Add tree learners to strategy evaluation directory

2020-11-04 15:15:24 -05:00
parent 05db89e8c2
commit 10d87aefd3
3 changed files with 154 additions and 0 deletions
--- a/strategy_evaluation/AbstractTreeLearner.py
+++ b/strategy_evaluation/AbstractTreeLearner.py
@@ -0,0 +1,77 @@
 import numpy as np
 class AbstractTreeLearner:
    LEAF = -1
    NA = -1
    def author(self):
        return 'felixm' # replace tb34 with your Georgia Tech username
    def create_node(self, factor, split_value, left, right):
        return np.array([(factor, split_value, left, right), ],
                        dtype='|i4, f4, i4,  i4')
    def query_point(self, point):
        node_index = 0
        while self.rel_tree[node_index][0] != self.LEAF:
            node = self.rel_tree[node_index]
            split_factor = node[0]
            split_value = node[1]
            if point[split_factor] <= split_value:
                # Recurse into left sub-tree.
                node_index += node[2]
            else:
                node_index += node[3]
        v = self.rel_tree[node_index][1]
        return v
    def query(self, points):
        """
        @summary: Estimate a set of test points given the model we built.
        @param points: should be a numpy array with each row corresponding to a specific query.
        @returns the estimated values according to the saved model.
        """
        query_point = lambda p: self.query_point(p)
        r = np.apply_along_axis(query_point, 1, points)
        return r
    def build_tree(self, xs, y):
        """
        @summary: Build a decision tree from the training data.
        @param dataX: X values of data to add
        @param dataY: the Y training values
        """
        assert(xs.shape[0] == y.shape[0])
        assert(xs.shape[0] > 0) # If this is 0 something went wrong.
        if xs.shape[0] <= self.leaf_size:
            value = np.mean(y)
            if value < -0.3:
                value = -1
            elif value > 0.3:
                value = 1
            else:
                value = 0
            return self.create_node(self.LEAF, value, self.NA, self.NA)
        if np.all(y[0] == y):
            return self.create_node(self.LEAF, y[0], self.NA, self.NA)
        i, split_value = self.get_i_and_split_value(xs, y)
        select_l = xs[:, i] <= split_value
        select_r = xs[:, i] > split_value
        lt = self.build_tree(xs[select_l], y[select_l])
        rt = self.build_tree(xs[select_r], y[select_r])
        root = self.create_node(i, split_value, 1, lt.shape[0] + 1)
        root = np.concatenate([root, lt, rt])
        return root
    def addEvidence(self, data_x, data_y):
        """
        @summary: Add training data to learner
        @param dataX: X values of data to add
        @param dataY: the Y training values
        """
        self.rel_tree = self.build_tree(data_x, data_y)
--- a/strategy_evaluation/BagLearner.py
+++ b/strategy_evaluation/BagLearner.py
@@ -0,0 +1,47 @@
 import numpy as np
 from AbstractTreeLearner import AbstractTreeLearner
 class BagLearner(AbstractTreeLearner):
    def __init__(self, learner, bags=9, boost=False, verbose=False, kwargs={}):
        self.learner = learner
        self.verbose = verbose
        self.bags = bags
        self.learners = [learner(**kwargs) for _ in range(bags)]
    def get_bag(self, data_x, data_y):
        num_items = int(data_x.shape[0] * 0.5) # 50% of samples
        bag_x, bag_y = [], []
        for _ in range(num_items):
            i = np.random.randint(0, data_x.shape[0])
            bag_x.append(data_x[i,:])
            bag_y.append(data_y[i])
        return np.array(bag_x), np.array(bag_y)
    def addEvidence(self, data_x, data_y):
        """
        @summary: Add training data to learner
        @param dataX: X values of data to add
        @param dataY: the Y training values
        """
        for learner in self.learners:
            x, y = self.get_bag(data_x, data_y)
            learner.addEvidence(x, y)
    def query(self, points):
        """
        @summary: Estimate a set of test points given the model we built.
        @param points: numpy array with each row corresponding to a query.
        @returns the estimated values according to the saved model.
        """
        def to_discret(m):
            print(m)
            if m < -0.5:
                return -1
            elif m > 0.5:
                return 1
            return 0
        m = np.mean([l.query(points) for l in self.learners], axis=0)
        return m
        # return np.apply_along_axis(to_discret, 1, m)
--- a/strategy_evaluation/RTLearner.py
+++ b/strategy_evaluation/RTLearner.py
@@ -0,0 +1,30 @@
 import numpy as np
 from AbstractTreeLearner import AbstractTreeLearner
 class RTLearner(AbstractTreeLearner):
    def __init__(self, leaf_size = 1, verbose = False):
        self.leaf_size = leaf_size
        self.verbose = verbose
    def get_i_and_split_value(self, xs, y):
        """
        @summary: Pick a random i and split value.
        Make sure that not all X are the same for i and also pick
        different values to average the split_value from.
        """
        i = np.random.randint(0, xs.shape[1])
        while np.all(xs[0,i] == xs[:,i]):
            i = np.random.randint(0, xs.shape[1])
        # I don't know about the performance of this, but at least it
        # terminates reliably. If the two elements are the same something is
        # wrong.
        a = np.array(list(set(xs[:, i])))
        r1, r2 = np.random.choice(a, size = 2, replace = False)
        assert(r1 != r2)
        split_value = (r1 + r2) / 2.0
        return i, split_value