Fix DTLearner. The issue was that I took the lenght of the wrong tree (right instead of left) for the root. Also avoid code duplication via abstract tree learner class because why not.
This commit is contained in:
@@ -1,83 +1,30 @@
|
||||
import numpy as np
|
||||
from AbstractTreeLearner import AbstractTreeLearner
|
||||
|
||||
|
||||
class RTLearner(object):
|
||||
|
||||
LEAF = -1
|
||||
NA = -1
|
||||
class RTLearner(AbstractTreeLearner):
|
||||
|
||||
def __init__(self, leaf_size = 1, verbose = False):
|
||||
self.leaf_size = leaf_size
|
||||
self.verbose = verbose
|
||||
|
||||
def author(self):
|
||||
return 'felixm' # replace tb34 with your Georgia Tech username
|
||||
|
||||
def create_node(self, factor, split_value, left, right):
|
||||
return np.array([[factor, split_value, left, right], ])
|
||||
|
||||
def build_tree(self, xs, y):
|
||||
assert(xs.shape[0] == y.shape[0])
|
||||
assert(xs.shape[0] > 0) # If this is 0 something went wrong.
|
||||
|
||||
if xs.shape[0] <= self.leaf_size:
|
||||
value = np.median(y)
|
||||
return self.create_node(self.LEAF, value, self.NA, self.NA)
|
||||
|
||||
if np.all(y[0] == y):
|
||||
return self.create_node(self.LEAF, y[0], self.NA, self.NA)
|
||||
def get_i_and_split_value(self, xs, y):
|
||||
"""
|
||||
@summary: Pick a random i and split value.
|
||||
|
||||
Make sure that not all X are the same for i and also pick
|
||||
different values to average the split_value from.
|
||||
"""
|
||||
i = np.random.randint(0, xs.shape[1])
|
||||
# If we pick an i for which all x are the same, try again.
|
||||
while np.all(xs[0,i] == xs[:,i]):
|
||||
i = np.random.randint(0, xs.shape[1])
|
||||
|
||||
r1, r2 = np.random.randint(0, xs.shape[0], size = 2)
|
||||
split_value = (xs[r1, i] + xs[r2, i]) / 2.0
|
||||
# I don't know about the performance of this, but at least it
|
||||
# terminates reliably. If the two elements are the same something is
|
||||
# wrong.
|
||||
a = np.array(list(set(xs[:, i])))
|
||||
r1, r2 = np.random.choice(a, size = 2, replace = False)
|
||||
assert(r1 != r2)
|
||||
split_value = (r1 + r2) / 2.0
|
||||
return i, split_value
|
||||
|
||||
select_lt = xs[:, i] <= split_value
|
||||
select_rt = xs[:, i] > split_value
|
||||
# Avoid case where all values are low or equal to the median.
|
||||
if select_lt.all() or select_rt.all():
|
||||
select_lt = xs[:, i] < split_value
|
||||
select_rt = xs[:, i] >= split_value
|
||||
|
||||
lt = self.build_tree(xs[select_lt], y[select_lt])
|
||||
rt = self.build_tree(xs[select_rt], y[select_rt])
|
||||
root = self.create_node(i, split_value, 1, rt.shape[0] + 1)
|
||||
|
||||
root = np.concatenate([root, lt, rt])
|
||||
return root
|
||||
|
||||
def addEvidence(self, data_x, data_y):
|
||||
"""
|
||||
@summary: Add training data to learner
|
||||
@param dataX: X values of data to add
|
||||
@param dataY: the Y training values
|
||||
"""
|
||||
self.rel_tree = self.build_tree(data_x, data_y)
|
||||
|
||||
def query_point(self, point):
|
||||
node_index = 0
|
||||
while self.rel_tree[node_index, 0] != self.LEAF:
|
||||
node = self.rel_tree[node_index]
|
||||
split_factor = int(node[0])
|
||||
split_value = node[1]
|
||||
if point[split_factor] <= split_value:
|
||||
node_index += int(node[2])
|
||||
else:
|
||||
node_index += int(node[3])
|
||||
return self.rel_tree[node_index, 1]
|
||||
|
||||
def query(self, points):
|
||||
"""
|
||||
@summary: Estimate a set of test points given the model we built.
|
||||
@param points: should be a numpy array with each row corresponding to a specific query.
|
||||
@returns the estimated values according to the saved model.
|
||||
"""
|
||||
query_point = lambda p: self.query_point(p)
|
||||
r = np.apply_along_axis(query_point, 1, points)
|
||||
return r
|
||||
|
||||
if __name__=="__main__":
|
||||
print("the secret clue is 'zzyzx'")
|
||||
|
||||
Reference in New Issue
Block a user