Finish all learners, but they don't pass tests. I have to figure out why they perform so bad.
This commit is contained in:
83
assess_learners/RTLearner.py
Normal file
83
assess_learners/RTLearner.py
Normal file
@@ -0,0 +1,83 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
class RTLearner(object):
|
||||
|
||||
LEAF = -1
|
||||
NA = -1
|
||||
|
||||
def __init__(self, leaf_size = 1, verbose = False):
|
||||
self.leaf_size = leaf_size
|
||||
self.verbose = verbose
|
||||
|
||||
def author(self):
|
||||
return 'felixm' # replace tb34 with your Georgia Tech username
|
||||
|
||||
def create_node(self, factor, split_value, left, right):
|
||||
return np.array([[factor, split_value, left, right], ])
|
||||
|
||||
def build_tree(self, xs, y):
|
||||
assert(xs.shape[0] == y.shape[0])
|
||||
assert(xs.shape[0] > 0) # If this is 0 something went wrong.
|
||||
|
||||
if xs.shape[0] <= self.leaf_size:
|
||||
value = np.median(y)
|
||||
return self.create_node(self.LEAF, value, self.NA, self.NA)
|
||||
|
||||
if np.all(y[0] == y):
|
||||
return self.create_node(self.LEAF, y[0], self.NA, self.NA)
|
||||
|
||||
i = np.random.randint(0, xs.shape[1])
|
||||
# If we pick an i for which all x are the same, try again.
|
||||
while np.all(xs[0,i] == xs[:,i]):
|
||||
i = np.random.randint(0, xs.shape[1])
|
||||
|
||||
r1, r2 = np.random.randint(0, xs.shape[0], size = 2)
|
||||
split_value = (xs[r1, i] + xs[r2, i]) / 2.0
|
||||
|
||||
select_lt = xs[:, i] <= split_value
|
||||
select_rt = xs[:, i] > split_value
|
||||
# Avoid case where all values are low or equal to the median.
|
||||
if select_lt.all() or select_rt.all():
|
||||
select_lt = xs[:, i] < split_value
|
||||
select_rt = xs[:, i] >= split_value
|
||||
|
||||
lt = self.build_tree(xs[select_lt], y[select_lt])
|
||||
rt = self.build_tree(xs[select_rt], y[select_rt])
|
||||
root = self.create_node(i, split_value, 1, rt.shape[0] + 1)
|
||||
|
||||
root = np.concatenate([root, lt, rt])
|
||||
return root
|
||||
|
||||
def addEvidence(self, data_x, data_y):
|
||||
"""
|
||||
@summary: Add training data to learner
|
||||
@param dataX: X values of data to add
|
||||
@param dataY: the Y training values
|
||||
"""
|
||||
self.rel_tree = self.build_tree(data_x, data_y)
|
||||
|
||||
def query_point(self, point):
|
||||
node_index = 0
|
||||
while self.rel_tree[node_index, 0] != self.LEAF:
|
||||
node = self.rel_tree[node_index]
|
||||
split_factor = int(node[0])
|
||||
split_value = node[1]
|
||||
if point[split_factor] <= split_value:
|
||||
node_index += int(node[2])
|
||||
else:
|
||||
node_index += int(node[3])
|
||||
return self.rel_tree[node_index, 1]
|
||||
|
||||
def query(self, points):
|
||||
"""
|
||||
@summary: Estimate a set of test points given the model we built.
|
||||
@param points: should be a numpy array with each row corresponding to a specific query.
|
||||
@returns the estimated values according to the saved model.
|
||||
"""
|
||||
query_point = lambda p: self.query_point(p)
|
||||
r = np.apply_along_axis(query_point, 1, points)
|
||||
return r
|
||||
|
||||
if __name__=="__main__":
|
||||
print("the secret clue is 'zzyzx'")
|
||||
Reference in New Issue
Block a user