Add tree learners to strategy evaluation directory
This commit is contained in:
parent
05db89e8c2
commit
10d87aefd3
77
strategy_evaluation/AbstractTreeLearner.py
Normal file
77
strategy_evaluation/AbstractTreeLearner.py
Normal file
@ -0,0 +1,77 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
class AbstractTreeLearner:
|
||||
LEAF = -1
|
||||
NA = -1
|
||||
|
||||
def author(self):
|
||||
return 'felixm' # replace tb34 with your Georgia Tech username
|
||||
|
||||
def create_node(self, factor, split_value, left, right):
|
||||
return np.array([(factor, split_value, left, right), ],
|
||||
dtype='|i4, f4, i4, i4')
|
||||
|
||||
def query_point(self, point):
|
||||
node_index = 0
|
||||
while self.rel_tree[node_index][0] != self.LEAF:
|
||||
node = self.rel_tree[node_index]
|
||||
split_factor = node[0]
|
||||
split_value = node[1]
|
||||
if point[split_factor] <= split_value:
|
||||
# Recurse into left sub-tree.
|
||||
node_index += node[2]
|
||||
else:
|
||||
node_index += node[3]
|
||||
v = self.rel_tree[node_index][1]
|
||||
return v
|
||||
|
||||
def query(self, points):
|
||||
"""
|
||||
@summary: Estimate a set of test points given the model we built.
|
||||
@param points: should be a numpy array with each row corresponding to a specific query.
|
||||
@returns the estimated values according to the saved model.
|
||||
"""
|
||||
query_point = lambda p: self.query_point(p)
|
||||
r = np.apply_along_axis(query_point, 1, points)
|
||||
return r
|
||||
|
||||
def build_tree(self, xs, y):
|
||||
"""
|
||||
@summary: Build a decision tree from the training data.
|
||||
@param dataX: X values of data to add
|
||||
@param dataY: the Y training values
|
||||
"""
|
||||
assert(xs.shape[0] == y.shape[0])
|
||||
assert(xs.shape[0] > 0) # If this is 0 something went wrong.
|
||||
|
||||
if xs.shape[0] <= self.leaf_size:
|
||||
value = np.mean(y)
|
||||
if value < -0.3:
|
||||
value = -1
|
||||
elif value > 0.3:
|
||||
value = 1
|
||||
else:
|
||||
value = 0
|
||||
return self.create_node(self.LEAF, value, self.NA, self.NA)
|
||||
|
||||
if np.all(y[0] == y):
|
||||
return self.create_node(self.LEAF, y[0], self.NA, self.NA)
|
||||
|
||||
i, split_value = self.get_i_and_split_value(xs, y)
|
||||
select_l = xs[:, i] <= split_value
|
||||
select_r = xs[:, i] > split_value
|
||||
lt = self.build_tree(xs[select_l], y[select_l])
|
||||
rt = self.build_tree(xs[select_r], y[select_r])
|
||||
root = self.create_node(i, split_value, 1, lt.shape[0] + 1)
|
||||
root = np.concatenate([root, lt, rt])
|
||||
return root
|
||||
|
||||
def addEvidence(self, data_x, data_y):
|
||||
"""
|
||||
@summary: Add training data to learner
|
||||
@param dataX: X values of data to add
|
||||
@param dataY: the Y training values
|
||||
"""
|
||||
self.rel_tree = self.build_tree(data_x, data_y)
|
||||
|
47
strategy_evaluation/BagLearner.py
Normal file
47
strategy_evaluation/BagLearner.py
Normal file
@ -0,0 +1,47 @@
|
||||
import numpy as np
|
||||
from AbstractTreeLearner import AbstractTreeLearner
|
||||
|
||||
|
||||
class BagLearner(AbstractTreeLearner):
|
||||
def __init__(self, learner, bags=9, boost=False, verbose=False, kwargs={}):
|
||||
self.learner = learner
|
||||
self.verbose = verbose
|
||||
self.bags = bags
|
||||
self.learners = [learner(**kwargs) for _ in range(bags)]
|
||||
|
||||
def get_bag(self, data_x, data_y):
|
||||
num_items = int(data_x.shape[0] * 0.5) # 50% of samples
|
||||
bag_x, bag_y = [], []
|
||||
for _ in range(num_items):
|
||||
i = np.random.randint(0, data_x.shape[0])
|
||||
bag_x.append(data_x[i,:])
|
||||
bag_y.append(data_y[i])
|
||||
return np.array(bag_x), np.array(bag_y)
|
||||
|
||||
def addEvidence(self, data_x, data_y):
|
||||
"""
|
||||
@summary: Add training data to learner
|
||||
@param dataX: X values of data to add
|
||||
@param dataY: the Y training values
|
||||
"""
|
||||
for learner in self.learners:
|
||||
x, y = self.get_bag(data_x, data_y)
|
||||
learner.addEvidence(x, y)
|
||||
|
||||
def query(self, points):
|
||||
"""
|
||||
@summary: Estimate a set of test points given the model we built.
|
||||
@param points: numpy array with each row corresponding to a query.
|
||||
@returns the estimated values according to the saved model.
|
||||
"""
|
||||
def to_discret(m):
|
||||
print(m)
|
||||
if m < -0.5:
|
||||
return -1
|
||||
elif m > 0.5:
|
||||
return 1
|
||||
return 0
|
||||
m = np.mean([l.query(points) for l in self.learners], axis=0)
|
||||
return m
|
||||
# return np.apply_along_axis(to_discret, 1, m)
|
||||
|
30
strategy_evaluation/RTLearner.py
Normal file
30
strategy_evaluation/RTLearner.py
Normal file
@ -0,0 +1,30 @@
|
||||
import numpy as np
|
||||
from AbstractTreeLearner import AbstractTreeLearner
|
||||
|
||||
|
||||
class RTLearner(AbstractTreeLearner):
|
||||
|
||||
def __init__(self, leaf_size = 1, verbose = False):
|
||||
self.leaf_size = leaf_size
|
||||
self.verbose = verbose
|
||||
|
||||
def get_i_and_split_value(self, xs, y):
|
||||
"""
|
||||
@summary: Pick a random i and split value.
|
||||
|
||||
Make sure that not all X are the same for i and also pick
|
||||
different values to average the split_value from.
|
||||
"""
|
||||
i = np.random.randint(0, xs.shape[1])
|
||||
while np.all(xs[0,i] == xs[:,i]):
|
||||
i = np.random.randint(0, xs.shape[1])
|
||||
|
||||
# I don't know about the performance of this, but at least it
|
||||
# terminates reliably. If the two elements are the same something is
|
||||
# wrong.
|
||||
a = np.array(list(set(xs[:, i])))
|
||||
r1, r2 = np.random.choice(a, size = 2, replace = False)
|
||||
assert(r1 != r2)
|
||||
split_value = (r1 + r2) / 2.0
|
||||
return i, split_value
|
||||
|
Loading…
Reference in New Issue
Block a user