Finish project 4
parent
381670705b
commit
d0c40f9af5
|
@ -49,34 +49,14 @@ def best4DT(seed=1489683273):
|
|||
"""
|
||||
This function should return a dataset that will work better for decision
|
||||
trees than linear regression.
|
||||
|
||||
Decision trees are better for categorizing discrete data. So if we set the
|
||||
output values to integers that should help. Additionally, the smaller the
|
||||
dataset the harder for the LR to create a nice curve.
|
||||
"""
|
||||
|
||||
# Z = np.append(X, Y.reshape(Y.shape[0], 1), 1)
|
||||
# pd.DataFrame(Z).to_csv("Z.csv", header=None, index=None)
|
||||
# np.random.seed(seed)
|
||||
# X = np.random.random(size=(100, 10))*1000-100
|
||||
# Y = np.random.random(size=(100,))*1000-100
|
||||
|
||||
np.random.seed(seed)
|
||||
# X_1 = np.random.random(size=(100, 1))*200-100
|
||||
# X_2 = np.random.random(size=(100, 1))*200-100
|
||||
# X_3 = np.random.random(size=(100, 1))*200-100
|
||||
# X_4 = np.random.random(size=(100, 1))*200-100
|
||||
# X = np.concatenate([X_1, X_2, X_3, X_4], 1)
|
||||
|
||||
# XXX: I honestly don't know how to help the DTLearner, yet.
|
||||
|
||||
X_1 = np.asarray([i for i in range(1, 101)]).reshape(100, 1)
|
||||
X_2 = np.asarray([i for i in range(100, 1100, 10)]).reshape(100, 1)
|
||||
X_3 = np.asarray([i for i in range(200, 300)]).reshape(100, 1)
|
||||
X_4 = np.asarray([i for i in range(300, 400)]).reshape(100, 1)
|
||||
X_5 = np.asarray([i for i in range(1, 101)]).reshape(100, 1)
|
||||
X_6 = np.asarray([i for i in range(1, 101)]).reshape(100, 1)
|
||||
X_7 = np.asarray([i for i in range(1, 101)]).reshape(100, 1)
|
||||
X_8 = np.asarray([i for i in range(1, 101)]).reshape(100, 1)
|
||||
X = np.concatenate([X_1, X_2, X_3, X_4, X_5, X_6, X_7, X_8], 1)
|
||||
# Y = X[:, 0] * 2 + X[:, 1] * 3
|
||||
Y = np.random.random(size=(100,)) * 200 - 100
|
||||
X = np.random.random(size=(10, 10)) * 200 - 100
|
||||
Y = np.asarray([i for i in range(0, 10)])
|
||||
return X, Y
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue