1
0
Fork 0

Finish project 4

master
Felix Martin 2020-10-05 20:01:29 -04:00
parent 381670705b
commit d0c40f9af5
1 changed files with 6 additions and 26 deletions

View File

@ -49,34 +49,14 @@ def best4DT(seed=1489683273):
"""
This function should return a dataset that will work better for decision
trees than linear regression.
Decision trees are better for categorizing discrete data. So if we set the
output values to integers that should help. Additionally, the smaller the
dataset the harder for the LR to create a nice curve.
"""
# Z = np.append(X, Y.reshape(Y.shape[0], 1), 1)
# pd.DataFrame(Z).to_csv("Z.csv", header=None, index=None)
# np.random.seed(seed)
# X = np.random.random(size=(100, 10))*1000-100
# Y = np.random.random(size=(100,))*1000-100
np.random.seed(seed)
# X_1 = np.random.random(size=(100, 1))*200-100
# X_2 = np.random.random(size=(100, 1))*200-100
# X_3 = np.random.random(size=(100, 1))*200-100
# X_4 = np.random.random(size=(100, 1))*200-100
# X = np.concatenate([X_1, X_2, X_3, X_4], 1)
# XXX: I honestly don't know how to help the DTLearner, yet.
X_1 = np.asarray([i for i in range(1, 101)]).reshape(100, 1)
X_2 = np.asarray([i for i in range(100, 1100, 10)]).reshape(100, 1)
X_3 = np.asarray([i for i in range(200, 300)]).reshape(100, 1)
X_4 = np.asarray([i for i in range(300, 400)]).reshape(100, 1)
X_5 = np.asarray([i for i in range(1, 101)]).reshape(100, 1)
X_6 = np.asarray([i for i in range(1, 101)]).reshape(100, 1)
X_7 = np.asarray([i for i in range(1, 101)]).reshape(100, 1)
X_8 = np.asarray([i for i in range(1, 101)]).reshape(100, 1)
X = np.concatenate([X_1, X_2, X_3, X_4, X_5, X_6, X_7, X_8], 1)
# Y = X[:, 0] * 2 + X[:, 1] * 3
Y = np.random.random(size=(100,)) * 200 - 100
X = np.random.random(size=(10, 10)) * 200 - 100
Y = np.asarray([i for i in range(0, 10)])
return X, Y