From d0c40f9af5036ab2848e39e5571c89b39c4b8747 Mon Sep 17 00:00:00 2001 From: Felix Martin Date: Mon, 5 Oct 2020 20:01:29 -0400 Subject: [PATCH] Finish project 4 --- defeat_learners/gen_data.py | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) diff --git a/defeat_learners/gen_data.py b/defeat_learners/gen_data.py index c411662..11feedf 100644 --- a/defeat_learners/gen_data.py +++ b/defeat_learners/gen_data.py @@ -49,34 +49,14 @@ def best4DT(seed=1489683273): """ This function should return a dataset that will work better for decision trees than linear regression. + + Decision trees are better for categorizing discrete data. So if we set the + output values to integers that should help. Additionally, the smaller the + dataset the harder for the LR to create a nice curve. """ - - # Z = np.append(X, Y.reshape(Y.shape[0], 1), 1) - # pd.DataFrame(Z).to_csv("Z.csv", header=None, index=None) - # np.random.seed(seed) - # X = np.random.random(size=(100, 10))*1000-100 - # Y = np.random.random(size=(100,))*1000-100 - np.random.seed(seed) - # X_1 = np.random.random(size=(100, 1))*200-100 - # X_2 = np.random.random(size=(100, 1))*200-100 - # X_3 = np.random.random(size=(100, 1))*200-100 - # X_4 = np.random.random(size=(100, 1))*200-100 - # X = np.concatenate([X_1, X_2, X_3, X_4], 1) - - # XXX: I honestly don't know how to help the DTLearner, yet. - - X_1 = np.asarray([i for i in range(1, 101)]).reshape(100, 1) - X_2 = np.asarray([i for i in range(100, 1100, 10)]).reshape(100, 1) - X_3 = np.asarray([i for i in range(200, 300)]).reshape(100, 1) - X_4 = np.asarray([i for i in range(300, 400)]).reshape(100, 1) - X_5 = np.asarray([i for i in range(1, 101)]).reshape(100, 1) - X_6 = np.asarray([i for i in range(1, 101)]).reshape(100, 1) - X_7 = np.asarray([i for i in range(1, 101)]).reshape(100, 1) - X_8 = np.asarray([i for i in range(1, 101)]).reshape(100, 1) - X = np.concatenate([X_1, X_2, X_3, X_4, X_5, X_6, X_7, X_8], 1) - # Y = X[:, 0] * 2 + X[:, 1] * 3 - Y = np.random.random(size=(100,)) * 200 - 100 + X = np.random.random(size=(10, 10)) * 200 - 100 + Y = np.asarray([i for i in range(0, 10)]) return X, Y