Start working on defeat learners assignment.

2020-09-27 16:22:16 -04:00
parent 8ee47c9a1d
commit db537d7043
6 changed files with 503 additions and 0 deletions
--- a/defeat_learners/testbest4.py
+++ b/defeat_learners/testbest4.py
@@ -0,0 +1,100 @@
+"""  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+Test best4 data generator.  (c) 2016 Tucker Balch  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+Copyright 2018, Georgia Institute of Technology (Georgia Tech)  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+Atlanta, Georgia 30332  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+All Rights Reserved  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+Template code for CS 4646/7646  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+Georgia Tech asserts copyright ownership of this template and all derivative  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+works, including solutions to the projects assigned in this course. Students  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+and other users of this template code are advised not to share it with others  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+or to make it available on publicly viewable websites including repositories  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+such as github and gitlab.  This copyright statement should not be removed  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+or edited.  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+We do grant permission to share solutions privately with non-students such  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+as potential employers. However, sharing with other current or future  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+students of CS 7646 is prohibited and subject to being investigated as a  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+GT honor code violation.  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+-----do not edit anything above this line---  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+"""  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+import numpy as np  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+import math  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+import LinRegLearner as lrl  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+import DTLearner as dt  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+from gen_data import best4LinReg, best4DT  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+# compare two learners' rmse out of sample  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+def compare_os_rmse(learner1, learner2, X, Y):  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    # compute how much of the data is training and testing  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    train_rows = int(math.floor(0.6* X.shape[0]))  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    test_rows = X.shape[0] - train_rows  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    # separate out training and testing data  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    train = np.random.choice(X.shape[0], size=train_rows, replace=False)  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    test = np.setdiff1d(np.array(range(X.shape[0])), train)  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    trainX = X[train, :]  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    trainY = Y[train]  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    testX = X[test, :]  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    testY = Y[test]  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    # train the learners  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    learner1.addEvidence(trainX, trainY) # train it  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    learner2.addEvidence(trainX, trainY) # train it  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    # evaluate learner1 out of sample  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    predY = learner1.query(testX) # get the predictions  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    rmse1 = math.sqrt(((testY - predY) ** 2).sum()/testY.shape[0])  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    # evaluate learner2 out of sample  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    predY = learner2.query(testX) # get the predictions  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    rmse2 = math.sqrt(((testY - predY) ** 2).sum()/testY.shape[0])  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    return rmse1, rmse2  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+def test_code():  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    # create two learners and get data  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    lrlearner = lrl.LinRegLearner(verbose = False)  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    dtlearner = dt.DTLearner(verbose = False, leaf_size = 1)  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    X, Y = best4LinReg()  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    # compare the two learners  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    rmseLR, rmseDT = compare_os_rmse(lrlearner, dtlearner, X, Y)  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    # share results  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    print()  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    print("best4LinReg() results")  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    print(f"RMSE LR    : {rmseLR}")  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    print(f"RMSE DT    : {rmseDT}")  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    if rmseLR < 0.9 * rmseDT:  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+        print("LR < 0.9 DT:  pass")  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    else:  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+        print("LR >= 0.9 DT:  fail")  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    print  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    # get data that is best for a random tree  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    lrlearner = lrl.LinRegLearner(verbose = False)  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    dtlearner = dt.DTLearner(verbose = False, leaf_size = 1)  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    X, Y = best4DT()  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    # compare the two learners  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    rmseLR, rmseDT = compare_os_rmse(lrlearner, dtlearner, X, Y)  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    # share results  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    print()  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    print("best4RT() results")  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    print(f"RMSE LR    : {rmseLR}")  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    print(f"RMSE DT    : {rmseDT}")  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    if rmseDT < 0.9 * rmseLR:  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+        print("DT < 0.9 LR:  pass")  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    else:  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+        print("DT >= 0.9 LR:  fail")  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    print  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+if __name__=="__main__":  		  	   		     			  		 			     			  	  		 	  	 		 			  		  			
+    test_code()