Fix DTLearner. The issue was that I took the lenght of the wrong tree (right instead of left) for the root. Also avoid code duplication via abstract tree learner class because why not.

This commit is contained in:
2020-09-24 22:15:41 -04:00
parent 3f2d2f4df3
commit 7007bc7514
6 changed files with 145 additions and 523 deletions

View File

@@ -36,8 +36,6 @@ if __name__=="__main__":
print("Usage: python testlearner.py <filename>")
sys.exit(1)
inf = open(sys.argv[1])
# data = np.array([list(map(float,s.strip().split(',')[1:]))
# for s in inf.readlines()[1:]])
data = np.array([list(map(float,s.strip().split(',')[1:]))
for s in inf.readlines()[1:]])
@@ -51,32 +49,44 @@ if __name__=="__main__":
testX = data[train_rows:,0:-1]
testY = data[train_rows:,-1]
# trainX = data[:, 0:-1]
# trainY = data[:, -1]
# testX = data[:, 0:-1]
# testY = data[:, -1]
print(f"{testX.shape}")
print(f"{testY.shape}")
# create a learner and train it
# learner = lrl.LinRegLearner(verbose = True) # create a LinRegLearner
learner = dtl.DTLearner(verbose = True) # create a LinRegLearner
# learner = rtl.RTLearner(verbose = True) # create a LinRegLearner
# learner = bgl.BagLearner(dtl.DTLearner, bags=50) # create a LinRegLearner
def test_learner(learner_class, **kwargs):
print("\n-----------")
print(f"name={learner_class.__name__} {kwargs=}")
learner = learner_class(**kwargs)
learner.addEvidence(trainX, trainY)
print(learner.author())
# evaluate in sample
predY = learner.query(trainX) # get the predictions
rmse = math.sqrt(((trainY - predY) ** 2).sum()/trainY.shape[0])
print()
print("In sample results")
print(f"RMSE: {rmse}")
c = np.corrcoef(predY, y=trainY)
print(f"corr: {c[0,1]}")
# evaluate out of sample
predY = learner.query(testX) # get the predictions
rmse = math.sqrt(((testY - predY) ** 2).sum()/testY.shape[0])
print()
print("Out of sample results")
print(f"RMSE: {rmse}")
c = np.corrcoef(predY, y=testY)
print(f"corr: {c[0,1]}")
print()
# test_learner(lrl.LinRegLearner)
test_learner(dtl.DTLearner, leaf_size=1)
test_learner(rtl.RTLearner)
test_learner(rtl.RTLearner, leaf_size=5)
# test_learner(bgl.BagLearner, learner=dtl.DTLearner, bags=20)
# learner = isl.InsaneLearner()
learner.addEvidence(trainX, trainY)
print(learner.author())
# evaluate in sample
predY = learner.query(trainX) # get the predictions
rmse = math.sqrt(((trainY - predY) ** 2).sum()/trainY.shape[0])
print()
print("In sample results")
print(f"RMSE: {rmse}")
c = np.corrcoef(predY, y=trainY)
print(f"corr: {c[0,1]}")
# evaluate out of sample
predY = learner.query(testX) # get the predictions
rmse = math.sqrt(((testY - predY) ** 2).sum()/testY.shape[0])
print()
print("Out of sample results")
print(f"RMSE: {rmse}")
c = np.corrcoef(predY, y=testY)
print(f"corr: {c[0,1]}")