508 lines
20 KiB
Python
508 lines
20 KiB
Python
"""MC3-P1: Assess learners - grading script.
|
|
|
|
Usage:
|
|
- Switch to a student feedback directory first (will write "points.txt" and "comments.txt" in pwd).
|
|
- Run this script with both ml4t/ and student solution in PYTHONPATH, e.g.:
|
|
PYTHONPATH=ml4t:MC3-P1/jdoe7 python ml4t/mc3_p1_grading/grade_learners.py
|
|
|
|
Copyright 2018, Georgia Institute of Technology (Georgia Tech)
|
|
Atlanta, Georgia 30332
|
|
All Rights Reserved
|
|
|
|
Template code for CS 4646/7646
|
|
|
|
Georgia Tech asserts copyright ownership of this template and all derivative
|
|
works, including solutions to the projects assigned in this course. Students
|
|
and other users of this template code are advised not to share it with others
|
|
or to make it available on publicly viewable websites including repositories
|
|
such as github and gitlab. This copyright statement should not be removed
|
|
or edited.
|
|
|
|
We do grant permission to share solutions privately with non-students such
|
|
as potential employers. However, sharing with other current or future
|
|
students of CS 7646 is prohibited and subject to being investigated as a
|
|
GT honor code violation.
|
|
|
|
-----do not edit anything above this line---
|
|
"""
|
|
|
|
import pytest
|
|
from grading.grading import grader, GradeResult, time_limit, run_with_timeout, IncorrectOutput
|
|
import util
|
|
|
|
import os
|
|
import sys
|
|
import traceback as tb
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
from collections import namedtuple
|
|
|
|
import math
|
|
|
|
import string
|
|
|
|
import time
|
|
|
|
import random
|
|
|
|
# Grading parameters
|
|
# rmse_margins = dict(KNNLearner=1.10, BagLearner=1.10) # 1.XX = +XX% margin of RMS error
|
|
# points_per_test_case = dict(KNNLearner=3.0, BagLearner=2.0) # points per test case for each group
|
|
# seconds_per_test_case = 10 # execution time limit
|
|
# seconds_per_test_case = 6
|
|
|
|
# More grading parameters (picked up by module-level grading fixtures)
|
|
max_points = 50.0 # 3.0*5 + 3.0*5 + 2.0*10 = 50
|
|
html_pre_block = True # surround comments with HTML <pre> tag (for T-Square comments field)
|
|
|
|
# Test cases
|
|
LearningTestCase = namedtuple('LearningTestCase', ['description', 'group', 'datafile', 'seed', 'outputs'])
|
|
learning_test_cases = [
|
|
########################
|
|
# DTLearner test cases #
|
|
########################
|
|
LearningTestCase(
|
|
description="Test Case 01: Deterministic Tree",
|
|
group='DTLearner',
|
|
datafile='Istanbul.csv',
|
|
seed=1481090001,
|
|
outputs=dict(
|
|
insample_corr_min=0.95,
|
|
outsample_corr_min=0.15,
|
|
insample_corr_max=0.95
|
|
)
|
|
),
|
|
LearningTestCase(
|
|
description="Test Case 02: Deterministic Tree",
|
|
group='DTLearner',
|
|
datafile='Istanbul.csv',
|
|
seed=1481090002,
|
|
outputs=dict(
|
|
insample_corr_min=0.95,
|
|
outsample_corr_min=0.15,
|
|
insample_corr_max=0.95
|
|
)
|
|
),
|
|
LearningTestCase(
|
|
description="Test Case 03: Deterministic Tree",
|
|
group='DTLearner',
|
|
datafile='Istanbul.csv',
|
|
seed=1481090003,
|
|
outputs=dict(
|
|
insample_corr_min=0.95,
|
|
outsample_corr_min=0.15,
|
|
insample_corr_max=0.95
|
|
)
|
|
),
|
|
LearningTestCase(
|
|
description="Test Case 04: Deterministic Tree",
|
|
group='DTLearner',
|
|
datafile='Istanbul.csv',
|
|
seed=1481090004,
|
|
outputs=dict(
|
|
insample_corr_min=0.95,
|
|
outsample_corr_min=0.15,
|
|
insample_corr_max=0.95
|
|
)
|
|
),
|
|
########################
|
|
# RTLearner test cases #
|
|
########################
|
|
LearningTestCase(
|
|
description="Test Case 01: Random Tree",
|
|
group='RTLearner',
|
|
datafile='Istanbul.csv',
|
|
seed=1481090001,
|
|
outputs=dict(
|
|
insample_corr_min=0.95,
|
|
outsample_corr_min=0.15,
|
|
insample_corr_max=0.95
|
|
)
|
|
),
|
|
LearningTestCase(
|
|
description="Test Case 02: Random Tree",
|
|
group='RTLearner',
|
|
datafile='Istanbul.csv',
|
|
seed=1481090002,
|
|
outputs=dict(
|
|
insample_corr_min=0.95,
|
|
outsample_corr_min=0.15,
|
|
insample_corr_max=0.95
|
|
)
|
|
),
|
|
LearningTestCase(
|
|
description="Test Case 03: Random Tree",
|
|
group='RTLearner',
|
|
datafile='Istanbul.csv',
|
|
seed=1481090003,
|
|
outputs=dict(
|
|
insample_corr_min=0.95,
|
|
outsample_corr_min=0.15,
|
|
insample_corr_max=0.95
|
|
)
|
|
),
|
|
LearningTestCase(
|
|
description="Test Case 04: Random Tree",
|
|
group='RTLearner',
|
|
datafile='Istanbul.csv',
|
|
seed=1481090004,
|
|
outputs=dict(
|
|
insample_corr_min=0.95,
|
|
outsample_corr_min=0.15,
|
|
insample_corr_max=0.95
|
|
)
|
|
),
|
|
|
|
######################
|
|
# Bagging test cases #
|
|
######################
|
|
LearningTestCase(
|
|
description="Test Case 01: Bagging",
|
|
group='BagLearner',
|
|
datafile='Istanbul.csv',
|
|
seed=1481090001,
|
|
outputs=None
|
|
),
|
|
LearningTestCase(
|
|
description="Test Case 02: Bagging",
|
|
group='BagLearner',
|
|
datafile='Istanbul.csv',
|
|
seed=1481090002,
|
|
outputs=None
|
|
),
|
|
LearningTestCase(
|
|
description="Test Case 03: Bagging",
|
|
group='BagLearner',
|
|
datafile='Istanbul.csv',
|
|
seed=1481090003,
|
|
outputs=None
|
|
),
|
|
LearningTestCase(
|
|
description="Test Case 04: Bagging",
|
|
group='BagLearner',
|
|
datafile='Istanbul.csv',
|
|
seed=1481090004,
|
|
outputs=None
|
|
),
|
|
LearningTestCase(
|
|
description="Test Case 05: Bagging",
|
|
group='BagLearner',
|
|
datafile='Istanbul.csv',
|
|
seed=1481090005,
|
|
outputs=None
|
|
),
|
|
LearningTestCase(
|
|
description="Test Case 06: Bagging",
|
|
group='BagLearner',
|
|
datafile='Istanbul.csv',
|
|
seed=1481090006,
|
|
outputs=None
|
|
),
|
|
LearningTestCase(
|
|
description="Test Case 07: Bagging",
|
|
group='BagLearner',
|
|
datafile='Istanbul.csv',
|
|
seed=1481090007,
|
|
outputs=None
|
|
),
|
|
LearningTestCase(
|
|
description="Test Case 08: Bagging",
|
|
group='BagLearner',
|
|
datafile='Istanbul.csv',
|
|
seed=1481090008,
|
|
outputs=None
|
|
),
|
|
##############################
|
|
# RandomName + InsaneLearner #
|
|
##############################
|
|
LearningTestCase(
|
|
description="InsaneLearner Test Case",
|
|
group='InsaneLearner',
|
|
datafile='simple.csv',
|
|
seed=1498076428,
|
|
outputs=None,
|
|
),
|
|
LearningTestCase(
|
|
description="Random Classname Test Case",
|
|
group='RandomName',
|
|
datafile='simple.csv',
|
|
seed=1498076428,
|
|
outputs=None),
|
|
]
|
|
|
|
|
|
# Test functon(s)
|
|
@pytest.mark.parametrize("description,group,datafile,seed,outputs", learning_test_cases)
|
|
def test_learners(description, group, datafile, seed, outputs, grader):
|
|
"""Test ML models returns correct predictions.
|
|
|
|
Requires test description, test case group, inputs, expected outputs, and a grader fixture.
|
|
"""
|
|
|
|
points_earned = 0.0 # initialize points for this test case
|
|
try:
|
|
learner_class = None
|
|
kwargs = {'verbose':False}
|
|
|
|
# (BPH) Copied from grade_strategy_qlearning.py
|
|
#Set fixed seed for repetability
|
|
np.random.seed(seed)
|
|
random.seed(seed)
|
|
#remove ability to seed either np.random or python random
|
|
tmp_numpy_seed = np.random.seed
|
|
tmp_random_seed = random.seed
|
|
np.random.seed = fake_seed
|
|
random.seed = fake_rseed
|
|
|
|
# Try to import KNNLearner (only once)
|
|
# if not 'KNNLearner' in globals():
|
|
# from KNNLearner import KNNLearner
|
|
if not 'RTLearner' in globals():
|
|
from RTLearner import RTLearner
|
|
if not 'DTLearner' in globals():
|
|
from DTLearner import DTLearner
|
|
if (group is 'BagLearner') or (group is 'InsaneLearner') or (group is 'RandomName') and (not 'BagLearner' in globals()):
|
|
from BagLearner import BagLearner
|
|
#put seeds back for the moment
|
|
np.random.seed = tmp_numpy_seed
|
|
random.seed = tmp_random_seed
|
|
# Tweak kwargs
|
|
# kwargs.update(inputs.get('kwargs', {}))
|
|
|
|
# Read separate training and testing data files
|
|
# with open(inputs['train_file']) as f:
|
|
# data_partitions=list()
|
|
testX,testY,trainX,trainY = None,None, None,None
|
|
permutation = None
|
|
author = None
|
|
with util.get_learner_data_file(datafile) as f:
|
|
alldata = np.genfromtxt(f,delimiter=',')
|
|
# Skip the date column and header row if we're working on Istanbul data
|
|
if datafile == 'Istanbul.csv':
|
|
alldata = alldata[1:,1:]
|
|
datasize = alldata.shape[0]
|
|
cutoff = int(datasize*0.6)
|
|
permutation = np.random.permutation(alldata.shape[0])
|
|
col_permutation = np.random.permutation(alldata.shape[1]-1)
|
|
train_data = alldata[permutation[:cutoff],:]
|
|
# trainX = train_data[:,:-1]
|
|
trainX = train_data[:,col_permutation]
|
|
trainY = train_data[:,-1]
|
|
test_data = alldata[permutation[cutoff:],:]
|
|
# testX = test_data[:,:-1]
|
|
testX = test_data[:,col_permutation]
|
|
testY = test_data[:,-1]
|
|
msgs = []
|
|
|
|
if (group is "RTLearner") or (group is "DTLearner"):
|
|
clss_name = RTLearner if group is "RTLearner" else DTLearner
|
|
tree_sptc = 3 if group is "RTLearner" else 10
|
|
corr_in, corr_out, corr_in_50 = None,None,None
|
|
def oneleaf():
|
|
np.random.seed(seed)
|
|
random.seed(seed)
|
|
np.random.seed = fake_seed
|
|
random.seed = fake_rseed
|
|
learner = clss_name(leaf_size=1,verbose=False)
|
|
learner.addEvidence(trainX,trainY)
|
|
insample = learner.query(trainX)
|
|
outsample = learner.query(testX)
|
|
np.random.seed = tmp_numpy_seed
|
|
random.seed = tmp_random_seed
|
|
author_rv = None
|
|
try:
|
|
author_rv = learner.author()
|
|
except:
|
|
pass
|
|
return insample, outsample, author_rv
|
|
def fiftyleaves():
|
|
np.random.seed(seed)
|
|
random.seed(seed)
|
|
np.random.seed = fake_seed
|
|
random.seed = fake_rseed
|
|
learner = clss_name(leaf_size=50,verbose=False)
|
|
learner.addEvidence(trainX,trainY)
|
|
np.random.seed = tmp_numpy_seed
|
|
random.seed = tmp_random_seed
|
|
return learner.query(trainX)
|
|
|
|
predY_in, predY_out, author = run_with_timeout(oneleaf,tree_sptc,(),{})
|
|
predY_in_50 = run_with_timeout(fiftyleaves,tree_sptc,(),{})
|
|
corr_in = np.corrcoef(predY_in,y=trainY)[0,1]
|
|
corr_out = np.corrcoef(predY_out,y=testY)[0,1]
|
|
corr_in_50 = np.corrcoef(predY_in_50,y=trainY)[0,1]
|
|
incorrect = False
|
|
|
|
if corr_in < outputs['insample_corr_min'] or np.isnan(corr_in):
|
|
incorrect = True
|
|
msgs.append(" In-sample with leaf_size=1 correlation less than allowed: got {} expected {}".format(corr_in,outputs['insample_corr_min']))
|
|
else:
|
|
points_earned += 1.0
|
|
if corr_out < outputs['outsample_corr_min'] or np.isnan(corr_out):
|
|
incorrect = True
|
|
msgs.append(" Out-of-sample correlation less than allowed: got {} expected {}".format(corr_out,outputs['outsample_corr_min']))
|
|
else:
|
|
points_earned += 1.0
|
|
if corr_in_50 > outputs['insample_corr_max'] or np.isnan(corr_in_50):
|
|
incorrect = True
|
|
msgs.append(" In-sample correlation with leaf_size=50 greater than allowed: got {} expected {}".format(corr_in_50,outputs['insample_corr_max']))
|
|
else:
|
|
points_earned += 1.0
|
|
# Check author string
|
|
if (author is None) or (author =='tb34'):
|
|
incorrect = True
|
|
msgs.append(" Invalid author: {}".format(author))
|
|
points_earned += -2.0
|
|
|
|
elif group is "BagLearner":
|
|
corr1, corr20 = None,None
|
|
bag_sptc = 10
|
|
def onebag():
|
|
np.random.seed(seed)
|
|
random.seed(seed)
|
|
np.random.seed = fake_seed
|
|
random.seed = fake_rseed
|
|
learner1 = BagLearner(learner=RTLearner,kwargs={"leaf_size":1},bags=1,boost=False,verbose=False)
|
|
learner1.addEvidence(trainX,trainY)
|
|
q_rv = learner1.query(testX)
|
|
a_rv = learner1.author()
|
|
np.random.seed = tmp_numpy_seed
|
|
random.seed = tmp_random_seed
|
|
return q_rv,a_rv
|
|
def twentybags():
|
|
np.random.seed(seed)
|
|
random.seed(seed)
|
|
np.random.seed = fake_seed
|
|
random.seed = fake_rseed
|
|
learner20 = BagLearner(learner=RTLearner,kwargs={"leaf_size":1},bags=20,boost=False,verbose=False)
|
|
learner20.addEvidence(trainX,trainY)
|
|
q_rv = learner20.query(testX)
|
|
np.random.seed = tmp_numpy_seed
|
|
random.seed = tmp_random_seed
|
|
return q_rv
|
|
predY1,author = run_with_timeout(onebag,bag_sptc,pos_args=(),keyword_args={})
|
|
predY20 = run_with_timeout(twentybags,bag_sptc,(),{})
|
|
|
|
corr1 = np.corrcoef(predY1,testY)[0,1]
|
|
corr20 = np.corrcoef(predY20,testY)[0,1]
|
|
incorrect = False
|
|
# msgs = []
|
|
if corr20 <= corr1:
|
|
incorrect = True
|
|
msgs.append(" Out-of-sample correlation for 20 bags is not greater than for 1 bag. 20 bags:{}, 1 bag:{}".format(corr20,corr1))
|
|
else:
|
|
points_earned += 2.0
|
|
# Check author string
|
|
if (author is None) or (author=='tb34'):
|
|
incorrect = True
|
|
msgs.append(" Invalid author: {}".format(author))
|
|
points_earned += -1.0
|
|
elif group is "InsaneLearner":
|
|
try:
|
|
def insane():
|
|
import InsaneLearner as it
|
|
learner = it.InsaneLearner(verbose=False)
|
|
learner.addEvidence(trainX,trainY)
|
|
Y = learner.query(testX)
|
|
run_with_timeout(insane,10,pos_args=(),keyword_args={})
|
|
incorrect = False
|
|
except Exception as e:
|
|
incorrect = True
|
|
msgs.append(" Exception calling InsaneLearner: {}".format(e))
|
|
points_earned = -10
|
|
elif group is "RandomName":
|
|
try:
|
|
il_name,il_code = gen_class()
|
|
exec(il_code) in globals(), locals()
|
|
il_cobj = eval(il_name)
|
|
def rnd_name():
|
|
np.random.seed(seed)
|
|
random.seed(seed)
|
|
np.random.seed=fake_seed
|
|
random.seed = fake_rseed
|
|
learner = BagLearner(learner=il_cobj,kwargs={'verbose':False},bags=20,boost=False,verbose=False)
|
|
learner.addEvidence(trainX,trainY)
|
|
Y = learner.query(testX)
|
|
np.random.seed = tmp_numpy_seed
|
|
random.seed = tmp_random_seed
|
|
return il_cobj.init_callcount_dict, il_cobj.add_callcount_dict, il_cobj.query_callcount_dict
|
|
iccd, accd, qccd = run_with_timeout(rnd_name,10,pos_args=(),keyword_args={})
|
|
incorrect = False
|
|
if (len(iccd)!=20) or (any([v!=1 for v in iccd.values()])):
|
|
incorrect = True
|
|
msgs.append(" Unexpected number of calls to __init__, sum={} (should be 20), max={} (should be 1), min={} (should be 1)".format(len(iccd),max(iccd.values()),min(iccd.values())))
|
|
points_earned = -10
|
|
if (len(accd)!=20) or (any([v!=1 for v in accd.values()])):
|
|
incorrect = True
|
|
msgs.append(" Unexpected number of calls to addEvidence sum={} (should be 20), max={} (should be 1), min={} (should be 1)".format(len(accd),max(accd.values()),min(accd.values())))
|
|
points_earned = -10
|
|
if (len(qccd)!=20) or (any([v!=1 for v in qccd.values()])):
|
|
incorrect = True
|
|
msgs.append(" Unexpected number of calls to query, sum={} (should be 20), max={} (should be 1), min={} (should be 1)".format(len(qccd),max(qccd.values()),min(qccd.values())))
|
|
points_earned = -10
|
|
except Exception as e:
|
|
incorrect = True
|
|
msgs.append(" Exception calling BagLearner: {}".format(e))
|
|
points_earned = -10
|
|
if incorrect:
|
|
inputs_str = " data file: {}\n" \
|
|
" permutation: {}".format(datafile, permutation)
|
|
raise IncorrectOutput("Test failed on one or more output criteria.\n Inputs:\n{}\n Failures:\n{}".format(inputs_str, "\n".join(msgs)))
|
|
except Exception as e:
|
|
# Test result: failed
|
|
msg = "Description: {} (group: {})\n".format(description, group)
|
|
|
|
# Generate a filtered stacktrace, only showing erroneous lines in student file(s)
|
|
tb_list = tb.extract_tb(sys.exc_info()[2])
|
|
for i in range(len(tb_list)):
|
|
row = tb_list[i]
|
|
tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3]) # show only filename instead of long absolute path
|
|
tb_list = [row for row in tb_list if (row[0] == 'RTLearner.py') or (row[0] == 'BagLearner.py')]
|
|
if tb_list:
|
|
msg += "Traceback:\n"
|
|
msg += ''.join(tb.format_list(tb_list)) # contains newlines
|
|
msg += "{}: {}".format(e.__class__.__name__, str(e))
|
|
|
|
# Report failure result to grader, with stacktrace
|
|
grader.add_result(GradeResult(outcome='failed', points=points_earned, msg=msg))
|
|
raise
|
|
else:
|
|
# Test result: passed (no exceptions)
|
|
grader.add_result(GradeResult(outcome='passed', points=points_earned, msg=None))
|
|
|
|
def gen_class():
|
|
c_def = "class {}(object):\n"
|
|
c_def+= " foo=4\n"
|
|
c_def+= " init_callcount_dict=dict()\n"
|
|
c_def+= " add_callcount_dict=dict()\n"
|
|
c_def+= " query_callcount_dict=dict()\n"
|
|
c_def+= " def __init__(self,**kwargs):\n"
|
|
c_def+= " self.ctor_args = kwargs\n"
|
|
c_def+= " self.init_callcount_dict[str(self)] = self.init_callcount_dict.get(str(self),0)+1\n"
|
|
c_def+= " if ('verbose' in self.ctor_args) and (self.ctor_args['verbose']==True):\n"
|
|
c_def+= " print('creating class')\n"
|
|
c_def+= " def addEvidence(self,trainX,trainY):\n"
|
|
c_def+= " self.trainX = trainX\n"
|
|
c_def+= " self.trainY = trainY\n"
|
|
c_def+= " self.add_callcount_dict[str(self)] = self.add_callcount_dict.get(str(self),0)+1\n"
|
|
c_def+= " if ('verbose' in self.ctor_args) and (self.ctor_args['verbose']==True):\n"
|
|
c_def+= " print('addEvidence()')\n"
|
|
c_def+= " def query(self,testX):\n"
|
|
c_def+= " rv = np.zeros(len(testX))\n"
|
|
c_def+= " rv[:] = self.trainY.mean()\n"
|
|
c_def+= " self.query_callcount_dict[str(self)] = self.query_callcount_dict.get(str(self),0)+1\n"
|
|
c_def+= " if ('verbose' in self.ctor_args) and (self.ctor_args['verbose']==True):\n"
|
|
c_def+= " print('query()')\n"
|
|
c_def+= " return rv"
|
|
c_name = ''.join(np.random.permutation(np.array(tuple(string.ascii_letters)))[:10].tolist())
|
|
return c_name,c_def.format(c_name)
|
|
|
|
def fake_seed(*args):
|
|
pass
|
|
def fake_rseed(*args):
|
|
pass
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main(["-s", __file__])
|