Implement first version of strategy learner

This version does not pass the automatic test.
This commit is contained in:
2020-11-04 15:14:27 -05:00
parent c40ffcf84b
commit 05db89e8c2
3 changed files with 423 additions and 411 deletions

View File

@@ -1,88 +1,94 @@
"""
Template for implementing StrategyLearner (c) 2016 Tucker Balch
Copyright 2018, Georgia Institute of Technology (Georgia Tech)
Atlanta, Georgia 30332
All Rights Reserved
Template code for CS 4646/7646
Georgia Tech asserts copyright ownership of this template and all derivative
works, including solutions to the projects assigned in this course. Students
and other users of this template code are advised not to share it with others
or to make it available on publicly viewable websites including repositories
such as github and gitlab. This copyright statement should not be removed
or edited.
We do grant permission to share solutions privately with non-students such
as potential employers. However, sharing with other current or future
students of CS 7646 is prohibited and subject to being investigated as a
GT honor code violation.
-----do not edit anything above this line---
Student Name: Tucker Balch (replace with your name)
GT User ID: tb34 (replace with your User ID)
GT ID: 900897987 (replace with your GT ID)
"""
import datetime as dt import datetime as dt
import pandas as pd import pandas as pd
import util as ut import util
import indicators
from BagLearner import BagLearner
from RTLearner import RTLearner
class StrategyLearner(object): class StrategyLearner(object):
# constructor def __init__(self, verbose=False, impact=0.0, commission=0.0, testing=False):
def __init__(self, verbose = False, impact=0.0, commission=0.0):
self.verbose = verbose self.verbose = verbose
self.impact = impact self.impact = impact
self.commission = commission self.commission = commission
self.testing = testing
# this method should create a QLearner, and train it for trading def _get_volume(self):
def addEvidence(self, symbol = "IBM", \ """For reference."""
sd=dt.datetime(2008,1,1), \ volume_all = ut.get_data(syms, dates, colname="Volume")
ed=dt.datetime(2009,1,1), \
sv = 10000):
# add your code to do learning here
# example usage of the old backward compatible util function
syms=[symbol]
dates = pd.date_range(sd, ed)
prices_all = ut.get_data(syms, dates) # automatically adds SPY
prices = prices_all[syms] # only portfolio symbols
# prices_SPY = prices_all['SPY'] # only SPY, for comparison later
if self.verbose: print(prices)
# example use with new colname
volume_all = ut.get_data(syms, dates, colname = "Volume") # automatically adds SPY
volume = volume_all[syms] # only portfolio symbols volume = volume_all[syms] # only portfolio symbols
# volume_SPY = volume_all['SPY'] # only SPY, for comparison later # volume_SPY = volume_all['SPY'] # only SPY, for comparison later
if self.verbose: print(volume) if self.verbose:
print(volume)
# this method should use the existing policy and test it against new data def _add_indicators(self, df, symbol):
def testPolicy(self, symbol = "IBM", \ """Add indicators for learning to DataFrame."""
sd=dt.datetime(2009,1,1), \ df.drop(columns=["SPY"], inplace=True)
ed=dt.datetime(2010,1,1), \ indicators.macd(df, symbol)
sv = 10000): indicators.rsi(df, symbol)
indicators.price_sma(df, symbol, [8])
indicators.price_delta(df, symbol, 3)
df.dropna(inplace=True)
# here we build a fake set of trades def addEvidence(self, symbol="IBM",
# your code should return the same sort of data sd=dt.datetime(2008, 1, 1),
dates = pd.date_range(sd, ed) ed=dt.datetime(2009, 1, 1),
prices_all = ut.get_data([symbol], dates) # automatically adds SPY sv=10000):
trades = prices_all[[symbol,]] # only portfolio symbols
# trades_SPY = prices_all['SPY'] # only SPY, for comparison later self.indicators = ['macd_diff', 'rsi', 'price_sma_8']
trades.values[:,:] = 0 # set them all to nothing df = util.get_data([symbol], pd.date_range(sd, ed))
trades.values[0,:] = 1000 # add a BUY at the start self._add_indicators(df, symbol)
trades.values[40,:] = -1000 # add a SELL
trades.values[41,:] = 1000 # add a BUY def classify_y(row):
trades.values[60,:] = -2000 # go short from long if row > 0.1:
trades.values[61,:] = 2000 # go long from short return 1
trades.values[-1,:] = -1000 #exit on the last day elif row < -0.1:
if self.verbose: print(type(trades)) # it better be a DataFrame! return -1
if self.verbose: print(trades) return 0
if self.verbose: print(prices_all)
return trades self.learner = RTLearner(leaf_size = 7)
# self.learner = BagLearner(RTLearner, 5, {'leaf_size': 5})
data_x = df[self.indicators].to_numpy()
y = df['pct_3'].apply(classify_y)
self.learner.addEvidence(data_x, y.to_numpy())
return y
def strat(self, data_y, orders):
self.holding = 0
def strat(row):
y = int(data_y.loc[row.name][0])
shares = 0
if self.holding == 0 and y == 1:
shares = 1000
elif self.holding == -1000 and y == 1:
shares = 2000
elif self.holding == 0 and y == -1:
shares = -1000
elif self.holding == 1000 and y == -1:
shares = -2000
self.holding += shares
return shares
orders["Shares"] = orders.apply(strat, axis=1)
def testPolicy(self, symbol="IBM",
sd=dt.datetime(2009, 1, 1),
ed=dt.datetime(2010, 1, 1),
sv=10000):
df = util.get_data([symbol], pd.date_range(sd, ed))
self._add_indicators(df, symbol)
data_x = df[self.indicators].to_numpy()
data_y = pd.DataFrame(index=df.index, data=self.learner.query(data_x))
orders = pd.DataFrame(index=df.index)
orders["Symbol"] = symbol
orders["Order"] = ""
orders["Shares"] = 0
self.strat(data_y, orders)
if self.testing:
return orders
else:
return orders[["Shares"]]
if __name__=="__main__":
print("One does not simply think up a strategy")

View File

@@ -93,29 +93,29 @@ def compare_manual_strategies(symbol, sv, sd, ed):
def experiment1(): def experiment1():
symbol = "JPM" symbol = "JPM"
start_value = 10000 sv = 10000
sd = dt.datetime(2008, 1, 1) # in-sample sd = dt.datetime(2008, 1, 1) # in-sample
ed = dt.datetime(2009, 12, 31) # in-sample ed = dt.datetime(2009, 12, 31) # in-sample
# sd = dt.datetime(2010, 1, 1) # out-sample sd_out = dt.datetime(2010, 1, 1) # out-sample
# ed = dt.datetime(2011, 12, 31) # out-sample ed_out = dt.datetime(2011, 12, 31) # out-sample
df = util.get_data([symbol], pd.date_range(sd, ed_out))
df.drop(columns=["SPY"], inplace=True)
# visualize_correlations(symbol, df) # visualize_correlations(symbol, df)
# plot_indicators(symbol, df) # plot_indicators(symbol, df)
# compare_manual_strategies(symbol, start_value, sd, ed) # compare_manual_strategies(symbol, sv, sd, ed)
df = util.get_data([symbol], pd.date_range(sd, ed))
df.drop(columns=["SPY"], inplace=True)
bs = BenchmarkStrategy() bs = BenchmarkStrategy()
orders = bs.testPolicy(symbol, sd, ed, start_value) orders = bs.testPolicy(symbol, sd_out, ed_out, sv)
df["Benchmark"] = marketsim.compute_portvals(orders, start_value) df["Benchmark"] = marketsim.compute_portvals(orders, sv)
df["Orders Benchmark"] = orders["Shares"] df["Orders Benchmark"] = orders["Shares"]
sl = StrategyLearner() sl = StrategyLearner(testing=True)
orders = ms.testPolicy(symbol, sd, ed, start_value) sl.addEvidence(symbol, sd, ed, sv)
df["SL"] = marketsim.compute_portvals(orders, start_value) orders = sl.testPolicy(symbol, sd_out, ed_out, sv)
df["SL"] = marketsim.compute_portvals(orders, sv)
df["Orders SL"] = orders["Shares"] df["Orders SL"] = orders["Shares"]
# df["Holding Manual"] = orders["Shares"].cumsum()
fig, ax = plt.subplots(3, sharex=True) fig, ax = plt.subplots(3, sharex=True)
df[[symbol]].plot(ax=ax[0]) df[[symbol]].plot(ax=ax[0])
@@ -127,6 +127,12 @@ def experiment1():
MultiCursor(fig.canvas, ax, color='r', lw=0.5) MultiCursor(fig.canvas, ax, color='r', lw=0.5)
plt.show() plt.show()
# For debugging the classification learner:
# df["y_train"] = sl.addEvidence(symbol, sd, ed, sv)
# df["y_query"] = sl.testPolicy(symbol, sd, ed, sv)
# df[["y_train", "y_query"]].plot(ax=ax[1])
if __name__ == "__main__": if __name__ == "__main__":
experiment1() experiment1()

View File

@@ -1,242 +1,242 @@
"""MC3-P3: Strategy Learner - grading script. """MC3-P3: Strategy Learner - grading script.
Usage: Usage:
- Switch to a student feedback directory first (will write "points.txt" and "comments.txt" in pwd). - Switch to a student feedback directory first (will write "points.txt" and "comments.txt" in pwd).
- Run this script with both ml4t/ and student solution in PYTHONPATH, e.g.: - Run this script with both ml4t/ and student solution in PYTHONPATH, e.g.:
PYTHONPATH=ml4t:MC1-P2/jdoe7 python ml4t/mc2_p1_grading/grade_marketsim.py PYTHONPATH=ml4t:MC1-P2/jdoe7 python ml4t/mc2_p1_grading/grade_marketsim.py
Copyright 2017, Georgia Tech Research Corporation Copyright 2017, Georgia Tech Research Corporation
Atlanta, Georgia 30332-0415 Atlanta, Georgia 30332-0415
All Rights Reserved All Rights Reserved
Template code for CS 4646/7646 Template code for CS 4646/7646
Georgia Tech asserts copyright ownership of this template and all derivative Georgia Tech asserts copyright ownership of this template and all derivative
works, including solutions to the projects assigned in this course. Students works, including solutions to the projects assigned in this course. Students
and other users of this template code are advised not to share it with others and other users of this template code are advised not to share it with others
or to make it available on publicly viewable websites including repositories or to make it available on publicly viewable websites including repositories
such as github and gitlab. This copyright statement should not be removed such as github and gitlab. This copyright statement should not be removed
or edited. or edited.
We do grant permission to share solutions privately with non-students such We do grant permission to share solutions privately with non-students such
as potential employers. However, sharing with other current or future as potential employers. However, sharing with other current or future
students of CS 7646 is prohibited and subject to being investigated as a students of CS 7646 is prohibited and subject to being investigated as a
GT honor code violation. GT honor code violation.
-----do not edit anything above this line--- -----do not edit anything above this line---
Student Name: Tucker Balch (replace with your name) Student Name: Tucker Balch (replace with your name)
GT User ID: tb34 (replace with your User ID) GT User ID: tb34 (replace with your User ID)
GT ID: 900897987 (replace with your GT ID) GT ID: 900897987 (replace with your GT ID)
""" """
import pytest import pytest
from grading.grading import grader, GradeResult, run_with_timeout, IncorrectOutput from grading.grading import grader, GradeResult, run_with_timeout, IncorrectOutput
import os import os
import sys import sys
import traceback as tb import traceback as tb
import datetime as dt import datetime as dt
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from collections import namedtuple from collections import namedtuple
import time import time
import util import util
import random import random
# Test cases # Test cases
StrategyTestCase = namedtuple('Strategy', ['description','insample_args','outsample_args','benchmark_type','benchmark','impact','train_time','test_time','max_time','seed']) StrategyTestCase = namedtuple('Strategy', ['description','insample_args','outsample_args','benchmark_type','benchmark','impact','train_time','test_time','max_time','seed'])
strategy_test_cases = [ strategy_test_cases = [
StrategyTestCase( StrategyTestCase(
description="ML4T-220", description="ML4T-220",
insample_args=dict(symbol="ML4T-220",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000), insample_args=dict(symbol="ML4T-220",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
outsample_args=dict(symbol="ML4T-220",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000), outsample_args=dict(symbol="ML4T-220",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
benchmark_type='clean', benchmark_type='clean',
benchmark=1.0, #benchmark updated Apr 24 2017 benchmark=1.0, #benchmark updated Apr 24 2017
impact=0.0, impact=0.0,
train_time=25, train_time=25,
test_time=5, test_time=5,
max_time=60, max_time=60,
seed=1481090000 seed=1481090000
), ),
StrategyTestCase( StrategyTestCase(
description="AAPL", description="AAPL",
insample_args=dict(symbol="AAPL",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000), insample_args=dict(symbol="AAPL",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
outsample_args=dict(symbol="AAPL",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000), outsample_args=dict(symbol="AAPL",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
benchmark_type='stock', benchmark_type='stock',
benchmark=0.1581999999999999, #benchmark computed Nov 22 2017 benchmark=0.1581999999999999, #benchmark computed Nov 22 2017
impact=0.0, impact=0.0,
train_time=25, train_time=25,
test_time=5, test_time=5,
max_time=60, max_time=60,
seed=1481090000 seed=1481090000
), ),
StrategyTestCase( StrategyTestCase(
description="SINE_FAST_NOISE", description="SINE_FAST_NOISE",
insample_args=dict(symbol="SINE_FAST_NOISE",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000), insample_args=dict(symbol="SINE_FAST_NOISE",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
outsample_args=dict(symbol="SINE_FAST_NOISE",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000), outsample_args=dict(symbol="SINE_FAST_NOISE",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
benchmark_type='noisy', benchmark_type='noisy',
benchmark=2.0, #benchmark updated Apr 24 2017 benchmark=2.0, #benchmark updated Apr 24 2017
impact=0.0, impact=0.0,
train_time=25, train_time=25,
test_time=5, test_time=5,
max_time=60, max_time=60,
seed=1481090000 seed=1481090000
), ),
StrategyTestCase( StrategyTestCase(
description="UNH - In sample", description="UNH - In sample",
insample_args=dict(symbol="UNH",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000), insample_args=dict(symbol="UNH",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
outsample_args=dict(symbol="UNH",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000), outsample_args=dict(symbol="UNH",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
benchmark_type='stock', benchmark_type='stock',
benchmark= -0.25239999999999996, #benchmark computed Nov 22 2017 benchmark= -0.25239999999999996, #benchmark computed Nov 22 2017
impact=0.0, impact=0.0,
train_time=25, train_time=25,
test_time=5, test_time=5,
max_time=60, max_time=60,
seed=1481090000 seed=1481090000
), ),
] ]
max_points = 60.0 max_points = 60.0
html_pre_block = True # surround comments with HTML <pre> tag (for T-Square comments field) html_pre_block = True # surround comments with HTML <pre> tag (for T-Square comments field)
MAX_HOLDINGS = 1000 MAX_HOLDINGS = 1000
# Test functon(s) # Test functon(s)
@pytest.mark.parametrize("description, insample_args, outsample_args, benchmark_type, benchmark, impact, train_time, test_time, max_time, seed", strategy_test_cases) @pytest.mark.parametrize("description, insample_args, outsample_args, benchmark_type, benchmark, impact, train_time, test_time, max_time, seed", strategy_test_cases)
def test_strategy(description, insample_args, outsample_args, benchmark_type, benchmark, impact, train_time, test_time, max_time, seed, grader): def test_strategy(description, insample_args, outsample_args, benchmark_type, benchmark, impact, train_time, test_time, max_time, seed, grader):
"""Test StrategyLearner. """Test StrategyLearner.
Requires test description, insample args (dict), outsample args (dict), benchmark_type (str), benchmark (float) Requires test description, insample args (dict), outsample args (dict), benchmark_type (str), benchmark (float)
max time (seconds), points for this test case (int), random seed (long), and a grader fixture. max time (seconds), points for this test case (int), random seed (long), and a grader fixture.
""" """
points_earned = 0.0 # initialize points for this test case points_earned = 0.0 # initialize points for this test case
try: try:
incorrect = True incorrect = True
if not 'StrategyLearner' in globals(): if not 'StrategyLearner' in globals():
import importlib import importlib
m = importlib.import_module('StrategyLearner') m = importlib.import_module('StrategyLearner')
globals()['StrategyLearner'] = m globals()['StrategyLearner'] = m
outsample_cr_to_beat = None outsample_cr_to_beat = None
if benchmark_type == 'clean': if benchmark_type == 'clean':
outsample_cr_to_beat = benchmark outsample_cr_to_beat = benchmark
def timeoutwrapper_strategylearner(): def timeoutwrapper_strategylearner():
#Set fixed seed for repetability #Set fixed seed for repetability
np.random.seed(seed) np.random.seed(seed)
random.seed(seed) random.seed(seed)
learner = StrategyLearner.StrategyLearner(verbose=False,impact=impact) learner = StrategyLearner.StrategyLearner(verbose=False,impact=impact)
tmp = time.time() tmp = time.time()
learner.addEvidence(**insample_args) learner.addEvidence(**insample_args)
train_t = time.time()-tmp train_t = time.time()-tmp
tmp = time.time() tmp = time.time()
insample_trades_1 = learner.testPolicy(**insample_args) insample_trades_1 = learner.testPolicy(**insample_args)
test_t = time.time()-tmp test_t = time.time()-tmp
insample_trades_2 = learner.testPolicy(**insample_args) insample_trades_2 = learner.testPolicy(**insample_args)
tmp = time.time() tmp = time.time()
outsample_trades = learner.testPolicy(**outsample_args) outsample_trades = learner.testPolicy(**outsample_args)
out_test_t = time.time()-tmp out_test_t = time.time()-tmp
return insample_trades_1, insample_trades_2, outsample_trades, train_t, test_t, out_test_t return insample_trades_1, insample_trades_2, outsample_trades, train_t, test_t, out_test_t
msgs = [] msgs = []
in_trades_1, in_trades_2, out_trades, train_t, test_t, out_test_t = run_with_timeout(timeoutwrapper_strategylearner,max_time,(),{}) in_trades_1, in_trades_2, out_trades, train_t, test_t, out_test_t = run_with_timeout(timeoutwrapper_strategylearner,max_time,(),{})
incorrect = False incorrect = False
if len(in_trades_1.shape)!=2 or in_trades_1.shape[1]!=1: if len(in_trades_1.shape)!=2 or in_trades_1.shape[1]!=1:
incorrect=True incorrect=True
msgs.append(" First insample trades DF has invalid shape: {}".format(in_trades_1.shape)) msgs.append(" First insample trades DF has invalid shape: {}".format(in_trades_1.shape))
elif len(in_trades_2.shape)!=2 or in_trades_2.shape[1]!=1: elif len(in_trades_2.shape)!=2 or in_trades_2.shape[1]!=1:
incorrect=True incorrect=True
msgs.append(" Second insample trades DF has invalid shape: {}".format(in_trades_2.shape)) msgs.append(" Second insample trades DF has invalid shape: {}".format(in_trades_2.shape))
elif len(out_trades.shape)!=2 or out_trades.shape[1]!=1: elif len(out_trades.shape)!=2 or out_trades.shape[1]!=1:
incorrect=True incorrect=True
msgs.append(" Out-of-sample trades DF has invalid shape: {}".format(out_trades.shape)) msgs.append(" Out-of-sample trades DF has invalid shape: {}".format(out_trades.shape))
else: else:
tmp_csum=0.0 tmp_csum=0.0
for date,trade in in_trades_1.iterrows(): for date,trade in in_trades_1.iterrows():
tmp_csum+= trade.iloc[0] tmp_csum+= trade.iloc[0]
if (trade.iloc[0]!=0) and\ if (trade.iloc[0]!=0) and\
(trade.abs().iloc[0]!=MAX_HOLDINGS) and\ (trade.abs().iloc[0]!=MAX_HOLDINGS) and\
(trade.abs().iloc[0]!=2*MAX_HOLDINGS): (trade.abs().iloc[0]!=2*MAX_HOLDINGS):
incorrect=True incorrect=True
msgs.append(" illegal trade in first insample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade)) msgs.append(" illegal trade in first insample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
break break
elif abs(tmp_csum)>MAX_HOLDINGS: elif abs(tmp_csum)>MAX_HOLDINGS:
incorrect=True incorrect=True
msgs.append(" holdings more than {} long or short in first insample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade)) msgs.append(" holdings more than {} long or short in first insample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
break break
tmp_csum=0.0 tmp_csum=0.0
for date,trade in in_trades_2.iterrows(): for date,trade in in_trades_2.iterrows():
tmp_csum+= trade.iloc[0] tmp_csum+= trade.iloc[0]
if (trade.iloc[0]!=0) and\ if (trade.iloc[0]!=0) and\
(trade.abs().iloc[0]!=MAX_HOLDINGS) and\ (trade.abs().iloc[0]!=MAX_HOLDINGS) and\
(trade.abs().iloc[0]!=2*MAX_HOLDINGS): (trade.abs().iloc[0]!=2*MAX_HOLDINGS):
incorrect=True incorrect=True
msgs.append(" illegal trade in second insample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade)) msgs.append(" illegal trade in second insample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
break break
elif abs(tmp_csum)>MAX_HOLDINGS: elif abs(tmp_csum)>MAX_HOLDINGS:
incorrect=True incorrect=True
msgs.append(" holdings more than {} long or short in second insample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade)) msgs.append(" holdings more than {} long or short in second insample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
break break
tmp_csum=0.0 tmp_csum=0.0
for date,trade in out_trades.iterrows(): for date,trade in out_trades.iterrows():
tmp_csum+= trade.iloc[0] tmp_csum+= trade.iloc[0]
if (trade.iloc[0]!=0) and\ if (trade.iloc[0]!=0) and\
(trade.abs().iloc[0]!=MAX_HOLDINGS) and\ (trade.abs().iloc[0]!=MAX_HOLDINGS) and\
(trade.abs().iloc[0]!=2*MAX_HOLDINGS): (trade.abs().iloc[0]!=2*MAX_HOLDINGS):
incorrect=True incorrect=True
msgs.append(" illegal trade in out-of-sample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade)) msgs.append(" illegal trade in out-of-sample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
break break
elif abs(tmp_csum)>MAX_HOLDINGS: elif abs(tmp_csum)>MAX_HOLDINGS:
incorrect=True incorrect=True
msgs.append(" holdings more than {} long or short in out-of-sample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade)) msgs.append(" holdings more than {} long or short in out-of-sample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
break break
# if (((in_trades_1.abs()!=0) & (in_trades_1.abs()!=MAX_HOLDINGS) & (in_trades_1.abs()!=2*MAX_HOLDINGS)).any().any() or\ # if (((in_trades_1.abs()!=0) & (in_trades_1.abs()!=MAX_HOLDINGS) & (in_trades_1.abs()!=2*MAX_HOLDINGS)).any().any() or\
# ((in_trades_2.abs()!=0) & (in_trades_2.abs()!=MAX_HOLDINGS) & (in_trades_2.abs()!=2*MAX_HOLDINGS)).any().any() or\ # ((in_trades_2.abs()!=0) & (in_trades_2.abs()!=MAX_HOLDINGS) & (in_trades_2.abs()!=2*MAX_HOLDINGS)).any().any() or\
# ((out_trades.abs()!=0) & (out_trades.abs()!=MAX_HOLDINGS) & (out_trades.abs()!=2*MAX_HOLDINGS)).any().any()): # ((out_trades.abs()!=0) & (out_trades.abs()!=MAX_HOLDINGS) & (out_trades.abs()!=2*MAX_HOLDINGS)).any().any()):
# incorrect = True # incorrect = True
# msgs.append(" illegal trade. abs(trades) not one of ({},{},{})".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS)) # msgs.append(" illegal trade. abs(trades) not one of ({},{},{})".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS))
# if ((in_trades_1.cumsum().abs()>MAX_HOLDINGS).any()[0]) or ((in_trades_2.cumsum().abs()>MAX_HOLDINGS).any()[0]) or ((out_trades.cumsum().abs()>MAX_HOLDINGS).any()[0]): # if ((in_trades_1.cumsum().abs()>MAX_HOLDINGS).any()[0]) or ((in_trades_2.cumsum().abs()>MAX_HOLDINGS).any()[0]) or ((out_trades.cumsum().abs()>MAX_HOLDINGS).any()[0]):
# incorrect = True # incorrect = True
# msgs.append(" holdings more than {} long or short".format(MAX_HOLDINGS)) # msgs.append(" holdings more than {} long or short".format(MAX_HOLDINGS))
if not(incorrect): if not(incorrect):
if train_t>train_time: if train_t>train_time:
incorrect=True incorrect=True
msgs.append(" addEvidence() took {} seconds, max allowed {}".format(train_t,train_time)) msgs.append(" addEvidence() took {} seconds, max allowed {}".format(train_t,train_time))
else: else:
points_earned += 1.0 points_earned += 1.0
if test_t > test_time: if test_t > test_time:
incorrect = True incorrect = True
msgs.append(" testPolicy() took {} seconds, max allowed {}".format(test_t,test_time)) msgs.append(" testPolicy() took {} seconds, max allowed {}".format(test_t,test_time))
else: else:
points_earned += 2.0 points_earned += 2.0
if not((in_trades_1 == in_trades_2).all()[0]): if not((in_trades_1 == in_trades_2).all()[0]):
incorrect = True incorrect = True
mismatches = in_trades_1.join(in_trades_2,how='outer',lsuffix='1',rsuffix='2') mismatches = in_trades_1.join(in_trades_2,how='outer',lsuffix='1',rsuffix='2')
mismatches = mismatches[mismatches.iloc[:,0]!=mismatches.iloc[:,1]] mismatches = mismatches[mismatches.iloc[:,0]!=mismatches.iloc[:,1]]
msgs.append(" consecutive calls to testPolicy() with same input did not produce same output:") msgs.append(" consecutive calls to testPolicy() with same input did not produce same output:")
msgs.append(" Mismatched trades:\n {}".format(mismatches)) msgs.append(" Mismatched trades:\n {}".format(mismatches))
else: else:
points_earned += 2.0 points_earned += 2.0
student_insample_cr = evalPolicy2(insample_args['symbol'],in_trades_1,insample_args['sv'],insample_args['sd'],insample_args['ed'],market_impact=impact,commission_cost=0.0) student_insample_cr = evalPolicy2(insample_args['symbol'],in_trades_1,insample_args['sv'],insample_args['sd'],insample_args['ed'],market_impact=impact,commission_cost=0.0)
student_outsample_cr = evalPolicy2(outsample_args['symbol'],out_trades, outsample_args['sv'],outsample_args['sd'],outsample_args['ed'],market_impact=impact,commission_cost=0.0) student_outsample_cr = evalPolicy2(outsample_args['symbol'],out_trades, outsample_args['sv'],outsample_args['sd'],outsample_args['ed'],market_impact=impact,commission_cost=0.0)
if student_insample_cr <= benchmark: if student_insample_cr <= benchmark:
incorrect = True incorrect = True
msgs.append(" in-sample return ({}) did not beat benchmark ({})".format(student_insample_cr,benchmark)) msgs.append(" in-sample return ({}) did not beat benchmark ({})".format(student_insample_cr,benchmark))
else: else:
points_earned += 5.0 points_earned += 5.0
if outsample_cr_to_beat is None: if outsample_cr_to_beat is None:
if out_test_t > test_time: if out_test_t > test_time:
incorrect = True incorrect = True
msgs.append(" out-sample took {} seconds, max of {}".format(out_test_t,test_time)) msgs.append(" out-sample took {} seconds, max of {}".format(out_test_t,test_time))
else: else:
points_earned += 5.0 points_earned += 5.0
else: else:
if student_outsample_cr < outsample_cr_to_beat: if student_outsample_cr < outsample_cr_to_beat:
incorrect = True incorrect = True
msgs.append(" out-sample return ({}) did not beat benchmark ({})".format(student_outsample_cr,outsample_cr_to_beat)) msgs.append(" out-sample return ({}) did not beat benchmark ({})".format(student_outsample_cr,outsample_cr_to_beat))
else: else:
points_earned += 5.0 points_earned += 5.0
if incorrect: if incorrect:
inputs_str = " insample_args: {}\n" \ inputs_str = " insample_args: {}\n" \
" outsample_args: {}\n" \ " outsample_args: {}\n" \
" benchmark_type: {}\n" \ " benchmark_type: {}\n" \
@@ -244,96 +244,96 @@ def test_strategy(description, insample_args, outsample_args, benchmark_type, be
" train_time: {}\n" \ " train_time: {}\n" \
" test_time: {}\n" \ " test_time: {}\n" \
" max_time: {}\n" \ " max_time: {}\n" \
" seed: {}\n".format(insample_args, outsample_args, benchmark_type, benchmark, train_time, test_time, max_time,seed) " seed: {}\n".format(insample_args, outsample_args, benchmark_type, benchmark, train_time, test_time, max_time,seed)
raise IncorrectOutput("Test failed on one or more output criteria.\n Inputs:\n{}\n Failures:\n{}".format(inputs_str, "\n".join(msgs))) raise IncorrectOutput("Test failed on one or more output criteria.\n Inputs:\n{}\n Failures:\n{}".format(inputs_str, "\n".join(msgs)))
except Exception as e: except Exception as e:
# Test result: failed # Test result: failed
msg = "Test case description: {}\n".format(description) msg = "Test case description: {}\n".format(description)
# Generate a filtered stacktrace, only showing erroneous lines in student file(s) # Generate a filtered stacktrace, only showing erroneous lines in student file(s)
tb_list = tb.extract_tb(sys.exc_info()[2]) tb_list = tb.extract_tb(sys.exc_info()[2])
for i in range(len(tb_list)): for i in range(len(tb_list)):
row = tb_list[i] row = tb_list[i]
tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3]) # show only filename instead of long absolute path tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3]) # show only filename instead of long absolute path
# tb_list = [row for row in tb_list if row[0] in ['QLearner.py','StrategyLearner.py']] # tb_list = [row for row in tb_list if row[0] in ['QLearner.py','StrategyLearner.py']]
if tb_list: if tb_list:
msg += "Traceback:\n" msg += "Traceback:\n"
msg += ''.join(tb.format_list(tb_list)) # contains newlines msg += ''.join(tb.format_list(tb_list)) # contains newlines
elif 'grading_traceback' in dir(e): elif 'grading_traceback' in dir(e):
msg += "Traceback:\n" msg += "Traceback:\n"
msg += ''.join(tb.format_list(e.grading_traceback)) msg += ''.join(tb.format_list(e.grading_traceback))
msg += "{}: {}".format(e.__class__.__name__, str(e)) msg += "{}: {}".format(e.__class__.__name__, str(e))
# Report failure result to grader, with stacktrace # Report failure result to grader, with stacktrace
grader.add_result(GradeResult(outcome='failed', points=points_earned, msg=msg)) grader.add_result(GradeResult(outcome='failed', points=points_earned, msg=msg))
raise raise
else: else:
# Test result: passed (no exceptions) # Test result: passed (no exceptions)
grader.add_result(GradeResult(outcome='passed', points=points_earned, msg=None)) grader.add_result(GradeResult(outcome='passed', points=points_earned, msg=None))
def compute_benchmark(sd,ed,sv,symbol,market_impact,commission_cost,max_holdings): def compute_benchmark(sd,ed,sv,symbol,market_impact,commission_cost,max_holdings):
date_idx = util.get_data([symbol,],pd.date_range(sd,ed)).index date_idx = util.get_data([symbol,],pd.date_range(sd,ed)).index
orders = pd.DataFrame(index=date_idx) orders = pd.DataFrame(index=date_idx)
orders['orders'] = 0; orders['orders'][0] = max_holdings; orders['orders'][-1] = -max_holdings orders['orders'] = 0; orders['orders'][0] = max_holdings; orders['orders'][-1] = -max_holdings
return evalPolicy2(symbol,orders,sv,sd,ed,market_impact,commission_cost) return evalPolicy2(symbol,orders,sv,sd,ed,market_impact,commission_cost)
def evalPolicy(student_trades,sym_prices,startval): def evalPolicy(student_trades,sym_prices,startval):
ending_cash = startval - student_trades.mul(sym_prices,axis=0).sum() ending_cash = startval - student_trades.mul(sym_prices,axis=0).sum()
ending_stocks = student_trades.sum()*sym_prices.iloc[-1] ending_stocks = student_trades.sum()*sym_prices.iloc[-1]
return float((ending_cash+ending_stocks)/startval)-1.0 return float((ending_cash+ending_stocks)/startval)-1.0
def evalPolicy2(symbol, student_trades, startval, sd, ed, market_impact,commission_cost): def evalPolicy2(symbol, student_trades, startval, sd, ed, market_impact,commission_cost):
orders_df = pd.DataFrame(columns=['Shares','Order','Symbol']) orders_df = pd.DataFrame(columns=['Shares','Order','Symbol'])
for row_idx in student_trades.index: for row_idx in student_trades.index:
nshares = student_trades.loc[row_idx][0] nshares = student_trades.loc[row_idx][0]
if nshares == 0: if nshares == 0:
continue continue
order = 'sell' if nshares < 0 else 'buy' order = 'sell' if nshares < 0 else 'buy'
new_row = pd.DataFrame([[abs(nshares),order,symbol],],columns=['Shares','Order','Symbol'],index=[row_idx,]) new_row = pd.DataFrame([[abs(nshares),order,symbol],],columns=['Shares','Order','Symbol'],index=[row_idx,])
orders_df = orders_df.append(new_row) orders_df = orders_df.append(new_row)
portvals = compute_portvals(orders_df, sd, ed, startval,market_impact,commission_cost) portvals = compute_portvals(orders_df, sd, ed, startval,market_impact,commission_cost)
return float(portvals[-1]/portvals[0])-1 return float(portvals[-1]/portvals[0])-1
def compute_portvals(orders_df, start_date, end_date, startval, market_impact=0.0, commission_cost=0.0): def compute_portvals(orders_df, start_date, end_date, startval, market_impact=0.0, commission_cost=0.0):
"""Simulate the market for the given date range and orders file.""" """Simulate the market for the given date range and orders file."""
symbols = [] symbols = []
orders = [] orders = []
orders_df = orders_df.sort_index() orders_df = orders_df.sort_index()
for date, order in orders_df.iterrows(): for date, order in orders_df.iterrows():
shares = order['Shares'] shares = order['Shares']
action = order['Order'] action = order['Order']
symbol = order['Symbol'] symbol = order['Symbol']
if action.lower() == 'sell': if action.lower() == 'sell':
shares *= -1 shares *= -1
order = (date, symbol, shares) order = (date, symbol, shares)
orders.append(order) orders.append(order)
symbols.append(symbol) symbols.append(symbol)
symbols = list(set(symbols)) symbols = list(set(symbols))
dates = pd.date_range(start_date, end_date) dates = pd.date_range(start_date, end_date)
prices_all = util.get_data(symbols, dates) prices_all = util.get_data(symbols, dates)
prices = prices_all[symbols] prices = prices_all[symbols]
prices = prices.fillna(method='ffill').fillna(method='bfill') prices = prices.fillna(method='ffill').fillna(method='bfill')
prices['_CASH'] = 1.0 prices['_CASH'] = 1.0
trades = pd.DataFrame(index=prices.index, columns=symbols) trades = pd.DataFrame(index=prices.index, columns=symbols)
trades = trades.fillna(0) trades = trades.fillna(0)
cash = pd.Series(index=prices.index) cash = pd.Series(index=prices.index)
cash = cash.fillna(0) cash = cash.fillna(0)
cash.iloc[0] = startval cash.iloc[0] = startval
for date, symbol, shares in orders: for date, symbol, shares in orders:
price = prices[symbol][date] price = prices[symbol][date]
val = shares * price val = shares * price
# transaction cost model # transaction cost model
val += commission_cost + (pd.np.abs(shares)*price*market_impact) val += commission_cost + (pd.np.abs(shares)*price*market_impact)
positions = prices.loc[date] * trades.sum() positions = prices.loc[date] * trades.sum()
totalcash = cash.sum() totalcash = cash.sum()
if (date < prices.index.min()) or (date > prices.index.max()): if (date < prices.index.min()) or (date > prices.index.max()):
continue continue
trades[symbol][date] += shares trades[symbol][date] += shares
cash[date] -= val cash[date] -= val
trades['_CASH'] = cash trades['_CASH'] = cash
holdings = trades.cumsum() holdings = trades.cumsum()
df_portvals = (prices * holdings).sum(axis=1) df_portvals = (prices * holdings).sum(axis=1)
return df_portvals return df_portvals
if __name__ == "__main__": if __name__ == "__main__":
pytest.main(["-s", __file__]) pytest.main(["-s", __file__])