diff --git a/strategy_evaluation/StrategyLearner.py b/strategy_evaluation/StrategyLearner.py index d0708be..0d4d3ec 100644 --- a/strategy_evaluation/StrategyLearner.py +++ b/strategy_evaluation/StrategyLearner.py @@ -1,88 +1,94 @@ -""" -Template for implementing StrategyLearner (c) 2016 Tucker Balch - -Copyright 2018, Georgia Institute of Technology (Georgia Tech) -Atlanta, Georgia 30332 -All Rights Reserved - -Template code for CS 4646/7646 - -Georgia Tech asserts copyright ownership of this template and all derivative -works, including solutions to the projects assigned in this course. Students -and other users of this template code are advised not to share it with others -or to make it available on publicly viewable websites including repositories -such as github and gitlab. This copyright statement should not be removed -or edited. - -We do grant permission to share solutions privately with non-students such -as potential employers. However, sharing with other current or future -students of CS 7646 is prohibited and subject to being investigated as a -GT honor code violation. - ------do not edit anything above this line--- - -Student Name: Tucker Balch (replace with your name) -GT User ID: tb34 (replace with your User ID) -GT ID: 900897987 (replace with your GT ID) -""" - import datetime as dt import pandas as pd -import util as ut +import util +import indicators +from BagLearner import BagLearner +from RTLearner import RTLearner + class StrategyLearner(object): - # constructor - def __init__(self, verbose = False, impact=0.0, commission=0.0): + def __init__(self, verbose=False, impact=0.0, commission=0.0, testing=False): self.verbose = verbose self.impact = impact self.commission = commission + self.testing = testing - # this method should create a QLearner, and train it for trading - def addEvidence(self, symbol = "IBM", \ - sd=dt.datetime(2008,1,1), \ - ed=dt.datetime(2009,1,1), \ - sv = 10000): - - # add your code to do learning here - - # example usage of the old backward compatible util function - syms=[symbol] - dates = pd.date_range(sd, ed) - prices_all = ut.get_data(syms, dates) # automatically adds SPY - prices = prices_all[syms] # only portfolio symbols - # prices_SPY = prices_all['SPY'] # only SPY, for comparison later - if self.verbose: print(prices) - - # example use with new colname - volume_all = ut.get_data(syms, dates, colname = "Volume") # automatically adds SPY + def _get_volume(self): + """For reference.""" + volume_all = ut.get_data(syms, dates, colname="Volume") volume = volume_all[syms] # only portfolio symbols # volume_SPY = volume_all['SPY'] # only SPY, for comparison later - if self.verbose: print(volume) + if self.verbose: + print(volume) - # this method should use the existing policy and test it against new data - def testPolicy(self, symbol = "IBM", \ - sd=dt.datetime(2009,1,1), \ - ed=dt.datetime(2010,1,1), \ - sv = 10000): + def _add_indicators(self, df, symbol): + """Add indicators for learning to DataFrame.""" + df.drop(columns=["SPY"], inplace=True) + indicators.macd(df, symbol) + indicators.rsi(df, symbol) + indicators.price_sma(df, symbol, [8]) + indicators.price_delta(df, symbol, 3) + df.dropna(inplace=True) - # here we build a fake set of trades - # your code should return the same sort of data - dates = pd.date_range(sd, ed) - prices_all = ut.get_data([symbol], dates) # automatically adds SPY - trades = prices_all[[symbol,]] # only portfolio symbols - # trades_SPY = prices_all['SPY'] # only SPY, for comparison later - trades.values[:,:] = 0 # set them all to nothing - trades.values[0,:] = 1000 # add a BUY at the start - trades.values[40,:] = -1000 # add a SELL - trades.values[41,:] = 1000 # add a BUY - trades.values[60,:] = -2000 # go short from long - trades.values[61,:] = 2000 # go long from short - trades.values[-1,:] = -1000 #exit on the last day - if self.verbose: print(type(trades)) # it better be a DataFrame! - if self.verbose: print(trades) - if self.verbose: print(prices_all) - return trades + def addEvidence(self, symbol="IBM", + sd=dt.datetime(2008, 1, 1), + ed=dt.datetime(2009, 1, 1), + sv=10000): + + self.indicators = ['macd_diff', 'rsi', 'price_sma_8'] + df = util.get_data([symbol], pd.date_range(sd, ed)) + self._add_indicators(df, symbol) + + def classify_y(row): + if row > 0.1: + return 1 + elif row < -0.1: + return -1 + return 0 + + self.learner = RTLearner(leaf_size = 7) + # self.learner = BagLearner(RTLearner, 5, {'leaf_size': 5}) + data_x = df[self.indicators].to_numpy() + y = df['pct_3'].apply(classify_y) + self.learner.addEvidence(data_x, y.to_numpy()) + return y + + def strat(self, data_y, orders): + self.holding = 0 + + def strat(row): + y = int(data_y.loc[row.name][0]) + shares = 0 + if self.holding == 0 and y == 1: + shares = 1000 + elif self.holding == -1000 and y == 1: + shares = 2000 + elif self.holding == 0 and y == -1: + shares = -1000 + elif self.holding == 1000 and y == -1: + shares = -2000 + self.holding += shares + return shares + + orders["Shares"] = orders.apply(strat, axis=1) + + def testPolicy(self, symbol="IBM", + sd=dt.datetime(2009, 1, 1), + ed=dt.datetime(2010, 1, 1), + sv=10000): + df = util.get_data([symbol], pd.date_range(sd, ed)) + self._add_indicators(df, symbol) + data_x = df[self.indicators].to_numpy() + data_y = pd.DataFrame(index=df.index, data=self.learner.query(data_x)) + + orders = pd.DataFrame(index=df.index) + orders["Symbol"] = symbol + orders["Order"] = "" + orders["Shares"] = 0 + self.strat(data_y, orders) + if self.testing: + return orders + else: + return orders[["Shares"]] -if __name__=="__main__": - print("One does not simply think up a strategy") diff --git a/strategy_evaluation/experiment1.py b/strategy_evaluation/experiment1.py index 8e09aae..65aa2b8 100644 --- a/strategy_evaluation/experiment1.py +++ b/strategy_evaluation/experiment1.py @@ -93,29 +93,29 @@ def compare_manual_strategies(symbol, sv, sd, ed): def experiment1(): symbol = "JPM" - start_value = 10000 + sv = 10000 sd = dt.datetime(2008, 1, 1) # in-sample ed = dt.datetime(2009, 12, 31) # in-sample - # sd = dt.datetime(2010, 1, 1) # out-sample - # ed = dt.datetime(2011, 12, 31) # out-sample + sd_out = dt.datetime(2010, 1, 1) # out-sample + ed_out = dt.datetime(2011, 12, 31) # out-sample + + df = util.get_data([symbol], pd.date_range(sd, ed_out)) + df.drop(columns=["SPY"], inplace=True) # visualize_correlations(symbol, df) # plot_indicators(symbol, df) - # compare_manual_strategies(symbol, start_value, sd, ed) - - df = util.get_data([symbol], pd.date_range(sd, ed)) - df.drop(columns=["SPY"], inplace=True) + # compare_manual_strategies(symbol, sv, sd, ed) bs = BenchmarkStrategy() - orders = bs.testPolicy(symbol, sd, ed, start_value) - df["Benchmark"] = marketsim.compute_portvals(orders, start_value) + orders = bs.testPolicy(symbol, sd_out, ed_out, sv) + df["Benchmark"] = marketsim.compute_portvals(orders, sv) df["Orders Benchmark"] = orders["Shares"] - sl = StrategyLearner() - orders = ms.testPolicy(symbol, sd, ed, start_value) - df["SL"] = marketsim.compute_portvals(orders, start_value) + sl = StrategyLearner(testing=True) + sl.addEvidence(symbol, sd, ed, sv) + orders = sl.testPolicy(symbol, sd_out, ed_out, sv) + df["SL"] = marketsim.compute_portvals(orders, sv) df["Orders SL"] = orders["Shares"] - # df["Holding Manual"] = orders["Shares"].cumsum() fig, ax = plt.subplots(3, sharex=True) df[[symbol]].plot(ax=ax[0]) @@ -127,6 +127,12 @@ def experiment1(): MultiCursor(fig.canvas, ax, color='r', lw=0.5) plt.show() + # For debugging the classification learner: + # df["y_train"] = sl.addEvidence(symbol, sd, ed, sv) + # df["y_query"] = sl.testPolicy(symbol, sd, ed, sv) + # df[["y_train", "y_query"]].plot(ax=ax[1]) + + if __name__ == "__main__": experiment1() diff --git a/strategy_evaluation/grade_strategy_learner.py b/strategy_evaluation/grade_strategy_learner.py index 18c567d..0dc0e20 100644 --- a/strategy_evaluation/grade_strategy_learner.py +++ b/strategy_evaluation/grade_strategy_learner.py @@ -1,242 +1,242 @@ -"""MC3-P3: Strategy Learner - grading script. - -Usage: -- Switch to a student feedback directory first (will write "points.txt" and "comments.txt" in pwd). -- Run this script with both ml4t/ and student solution in PYTHONPATH, e.g.: - PYTHONPATH=ml4t:MC1-P2/jdoe7 python ml4t/mc2_p1_grading/grade_marketsim.py - -Copyright 2017, Georgia Tech Research Corporation -Atlanta, Georgia 30332-0415 -All Rights Reserved - -Template code for CS 4646/7646 - -Georgia Tech asserts copyright ownership of this template and all derivative -works, including solutions to the projects assigned in this course. Students -and other users of this template code are advised not to share it with others -or to make it available on publicly viewable websites including repositories -such as github and gitlab. This copyright statement should not be removed -or edited. - -We do grant permission to share solutions privately with non-students such -as potential employers. However, sharing with other current or future -students of CS 7646 is prohibited and subject to being investigated as a -GT honor code violation. - ------do not edit anything above this line--- - -Student Name: Tucker Balch (replace with your name) -GT User ID: tb34 (replace with your User ID) -GT ID: 900897987 (replace with your GT ID) -""" - -import pytest -from grading.grading import grader, GradeResult, run_with_timeout, IncorrectOutput - -import os -import sys -import traceback as tb - -import datetime as dt -import numpy as np -import pandas as pd -from collections import namedtuple - -import time -import util -import random - -# Test cases -StrategyTestCase = namedtuple('Strategy', ['description','insample_args','outsample_args','benchmark_type','benchmark','impact','train_time','test_time','max_time','seed']) -strategy_test_cases = [ - StrategyTestCase( - description="ML4T-220", - insample_args=dict(symbol="ML4T-220",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000), - outsample_args=dict(symbol="ML4T-220",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000), - benchmark_type='clean', - benchmark=1.0, #benchmark updated Apr 24 2017 - impact=0.0, - train_time=25, - test_time=5, - max_time=60, - seed=1481090000 - ), - StrategyTestCase( - description="AAPL", - insample_args=dict(symbol="AAPL",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000), - outsample_args=dict(symbol="AAPL",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000), - benchmark_type='stock', - benchmark=0.1581999999999999, #benchmark computed Nov 22 2017 - impact=0.0, - train_time=25, - test_time=5, - max_time=60, - seed=1481090000 - ), - StrategyTestCase( - description="SINE_FAST_NOISE", - insample_args=dict(symbol="SINE_FAST_NOISE",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000), - outsample_args=dict(symbol="SINE_FAST_NOISE",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000), - benchmark_type='noisy', - benchmark=2.0, #benchmark updated Apr 24 2017 - impact=0.0, - train_time=25, - test_time=5, - max_time=60, - seed=1481090000 - ), - StrategyTestCase( - description="UNH - In sample", - insample_args=dict(symbol="UNH",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000), - outsample_args=dict(symbol="UNH",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000), - benchmark_type='stock', - benchmark= -0.25239999999999996, #benchmark computed Nov 22 2017 - impact=0.0, - train_time=25, - test_time=5, - max_time=60, - seed=1481090000 - ), -] - -max_points = 60.0 -html_pre_block = True # surround comments with HTML
tag (for T-Square comments field)
-
-MAX_HOLDINGS = 1000
-
-# Test functon(s)
-@pytest.mark.parametrize("description, insample_args, outsample_args, benchmark_type, benchmark, impact, train_time, test_time, max_time, seed", strategy_test_cases)
-def test_strategy(description, insample_args, outsample_args, benchmark_type, benchmark, impact, train_time, test_time, max_time, seed, grader):
- """Test StrategyLearner.
-
- Requires test description, insample args (dict), outsample args (dict), benchmark_type (str), benchmark (float)
- max time (seconds), points for this test case (int), random seed (long), and a grader fixture.
- """
- points_earned = 0.0 # initialize points for this test case
- try:
- incorrect = True
- if not 'StrategyLearner' in globals():
- import importlib
- m = importlib.import_module('StrategyLearner')
- globals()['StrategyLearner'] = m
- outsample_cr_to_beat = None
- if benchmark_type == 'clean':
- outsample_cr_to_beat = benchmark
- def timeoutwrapper_strategylearner():
- #Set fixed seed for repetability
- np.random.seed(seed)
- random.seed(seed)
- learner = StrategyLearner.StrategyLearner(verbose=False,impact=impact)
- tmp = time.time()
- learner.addEvidence(**insample_args)
- train_t = time.time()-tmp
- tmp = time.time()
- insample_trades_1 = learner.testPolicy(**insample_args)
- test_t = time.time()-tmp
- insample_trades_2 = learner.testPolicy(**insample_args)
- tmp = time.time()
- outsample_trades = learner.testPolicy(**outsample_args)
- out_test_t = time.time()-tmp
- return insample_trades_1, insample_trades_2, outsample_trades, train_t, test_t, out_test_t
- msgs = []
- in_trades_1, in_trades_2, out_trades, train_t, test_t, out_test_t = run_with_timeout(timeoutwrapper_strategylearner,max_time,(),{})
- incorrect = False
- if len(in_trades_1.shape)!=2 or in_trades_1.shape[1]!=1:
- incorrect=True
- msgs.append(" First insample trades DF has invalid shape: {}".format(in_trades_1.shape))
- elif len(in_trades_2.shape)!=2 or in_trades_2.shape[1]!=1:
- incorrect=True
- msgs.append(" Second insample trades DF has invalid shape: {}".format(in_trades_2.shape))
- elif len(out_trades.shape)!=2 or out_trades.shape[1]!=1:
- incorrect=True
- msgs.append(" Out-of-sample trades DF has invalid shape: {}".format(out_trades.shape))
- else:
- tmp_csum=0.0
- for date,trade in in_trades_1.iterrows():
- tmp_csum+= trade.iloc[0]
+"""MC3-P3: Strategy Learner - grading script.
+
+Usage:
+- Switch to a student feedback directory first (will write "points.txt" and "comments.txt" in pwd).
+- Run this script with both ml4t/ and student solution in PYTHONPATH, e.g.:
+ PYTHONPATH=ml4t:MC1-P2/jdoe7 python ml4t/mc2_p1_grading/grade_marketsim.py
+
+Copyright 2017, Georgia Tech Research Corporation
+Atlanta, Georgia 30332-0415
+All Rights Reserved
+
+Template code for CS 4646/7646
+
+Georgia Tech asserts copyright ownership of this template and all derivative
+works, including solutions to the projects assigned in this course. Students
+and other users of this template code are advised not to share it with others
+or to make it available on publicly viewable websites including repositories
+such as github and gitlab. This copyright statement should not be removed
+or edited.
+
+We do grant permission to share solutions privately with non-students such
+as potential employers. However, sharing with other current or future
+students of CS 7646 is prohibited and subject to being investigated as a
+GT honor code violation.
+
+-----do not edit anything above this line---
+
+Student Name: Tucker Balch (replace with your name)
+GT User ID: tb34 (replace with your User ID)
+GT ID: 900897987 (replace with your GT ID)
+"""
+
+import pytest
+from grading.grading import grader, GradeResult, run_with_timeout, IncorrectOutput
+
+import os
+import sys
+import traceback as tb
+
+import datetime as dt
+import numpy as np
+import pandas as pd
+from collections import namedtuple
+
+import time
+import util
+import random
+
+# Test cases
+StrategyTestCase = namedtuple('Strategy', ['description','insample_args','outsample_args','benchmark_type','benchmark','impact','train_time','test_time','max_time','seed'])
+strategy_test_cases = [
+ StrategyTestCase(
+ description="ML4T-220",
+ insample_args=dict(symbol="ML4T-220",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
+ outsample_args=dict(symbol="ML4T-220",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
+ benchmark_type='clean',
+ benchmark=1.0, #benchmark updated Apr 24 2017
+ impact=0.0,
+ train_time=25,
+ test_time=5,
+ max_time=60,
+ seed=1481090000
+ ),
+ StrategyTestCase(
+ description="AAPL",
+ insample_args=dict(symbol="AAPL",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
+ outsample_args=dict(symbol="AAPL",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
+ benchmark_type='stock',
+ benchmark=0.1581999999999999, #benchmark computed Nov 22 2017
+ impact=0.0,
+ train_time=25,
+ test_time=5,
+ max_time=60,
+ seed=1481090000
+ ),
+ StrategyTestCase(
+ description="SINE_FAST_NOISE",
+ insample_args=dict(symbol="SINE_FAST_NOISE",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
+ outsample_args=dict(symbol="SINE_FAST_NOISE",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
+ benchmark_type='noisy',
+ benchmark=2.0, #benchmark updated Apr 24 2017
+ impact=0.0,
+ train_time=25,
+ test_time=5,
+ max_time=60,
+ seed=1481090000
+ ),
+ StrategyTestCase(
+ description="UNH - In sample",
+ insample_args=dict(symbol="UNH",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
+ outsample_args=dict(symbol="UNH",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
+ benchmark_type='stock',
+ benchmark= -0.25239999999999996, #benchmark computed Nov 22 2017
+ impact=0.0,
+ train_time=25,
+ test_time=5,
+ max_time=60,
+ seed=1481090000
+ ),
+]
+
+max_points = 60.0
+html_pre_block = True # surround comments with HTML tag (for T-Square comments field)
+
+MAX_HOLDINGS = 1000
+
+# Test functon(s)
+@pytest.mark.parametrize("description, insample_args, outsample_args, benchmark_type, benchmark, impact, train_time, test_time, max_time, seed", strategy_test_cases)
+def test_strategy(description, insample_args, outsample_args, benchmark_type, benchmark, impact, train_time, test_time, max_time, seed, grader):
+ """Test StrategyLearner.
+
+ Requires test description, insample args (dict), outsample args (dict), benchmark_type (str), benchmark (float)
+ max time (seconds), points for this test case (int), random seed (long), and a grader fixture.
+ """
+ points_earned = 0.0 # initialize points for this test case
+ try:
+ incorrect = True
+ if not 'StrategyLearner' in globals():
+ import importlib
+ m = importlib.import_module('StrategyLearner')
+ globals()['StrategyLearner'] = m
+ outsample_cr_to_beat = None
+ if benchmark_type == 'clean':
+ outsample_cr_to_beat = benchmark
+ def timeoutwrapper_strategylearner():
+ #Set fixed seed for repetability
+ np.random.seed(seed)
+ random.seed(seed)
+ learner = StrategyLearner.StrategyLearner(verbose=False,impact=impact)
+ tmp = time.time()
+ learner.addEvidence(**insample_args)
+ train_t = time.time()-tmp
+ tmp = time.time()
+ insample_trades_1 = learner.testPolicy(**insample_args)
+ test_t = time.time()-tmp
+ insample_trades_2 = learner.testPolicy(**insample_args)
+ tmp = time.time()
+ outsample_trades = learner.testPolicy(**outsample_args)
+ out_test_t = time.time()-tmp
+ return insample_trades_1, insample_trades_2, outsample_trades, train_t, test_t, out_test_t
+ msgs = []
+ in_trades_1, in_trades_2, out_trades, train_t, test_t, out_test_t = run_with_timeout(timeoutwrapper_strategylearner,max_time,(),{})
+ incorrect = False
+ if len(in_trades_1.shape)!=2 or in_trades_1.shape[1]!=1:
+ incorrect=True
+ msgs.append(" First insample trades DF has invalid shape: {}".format(in_trades_1.shape))
+ elif len(in_trades_2.shape)!=2 or in_trades_2.shape[1]!=1:
+ incorrect=True
+ msgs.append(" Second insample trades DF has invalid shape: {}".format(in_trades_2.shape))
+ elif len(out_trades.shape)!=2 or out_trades.shape[1]!=1:
+ incorrect=True
+ msgs.append(" Out-of-sample trades DF has invalid shape: {}".format(out_trades.shape))
+ else:
+ tmp_csum=0.0
+ for date,trade in in_trades_1.iterrows():
+ tmp_csum+= trade.iloc[0]
if (trade.iloc[0]!=0) and\
(trade.abs().iloc[0]!=MAX_HOLDINGS) and\
- (trade.abs().iloc[0]!=2*MAX_HOLDINGS):
- incorrect=True
- msgs.append(" illegal trade in first insample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
- break
- elif abs(tmp_csum)>MAX_HOLDINGS:
- incorrect=True
- msgs.append(" holdings more than {} long or short in first insample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
- break
- tmp_csum=0.0
- for date,trade in in_trades_2.iterrows():
- tmp_csum+= trade.iloc[0]
+ (trade.abs().iloc[0]!=2*MAX_HOLDINGS):
+ incorrect=True
+ msgs.append(" illegal trade in first insample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
+ break
+ elif abs(tmp_csum)>MAX_HOLDINGS:
+ incorrect=True
+ msgs.append(" holdings more than {} long or short in first insample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
+ break
+ tmp_csum=0.0
+ for date,trade in in_trades_2.iterrows():
+ tmp_csum+= trade.iloc[0]
if (trade.iloc[0]!=0) and\
(trade.abs().iloc[0]!=MAX_HOLDINGS) and\
- (trade.abs().iloc[0]!=2*MAX_HOLDINGS):
- incorrect=True
- msgs.append(" illegal trade in second insample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
- break
- elif abs(tmp_csum)>MAX_HOLDINGS:
- incorrect=True
- msgs.append(" holdings more than {} long or short in second insample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
- break
- tmp_csum=0.0
- for date,trade in out_trades.iterrows():
- tmp_csum+= trade.iloc[0]
+ (trade.abs().iloc[0]!=2*MAX_HOLDINGS):
+ incorrect=True
+ msgs.append(" illegal trade in second insample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
+ break
+ elif abs(tmp_csum)>MAX_HOLDINGS:
+ incorrect=True
+ msgs.append(" holdings more than {} long or short in second insample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
+ break
+ tmp_csum=0.0
+ for date,trade in out_trades.iterrows():
+ tmp_csum+= trade.iloc[0]
if (trade.iloc[0]!=0) and\
(trade.abs().iloc[0]!=MAX_HOLDINGS) and\
- (trade.abs().iloc[0]!=2*MAX_HOLDINGS):
- incorrect=True
- msgs.append(" illegal trade in out-of-sample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
- break
- elif abs(tmp_csum)>MAX_HOLDINGS:
- incorrect=True
- msgs.append(" holdings more than {} long or short in out-of-sample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
- break
+ (trade.abs().iloc[0]!=2*MAX_HOLDINGS):
+ incorrect=True
+ msgs.append(" illegal trade in out-of-sample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
+ break
+ elif abs(tmp_csum)>MAX_HOLDINGS:
+ incorrect=True
+ msgs.append(" holdings more than {} long or short in out-of-sample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
+ break
# if (((in_trades_1.abs()!=0) & (in_trades_1.abs()!=MAX_HOLDINGS) & (in_trades_1.abs()!=2*MAX_HOLDINGS)).any().any() or\
# ((in_trades_2.abs()!=0) & (in_trades_2.abs()!=MAX_HOLDINGS) & (in_trades_2.abs()!=2*MAX_HOLDINGS)).any().any() or\
- # ((out_trades.abs()!=0) & (out_trades.abs()!=MAX_HOLDINGS) & (out_trades.abs()!=2*MAX_HOLDINGS)).any().any()):
- # incorrect = True
- # msgs.append(" illegal trade. abs(trades) not one of ({},{},{})".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS))
- # if ((in_trades_1.cumsum().abs()>MAX_HOLDINGS).any()[0]) or ((in_trades_2.cumsum().abs()>MAX_HOLDINGS).any()[0]) or ((out_trades.cumsum().abs()>MAX_HOLDINGS).any()[0]):
- # incorrect = True
- # msgs.append(" holdings more than {} long or short".format(MAX_HOLDINGS))
- if not(incorrect):
- if train_t>train_time:
- incorrect=True
- msgs.append(" addEvidence() took {} seconds, max allowed {}".format(train_t,train_time))
- else:
- points_earned += 1.0
- if test_t > test_time:
- incorrect = True
- msgs.append(" testPolicy() took {} seconds, max allowed {}".format(test_t,test_time))
- else:
- points_earned += 2.0
- if not((in_trades_1 == in_trades_2).all()[0]):
- incorrect = True
- mismatches = in_trades_1.join(in_trades_2,how='outer',lsuffix='1',rsuffix='2')
- mismatches = mismatches[mismatches.iloc[:,0]!=mismatches.iloc[:,1]]
- msgs.append(" consecutive calls to testPolicy() with same input did not produce same output:")
- msgs.append(" Mismatched trades:\n {}".format(mismatches))
- else:
- points_earned += 2.0
- student_insample_cr = evalPolicy2(insample_args['symbol'],in_trades_1,insample_args['sv'],insample_args['sd'],insample_args['ed'],market_impact=impact,commission_cost=0.0)
- student_outsample_cr = evalPolicy2(outsample_args['symbol'],out_trades, outsample_args['sv'],outsample_args['sd'],outsample_args['ed'],market_impact=impact,commission_cost=0.0)
- if student_insample_cr <= benchmark:
- incorrect = True
- msgs.append(" in-sample return ({}) did not beat benchmark ({})".format(student_insample_cr,benchmark))
- else:
- points_earned += 5.0
- if outsample_cr_to_beat is None:
- if out_test_t > test_time:
- incorrect = True
- msgs.append(" out-sample took {} seconds, max of {}".format(out_test_t,test_time))
- else:
- points_earned += 5.0
- else:
- if student_outsample_cr < outsample_cr_to_beat:
- incorrect = True
- msgs.append(" out-sample return ({}) did not beat benchmark ({})".format(student_outsample_cr,outsample_cr_to_beat))
- else:
- points_earned += 5.0
- if incorrect:
+ # ((out_trades.abs()!=0) & (out_trades.abs()!=MAX_HOLDINGS) & (out_trades.abs()!=2*MAX_HOLDINGS)).any().any()):
+ # incorrect = True
+ # msgs.append(" illegal trade. abs(trades) not one of ({},{},{})".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS))
+ # if ((in_trades_1.cumsum().abs()>MAX_HOLDINGS).any()[0]) or ((in_trades_2.cumsum().abs()>MAX_HOLDINGS).any()[0]) or ((out_trades.cumsum().abs()>MAX_HOLDINGS).any()[0]):
+ # incorrect = True
+ # msgs.append(" holdings more than {} long or short".format(MAX_HOLDINGS))
+ if not(incorrect):
+ if train_t>train_time:
+ incorrect=True
+ msgs.append(" addEvidence() took {} seconds, max allowed {}".format(train_t,train_time))
+ else:
+ points_earned += 1.0
+ if test_t > test_time:
+ incorrect = True
+ msgs.append(" testPolicy() took {} seconds, max allowed {}".format(test_t,test_time))
+ else:
+ points_earned += 2.0
+ if not((in_trades_1 == in_trades_2).all()[0]):
+ incorrect = True
+ mismatches = in_trades_1.join(in_trades_2,how='outer',lsuffix='1',rsuffix='2')
+ mismatches = mismatches[mismatches.iloc[:,0]!=mismatches.iloc[:,1]]
+ msgs.append(" consecutive calls to testPolicy() with same input did not produce same output:")
+ msgs.append(" Mismatched trades:\n {}".format(mismatches))
+ else:
+ points_earned += 2.0
+ student_insample_cr = evalPolicy2(insample_args['symbol'],in_trades_1,insample_args['sv'],insample_args['sd'],insample_args['ed'],market_impact=impact,commission_cost=0.0)
+ student_outsample_cr = evalPolicy2(outsample_args['symbol'],out_trades, outsample_args['sv'],outsample_args['sd'],outsample_args['ed'],market_impact=impact,commission_cost=0.0)
+ if student_insample_cr <= benchmark:
+ incorrect = True
+ msgs.append(" in-sample return ({}) did not beat benchmark ({})".format(student_insample_cr,benchmark))
+ else:
+ points_earned += 5.0
+ if outsample_cr_to_beat is None:
+ if out_test_t > test_time:
+ incorrect = True
+ msgs.append(" out-sample took {} seconds, max of {}".format(out_test_t,test_time))
+ else:
+ points_earned += 5.0
+ else:
+ if student_outsample_cr < outsample_cr_to_beat:
+ incorrect = True
+ msgs.append(" out-sample return ({}) did not beat benchmark ({})".format(student_outsample_cr,outsample_cr_to_beat))
+ else:
+ points_earned += 5.0
+ if incorrect:
inputs_str = " insample_args: {}\n" \
" outsample_args: {}\n" \
" benchmark_type: {}\n" \
@@ -244,96 +244,96 @@ def test_strategy(description, insample_args, outsample_args, benchmark_type, be
" train_time: {}\n" \
" test_time: {}\n" \
" max_time: {}\n" \
- " seed: {}\n".format(insample_args, outsample_args, benchmark_type, benchmark, train_time, test_time, max_time,seed)
- raise IncorrectOutput("Test failed on one or more output criteria.\n Inputs:\n{}\n Failures:\n{}".format(inputs_str, "\n".join(msgs)))
- except Exception as e:
- # Test result: failed
- msg = "Test case description: {}\n".format(description)
-
- # Generate a filtered stacktrace, only showing erroneous lines in student file(s)
- tb_list = tb.extract_tb(sys.exc_info()[2])
- for i in range(len(tb_list)):
- row = tb_list[i]
- tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3]) # show only filename instead of long absolute path
- # tb_list = [row for row in tb_list if row[0] in ['QLearner.py','StrategyLearner.py']]
- if tb_list:
- msg += "Traceback:\n"
- msg += ''.join(tb.format_list(tb_list)) # contains newlines
- elif 'grading_traceback' in dir(e):
- msg += "Traceback:\n"
- msg += ''.join(tb.format_list(e.grading_traceback))
- msg += "{}: {}".format(e.__class__.__name__, str(e))
-
- # Report failure result to grader, with stacktrace
- grader.add_result(GradeResult(outcome='failed', points=points_earned, msg=msg))
- raise
- else:
- # Test result: passed (no exceptions)
- grader.add_result(GradeResult(outcome='passed', points=points_earned, msg=None))
-
-def compute_benchmark(sd,ed,sv,symbol,market_impact,commission_cost,max_holdings):
- date_idx = util.get_data([symbol,],pd.date_range(sd,ed)).index
- orders = pd.DataFrame(index=date_idx)
- orders['orders'] = 0; orders['orders'][0] = max_holdings; orders['orders'][-1] = -max_holdings
- return evalPolicy2(symbol,orders,sv,sd,ed,market_impact,commission_cost)
-
-def evalPolicy(student_trades,sym_prices,startval):
- ending_cash = startval - student_trades.mul(sym_prices,axis=0).sum()
- ending_stocks = student_trades.sum()*sym_prices.iloc[-1]
- return float((ending_cash+ending_stocks)/startval)-1.0
-
-def evalPolicy2(symbol, student_trades, startval, sd, ed, market_impact,commission_cost):
- orders_df = pd.DataFrame(columns=['Shares','Order','Symbol'])
- for row_idx in student_trades.index:
- nshares = student_trades.loc[row_idx][0]
- if nshares == 0:
- continue
- order = 'sell' if nshares < 0 else 'buy'
- new_row = pd.DataFrame([[abs(nshares),order,symbol],],columns=['Shares','Order','Symbol'],index=[row_idx,])
- orders_df = orders_df.append(new_row)
- portvals = compute_portvals(orders_df, sd, ed, startval,market_impact,commission_cost)
- return float(portvals[-1]/portvals[0])-1
-
-def compute_portvals(orders_df, start_date, end_date, startval, market_impact=0.0, commission_cost=0.0):
- """Simulate the market for the given date range and orders file."""
- symbols = []
- orders = []
- orders_df = orders_df.sort_index()
- for date, order in orders_df.iterrows():
- shares = order['Shares']
- action = order['Order']
- symbol = order['Symbol']
- if action.lower() == 'sell':
- shares *= -1
- order = (date, symbol, shares)
- orders.append(order)
- symbols.append(symbol)
- symbols = list(set(symbols))
- dates = pd.date_range(start_date, end_date)
- prices_all = util.get_data(symbols, dates)
- prices = prices_all[symbols]
- prices = prices.fillna(method='ffill').fillna(method='bfill')
- prices['_CASH'] = 1.0
- trades = pd.DataFrame(index=prices.index, columns=symbols)
- trades = trades.fillna(0)
- cash = pd.Series(index=prices.index)
- cash = cash.fillna(0)
- cash.iloc[0] = startval
- for date, symbol, shares in orders:
- price = prices[symbol][date]
- val = shares * price
- # transaction cost model
- val += commission_cost + (pd.np.abs(shares)*price*market_impact)
- positions = prices.loc[date] * trades.sum()
- totalcash = cash.sum()
- if (date < prices.index.min()) or (date > prices.index.max()):
- continue
- trades[symbol][date] += shares
- cash[date] -= val
- trades['_CASH'] = cash
- holdings = trades.cumsum()
- df_portvals = (prices * holdings).sum(axis=1)
- return df_portvals
-
-if __name__ == "__main__":
- pytest.main(["-s", __file__])
+ " seed: {}\n".format(insample_args, outsample_args, benchmark_type, benchmark, train_time, test_time, max_time,seed)
+ raise IncorrectOutput("Test failed on one or more output criteria.\n Inputs:\n{}\n Failures:\n{}".format(inputs_str, "\n".join(msgs)))
+ except Exception as e:
+ # Test result: failed
+ msg = "Test case description: {}\n".format(description)
+
+ # Generate a filtered stacktrace, only showing erroneous lines in student file(s)
+ tb_list = tb.extract_tb(sys.exc_info()[2])
+ for i in range(len(tb_list)):
+ row = tb_list[i]
+ tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3]) # show only filename instead of long absolute path
+ # tb_list = [row for row in tb_list if row[0] in ['QLearner.py','StrategyLearner.py']]
+ if tb_list:
+ msg += "Traceback:\n"
+ msg += ''.join(tb.format_list(tb_list)) # contains newlines
+ elif 'grading_traceback' in dir(e):
+ msg += "Traceback:\n"
+ msg += ''.join(tb.format_list(e.grading_traceback))
+ msg += "{}: {}".format(e.__class__.__name__, str(e))
+
+ # Report failure result to grader, with stacktrace
+ grader.add_result(GradeResult(outcome='failed', points=points_earned, msg=msg))
+ raise
+ else:
+ # Test result: passed (no exceptions)
+ grader.add_result(GradeResult(outcome='passed', points=points_earned, msg=None))
+
+def compute_benchmark(sd,ed,sv,symbol,market_impact,commission_cost,max_holdings):
+ date_idx = util.get_data([symbol,],pd.date_range(sd,ed)).index
+ orders = pd.DataFrame(index=date_idx)
+ orders['orders'] = 0; orders['orders'][0] = max_holdings; orders['orders'][-1] = -max_holdings
+ return evalPolicy2(symbol,orders,sv,sd,ed,market_impact,commission_cost)
+
+def evalPolicy(student_trades,sym_prices,startval):
+ ending_cash = startval - student_trades.mul(sym_prices,axis=0).sum()
+ ending_stocks = student_trades.sum()*sym_prices.iloc[-1]
+ return float((ending_cash+ending_stocks)/startval)-1.0
+
+def evalPolicy2(symbol, student_trades, startval, sd, ed, market_impact,commission_cost):
+ orders_df = pd.DataFrame(columns=['Shares','Order','Symbol'])
+ for row_idx in student_trades.index:
+ nshares = student_trades.loc[row_idx][0]
+ if nshares == 0:
+ continue
+ order = 'sell' if nshares < 0 else 'buy'
+ new_row = pd.DataFrame([[abs(nshares),order,symbol],],columns=['Shares','Order','Symbol'],index=[row_idx,])
+ orders_df = orders_df.append(new_row)
+ portvals = compute_portvals(orders_df, sd, ed, startval,market_impact,commission_cost)
+ return float(portvals[-1]/portvals[0])-1
+
+def compute_portvals(orders_df, start_date, end_date, startval, market_impact=0.0, commission_cost=0.0):
+ """Simulate the market for the given date range and orders file."""
+ symbols = []
+ orders = []
+ orders_df = orders_df.sort_index()
+ for date, order in orders_df.iterrows():
+ shares = order['Shares']
+ action = order['Order']
+ symbol = order['Symbol']
+ if action.lower() == 'sell':
+ shares *= -1
+ order = (date, symbol, shares)
+ orders.append(order)
+ symbols.append(symbol)
+ symbols = list(set(symbols))
+ dates = pd.date_range(start_date, end_date)
+ prices_all = util.get_data(symbols, dates)
+ prices = prices_all[symbols]
+ prices = prices.fillna(method='ffill').fillna(method='bfill')
+ prices['_CASH'] = 1.0
+ trades = pd.DataFrame(index=prices.index, columns=symbols)
+ trades = trades.fillna(0)
+ cash = pd.Series(index=prices.index)
+ cash = cash.fillna(0)
+ cash.iloc[0] = startval
+ for date, symbol, shares in orders:
+ price = prices[symbol][date]
+ val = shares * price
+ # transaction cost model
+ val += commission_cost + (pd.np.abs(shares)*price*market_impact)
+ positions = prices.loc[date] * trades.sum()
+ totalcash = cash.sum()
+ if (date < prices.index.min()) or (date > prices.index.max()):
+ continue
+ trades[symbol][date] += shares
+ cash[date] -= val
+ trades['_CASH'] = cash
+ holdings = trades.cumsum()
+ df_portvals = (prices * holdings).sum(axis=1)
+ return df_portvals
+
+if __name__ == "__main__":
+ pytest.main(["-s", __file__])