1
0
Fork 0

Add template for project 8

master
Felix Martin 2020-10-15 16:46:50 -04:00
parent cefc6f7893
commit f5e91eba0a
3 changed files with 428 additions and 0 deletions

View File

@ -0,0 +1,89 @@
"""
Template for implementing StrategyLearner (c) 2016 Tucker Balch
Copyright 2018, Georgia Institute of Technology (Georgia Tech)
Atlanta, Georgia 30332
All Rights Reserved
Template code for CS 4646/7646
Georgia Tech asserts copyright ownership of this template and all derivative
works, including solutions to the projects assigned in this course. Students
and other users of this template code are advised not to share it with others
or to make it available on publicly viewable websites including repositories
such as github and gitlab. This copyright statement should not be removed
or edited.
We do grant permission to share solutions privately with non-students such
as potential employers. However, sharing with other current or future
students of CS 7646 is prohibited and subject to being investigated as a
GT honor code violation.
-----do not edit anything above this line---
Student Name: Tucker Balch (replace with your name)
GT User ID: tb34 (replace with your User ID)
GT ID: 900897987 (replace with your GT ID)
"""
import datetime as dt
import pandas as pd
import util as ut
import random
class StrategyLearner(object):
# constructor
def __init__(self, verbose = False, impact=0.0, commission=0.0):
self.verbose = verbose
self.impact = impact
self.commission = commission
# this method should create a QLearner, and train it for trading
def addEvidence(self, symbol = "IBM", \
sd=dt.datetime(2008,1,1), \
ed=dt.datetime(2009,1,1), \
sv = 10000):
# add your code to do learning here
# example usage of the old backward compatible util function
syms=[symbol]
dates = pd.date_range(sd, ed)
prices_all = ut.get_data(syms, dates) # automatically adds SPY
prices = prices_all[syms] # only portfolio symbols
prices_SPY = prices_all['SPY'] # only SPY, for comparison later
if self.verbose: print(prices)
# example use with new colname
volume_all = ut.get_data(syms, dates, colname = "Volume") # automatically adds SPY
volume = volume_all[syms] # only portfolio symbols
volume_SPY = volume_all['SPY'] # only SPY, for comparison later
if self.verbose: print(volume)
# this method should use the existing policy and test it against new data
def testPolicy(self, symbol = "IBM", \
sd=dt.datetime(2009,1,1), \
ed=dt.datetime(2010,1,1), \
sv = 10000):
# here we build a fake set of trades
# your code should return the same sort of data
dates = pd.date_range(sd, ed)
prices_all = ut.get_data([symbol], dates) # automatically adds SPY
trades = prices_all[[symbol,]] # only portfolio symbols
trades_SPY = prices_all['SPY'] # only SPY, for comparison later
trades.values[:,:] = 0 # set them all to nothing
trades.values[0,:] = 1000 # add a BUY at the start
trades.values[40,:] = -1000 # add a SELL
trades.values[41,:] = 1000 # add a BUY
trades.values[60,:] = -2000 # go short from long
trades.values[61,:] = 2000 # go long from short
trades.values[-1,:] = -1000 #exit on the last day
if self.verbose: print(type(trades)) # it better be a DataFrame!
if self.verbose: print(trades)
if self.verbose: print(prices_all)
return trades
if __name__=="__main__":
print("One does not simply think up a strategy")

View File

@ -0,0 +1,339 @@
"""MC3-P3: Strategy Learner - grading script.
Usage:
- Switch to a student feedback directory first (will write "points.txt" and "comments.txt" in pwd).
- Run this script with both ml4t/ and student solution in PYTHONPATH, e.g.:
PYTHONPATH=ml4t:MC1-P2/jdoe7 python ml4t/mc2_p1_grading/grade_marketsim.py
Copyright 2017, Georgia Tech Research Corporation
Atlanta, Georgia 30332-0415
All Rights Reserved
Template code for CS 4646/7646
Georgia Tech asserts copyright ownership of this template and all derivative
works, including solutions to the projects assigned in this course. Students
and other users of this template code are advised not to share it with others
or to make it available on publicly viewable websites including repositories
such as github and gitlab. This copyright statement should not be removed
or edited.
We do grant permission to share solutions privately with non-students such
as potential employers. However, sharing with other current or future
students of CS 7646 is prohibited and subject to being investigated as a
GT honor code violation.
-----do not edit anything above this line---
Student Name: Tucker Balch (replace with your name)
GT User ID: tb34 (replace with your User ID)
GT ID: 900897987 (replace with your GT ID)
"""
import pytest
from grading.grading import grader, GradeResult, run_with_timeout, IncorrectOutput
import os
import sys
import traceback as tb
import datetime as dt
import numpy as np
import pandas as pd
from collections import namedtuple
import time
import util
import random
# Test cases
StrategyTestCase = namedtuple('Strategy', ['description','insample_args','outsample_args','benchmark_type','benchmark','impact','train_time','test_time','max_time','seed'])
strategy_test_cases = [
StrategyTestCase(
description="ML4T-220",
insample_args=dict(symbol="ML4T-220",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
outsample_args=dict(symbol="ML4T-220",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
benchmark_type='clean',
benchmark=1.0, #benchmark updated Apr 24 2017
impact=0.0,
train_time=25,
test_time=5,
max_time=60,
seed=1481090000
),
StrategyTestCase(
description="AAPL",
insample_args=dict(symbol="AAPL",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
outsample_args=dict(symbol="AAPL",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
benchmark_type='stock',
benchmark=0.1581999999999999, #benchmark computed Nov 22 2017
impact=0.0,
train_time=25,
test_time=5,
max_time=60,
seed=1481090000
),
StrategyTestCase(
description="SINE_FAST_NOISE",
insample_args=dict(symbol="SINE_FAST_NOISE",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
outsample_args=dict(symbol="SINE_FAST_NOISE",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
benchmark_type='noisy',
benchmark=2.0, #benchmark updated Apr 24 2017
impact=0.0,
train_time=25,
test_time=5,
max_time=60,
seed=1481090000
),
StrategyTestCase(
description="UNH - In sample",
insample_args=dict(symbol="UNH",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
outsample_args=dict(symbol="UNH",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
benchmark_type='stock',
benchmark= -0.25239999999999996, #benchmark computed Nov 22 2017
impact=0.0,
train_time=25,
test_time=5,
max_time=60,
seed=1481090000
),
]
max_points = 60.0
html_pre_block = True # surround comments with HTML <pre> tag (for T-Square comments field)
MAX_HOLDINGS = 1000
# Test functon(s)
@pytest.mark.parametrize("description, insample_args, outsample_args, benchmark_type, benchmark, impact, train_time, test_time, max_time, seed", strategy_test_cases)
def test_strategy(description, insample_args, outsample_args, benchmark_type, benchmark, impact, train_time, test_time, max_time, seed, grader):
"""Test StrategyLearner.
Requires test description, insample args (dict), outsample args (dict), benchmark_type (str), benchmark (float)
max time (seconds), points for this test case (int), random seed (long), and a grader fixture.
"""
points_earned = 0.0 # initialize points for this test case
try:
incorrect = True
if not 'StrategyLearner' in globals():
import importlib
m = importlib.import_module('StrategyLearner')
globals()['StrategyLearner'] = m
outsample_cr_to_beat = None
if benchmark_type == 'clean':
outsample_cr_to_beat = benchmark
def timeoutwrapper_strategylearner():
#Set fixed seed for repetability
np.random.seed(seed)
random.seed(seed)
learner = StrategyLearner.StrategyLearner(verbose=False,impact=impact)
tmp = time.time()
learner.addEvidence(**insample_args)
train_t = time.time()-tmp
tmp = time.time()
insample_trades_1 = learner.testPolicy(**insample_args)
test_t = time.time()-tmp
insample_trades_2 = learner.testPolicy(**insample_args)
tmp = time.time()
outsample_trades = learner.testPolicy(**outsample_args)
out_test_t = time.time()-tmp
return insample_trades_1, insample_trades_2, outsample_trades, train_t, test_t, out_test_t
msgs = []
in_trades_1, in_trades_2, out_trades, train_t, test_t, out_test_t = run_with_timeout(timeoutwrapper_strategylearner,max_time,(),{})
incorrect = False
if len(in_trades_1.shape)!=2 or in_trades_1.shape[1]!=1:
incorrect=True
msgs.append(" First insample trades DF has invalid shape: {}".format(in_trades_1.shape))
elif len(in_trades_2.shape)!=2 or in_trades_2.shape[1]!=1:
incorrect=True
msgs.append(" Second insample trades DF has invalid shape: {}".format(in_trades_2.shape))
elif len(out_trades.shape)!=2 or out_trades.shape[1]!=1:
incorrect=True
msgs.append(" Out-of-sample trades DF has invalid shape: {}".format(out_trades.shape))
else:
tmp_csum=0.0
for date,trade in in_trades_1.iterrows():
tmp_csum+= trade.iloc[0]
if (trade.iloc[0]!=0) and\
(trade.abs().iloc[0]!=MAX_HOLDINGS) and\
(trade.abs().iloc[0]!=2*MAX_HOLDINGS):
incorrect=True
msgs.append(" illegal trade in first insample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
break
elif abs(tmp_csum)>MAX_HOLDINGS:
incorrect=True
msgs.append(" holdings more than {} long or short in first insample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
break
tmp_csum=0.0
for date,trade in in_trades_2.iterrows():
tmp_csum+= trade.iloc[0]
if (trade.iloc[0]!=0) and\
(trade.abs().iloc[0]!=MAX_HOLDINGS) and\
(trade.abs().iloc[0]!=2*MAX_HOLDINGS):
incorrect=True
msgs.append(" illegal trade in second insample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
break
elif abs(tmp_csum)>MAX_HOLDINGS:
incorrect=True
msgs.append(" holdings more than {} long or short in second insample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
break
tmp_csum=0.0
for date,trade in out_trades.iterrows():
tmp_csum+= trade.iloc[0]
if (trade.iloc[0]!=0) and\
(trade.abs().iloc[0]!=MAX_HOLDINGS) and\
(trade.abs().iloc[0]!=2*MAX_HOLDINGS):
incorrect=True
msgs.append(" illegal trade in out-of-sample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
break
elif abs(tmp_csum)>MAX_HOLDINGS:
incorrect=True
msgs.append(" holdings more than {} long or short in out-of-sample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
break
# if (((in_trades_1.abs()!=0) & (in_trades_1.abs()!=MAX_HOLDINGS) & (in_trades_1.abs()!=2*MAX_HOLDINGS)).any().any() or\
# ((in_trades_2.abs()!=0) & (in_trades_2.abs()!=MAX_HOLDINGS) & (in_trades_2.abs()!=2*MAX_HOLDINGS)).any().any() or\
# ((out_trades.abs()!=0) & (out_trades.abs()!=MAX_HOLDINGS) & (out_trades.abs()!=2*MAX_HOLDINGS)).any().any()):
# incorrect = True
# msgs.append(" illegal trade. abs(trades) not one of ({},{},{})".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS))
# if ((in_trades_1.cumsum().abs()>MAX_HOLDINGS).any()[0]) or ((in_trades_2.cumsum().abs()>MAX_HOLDINGS).any()[0]) or ((out_trades.cumsum().abs()>MAX_HOLDINGS).any()[0]):
# incorrect = True
# msgs.append(" holdings more than {} long or short".format(MAX_HOLDINGS))
if not(incorrect):
if train_t>train_time:
incorrect=True
msgs.append(" addEvidence() took {} seconds, max allowed {}".format(train_t,train_time))
else:
points_earned += 1.0
if test_t > test_time:
incorrect = True
msgs.append(" testPolicy() took {} seconds, max allowed {}".format(test_t,test_time))
else:
points_earned += 2.0
if not((in_trades_1 == in_trades_2).all()[0]):
incorrect = True
mismatches = in_trades_1.join(in_trades_2,how='outer',lsuffix='1',rsuffix='2')
mismatches = mismatches[mismatches.iloc[:,0]!=mismatches.iloc[:,1]]
msgs.append(" consecutive calls to testPolicy() with same input did not produce same output:")
msgs.append(" Mismatched trades:\n {}".format(mismatches))
else:
points_earned += 2.0
student_insample_cr = evalPolicy2(insample_args['symbol'],in_trades_1,insample_args['sv'],insample_args['sd'],insample_args['ed'],market_impact=impact,commission_cost=0.0)
student_outsample_cr = evalPolicy2(outsample_args['symbol'],out_trades, outsample_args['sv'],outsample_args['sd'],outsample_args['ed'],market_impact=impact,commission_cost=0.0)
if student_insample_cr <= benchmark:
incorrect = True
msgs.append(" in-sample return ({}) did not beat benchmark ({})".format(student_insample_cr,benchmark))
else:
points_earned += 5.0
if outsample_cr_to_beat is None:
if out_test_t > test_time:
incorrect = True
msgs.append(" out-sample took {} seconds, max of {}".format(out_test_t,test_time))
else:
points_earned += 5.0
else:
if student_outsample_cr < outsample_cr_to_beat:
incorrect = True
msgs.append(" out-sample return ({}) did not beat benchmark ({})".format(student_outsample_cr,outsample_cr_to_beat))
else:
points_earned += 5.0
if incorrect:
inputs_str = " insample_args: {}\n" \
" outsample_args: {}\n" \
" benchmark_type: {}\n" \
" benchmark: {}\n" \
" train_time: {}\n" \
" test_time: {}\n" \
" max_time: {}\n" \
" seed: {}\n".format(insample_args, outsample_args, benchmark_type, benchmark, train_time, test_time, max_time,seed)
raise IncorrectOutput("Test failed on one or more output criteria.\n Inputs:\n{}\n Failures:\n{}".format(inputs_str, "\n".join(msgs)))
except Exception as e:
# Test result: failed
msg = "Test case description: {}\n".format(description)
# Generate a filtered stacktrace, only showing erroneous lines in student file(s)
tb_list = tb.extract_tb(sys.exc_info()[2])
for i in range(len(tb_list)):
row = tb_list[i]
tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3]) # show only filename instead of long absolute path
# tb_list = [row for row in tb_list if row[0] in ['QLearner.py','StrategyLearner.py']]
if tb_list:
msg += "Traceback:\n"
msg += ''.join(tb.format_list(tb_list)) # contains newlines
elif 'grading_traceback' in dir(e):
msg += "Traceback:\n"
msg += ''.join(tb.format_list(e.grading_traceback))
msg += "{}: {}".format(e.__class__.__name__, str(e))
# Report failure result to grader, with stacktrace
grader.add_result(GradeResult(outcome='failed', points=points_earned, msg=msg))
raise
else:
# Test result: passed (no exceptions)
grader.add_result(GradeResult(outcome='passed', points=points_earned, msg=None))
def compute_benchmark(sd,ed,sv,symbol,market_impact,commission_cost,max_holdings):
date_idx = util.get_data([symbol,],pd.date_range(sd,ed)).index
orders = pd.DataFrame(index=date_idx)
orders['orders'] = 0; orders['orders'][0] = max_holdings; orders['orders'][-1] = -max_holdings
return evalPolicy2(symbol,orders,sv,sd,ed,market_impact,commission_cost)
def evalPolicy(student_trades,sym_prices,startval):
ending_cash = startval - student_trades.mul(sym_prices,axis=0).sum()
ending_stocks = student_trades.sum()*sym_prices.iloc[-1]
return float((ending_cash+ending_stocks)/startval)-1.0
def evalPolicy2(symbol, student_trades, startval, sd, ed, market_impact,commission_cost):
orders_df = pd.DataFrame(columns=['Shares','Order','Symbol'])
for row_idx in student_trades.index:
nshares = student_trades.loc[row_idx][0]
if nshares == 0:
continue
order = 'sell' if nshares < 0 else 'buy'
new_row = pd.DataFrame([[abs(nshares),order,symbol],],columns=['Shares','Order','Symbol'],index=[row_idx,])
orders_df = orders_df.append(new_row)
portvals = compute_portvals(orders_df, sd, ed, startval,market_impact,commission_cost)
return float(portvals[-1]/portvals[0])-1
def compute_portvals(orders_df, start_date, end_date, startval, market_impact=0.0, commission_cost=0.0):
"""Simulate the market for the given date range and orders file."""
symbols = []
orders = []
orders_df = orders_df.sort_index()
for date, order in orders_df.iterrows():
shares = order['Shares']
action = order['Order']
symbol = order['Symbol']
if action.lower() == 'sell':
shares *= -1
order = (date, symbol, shares)
orders.append(order)
symbols.append(symbol)
symbols = list(set(symbols))
dates = pd.date_range(start_date, end_date)
prices_all = util.get_data(symbols, dates)
prices = prices_all[symbols]
prices = prices.fillna(method='ffill').fillna(method='bfill')
prices['_CASH'] = 1.0
trades = pd.DataFrame(index=prices.index, columns=symbols)
trades = trades.fillna(0)
cash = pd.Series(index=prices.index)
cash = cash.fillna(0)
cash.iloc[0] = startval
for date, symbol, shares in orders:
price = prices[symbol][date]
val = shares * price
# transaction cost model
val += commission_cost + (pd.np.abs(shares)*price*market_impact)
positions = prices.loc[date] * trades.sum()
totalcash = cash.sum()
if (date < prices.index.min()) or (date > prices.index.max()):
continue
trades[symbol][date] += shares
cash[date] -= val
trades['_CASH'] = cash
holdings = trades.cumsum()
df_portvals = (prices * holdings).sum(axis=1)
return df_portvals
if __name__ == "__main__":
pytest.main(["-s", __file__])

Binary file not shown.