Compare commits

...

25 Commits

Author SHA1 Message Date
e577ce34d2 Add support for trading view data 2021-06-20 15:04:30 -04:00
71f9b58803 Add util because I cannot remember why I would ignore it 2021-06-19 09:47:35 -04:00
0af5a9885d Add project 8 report to readme 2020-11-10 12:41:50 -05:00
063d9a75ae Finish project 8 and course! 2020-11-10 12:33:42 -05:00
6e1f70bcba Improve QLearner so that commission is considered 2020-11-10 11:01:21 -05:00
761a0366e4 Finish first version of Q trader 2020-11-09 15:07:52 -05:00
169dd8278d Implement binning and state calculation 2020-11-07 12:39:37 -05:00
889bcf68ca Finish experiment 1 and start with Q trader 2020-11-05 14:34:48 -05:00
5fbbc26929 Update StrategyLearner to pass tests 2020-11-04 17:32:02 -05:00
10d87aefd3 Add tree learners to strategy evaluation directory 2020-11-04 15:15:24 -05:00
05db89e8c2 Implement first version of strategy learner
This version does not pass the automatic test.
2020-11-04 15:14:27 -05:00
c40ffcf84b Show both MACD and indicator strat on figure
Prepare for strategy learner.
2020-11-04 09:23:42 -05:00
0519ae9336 Finish manual strategy for project 8
I struggled with the manual strategy, mostly because I tried to read
good triggers from the price action charts. Finally, I had the ingenious
(hmm) idea to scatter plot the 1, 3, and 5 day percentage returns over
different indicators. I can also use this information to train my Q
learner.
2020-11-03 19:05:43 -05:00
43e297c075 Change indicators to return their results and work on three indicator strat 2020-11-02 09:10:01 -05:00
4679910374 Implement first version of manual strategy 2020-10-27 19:57:46 -04:00
1798e9569e Make marketsim support buying and selling via sign of shares 2020-10-27 19:53:55 -04:00
85a9c4fcb3 Start working on strategy evaluation 2020-10-26 21:44:18 -04:00
d112dce5f5 Implement dyna-q to finish project 7 2020-10-19 08:56:24 -04:00
22022c3780 Update readme for project 7 2020-10-18 14:48:15 -04:00
d5aa22e9dd Implement Q learner 2020-10-18 14:44:32 -04:00
f5e91eba0a Add template for project 8 2020-10-15 16:46:50 -04:00
cefc6f7893 Add files for qlearning assignment 2020-10-15 16:44:21 -04:00
6a9e762012 Fix picture link in project 6 report 2020-10-15 13:11:40 -04:00
66ffe6d11b Add figures to report to finish project 6 2020-10-15 13:09:57 -04:00
9464943f75 Implment remaining indicators and add figures 2020-10-15 13:05:46 -04:00
56 changed files with 3806 additions and 21 deletions

1
.gitignore vendored
View File

@@ -2,4 +2,3 @@ __pycache__
assess_learners/Data assess_learners/Data
data data
grading grading
util.py

View File

@@ -38,6 +38,6 @@ unzip -n zips/*.zip -d ./
- [Report 3](./assess_learners/assess_learners.md) - [Report 3](./assess_learners/assess_learners.md)
- No reports for projects 4 (defeat learners) and 5 (marketsim) - No reports for projects 4 (defeat learners) and 5 (marketsim)
- [Report 6](./manual_strategy/manual_strategy.md) - [Report 6](./manual_strategy/manual_strategy.md)
- [Report 7](#) - No report for project 7
- [Report 8](#) - [Report 8](./strategy_evaluation/strategy_evaluation.md)

View File

@@ -0,0 +1,77 @@
import numpy as np
class AbstractTreeLearner:
LEAF = -1
NA = -1
def author(self):
return 'felixm' # replace tb34 with your Georgia Tech username
def create_node(self, factor, split_value, left, right):
return np.array([(factor, split_value, left, right), ],
dtype='|i4, f4, i4, i4')
def query_point(self, point):
node_index = 0
while self.rel_tree[node_index][0] != self.LEAF:
node = self.rel_tree[node_index]
split_factor = node[0]
split_value = node[1]
if point[split_factor] <= split_value:
# Recurse into left sub-tree.
node_index += node[2]
else:
node_index += node[3]
v = self.rel_tree[node_index][1]
return v
def query(self, points):
"""
@summary: Estimate a set of test points given the model we built.
@param points: should be a numpy array with each row corresponding to a specific query.
@returns the estimated values according to the saved model.
"""
query_point = lambda p: self.query_point(p)
r = np.apply_along_axis(query_point, 1, points)
return r
def build_tree(self, xs, y):
"""
@summary: Build a decision tree from the training data.
@param dataX: X values of data to add
@param dataY: the Y training values
"""
assert(xs.shape[0] == y.shape[0])
assert(xs.shape[0] > 0) # If this is 0 something went wrong.
if xs.shape[0] <= self.leaf_size:
value = np.mean(y)
if value < -0.2:
value = -1
elif value > 0.2:
value = 1
else:
value = 0
return self.create_node(self.LEAF, value, self.NA, self.NA)
if np.all(y[0] == y):
return self.create_node(self.LEAF, y[0], self.NA, self.NA)
i, split_value = self.get_i_and_split_value(xs, y)
select_l = xs[:, i] <= split_value
select_r = xs[:, i] > split_value
lt = self.build_tree(xs[select_l], y[select_l])
rt = self.build_tree(xs[select_r], y[select_r])
root = self.create_node(i, split_value, 1, lt.shape[0] + 1)
root = np.concatenate([root, lt, rt])
return root
def addEvidence(self, data_x, data_y):
"""
@summary: Add training data to learner
@param dataX: X values of data to add
@param dataY: the Y training values
"""
self.rel_tree = self.build_tree(data_x, data_y)

View File

@@ -0,0 +1,36 @@
import pandas as pd
import util as ut
import datetime as dt
class BenchmarkStrategy:
def __init__(self, verbose=False, impact=0.0, commission=0.0, units=1000):
self.verbose = verbose
self.impact = impact
self.commission = commission
self.units = units
def addEvidence(self, symbol=0, sd=0, ed=0, sv=0):
"""Keep this so that API is valid."""
pass
def testPolicy(self, symbol="IBM",
sd=dt.datetime(2009, 1, 1),
ed=dt.datetime(2010, 1, 1),
sv=10000):
"""Benchmark is to buy 1000 shares and hold."""
dates = pd.date_range(sd, ed)
prices = ut.get_data([symbol], dates, addSPY=False,
colname='close', datecol='time')
orders = pd.DataFrame(index=prices.index)
orders["Symbol"] = symbol
orders["Order"] = ""
orders["Shares"] = 0
orders.iloc[0] = [symbol, "BUY", self.units]
orders.iloc[-1] = [symbol, "SELL", -self.units]
if self.verbose:
print(type(orders)) # it better be a DataFrame!
print(orders)
return orders

View File

@@ -0,0 +1,114 @@
import datetime as dt
import pandas as pd
import util
import indicators
class ManualStrategy:
def __init__(self, verbose=False, impact=0.0, commission=0.0):
self.verbose = verbose
self.impact = impact
self.commission = commission
# this method should create a QLearner, and train it for trading
def addEvidence(self, symbol="IBM",
sd=dt.datetime(2008, 1, 1),
ed=dt.datetime(2009, 1, 1),
sv=10000):
# add your code to do learning here
# example usage of the old backward compatible util function
syms = [symbol]
dates = pd.date_range(sd, ed)
prices_all = util.get_data(syms, dates) # automatically adds SPY
prices = prices_all[syms] # only portfolio symbols
# prices_SPY = prices_all['SPY'] # only SPY, for comparison later
if self.verbose:
print(prices)
# example use with new colname
# automatically adds SPY
volume_all = util.get_data(syms, dates, colname="Volume")
volume = volume_all[syms] # only portfolio symbols
# volume_SPY = volume_all['SPY'] # only SPY, for comparison later
if self.verbose:
print(volume)
def macd_strat(self, macd, orders):
"""Strategy based on MACD cross."""
def strat(ser):
m = macd.loc[ser.index]
prev_macd, prev_signal, _ = m.iloc[0]
cur_macd, cur_signal, _ = m.iloc[1]
shares = 0
if cur_macd < -1 and prev_macd < prev_signal \
and cur_macd > cur_signal:
if self.holding == 0:
shares = 1000
elif self.holding == -1000:
shares = 2000
elif cur_macd > 1 and prev_macd > prev_signal \
and cur_macd < cur_signal:
if self.holding == 0:
shares = -1000
elif self.holding == 1000:
shares = -2000
self.holding += shares
return shares
orders['Shares'] = orders['Shares'].rolling(2).apply(strat)
def three_indicator_strat(self, macd, rsi, price_sma, orders):
"""Strategy based on three indicators. Thresholds selected based on
scatter plots."""
def strat(row):
shares = 0
_, _, macd_diff = macd.loc[row.name]
cur_rsi = rsi.loc[row.name][0]
cur_price_sma = price_sma.loc[row.name][0]
if self.holding == -1000 and cur_price_sma < 0.9:
shares = 2000
elif self.holding == 0 and cur_price_sma < 0.9:
shares = 1000
elif self.holding == -1000 and cur_rsi > 80:
shares = 2000
elif self.holding == 0 and cur_rsi > 80:
shares = 1000
elif self.holding == -1000 and macd_diff < -0.5:
shares = 2000
elif self.holding == 0 and macd_diff < -0.5:
shares = 1000
elif self.holding == 1000 and cur_price_sma > 1.1:
shares = -2000
elif self.holding == 0 and cur_price_sma > 1.1:
shares = -1000
self.holding += shares
return shares
orders['Shares'] = orders.apply(strat, axis=1)
def testPolicy(self, symbol="IBM",
sd=dt.datetime(2009, 1, 1),
ed=dt.datetime(2010, 1, 1),
sv=10000, macd_strat=False):
self.holding = 0
df = util.get_data([symbol], pd.date_range(sd, ed))
df.drop(columns=["SPY"], inplace=True)
orders = pd.DataFrame(index=df.index)
orders["Symbol"] = symbol
orders["Order"] = ""
orders["Shares"] = 0
macd = indicators.macd(df, symbol)
rsi = indicators.rsi(df, symbol)
price_sma = indicators.price_sma(df, symbol, [8])
if macd_strat:
self.macd_strat(macd, orders)
else:
self.three_indicator_strat(macd, rsi, price_sma, orders)
return orders

170
crypto_eval/QLearner.py Normal file
View File

@@ -0,0 +1,170 @@
import datetime as dt
import pandas as pd
import util
import indicators
from qlearning_robot.QLearner import QLearner as Learner
from dataclasses import dataclass
@dataclass
class Holding:
cash: int
shares: int
equity: int
class QLearner(object):
def __init__(self, verbose=False, impact=0.0, units=1000, commission=0.0, testing=False, n_bins=5):
self.verbose = verbose
self.impact = impact
self.commission = commission
self.testing = testing # Decides which type of order df to return.
self.indicators = ['macd_diff', 'rsi', 'price_sma_8']
self.n_bins = n_bins
self.bins = {}
self.num_states = self.get_num_states()
self.num_actions = 3 # buy, sell, hold
self.learner = Learner(self.num_states, self.num_actions)
self.units = units
def row_to_state(self, holding, df_row):
"""Transforms a row into a state value."""
holding = (holding + self.units) // self.units
assert(holding in [0, 1, 2])
# For each indicator that goes into the state the interval becomes
# smaller based on how many bins the indicator has. The first
# 'indicator' is the information about how many shares we are currently
# holding. So for example, if I have 450 states then the intervall (aka
# remaining_states) is 150 because there are three values for holding:
# holding = 0 -> state = 0 * 150 = 0
# holding = 1 -> state = 1 * 150 = 150
# holding = 2 -> state = 2 * 150 = 300
remaining_states = self.num_states // 3
state = holding * remaining_states
for indicator in self.indicators:
value = df_row[indicator]
bin_n = self.indicator_value_to_bin(indicator, value)
remaining_states //= self.n_bins
state += bin_n * remaining_states
return state
def indicator_value_to_bin(self, indicator, value):
for i, upper_bound in enumerate(self.bins[indicator]):
if value < upper_bound:
return i
return i + 1
def add_indicators(self, df, symbol):
"""Add indicators for learning to DataFrame."""
for indicator in self.indicators:
if indicator == "macd_diff":
indicators.macd(df, symbol)
df.drop(columns=["macd", "macd_signal"], inplace=True)
elif indicator == "rsi":
indicators.rsi(df, symbol)
elif indicator.startswith("price_sma_"):
period = int(indicator.replace("price_sma_", ""))
indicators.price_sma(df, symbol, [period])
df.drop(columns=["SPY"], inplace=True)
df.dropna(inplace=True)
def bin_indicators(self, df):
"""Create bins for indicators."""
for indicator in self.indicators:
ser, bins = pd.qcut(df[indicator], self.n_bins, retbins=True)
self.bins[indicator] = bins[1:self.n_bins]
def get_num_states(self):
"""Return the total num of states."""
num_states = 3 # Three states holding (1000, 0, -1000)
for _ in self.indicators:
num_states *= self.n_bins
return num_states
def handle_order(self, action, holding, adj_closing_price):
shares = 0
if action == 0: # buy
if holding.shares == 0 or holding.shares == -self.units:
shares = self.units
elif action == 1: # sell
if holding.shares== 0 or holding.shares == self.units:
shares = -self.units
elif action == 2: # hold
shares = 0
cost = shares * adj_closing_price
if shares != 0:
# Charge commission and deduct impact penalty
holding.cash -= self.commission
holding.cash -= (self.impact * adj_closing_price * abs(shares))
holding.cash -= cost
holding.shares += shares
holding.equity = holding.cash + holding.shares * adj_closing_price
def get_reward(self, equity, new_equity):
if new_equity > equity:
return 1
return -1
def train(self, df, symbol, sv):
holding = Holding(sv, 0, sv)
row = df.iloc[0]
state = self.row_to_state(holding.shares, row)
action = self.learner.querysetstate(state)
adj_closing_price = row[symbol]
equity = holding.equity
self.handle_order(action, holding, adj_closing_price)
for index, row in df.iloc[1:].iterrows():
adj_closing_price = row[symbol]
new_equity = holding.cash + holding.shares * adj_closing_price
r = self.get_reward(equity, new_equity)
s_prime = self.row_to_state(holding.shares, row)
a = self.learner.query(s_prime, r)
equity = new_equity
self.handle_order(a, holding, adj_closing_price)
if self.verbose:
print(f"{holding=} {s_prime=} {r=} {a=}")
def addEvidence(self, symbol="IBM", sd=dt.datetime(2008, 1, 1), ed=dt.datetime(2009, 1, 1), sv=10000):
df = util.get_data([symbol], pd.date_range(sd, ed))
self.add_indicators(df, symbol)
self.bin_indicators(df)
for _ in range(15):
self.train(df, symbol, sv)
def testPolicy(self, symbol="IBM", sd=dt.datetime(2009, 1, 1), ed=dt.datetime(2010, 1, 1), sv=10000):
df = util.get_data([symbol], pd.date_range(sd, ed))
orders = pd.DataFrame(index=df.index)
orders["Symbol"] = symbol
orders["Order"] = ""
orders["Shares"] = 0
shares = orders["Shares"]
self.add_indicators(df, symbol)
holding = 0
for index, row in df.iterrows():
state = self.row_to_state(holding, row)
action = self.learner.querysetstate(state)
if action == 0: # buy
if holding == 0 or holding == -self.units:
holding += self.units
orders.loc[index, "Shares"] = self.units
elif action == 1: # sell
if holding == 0 or holding == self.units:
holding -= self.units
orders.loc[index, "Shares"] = -self.units
elif action == 2: # hold
pass
if self.testing:
return orders
else:
return orders[["Shares"]]

30
crypto_eval/RTLearner.py Normal file
View File

@@ -0,0 +1,30 @@
import numpy as np
from AbstractTreeLearner import AbstractTreeLearner
class RTLearner(AbstractTreeLearner):
def __init__(self, leaf_size = 1, verbose = False):
self.leaf_size = leaf_size
self.verbose = verbose
def get_i_and_split_value(self, xs, y):
"""
@summary: Pick a random i and split value.
Make sure that not all X are the same for i and also pick
different values to average the split_value from.
"""
i = np.random.randint(0, xs.shape[1])
while np.all(xs[0,i] == xs[:,i]):
i = np.random.randint(0, xs.shape[1])
# I don't know about the performance of this, but at least it
# terminates reliably. If the two elements are the same something is
# wrong.
a = np.array(list(set(xs[:, i])))
r1, r2 = np.random.choice(a, size = 2, replace = False)
assert(r1 != r2)
split_value = (r1 + r2) / 2.0
return i, split_value

View File

@@ -0,0 +1,106 @@
import datetime as dt
import pandas as pd
import util
import indicators
from RTLearner import RTLearner
class StrategyLearner(object):
def __init__(self, verbose=False, impact=0.0, commission=0.0, testing=False):
self.verbose = verbose
self.impact = impact
self.commission = commission
self.testing = testing
def _get_volume(self):
"""For reference."""
volume_all = ut.get_data(syms, dates, colname="Volume")
volume = volume_all[syms] # only portfolio symbols
# volume_SPY = volume_all['SPY'] # only SPY, for comparison later
if self.verbose:
print(volume)
def _add_indicators(self, df, symbol):
"""Add indicators for learning to DataFrame."""
df.drop(columns=["SPY"], inplace=True)
indicators.macd(df, symbol)
indicators.rsi(df, symbol)
indicators.price_sma(df, symbol, [8])
indicators.price_delta(df, symbol, 3)
df.dropna(inplace=True)
def addEvidence(self, symbol="IBM",
sd=dt.datetime(2008, 1, 1),
ed=dt.datetime(2009, 1, 1),
sv=10000):
self.y_threshold = 0.2
self.indicators = ['macd_diff', 'rsi', 'price_sma_8']
df = util.get_data([symbol], pd.date_range(sd, ed))
self._add_indicators(df, symbol)
def classify_y(row):
if row > self.y_threshold:
return 1
elif row < -self.y_threshold:
return -1
else:
pass
return 0
def set_y_threshold(pct):
if max(pct) < 0.2:
self.y_threshold = 0.02
self.learner = RTLearner(leaf_size = 5)
# self.learner = BagLearner(RTLearner, 3, {'leaf_size': 5})
data_x = df[self.indicators].to_numpy()
pct = df['pct_3']
# This is a hack to get a low enough buy/sell threshold for the
# cyclic the test 'ML4T-220' where the max pct_3 is 0.0268.
set_y_threshold(pct)
y = pct.apply(classify_y)
self.learner.addEvidence(data_x, y.to_numpy())
return y
def strat(self, data_y, orders):
self.holding = 0
def strat(row):
y = int(data_y.loc[row.name][0])
shares = 0
if self.holding == 0 and y == 1:
shares = 1000
elif self.holding == -1000 and y == 1:
shares = 2000
elif self.holding == 0 and y == -1:
shares = -1000
elif self.holding == 1000 and y == -1:
shares = -2000
self.holding += shares
return shares
orders["Shares"] = orders.apply(strat, axis=1)
def testPolicy(self, symbol="IBM",
sd=dt.datetime(2009, 1, 1),
ed=dt.datetime(2010, 1, 1),
sv=10000):
df = util.get_data([symbol], pd.date_range(sd, ed))
self._add_indicators(df, symbol)
data_x = df[self.indicators].to_numpy()
data_y = pd.DataFrame(index=df.index, data=self.learner.query(data_x))
orders = pd.DataFrame(index=df.index)
orders["Symbol"] = symbol
orders["Order"] = ""
orders["Shares"] = 0
self.strat(data_y, orders)
if self.testing:
return orders
else:
return orders[["Shares"]]

237
crypto_eval/experiment1.py Normal file
View File

@@ -0,0 +1,237 @@
import pandas as pd
import datetime as dt
import sys
import util
import indicators
import crypto_eval.marketsim as marketsim
import matplotlib.pyplot as plt
from matplotlib.widgets import MultiCursor
from BenchmarkStrategy import BenchmarkStrategy
from ManualStrategy import ManualStrategy
from StrategyLearner import StrategyLearner
from QLearner import QLearner
def plot_indicators(symbol, df):
fig, ax = plt.subplots(4, sharex=True)
price_sma = indicators.price_sma(df, symbol, [8])
bb = indicators.bollinger_band(df, symbol)
rsi = indicators.rsi(df, symbol)
macd = indicators.macd(df, symbol).copy()
df[[symbol]].plot(ax=ax[0])
bb.plot(ax=ax[0])
price_sma.plot(ax=ax[1])
macd.plot(ax=ax[2])
rsi.plot(ax=ax[3])
for a in ax.flat:
a.grid()
m = MultiCursor(fig.canvas, ax, color='r', lw=0.5)
plt.show()
sys.exit(0)
def visualize_correlations(symbol, df):
indicators.price_sma(df, symbol, [8, 21])
indicators.price_delta(df, symbol, 5)
indicators.price_delta(df, symbol, 3)
indicators.price_delta(df, symbol, 1)
indicators.macd(df, symbol)
indicators.rsi(df, symbol)
# df = df[df['rsi'] > 80]
fig, ax = plt.subplots(3, 2) # sharex=True)
df.plot.scatter(x="price_sma_8", y="pct_5", ax=ax[0, 0])
df.plot.scatter(x="price_sma_8", y="pct_3", ax=ax[1, 0])
df.plot.scatter(x="price_sma_8", y="pct_1", ax=ax[2, 0])
# df.plot.scatter(x="rsi", y="pct_5", ax=ax[0, 1])
# df.plot.scatter(x="rsi", y="pct_3", ax=ax[1, 1])
# df.plot.scatter(x="rsi", y="pct_1", ax=ax[2, 1])
df.plot.scatter(x="macd_diff", y="pct_5", ax=ax[0, 1])
df.plot.scatter(x="macd_diff", y="pct_3", ax=ax[1, 1])
df.plot.scatter(x="macd_diff", y="pct_1", ax=ax[2, 1])
for a in ax.flat:
a.grid()
plt.show()
sys.exit(0)
def compare_manual_strategies(symbol, sv, sd, ed):
df = util.get_data([symbol], pd.date_range(sd, ed))
df.drop(columns=["SPY"], inplace=True)
bs = BenchmarkStrategy()
orders = bs.testPolicy(symbol, sd, ed, sv)
df["Benchmark"] = marketsim.compute_portvals(orders, sv)
df["Orders Benchmark"] = orders["Shares"]
ms = ManualStrategy()
orders = ms.testPolicy(symbol, sd, ed, sv, macd_strat=True)
df["MACD Strat"] = marketsim.compute_portvals(orders, sv)
df["Orders MACD"] = orders["Shares"]
# df["Holding Manual"] = orders["Shares"].cumsum()
orders = ms.testPolicy(symbol, sd, ed, sv)
df["Three Strat"] = marketsim.compute_portvals(orders, sv)
df["Orders Three"] = orders["Shares"]
fig, ax = plt.subplots(3, sharex=True)
df[[symbol]].plot(ax=ax[0])
df[["Benchmark", "MACD Strat", "Three Strat"]].plot(ax=ax[1])
df[["Orders Benchmark", "Orders MACD", "Orders Three"]].plot(ax=ax[2])
for a in ax:
a.grid()
MultiCursor(fig.canvas, ax, color='r', lw=0.5)
# plt.show()
fig.set_size_inches(10, 8, forward=True)
plt.savefig('figure_1.png', dpi=fig.dpi)
def compare_all_strategies(symbol, sv, sd, ed):
df = util.get_data([symbol], pd.date_range(sd, ed))
df.drop(columns=["SPY"], inplace=True)
normalize = indicators.normalize
bs = BenchmarkStrategy()
orders = bs.testPolicy(symbol, sd, ed, sv)
df["Benchmark"] = normalize(marketsim.compute_portvals(orders, sv))
df["Orders Benchmark"] = orders["Shares"]
ms = ManualStrategy()
orders = ms.testPolicy(symbol, sd, ed, sv)
df["Manual"] = normalize(marketsim.compute_portvals(orders, sv))
df["Orders Manual"] = orders["Shares"]
sl = StrategyLearner(testing=True)
sl.addEvidence(symbol, sd, ed, sv)
orders = sl.testPolicy(symbol, sd, ed, sv)
df["Strategy"] = normalize(marketsim.compute_portvals(orders, sv))
df["Orders Strategy"] = orders["Shares"]
fig, ax = plt.subplots(3, sharex=True)
df[[symbol]].plot(ax=ax[0])
df[["Benchmark", "Manual", "Strategy"]].plot(ax=ax[1])
df[["Orders Benchmark", "Orders Manual", "Orders Strategy"]].plot(ax=ax[2])
for a in ax:
a.grid()
MultiCursor(fig.canvas, ax, color='r', lw=0.5)
# plt.show()
fig.set_size_inches(10, 8, forward=True)
plt.savefig('figure_2.png', dpi=fig.dpi)
def compare_number_trades():
symbol = "JPM"
sv = 10000
sd = dt.datetime(2008, 1, 1) # in-sample
ed = dt.datetime(2009, 12, 31) # in-sample
df = util.get_data([symbol], pd.date_range(sd, ed))
df.drop(columns=["SPY"], inplace=True)
print(f"| commission | n_orders |")
print(f"-------------------------")
for commission in [0, 9.95, 20, 50, 100]:
ql = QLearner(testing=True, commission=commission, impact=0.005)
ql.addEvidence(symbol, sd, ed, sv)
orders = ql.testPolicy(symbol, sd, ed, sv)
n_orders = orders[orders["Shares"] != 0].shape[0]
print(f"| {commission} | {n_orders} |")
def compare_q_learners():
symbol = "JPM"
sv = 10000
sd = dt.datetime(2008, 1, 1) # in-sample
ed = dt.datetime(2009, 12, 31) # in-sample
sd_out = dt.datetime(2010, 1, 1) # out-sample
ed_out = dt.datetime(2011, 12, 31) # out-sample
df = util.get_data([symbol], pd.date_range(sd_out, ed_out))
df.drop(columns=["SPY"], inplace=True)
bs = BenchmarkStrategy()
orders = bs.testPolicy(symbol, sd_out, ed_out, sv)
df["Benchmark"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
df["Orders Benchmark"] = orders["Shares"]
ql = QLearner(testing=True, verbose=False)
ql.addEvidence(symbol, sd, ed, sv)
orders = ql.testPolicy(symbol, sd_out, ed_out, sv)
df["QL 5"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
df["Orders QL 5"] = orders["Shares"]
ql = QLearner(testing=True, verbose=False, n_bins=4)
ql.addEvidence(symbol, sd, ed, sv)
orders = ql.testPolicy(symbol, sd_out, ed_out, sv)
df["QL 4"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
df["Orders QL 4"] = orders["Shares"]
fig, ax = plt.subplots(3, sharex=True)
df[[symbol]].plot(ax=ax[0])
df[["Benchmark", "QL 5", "QL 4"]].plot(ax=ax[1])
df[["Orders Benchmark", "Orders QL 5", "Orders QL 4"]].plot(ax=ax[2])
for a in ax:
a.grid()
m = MultiCursor(fig.canvas, ax, color='r', lw=0.5)
fig.set_size_inches(10, 8, forward=True)
plt.savefig('figure_4.png', dpi=fig.dpi)
sys.exit(0)
def experiment1(create_report=False):
symbol = "COINBASE_BTCUSD_1D"
sv = 10000
sd = dt.datetime(2020, 1, 1) # in-sample
ed = dt.datetime(2020, 12, 31) # in-sample
sd_out = dt.datetime(2020, 1, 1) # out-sample
ed_out = dt.datetime(2020, 12, 31) # out-sample
df = util.get_data([symbol], pd.date_range(sd_out, ed_out), addSPY=True)
# if create_report:
# compare_manual_strategies(symbol, sv, sd, ed)
# compare_all_strategies(symbol, sv, sd, ed)
# sys.exit(0)
# visualize_correlations(symbol, df)
# plot_indicators(symbol, df)
# compare_number_trades(symbol, sv, sd, ed)
# compare_q_learners()
# return
bs = BenchmarkStrategy(units=1)
orders = bs.testPolicy(symbol, sd_out, ed_out, sv)
pvs = marketsim.compute_portvals(orders, start_val=sv)
df["Benchmark"] = indicators.normalize(pvs)
df["Orders Benchmark"] = orders["Shares"]
ql = QLearner(testing=True, verbose=False, units=1)
ql.addEvidence(symbol, sd, ed, sv)
orders = ql.testPolicy(symbol, sd_out, ed_out, sv)
df["QL"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
df["Orders QL"] = orders["Shares"]
fig, ax = plt.subplots(3, sharex=True)
df[[symbol]].plot(ax=ax[0])
df[["Benchmark", "QL"]].plot(ax=ax[1])
df[["Orders Benchmark", "Orders QL"]].plot(ax=ax[2])
for a in ax:
a.grid()
m = MultiCursor(fig.canvas, ax, color='r', lw=0.5)
plt.show()
# fig.set_size_inches(10, 8, forward=True)
# plt.savefig('figure_4.png', dpi=fig.dpi)
if __name__ == "__main__":
experiment1()

View File

@@ -0,0 +1,8 @@
import experiment1
def experiment2():
experiment1.compare_number_trades()
if __name__ == "__main__":
experiment2()

View File

@@ -0,0 +1,339 @@
"""MC3-P3: Strategy Learner - grading script.
Usage:
- Switch to a student feedback directory first (will write "points.txt" and "comments.txt" in pwd).
- Run this script with both ml4t/ and student solution in PYTHONPATH, e.g.:
PYTHONPATH=ml4t:MC1-P2/jdoe7 python ml4t/mc2_p1_grading/grade_marketsim.py
Copyright 2017, Georgia Tech Research Corporation
Atlanta, Georgia 30332-0415
All Rights Reserved
Template code for CS 4646/7646
Georgia Tech asserts copyright ownership of this template and all derivative
works, including solutions to the projects assigned in this course. Students
and other users of this template code are advised not to share it with others
or to make it available on publicly viewable websites including repositories
such as github and gitlab. This copyright statement should not be removed
or edited.
We do grant permission to share solutions privately with non-students such
as potential employers. However, sharing with other current or future
students of CS 7646 is prohibited and subject to being investigated as a
GT honor code violation.
-----do not edit anything above this line---
Student Name: Tucker Balch (replace with your name)
GT User ID: tb34 (replace with your User ID)
GT ID: 900897987 (replace with your GT ID)
"""
import pytest
from grading.grading import grader, GradeResult, run_with_timeout, IncorrectOutput
import os
import sys
import traceback as tb
import datetime as dt
import numpy as np
import pandas as pd
from collections import namedtuple
import time
import util
import random
# Test cases
StrategyTestCase = namedtuple('Strategy', ['description','insample_args','outsample_args','benchmark_type','benchmark','impact','train_time','test_time','max_time','seed'])
strategy_test_cases = [
StrategyTestCase(
description="ML4T-220",
insample_args=dict(symbol="ML4T-220",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
outsample_args=dict(symbol="ML4T-220",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
benchmark_type='clean',
benchmark=1.0, #benchmark updated Apr 24 2017
impact=0.0,
train_time=25,
test_time=5,
max_time=60,
seed=1481090000
),
StrategyTestCase(
description="AAPL",
insample_args=dict(symbol="AAPL",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
outsample_args=dict(symbol="AAPL",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
benchmark_type='stock',
benchmark=0.1581999999999999, #benchmark computed Nov 22 2017
impact=0.0,
train_time=25,
test_time=5,
max_time=60,
seed=1481090000
),
StrategyTestCase(
description="SINE_FAST_NOISE",
insample_args=dict(symbol="SINE_FAST_NOISE",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
outsample_args=dict(symbol="SINE_FAST_NOISE",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
benchmark_type='noisy',
benchmark=2.0, #benchmark updated Apr 24 2017
impact=0.0,
train_time=25,
test_time=5,
max_time=60,
seed=1481090000
),
StrategyTestCase(
description="UNH - In sample",
insample_args=dict(symbol="UNH",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
outsample_args=dict(symbol="UNH",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
benchmark_type='stock',
benchmark= -0.25239999999999996, #benchmark computed Nov 22 2017
impact=0.0,
train_time=25,
test_time=5,
max_time=60,
seed=1481090000
),
]
max_points = 60.0
html_pre_block = True # surround comments with HTML <pre> tag (for T-Square comments field)
MAX_HOLDINGS = 1000
# Test functon(s)
@pytest.mark.parametrize("description, insample_args, outsample_args, benchmark_type, benchmark, impact, train_time, test_time, max_time, seed", strategy_test_cases)
def test_strategy(description, insample_args, outsample_args, benchmark_type, benchmark, impact, train_time, test_time, max_time, seed, grader):
"""Test StrategyLearner.
Requires test description, insample args (dict), outsample args (dict), benchmark_type (str), benchmark (float)
max time (seconds), points for this test case (int), random seed (long), and a grader fixture.
"""
points_earned = 0.0 # initialize points for this test case
try:
incorrect = True
if not 'StrategyLearner' in globals():
import importlib
m = importlib.import_module('StrategyLearner')
globals()['StrategyLearner'] = m
outsample_cr_to_beat = None
if benchmark_type == 'clean':
outsample_cr_to_beat = benchmark
def timeoutwrapper_strategylearner():
#Set fixed seed for repetability
np.random.seed(seed)
random.seed(seed)
learner = StrategyLearner.StrategyLearner(verbose=False,impact=impact)
tmp = time.time()
learner.addEvidence(**insample_args)
train_t = time.time()-tmp
tmp = time.time()
insample_trades_1 = learner.testPolicy(**insample_args)
test_t = time.time()-tmp
insample_trades_2 = learner.testPolicy(**insample_args)
tmp = time.time()
outsample_trades = learner.testPolicy(**outsample_args)
out_test_t = time.time()-tmp
return insample_trades_1, insample_trades_2, outsample_trades, train_t, test_t, out_test_t
msgs = []
in_trades_1, in_trades_2, out_trades, train_t, test_t, out_test_t = run_with_timeout(timeoutwrapper_strategylearner,max_time,(),{})
incorrect = False
if len(in_trades_1.shape)!=2 or in_trades_1.shape[1]!=1:
incorrect=True
msgs.append(" First insample trades DF has invalid shape: {}".format(in_trades_1.shape))
elif len(in_trades_2.shape)!=2 or in_trades_2.shape[1]!=1:
incorrect=True
msgs.append(" Second insample trades DF has invalid shape: {}".format(in_trades_2.shape))
elif len(out_trades.shape)!=2 or out_trades.shape[1]!=1:
incorrect=True
msgs.append(" Out-of-sample trades DF has invalid shape: {}".format(out_trades.shape))
else:
tmp_csum=0.0
for date,trade in in_trades_1.iterrows():
tmp_csum+= trade.iloc[0]
if (trade.iloc[0]!=0) and\
(trade.abs().iloc[0]!=MAX_HOLDINGS) and\
(trade.abs().iloc[0]!=2*MAX_HOLDINGS):
incorrect=True
msgs.append(" illegal trade in first insample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
break
elif abs(tmp_csum)>MAX_HOLDINGS:
incorrect=True
msgs.append(" holdings more than {} long or short in first insample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
break
tmp_csum=0.0
for date,trade in in_trades_2.iterrows():
tmp_csum+= trade.iloc[0]
if (trade.iloc[0]!=0) and\
(trade.abs().iloc[0]!=MAX_HOLDINGS) and\
(trade.abs().iloc[0]!=2*MAX_HOLDINGS):
incorrect=True
msgs.append(" illegal trade in second insample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
break
elif abs(tmp_csum)>MAX_HOLDINGS:
incorrect=True
msgs.append(" holdings more than {} long or short in second insample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
break
tmp_csum=0.0
for date,trade in out_trades.iterrows():
tmp_csum+= trade.iloc[0]
if (trade.iloc[0]!=0) and\
(trade.abs().iloc[0]!=MAX_HOLDINGS) and\
(trade.abs().iloc[0]!=2*MAX_HOLDINGS):
incorrect=True
msgs.append(" illegal trade in out-of-sample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
break
elif abs(tmp_csum)>MAX_HOLDINGS:
incorrect=True
msgs.append(" holdings more than {} long or short in out-of-sample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
break
# if (((in_trades_1.abs()!=0) & (in_trades_1.abs()!=MAX_HOLDINGS) & (in_trades_1.abs()!=2*MAX_HOLDINGS)).any().any() or\
# ((in_trades_2.abs()!=0) & (in_trades_2.abs()!=MAX_HOLDINGS) & (in_trades_2.abs()!=2*MAX_HOLDINGS)).any().any() or\
# ((out_trades.abs()!=0) & (out_trades.abs()!=MAX_HOLDINGS) & (out_trades.abs()!=2*MAX_HOLDINGS)).any().any()):
# incorrect = True
# msgs.append(" illegal trade. abs(trades) not one of ({},{},{})".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS))
# if ((in_trades_1.cumsum().abs()>MAX_HOLDINGS).any()[0]) or ((in_trades_2.cumsum().abs()>MAX_HOLDINGS).any()[0]) or ((out_trades.cumsum().abs()>MAX_HOLDINGS).any()[0]):
# incorrect = True
# msgs.append(" holdings more than {} long or short".format(MAX_HOLDINGS))
if not(incorrect):
if train_t>train_time:
incorrect=True
msgs.append(" addEvidence() took {} seconds, max allowed {}".format(train_t,train_time))
else:
points_earned += 1.0
if test_t > test_time:
incorrect = True
msgs.append(" testPolicy() took {} seconds, max allowed {}".format(test_t,test_time))
else:
points_earned += 2.0
if not((in_trades_1 == in_trades_2).all()[0]):
incorrect = True
mismatches = in_trades_1.join(in_trades_2,how='outer',lsuffix='1',rsuffix='2')
mismatches = mismatches[mismatches.iloc[:,0]!=mismatches.iloc[:,1]]
msgs.append(" consecutive calls to testPolicy() with same input did not produce same output:")
msgs.append(" Mismatched trades:\n {}".format(mismatches))
else:
points_earned += 2.0
student_insample_cr = evalPolicy2(insample_args['symbol'],in_trades_1,insample_args['sv'],insample_args['sd'],insample_args['ed'],market_impact=impact,commission_cost=0.0)
student_outsample_cr = evalPolicy2(outsample_args['symbol'],out_trades, outsample_args['sv'],outsample_args['sd'],outsample_args['ed'],market_impact=impact,commission_cost=0.0)
if student_insample_cr <= benchmark:
incorrect = True
msgs.append(" in-sample return ({}) did not beat benchmark ({})".format(student_insample_cr,benchmark))
else:
points_earned += 5.0
if outsample_cr_to_beat is None:
if out_test_t > test_time:
incorrect = True
msgs.append(" out-sample took {} seconds, max of {}".format(out_test_t,test_time))
else:
points_earned += 5.0
else:
if student_outsample_cr < outsample_cr_to_beat:
incorrect = True
msgs.append(" out-sample return ({}) did not beat benchmark ({})".format(student_outsample_cr,outsample_cr_to_beat))
else:
points_earned += 5.0
if incorrect:
inputs_str = " insample_args: {}\n" \
" outsample_args: {}\n" \
" benchmark_type: {}\n" \
" benchmark: {}\n" \
" train_time: {}\n" \
" test_time: {}\n" \
" max_time: {}\n" \
" seed: {}\n".format(insample_args, outsample_args, benchmark_type, benchmark, train_time, test_time, max_time,seed)
raise IncorrectOutput("Test failed on one or more output criteria.\n Inputs:\n{}\n Failures:\n{}".format(inputs_str, "\n".join(msgs)))
except Exception as e:
# Test result: failed
msg = "Test case description: {}\n".format(description)
# Generate a filtered stacktrace, only showing erroneous lines in student file(s)
tb_list = tb.extract_tb(sys.exc_info()[2])
for i in range(len(tb_list)):
row = tb_list[i]
tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3]) # show only filename instead of long absolute path
# tb_list = [row for row in tb_list if row[0] in ['QLearner.py','StrategyLearner.py']]
if tb_list:
msg += "Traceback:\n"
msg += ''.join(tb.format_list(tb_list)) # contains newlines
elif 'grading_traceback' in dir(e):
msg += "Traceback:\n"
msg += ''.join(tb.format_list(e.grading_traceback))
msg += "{}: {}".format(e.__class__.__name__, str(e))
# Report failure result to grader, with stacktrace
grader.add_result(GradeResult(outcome='failed', points=points_earned, msg=msg))
raise
else:
# Test result: passed (no exceptions)
grader.add_result(GradeResult(outcome='passed', points=points_earned, msg=None))
def compute_benchmark(sd,ed,sv,symbol,market_impact,commission_cost,max_holdings):
date_idx = util.get_data([symbol,],pd.date_range(sd,ed)).index
orders = pd.DataFrame(index=date_idx)
orders['orders'] = 0; orders['orders'][0] = max_holdings; orders['orders'][-1] = -max_holdings
return evalPolicy2(symbol,orders,sv,sd,ed,market_impact,commission_cost)
def evalPolicy(student_trades,sym_prices,startval):
ending_cash = startval - student_trades.mul(sym_prices,axis=0).sum()
ending_stocks = student_trades.sum()*sym_prices.iloc[-1]
return float((ending_cash+ending_stocks)/startval)-1.0
def evalPolicy2(symbol, student_trades, startval, sd, ed, market_impact,commission_cost):
orders_df = pd.DataFrame(columns=['Shares','Order','Symbol'])
for row_idx in student_trades.index:
nshares = student_trades.loc[row_idx][0]
if nshares == 0:
continue
order = 'sell' if nshares < 0 else 'buy'
new_row = pd.DataFrame([[abs(nshares),order,symbol],],columns=['Shares','Order','Symbol'],index=[row_idx,])
orders_df = orders_df.append(new_row)
portvals = compute_portvals(orders_df, sd, ed, startval,market_impact,commission_cost)
return float(portvals[-1]/portvals[0])-1
def compute_portvals(orders_df, start_date, end_date, startval, market_impact=0.0, commission_cost=0.0):
"""Simulate the market for the given date range and orders file."""
symbols = []
orders = []
orders_df = orders_df.sort_index()
for date, order in orders_df.iterrows():
shares = order['Shares']
action = order['Order']
symbol = order['Symbol']
if action.lower() == 'sell':
shares *= -1
order = (date, symbol, shares)
orders.append(order)
symbols.append(symbol)
symbols = list(set(symbols))
dates = pd.date_range(start_date, end_date)
prices_all = util.get_data(symbols, dates)
prices = prices_all[symbols]
prices = prices.fillna(method='ffill').fillna(method='bfill')
prices['_CASH'] = 1.0
trades = pd.DataFrame(index=prices.index, columns=symbols)
trades = trades.fillna(0)
cash = pd.Series(index=prices.index)
cash = cash.fillna(0)
cash.iloc[0] = startval
for date, symbol, shares in orders:
price = prices[symbol][date]
val = shares * price
# transaction cost model
val += commission_cost + (pd.np.abs(shares)*price*market_impact)
positions = prices.loc[date] * trades.sum()
totalcash = cash.sum()
if (date < prices.index.min()) or (date > prices.index.max()):
continue
trades[symbol][date] += shares
cash[date] -= val
trades['_CASH'] = cash
holdings = trades.cumsum()
df_portvals = (prices * holdings).sum(axis=1)
return df_portvals
if __name__ == "__main__":
pytest.main(["-s", __file__])

140
crypto_eval/indicators.py Normal file
View File

@@ -0,0 +1,140 @@
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
from util import get_data
def author():
return "felixm"
def normalize(timeseries):
return timeseries / timeseries.iloc[0]
def bollinger_band(df, symbol, period=20, m=2):
boll_sma = df[symbol].rolling(period).mean()
std = df[symbol].rolling(period).std()
boll_up = boll_sma + m * std
boll_lo = boll_sma - m * std
key_sma, key_up, key_lo = "boll_sma", "boll_up", "boll_lo"
df[key_sma] = boll_sma
df[key_up] = boll_up
df[key_lo] = boll_lo
return df[[key_sma, key_up, key_lo]]
def sma(df, symbol, period):
"""Adds SMA for one or multiple periods to df and returns SMAs"""
if type(period) is int:
period = [period]
keys = []
for p in period:
key = f"sma_{p}"
df[key] = df[symbol].rolling(p).mean()
keys.append(key)
return df[keys]
def ema(df, symbol, period):
"""Adds EMA for one or multiple periods to df and returns EMAs"""
if type(period) is int:
period = [period]
keys = []
for p in period:
key = f"ema_{p}"
df[key] = df[symbol].ewm(span=p).mean()
keys.append(key)
return df[keys]
def price_sma(df, symbol, period):
"""Calculates SMA and adds new column price divided by SMA to the df."""
if type(period) is int:
period = [period]
keys = []
for p in period:
key = f"price_sma_{p}"
sma = df[symbol].rolling(p).mean()
df[key] = df[symbol] / sma
keys.append(key)
return df[keys]
def rsi(df, symbol, period=14):
"""Calculates relative strength index over given period."""
def rsi(x):
pct = x.pct_change()
avg_gain = pct[pct > 0].mean()
avg_loss = pct[pct <= 0].abs().mean()
rsi = 100 - (100 /
(1 + ((avg_gain / period) /
(avg_loss / period))))
return rsi
key = "rsi"
# Add one to get 'period' price changes (first change is nan).
period += 1
df[key] = df[symbol].rolling(period).apply(rsi)
return df[[key]]
def macd(df, symbol):
macd = df[symbol].ewm(span=12).mean() - df[symbol].ewm(span=26).mean()
k1 = "macd"
k2 = "macd_signal"
k3 = "macd_diff"
df[k1] = macd
df[k2] = macd.rolling(9).mean()
df[k3] = df[k1] - df[k2]
return df[[k1, k2, k3]]
def price_delta(df, symbol, period=1):
"""Calculate percentage change for period."""
k = f"pct_{period}"
df[k] = df[symbol].pct_change(periods=period)
df[k] = df[k].shift(-period)
return df[k]
def test_indicators():
symbol = "JPM"
sd = dt.datetime(2008, 1, 1)
ed = dt.datetime(2009, 12, 31)
df = get_data([symbol], pd.date_range(sd, ed))
df.drop(columns=["SPY"], inplace=True)
df_orig = df.copy()
# df = normalize(df)
sma(df, symbol, 21)
ema(df, symbol, 21)
df.plot(title="21 SMA and EMA")
plt.savefig('figure_1.png')
df = df_orig.copy()
sma(df, symbol, 8)
price_sma(df, symbol, 8)
df.plot(title="SMA and price / SMA", subplots=True)
plt.savefig('figure_2.png')
df = df_orig.copy()
bollinger_band(df, symbol)
df.plot(title="Bollinger Band")
plt.savefig('figure_3.png')
df = df_orig.copy()
rsi(df, symbol)
fig, axes = plt.subplots(nrows=2, sharex=True)
df[symbol].plot(ax=axes[0], title="JPM price action")
df["JPM-rsi(14)"].plot(ax=axes[1], title="RSI")
plt.savefig('figure_4.png')
df = df_orig.copy()
macd(df, symbol)
fig, axes = plt.subplots(nrows=2, sharex=True)
df[symbol].plot(ax=axes[0], title="JPM price action")
df[["JPM-macd", "JPM-macd-signal"]].plot(ax=axes[1])
plt.savefig('figure_5.png')

179
crypto_eval/marketsim.py Normal file
View File

@@ -0,0 +1,179 @@
"""MC2-P1: Market simulator.
Copyright 2018, Georgia Institute of Technology (Georgia Tech)
Atlanta, Georgia 30332
All Rights Reserved
Template code for CS 4646/7646
Georgia Tech asserts copyright ownership of this template and all derivative
works, including solutions to the projects assigned in this course. Students
and other users of this template code are advised not to share it with others
or to make it available on publicly viewable websites including repositories
such as github and gitlab. This copyright statement should not be removed
or edited.
We do grant permission to share solutions privately with non-students such
as potential employers. However, sharing with other current or future
students of CS 7646 is prohibited and subject to being investigated as a
GT honor code violation.
-----do not edit anything above this line---
Student Name: Tucker Balch (replace with your name)
GT User ID: felixm (replace with your User ID)
GT ID: 1337 (replace with your GT ID)
"""
import pandas as pd
from util import get_data, plot_data
from optimize_something.optimization import calculate_stats
def read_orders(orders_file):
"""
Parser orders into the form:
Date datetime64[ns]
Symbol object
Order object
Shares int32
This is how the order book looks like:
Date,Symbol,Order,Shares
2011-01-10,AAPL,BUY,1500
2011-01-10,AAPL,SELL,1500
"""
orders = pd.read_csv(orders_file,
index_col=['Date'],
dtype='|str, str, str, i4',
parse_dates=['Date'])
orders.sort_values(by="Date", inplace=True)
return orders
def get_order_book_info(orders):
"""Return start_date, end_date, and symbols (as a list)."""
start_date = orders.index[0]
end_date = orders.index[-1]
symbols = sorted(list((set(orders.Symbol.tolist()))))
return start_date, end_date, symbols
def get_portfolio_value(holding, prices):
"""Calculate the current portofolio value."""
value = 0
for ticker, shares in holding.items():
if ticker == 'cash':
value += shares
else:
value += shares * prices[ticker]
return value
def handle_order(date, order, holding, prices, commission, impact):
"""Process the order."""
symbol, order, shares = order
if shares == 0 and order == "":
return # empty order
if pd.isnull(shares):
return # shares is nan
# Allow indicating buying and selling via shares. If shares is positive we
# buy and if it is negative we sell.
if shares > 0 and order == "":
order = "BUY"
elif shares < 0 and order == "":
order = "SELL"
shares = abs(shares)
adj_closing_price = prices[symbol]
cost = shares * adj_closing_price
# Charge commission and deduct impact penalty
holding['cash'] -= (commission + impact * adj_closing_price * shares)
if order.upper() == "BUY":
# print(f"Buy {shares:6} of {symbol:4} on {date}")
holding['cash'] -= cost
holding[symbol] += shares
elif order.upper() == "SELL":
# print(f"Sell {shares:6} of {symbol:4} on {date}")
holding['cash'] += cost
holding[symbol] -= shares
else:
raise Exception("Unexpected order type.")
def compute_portvals(orders_file, start_val=1000000, commission=9.95, impact=0.005):
if isinstance(orders_file, pd.DataFrame):
orders = orders_file
else:
orders = read_orders(orders_file)
start_date, end_date, symbols = get_order_book_info(orders)
# Tickers in the orderbook over the date_range in the order book.
prices = get_data(symbols, pd.date_range(start_date, end_date))
prices['Portval'] = pd.Series(0.0, index=prices.index)
# A dictionary to keep track of the assets we are holding.
holding = {s: 0 for s in symbols}
holding['cash'] = start_val
# Iterate over all trading days that are in the (inclusive) range of the
# order book dates. This implicitly ignores orders placed on non-trading
# days.
for date, values in prices.iterrows():
# Process orders for that day.
for date, order in orders.loc[date:date].iterrows():
handle_order(date, order, holding, values, commission, impact)
# Compute portfolio value at the end of day.
values['Portval'] = get_portfolio_value(holding, values)
return prices[['Portval']]
def test_code():
of = "./orders/orders-02.csv"
sv = 1000000
portvals = compute_portvals(orders_file=of, start_val=sv)
if isinstance(portvals, pd.DataFrame):
portvals = portvals[portvals.columns[0]] # just get the first column
else:
raise Exception("warning, code did not return a DataFrame")
start_date = portvals.index[0]
end_date = portvals.index[-1]
cum_ret, avg_daily_ret, \
std_daily_ret, sharpe_ratio = calculate_stats(portvals.to_frame(), [1])
spy = get_data(['SPY'], pd.date_range(start_date, end_date))
cum_ret_SPY, avg_daily_ret_SPY, \
std_daily_ret_SPY, sharpe_ratio_SPY = calculate_stats(spy, [1])
# Compare portfolio against $SPY
print(f"Date Range: {start_date} to {end_date}")
print()
print(f"Sharpe Ratio of Fund: {sharpe_ratio}")
print(f"Sharpe Ratio of SPY : {sharpe_ratio_SPY}")
print()
print(f"Cumulative Return of Fund: {cum_ret}")
print(f"Cumulative Return of SPY : {cum_ret_SPY}")
print()
print(f"Standard Deviation of Fund: {std_daily_ret}")
print(f"Standard Deviation of SPY : {std_daily_ret_SPY}")
print()
print(f"Average Daily Return of Fund: {avg_daily_ret}")
print(f"Average Daily Return of SPY : {avg_daily_ret_SPY}")
print()
print(f"Final Portfolio Value: {portvals[-1]}")
def author():
return 'felixm'
if __name__ == "__main__":
test_code()

View File

@@ -0,0 +1,8 @@
from experiment1 import experiment1
from experiment2 import experiment2
if __name__ == "__main__":
experiment1(create_report=True)
experiment2()

Binary file not shown.

After

Width:  |  Height:  |  Size: 55 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 75 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 36 KiB

After

Width:  |  Height:  |  Size: 55 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

View File

@@ -2,7 +2,7 @@ import pandas as pd
import datetime as dt import datetime as dt
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import TheoreticallyOptimalStrategy as tos import TheoreticallyOptimalStrategy as tos
from util import plot_data, get_data from util import get_data
from marketsim.marketsim import compute_portvals from marketsim.marketsim import compute_portvals
from optimize_something.optimization import calculate_stats from optimize_something.optimization import calculate_stats
@@ -31,7 +31,7 @@ def test_optimal_strategy():
orders = pd.DataFrame(data=d, index=[start_date, end_date]) orders = pd.DataFrame(data=d, index=[start_date, end_date])
bench = compute_portvals(orders, start_value, 0, 0) bench = compute_portvals(orders, start_value, 0, 0)
cum_ret_bench, avg_daily_ret_bench, \ cum_ret_bench, avg_daily_ret_bench, \
std_daily_ret_bench, sharpe_ratio_bench = calculate_stats(bench, [1]) std_daily_ret_bench, sharpe_ratio_bench = calculate_stats(bench, [1])
# Compare portfolio against benchmark # Compare portfolio against benchmark
print(f"Date Range: {start_date} to {end_date}") print(f"Date Range: {start_date} to {end_date}")
@@ -56,7 +56,7 @@ def test_optimal_strategy():
portvals.drop(columns=["Portval"], inplace=True) portvals.drop(columns=["Portval"], inplace=True)
portvals.plot(title="Optimal strategy versus 1000 shares of JPM") portvals.plot(title="Optimal strategy versus 1000 shares of JPM")
plt.savefig('figure_5.png') plt.savefig('figure_6.png')
def normalize(timeseries): def normalize(timeseries):
@@ -68,9 +68,9 @@ def bollinger_band(df, symbol, period=20, m=2):
std = df[symbol].rolling(period).std() std = df[symbol].rolling(period).std()
boll_up = boll_sma + m * std boll_up = boll_sma + m * std
boll_lo = boll_sma - m * std boll_lo = boll_sma - m * std
df[f"Boll({symbol}, {period})-sma"] = boll_sma df[f"{symbol}-Boll({period})-sma"] = boll_sma
df[f"Boll({symbol}, {period})-up"] = boll_up df[f"{symbol}-Boll({period})-up"] = boll_up
df[f"Boll({symbol}, {period})-lo"] = boll_lo df[f"{symbol}-Boll({period})-lo"] = boll_lo
def sma(df, symbol, period): def sma(df, symbol, period):
@@ -78,31 +78,90 @@ def sma(df, symbol, period):
df[f"{symbol}-sma({period})"] = df[symbol].rolling(period).mean() df[f"{symbol}-sma({period})"] = df[symbol].rolling(period).mean()
def ema(df, symbol, period):
"""Adds a new column to the dataframe EMA(period)"""
df[f"{symbol}-ema({period})"] = df[symbol].ewm(span=period).mean()
def price_sma(df, symbol, period): def price_sma(df, symbol, period):
"""Calculates SMA and adds new column price divided by SMA to the df.""" """Calculates SMA and adds new column price divided by SMA to the df."""
sma = df[symbol].rolling(period).mean() sma = df[symbol].rolling(period).mean()
df[f"{symbol}-price/sma({period})"] = df[symbol] / sma df[f"{symbol}-price/sma({period})"] = df[symbol] / sma
def main(): def rsi(df, symbol, period=14):
# test_optimal_strategy() """Calculates relative strength index over given period."""
def rsi(x):
pct = x.pct_change()
avg_gain = pct[pct > 0].mean()
avg_loss = pct[pct <= 0].abs().mean()
rsi = 100 - (100 /
(1 + ((avg_gain / period) /
(avg_loss / period))))
return rsi
key = f"{symbol}-rsi({period})"
# Add one to get 'period' price changes (first change is nan).
period += 1
df[key] = df[symbol].rolling(period).apply(rsi)
def macd(df, symbol):
macd = df[symbol].ewm(span=12).mean() - df[symbol].ewm(span=26).mean()
df[f"{symbol}-macd"] = macd
df[f"{symbol}-macd-signal"] = macd.rolling(9).mean()
def price_delta(df, symbol, period=1):
"""Calculate delta between previous day and today."""
df[f"{symbol}-diff({period})"] = df[symbol].diff(periods=period)
def test_indicators():
symbol = "JPM" symbol = "JPM"
sd = dt.datetime(2008, 1, 1) sd = dt.datetime(2008, 1, 1)
ed = dt.datetime(2009, 12, 31) ed = dt.datetime(2009, 12, 31)
df = get_data([symbol], pd.date_range(sd, ed)) df = get_data([symbol], pd.date_range(sd, ed))
df.drop(columns=["SPY"], inplace=True) df.drop(columns=["SPY"], inplace=True)
df = normalize(df) df_orig = df.copy()
# df = normalize(df)
sma(df, symbol, 21)
ema(df, symbol, 21)
df.plot(title="21 SMA and EMA")
plt.savefig('figure_1.png')
df = df_orig.copy()
sma(df, symbol, 8) sma(df, symbol, 8)
# price_sma(df, symbol, 8) price_sma(df, symbol, 8)
# bollinger_band(df, symbol) df.plot(title="SMA and price / SMA", subplots=True)
plt.savefig('figure_2.png')
# TODO df = df_orig.copy()
# rsi(df, symbol) bollinger_band(df, symbol)
# macd(df, symbol) df.plot(title="Bollinger Band")
plt.savefig('figure_3.png')
plot_data(df) df = df_orig.copy()
rsi(df, symbol)
fig, axes = plt.subplots(nrows=2, sharex=True)
df[symbol].plot(ax=axes[0], title="JPM price action")
df["JPM-rsi(14)"].plot(ax=axes[1], title="RSI")
plt.savefig('figure_4.png')
df = df_orig.copy()
macd(df, symbol)
fig, axes = plt.subplots(nrows=2, sharex=True)
df[symbol].plot(ax=axes[0], title="JPM price action")
df[["JPM-macd", "JPM-macd-signal"]].plot(ax=axes[1])
plt.savefig('figure_5.png')
def main():
test_optimal_strategy()
test_indicators()
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -1,6 +1,24 @@
# Indicators # Indicators
## SMA and EMA
![SMA and EMA](figure_1.png)
## SMA/Price
![SMA/Price](figure_2.png)
## Bollinger Band
![Bollinger Band](figure_3.png)
## RSI
![RSI](figure_4.png)
## MACD
![MACD](figure_5.png)
# Optimal Strategy # Optimal Strategy
@@ -31,7 +49,7 @@ established that holding zero shares does not make sense, the only two
possibilities are buying or selling 2000 depending on the current amount of possibilities are buying or selling 2000 depending on the current amount of
shares. shares.
![Optimal strategy versus holding 1000 shares](figure_5.png) ![Optimal strategy versus holding 1000 shares](figure_6.png)
``` ```
Date Range: 2008-01-02 00:00:00 to 2009-12-31 00:00:00 Date Range: 2008-01-02 00:00:00 to 2009-12-31 00:00:00

View File

@@ -75,15 +75,28 @@ def get_portfolio_value(holding, prices):
def handle_order(date, order, holding, prices, commission, impact): def handle_order(date, order, holding, prices, commission, impact):
"""Process the order.""" """Process the order."""
symbol, order, shares = order symbol, order, shares = order
if shares == 0 and order == "":
return # empty order
if pd.isnull(shares):
return # shares is nan
# Allow indicating buying and selling via shares. If shares is positive we
# buy and if it is negative we sell.
if shares > 0 and order == "":
order = "BUY"
elif shares < 0 and order == "":
order = "SELL"
shares = abs(shares)
adj_closing_price = prices[symbol] adj_closing_price = prices[symbol]
cost = shares * adj_closing_price cost = shares * adj_closing_price
# Charge commission and deduct impact penalty # Charge commission and deduct impact penalty
holding['cash'] -= (commission + impact * adj_closing_price * shares) holding['cash'] -= (commission + impact * adj_closing_price * shares)
if order == "BUY": if order.upper() == "BUY":
# print(f"Buy {shares:6} of {symbol:4} on {date}") # print(f"Buy {shares:6} of {symbol:4} on {date}")
holding['cash'] -= cost holding['cash'] -= cost
holding[symbol] += shares holding[symbol] += shares
elif order == "SELL": elif order.upper() == "SELL":
# print(f"Sell {shares:6} of {symbol:4} on {date}") # print(f"Sell {shares:6} of {symbol:4} on {date}")
holding['cash'] += cost holding['cash'] += cost
holding[symbol] -= shares holding[symbol] -= shares

138
qlearning_robot/QLearner.py Normal file
View File

@@ -0,0 +1,138 @@
"""
Template for implementing QLearner (c) 2015 Tucker Balch
Copyright 2018, Georgia Institute of Technology (Georgia Tech)
Atlanta, Georgia 30332
All Rights Reserved
Template code for CS 4646/7646
Georgia Tech asserts copyright ownership of this template and all derivative
works, including solutions to the projects assigned in this course. Students
and other users of this template code are advised not to share it with others
or to make it available on publicly viewable websites including repositories
such as github and gitlab. This copyright statement should not be removed
or edited.
We do grant permission to share solutions privately with non-students such
as potential employers. However, sharing with other current or future
students of CS 7646 is prohibited and subject to being investigated as a
GT honor code violation.
-----do not edit anything above this line---
"""
import random
import numpy as np
class QLearner(object):
def __init__(self,
num_states=100,
num_actions=4,
alpha=0.2,
gamma=0.9,
rar=0.5,
radr=0.99,
dyna=0,
verbose=False):
self.verbose = verbose
self.num_actions = num_actions
self.num_states = num_states
self.s = 0
self.a = 0
self.alpha = alpha
self.gamma = gamma
self.rar = rar
self.radr = radr
self.dyna = dyna
if self.dyna > 0:
self.model = {}
self.state_action_list = []
# self.q = np.random.random((num_states, num_actions))
self.q = np.zeros((num_states, num_actions))
def _get_a(self, s):
"""Get best action for state. Considers rar."""
if random.random() < self.rar:
a = random.randint(0, self.num_actions - 1)
else:
a = np.argmax(self.q[s])
return a
def _update_q(self, s, a, r, s_prime):
"""Updates the Q table."""
q_old = self.q[s][a]
alpha = self.alpha
# estimate optimal future value
a_max = np.argmax(self.q[s_prime])
q_future = self.q[s_prime][a_max]
# calculate new value and update table
q_new = (1 - alpha) * q_old + alpha * (r + self.gamma * q_future)
self.q[s][a] = q_new
if self.verbose:
print(f"{q_old=} {q_future=} {q_new=}")
def querysetstate(self, s):
"""
@summary: Update the state without updating the Q-table
@param s: The new state
@returns: The selected action
"""
a = self._get_a(s)
if self.verbose:
print(f"s = {s}, a = {a}")
self.s = s
self.a = a
return self.a
def query(self, s_prime, r):
"""
@summary: Update the Q table and return an action
@param s_prime: The new state
@param r: The reward
@returns: The selected action
"""
self._update_q(self.s, self.a, r, s_prime)
a = self._get_a(s_prime)
# Update random action rate
self.rar = self.rar * self.radr
if self.dyna > 0:
self._update_model(self.s, self.a, r, s_prime)
self._dyna_q()
self.a = a
self.s = s_prime
return self.a
def _update_model(self, s, a, r, s_prime):
state_action = (s, a)
if not state_action in self.model:
self.model[state_action] = (r, s_prime)
self.state_action_list.append(state_action)
def _dyna_q(self):
for _ in range(self.dyna):
s, a = random.choice(self.state_action_list)
r, s_prime = self.model[(s, a)]
self._update_q(s, a, r, s_prime)
def author(self):
return 'felixm'
if __name__ == "__main__":
q = QLearner(verbose=True, dyna=2)
q.querysetstate(2)
q.query(15, 1.00)
q.querysetstate(15)
q.query(17, 0.10)

View File

@@ -0,0 +1,387 @@
"""MC3-P2: Q-learning & Dyna - grading script.
Usage:
- Switch to a student feedback directory first (will write "points.txt" and "comments.txt" in pwd).
- Run this script with both ml4t/ and student solution in PYTHONPATH, e.g.:
PYTHONPATH=ml4t:MC1-P2/jdoe7 python ml4t/mc2_p1_grading/grade_marketsim.py
Copyright 2018, Georgia Institute of Technology (Georgia Tech)
Atlanta, Georgia 30332
All Rights Reserved
Template code for CS 4646/7646
Georgia Tech asserts copyright ownership of this template and all derivative
works, including solutions to the projects assigned in this course. Students
and other users of this template code are advised not to share it with others
or to make it available on publicly viewable websites including repositories
such as github and gitlab. This copyright statement should not be removed
or edited.
We do grant permission to share solutions privately with non-students such
as potential employers. However, sharing with other current or future
students of CS 7646 is prohibited and subject to being investigated as a
GT honor code violation.
-----do not edit anything above this line---
Student Name: Tucker Balch (replace with your name)
GT User ID: tb34 (replace with your User ID)
GT ID: 900897987 (replace with your GT ID)
"""
import pytest
from grading.grading import grader, GradeResult, run_with_timeout, IncorrectOutput
import os
import sys
import traceback as tb
import datetime as dt
import random
import numpy as np
import pandas as pd
from collections import namedtuple
import util
# Student modules to import
main_code = "QLearner" # module name to import
robot_qlearning_testing_seed=1490652871
QLearningTestCase = namedtuple('QLearning', ['description', 'group','world_file','best_reward','median_reward','max_time','points'])
qlearning_test_cases = [
QLearningTestCase(
description="World 1",
group='nodyna',
world_file='world01.csv',
best_reward=-17,
median_reward=-29.5,
max_time=2,
points=9.5
),
QLearningTestCase(
description="World 2",
group='nodyna',
world_file='world02.csv',
best_reward=-14,
median_reward=-19,
max_time=2,
points=9.5
),
QLearningTestCase(
description="World 4",
group='nodyna',
world_file='world04.csv',
best_reward=-24,
median_reward=-33,
max_time=2,
points=9.5
),
QLearningTestCase(
description="World 6",
group='nodyna',
world_file='world06.csv',
best_reward=-16,
median_reward=-23.5,
max_time=2,
points=9.5
),
QLearningTestCase(
description="World 7",
group='nodyna',
world_file='world07.csv',
best_reward=-14,
median_reward=-26,
max_time=2,
points=9.5
),
QLearningTestCase(
description="World 8",
group='nodyna',
world_file='world08.csv',
best_reward=-14,
median_reward=-19,
max_time=2,
points=9.5
),
QLearningTestCase(
description="World 9",
group='nodyna',
world_file='world09.csv',
best_reward=-15,
median_reward=-20,
max_time=2,
points=9.5
),
QLearningTestCase(
description="World 10",
group='nodyna',
world_file='world10.csv',
best_reward=-28,
median_reward=-42,
max_time=2,
points=9.5
),
# Dyna test cases
QLearningTestCase(
description="World 1, dyna=200",
group='dyna',
world_file='world01.csv',
best_reward=-12,
median_reward=-29.5,
max_time=10,
points=2.5
),
QLearningTestCase(
description="World 2, dyna=200",
group='dyna',
world_file='world02.csv',
best_reward=-14,
median_reward=-19,
max_time=10,
points=2.5
),
QLearningTestCase(
description="Author check",
group='author',
world_file='world01.csv',
best_reward=0,
median_reward=0,
max_time=10,
points=0
),
]
max_points = 100.0
html_pre_block = True # surround comments with HTML <pre> tag (for T-Square comments field)
# Test functon(s)
@pytest.mark.parametrize("description,group,world_file,best_reward,median_reward,max_time,points", qlearning_test_cases)
def test_qlearning(description, group, world_file, best_reward, median_reward, max_time, points, grader):
points_earned = 0.0 # initialize points for this test case
try:
incorrect = True
if not 'QLearner' in globals():
import importlib
m = importlib.import_module('QLearner')
globals()['QLearner'] = m
# Unpack test case
world = np.array([list(map(float,s.strip().split(','))) for s in util.get_robot_world_file(world_file).readlines()])
student_reward = None
student_author = None
msgs = []
if group=='nodyna':
def timeoutwrapper_nodyna():
# Note: the following will NOT be commented durring final grading
# random.seed(robot_qlearning_testing_seed)
# np.random.seed(robot_qlearning_testing_seed)
learner = QLearner.QLearner(num_states=100,\
num_actions = 4, \
alpha = 0.2, \
gamma = 0.9, \
rar = 0.98, \
radr = 0.999, \
dyna = 0, \
verbose=False)
return qltest(worldmap=world,iterations=500,max_steps=10000,learner=learner,verbose=False)
student_reward = run_with_timeout(timeoutwrapper_nodyna,max_time,(),{})
incorrect = False
if student_reward < 1.5*median_reward:
incorrect = True
msgs.append(" Reward too low, expected %s, found %s"%(median_reward,student_reward))
elif group=='dyna':
def timeoutwrapper_dyna():
# Note: the following will NOT be commented durring final grading
# random.seed(robot_qlearning_testing_seed)
# np.random.seed(robot_qlearning_testing_seed)
learner = QLearner.QLearner(num_states=100,\
num_actions = 4, \
alpha = 0.2, \
gamma = 0.9, \
rar = 0.5, \
radr = 0.99, \
dyna = 200, \
verbose=False)
return qltest(worldmap=world,iterations=50,max_steps=10000,learner=learner,verbose=False)
student_reward = run_with_timeout(timeoutwrapper_dyna,max_time,(),{})
incorrect = False
if student_reward < 1.5*median_reward:
incorrect = True
msgs.append(" Reward too low, expected %s, found %s"%(median_reward,student_reward))
elif group=='author':
points_earned = -20
def timeoutwrapper_author():
# Note: the following will NOT be commented durring final grading
# random.seed(robot_qlearning_testing_seed)
# np.random.seed(robot_qlearning_testing_seed)
learner = QLearner.QLearner(num_states=100,\
num_actions = 4, \
alpha = 0.2, \
gamma = 0.9, \
rar = 0.98, \
radr = 0.999, \
dyna = 0, \
verbose=False)
return learner.author()
student_author = run_with_timeout(timeoutwrapper_author,max_time,(),{})
student_reward = best_reward+1
incorrect = False
if (student_author is None) or (student_author=='tb34'):
incorrect = True
msgs.append(" author() method not implemented correctly. Found {}".format(student_author))
else:
points_earned = points
if (not incorrect):
points_earned += points
if incorrect:
inputs_str = " group: {}\n" \
" world_file: {}\n"\
" median_reward: {}\n".format(group, world_file, median_reward)
raise IncorrectOutput("Test failed on one or more output criteria.\n Inputs:\n{}\n Failures:\n{}".format(inputs_str, "\n".join(msgs)))
except Exception as e:
# Test result: failed
msg = "Test case description: {}\n".format(description)
# Generate a filtered stacktrace, only showing erroneous lines in student file(s)
tb_list = tb.extract_tb(sys.exc_info()[2])
for i in range(len(tb_list)):
row = tb_list[i]
tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3]) # show only filename instead of long absolute path
tb_list = [row for row in tb_list if row[0] in ['QLearner.py','StrategyLearner.py']]
if tb_list:
msg += "Traceback:\n"
msg += ''.join(tb.format_list(tb_list)) # contains newlines
elif 'grading_traceback' in dir(e):
msg += "Traceback:\n"
msg += ''.join(tb.format_list(e.grading_traceback))
msg += "{}: {}".format(e.__class__.__name__, str(e))
# Report failure result to grader, with stacktrace
grader.add_result(GradeResult(outcome='failed', points=points_earned, msg=msg))
raise
else:
# Test result: passed (no exceptions)
grader.add_result(GradeResult(outcome='passed', points=points_earned, msg=None))
def getrobotpos(data):
R = -999
C = -999
for row in range(0, data.shape[0]):
for col in range(0, data.shape[1]):
if data[row,col] == 2:
C = col
R = row
if (R+C)<0:
print("warning: start location not defined")
return R, C
# find where the goal is in the map
def getgoalpos(data):
R = -999
C = -999
for row in range(0, data.shape[0]):
for col in range(0, data.shape[1]):
if data[row,col] == 3:
C = col
R = row
if (R+C)<0:
print("warning: goal location not defined")
return (R, C)
# move the robot and report reward
def movebot(data,oldpos,a):
testr, testc = oldpos
randomrate = 0.20 # how often do we move randomly
quicksandreward = -100 # penalty for stepping on quicksand
# decide if we're going to ignore the action and
# choose a random one instead
if random.uniform(0.0, 1.0) <= randomrate: # going rogue
a = random.randint(0,3) # choose the random direction
# update the test location
if a == 0: #north
testr = testr - 1
elif a == 1: #east
testc = testc + 1
elif a == 2: #south
testr = testr + 1
elif a == 3: #west
testc = testc - 1
reward = -1 # default reward is negative one
# see if it is legal. if not, revert
if testr < 0: # off the map
testr, testc = oldpos
elif testr >= data.shape[0]: # off the map
testr, testc = oldpos
elif testc < 0: # off the map
testr, testc = oldpos
elif testc >= data.shape[1]: # off the map
testr, testc = oldpos
elif data[testr, testc] == 1: # it is an obstacle
testr, testc = oldpos
elif data[testr, testc] == 5: # it is quicksand
reward = quicksandreward
data[testr, testc] = 6 # mark the event
elif data[testr, testc] == 6: # it is still quicksand
reward = quicksandreward
data[testr, testc] = 6 # mark the event
elif data[testr, testc] == 3: # it is the goal
reward = 1 # for reaching the goal
return (testr, testc), reward #return the new, legal location
# convert the location to a single integer
def discretize(pos):
return pos[0]*10 + pos[1]
def qltest(worldmap, iterations, max_steps, learner, verbose):
# each iteration involves one trip to the goal
startpos = getrobotpos(worldmap) #find where the robot starts
goalpos = getgoalpos(worldmap) #find where the goal is
# max_reward = -float('inf')
all_rewards = list()
for iteration in range(1,iterations+1):
total_reward = 0
data = worldmap.copy()
robopos = startpos
state = discretize(robopos) #convert the location to a state
action = learner.querysetstate(state) #set the state and get first action
count = 0
while (robopos != goalpos) & (count<max_steps):
#move to new location according to action and then get a new action
newpos, stepreward = movebot(data,robopos,action)
if newpos == goalpos:
r = 1 # reward for reaching the goal
else:
r = stepreward # negative reward for not being at the goal
state = discretize(newpos)
action = learner.query(state,r)
if data[robopos] != 6:
data[robopos] = 4 # mark where we've been for map printing
if data[newpos] != 6:
data[newpos] = 2 # move to new location
robopos = newpos # update the location
#if verbose: time.sleep(1)
total_reward += stepreward
count = count + 1
if verbose and (count == max_steps):
print("timeout")
if verbose: printmap(data)
if verbose: print(f"{iteration} {total_reward}")
# if max_reward < total_reward:
# max_reward = total_reward
all_rewards.append(total_reward)
# return max_reward
return np.median(all_rewards)
if __name__ == "__main__":
pytest.main(["-s", __file__])

View File

@@ -0,0 +1,223 @@
"""
Test a Q Learner in a navigation problem. (c) 2015 Tucker Balch
2016-10-20 Added "quicksand" and uncertain actions.
Copyright 2018, Georgia Institute of Technology (Georgia Tech)
Atlanta, Georgia 30332
All Rights Reserved
Template code for CS 4646/7646
Georgia Tech asserts copyright ownership of this template and all derivative
works, including solutions to the projects assigned in this course. Students
and other users of this template code are advised not to share it with others
or to make it available on publicly viewable websites including repositories
such as github and gitlab. This copyright statement should not be removed
or edited.
We do grant permission to share solutions privately with non-students such
as potential employers. However, sharing with other current or future
students of CS 7646 is prohibited and subject to being investigated as a
GT honor code violation.
-----do not edit anything above this line---
Student Name: Tucker Balch (replace with your name)
GT User ID: tb34 (replace with your User ID)
GT ID: 900897987 (replace with your GT ID)
"""
import numpy as np
import random as rand
import time
import math
import QLearner as ql
# print out the map
def printmap(data):
print("--------------------")
for row in range(0, data.shape[0]):
for col in range(0, data.shape[1]):
if data[row,col] == 0: # Empty space
print(" ", end=' ')
if data[row,col] == 1: # Obstacle
print("O", end=' ')
if data[row,col] == 2: # El roboto
print("*", end=' ')
if data[row,col] == 3: # Goal
print("X", end=' ')
if data[row,col] == 4: # Trail
print(".", end=' ')
if data[row,col] == 5: # Quick sand
print("~", end=' ')
if data[row,col] == 6: # Stepped in quicksand
print("@", end=' ')
print()
print("--------------------")
# find where the robot is in the map
def getrobotpos(data):
R = -999
C = -999
for row in range(0, data.shape[0]):
for col in range(0, data.shape[1]):
if data[row,col] == 2:
C = col
R = row
if (R+C)<0:
print("warning: start location not defined")
return R, C
# find where the goal is in the map
def getgoalpos(data):
R = -999
C = -999
for row in range(0, data.shape[0]):
for col in range(0, data.shape[1]):
if data[row,col] == 3:
C = col
R = row
if (R+C)<0:
print("warning: goal location not defined")
return (R, C)
# move the robot and report reward
def movebot(data,oldpos,a):
testr, testc = oldpos
randomrate = 0.20 # how often do we move randomly
quicksandreward = -100 # penalty for stepping on quicksand
# decide if we're going to ignore the action and
# choose a random one instead
if rand.uniform(0.0, 1.0) <= randomrate: # going rogue
a = rand.randint(0,3) # choose the random direction
# update the test location
if a == 0: #north
testr = testr - 1
elif a == 1: #east
testc = testc + 1
elif a == 2: #south
testr = testr + 1
elif a == 3: #west
testc = testc - 1
reward = -1 # default reward is negative one
# see if it is legal. if not, revert
if testr < 0: # off the map
testr, testc = oldpos
elif testr >= data.shape[0]: # off the map
testr, testc = oldpos
elif testc < 0: # off the map
testr, testc = oldpos
elif testc >= data.shape[1]: # off the map
testr, testc = oldpos
elif data[testr, testc] == 1: # it is an obstacle
testr, testc = oldpos
elif data[testr, testc] == 5: # it is quicksand
reward = quicksandreward
data[testr, testc] = 6 # mark the event
elif data[testr, testc] == 6: # it is still quicksand
reward = quicksandreward
data[testr, testc] = 6 # mark the event
elif data[testr, testc] == 3: # it is the goal
reward = 1 # for reaching the goal
return (testr, testc), reward #return the new, legal location
# convert the location to a single integer
def discretize(pos):
return pos[0]*10 + pos[1]
def test(map, epochs, learner, verbose):
# each epoch involves one trip to the goal
startpos = getrobotpos(map) #find where the robot starts
goalpos = getgoalpos(map) #find where the goal is
scores = np.zeros((epochs,1))
for epoch in range(1,epochs+1):
total_reward = 0
data = map.copy()
robopos = startpos
state = discretize(robopos) #convert the location to a state
action = learner.querysetstate(state) #set the state and get first action
count = 0
while (robopos != goalpos) & (count<10000):
#move to new location according to action and then get a new action
newpos, stepreward = movebot(data,robopos,action)
if newpos == goalpos:
r = 1 # reward for reaching the goal
else:
r = stepreward # negative reward for not being at the goal
state = discretize(newpos)
action = learner.query(state,r)
if data[robopos] != 6:
data[robopos] = 4 # mark where we've been for map printing
if data[newpos] != 6:
data[newpos] = 2 # move to new location
robopos = newpos # update the location
#if verbose: time.sleep(1)
total_reward += stepreward
count = count + 1
if count == 100000:
print("timeout")
if verbose: printmap(data)
if verbose: print(f"{epoch}, {total_reward}")
scores[epoch-1,0] = total_reward
return np.median(scores)
# run the code to test a learner
def test_code():
verbose = False # print lots of debug stuff if True
# read in the map
filename = 'testworlds/world01.csv'
inf = open(filename)
data = np.array([list(map(float,s.strip().split(','))) for s in inf.readlines()])
originalmap = data.copy() #make a copy so we can revert to the original map later
if verbose: printmap(data)
rand.seed(5)
######## run non-dyna test ########
learner = ql.QLearner(num_states=100,\
num_actions = 4, \
alpha = 0.2, \
gamma = 0.9, \
rar = 0.98, \
radr = 0.999, \
dyna = 0, \
verbose=False) #initialize the learner
epochs = 500
total_reward = test(data, epochs, learner, verbose)
print(f"{epochs}, median total_reward {total_reward}")
print()
non_dyna_score = total_reward
######## run dyna test ########
learner = ql.QLearner(num_states=100,\
num_actions = 4, \
alpha = 0.2, \
gamma = 0.9, \
rar = 0.5, \
radr = 0.99, \
dyna = 200, \
verbose=False) #initialize the learner
epochs = 50
data = originalmap.copy()
total_reward = test(data, epochs, learner, verbose)
print(f"{epochs}, median total_reward {total_reward}")
dyna_score = total_reward
print()
print()
print(f"results for {filename}")
print(f"non_dyna_score: {non_dyna_score}")
print(f"dyna_score : {dyna_score}")
if __name__=="__main__":
test_code()

View File

@@ -0,0 +1,10 @@
3,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0
0,0,1,1,1,1,1,0,0,0
0,5,1,0,0,0,1,0,0,0
0,5,1,0,0,0,1,0,0,0
0,0,1,0,0,0,1,0,0,0
0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0
0,0,0,0,2,0,0,0,0,0
1 3 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0 0
4 0 0 1 1 1 1 1 0 0 0
5 0 5 1 0 0 0 1 0 0 0
6 0 5 1 0 0 0 1 0 0 0
7 0 0 1 0 0 0 1 0 0 0
8 0 0 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 0 0 0 0
10 0 0 0 0 2 0 0 0 0 0

View File

@@ -0,0 +1,10 @@
0,1,0,1,0,0,0,0,0,0
0,1,0,1,0,0,0,0,0,0
0,1,0,0,0,0,0,0,0,0
0,1,0,1,1,1,1,1,1,1
2,1,0,1,0,0,0,0,0,0
0,1,0,1,0,0,1,0,0,3
0,0,0,1,0,0,1,0,0,0
0,1,0,0,0,0,1,1,1,1
0,1,0,1,0,0,0,0,0,0
0,0,0,1,0,0,0,0,0,0
1 0 1 0 1 0 0 0 0 0 0
2 0 1 0 1 0 0 0 0 0 0
3 0 1 0 0 0 0 0 0 0 0
4 0 1 0 1 1 1 1 1 1 1
5 2 1 0 1 0 0 0 0 0 0
6 0 1 0 1 0 0 1 0 0 3
7 0 0 0 1 0 0 1 0 0 0
8 0 1 0 0 0 0 1 1 1 1
9 0 1 0 1 0 0 0 0 0 0
10 0 0 0 1 0 0 0 0 0 0

View File

@@ -0,0 +1,10 @@
0,0,0,1,0,0,0,1,0,3
0,1,0,1,0,1,0,1,0,0
0,1,0,1,0,1,0,1,0,1
0,1,0,1,0,1,0,1,0,0
0,1,0,1,0,1,0,1,1,0
0,1,0,1,0,1,0,1,0,0
0,1,0,1,0,1,0,1,0,1
0,1,0,1,0,1,0,1,0,0
0,1,0,1,0,1,0,1,1,0
2,1,0,0,0,1,0,0,0,0
1 0 0 0 1 0 0 0 1 0 3
2 0 1 0 1 0 1 0 1 0 0
3 0 1 0 1 0 1 0 1 0 1
4 0 1 0 1 0 1 0 1 0 0
5 0 1 0 1 0 1 0 1 1 0
6 0 1 0 1 0 1 0 1 0 0
7 0 1 0 1 0 1 0 1 0 1
8 0 1 0 1 0 1 0 1 0 0
9 0 1 0 1 0 1 0 1 1 0
10 2 1 0 0 0 1 0 0 0 0

View File

@@ -0,0 +1,10 @@
0,0,0,0,0,1,0,1,0,3
0,0,0,0,0,1,0,1,0,0
0,0,0,1,0,1,0,1,0,1
0,0,0,1,0,1,0,1,0,0
0,0,0,1,0,0,0,1,1,0
2,0,0,1,1,1,0,1,0,0
0,0,0,1,0,1,0,0,0,1
0,0,5,0,0,1,0,1,0,0
0,0,1,1,1,1,0,1,1,0
0,0,0,0,0,1,0,0,0,0
1 0 0 0 0 0 1 0 1 0 3
2 0 0 0 0 0 1 0 1 0 0
3 0 0 0 1 0 1 0 1 0 1
4 0 0 0 1 0 1 0 1 0 0
5 0 0 0 1 0 0 0 1 1 0
6 2 0 0 1 1 1 0 1 0 0
7 0 0 0 1 0 1 0 0 0 1
8 0 0 5 0 0 1 0 1 0 0
9 0 0 1 1 1 1 0 1 1 0
10 0 0 0 0 0 1 0 0 0 0

View File

@@ -0,0 +1,10 @@
0,1,0,0,0,1,0,1,0,3
1,0,0,0,0,0,1,0,0,0
0,0,1,1,0,1,0,1,0,1
1,1,0,0,0,0,1,0,0,0
0,0,0,0,0,0,0,0,1,0
0,1,0,1,1,0,0,1,0,0
1,0,0,0,0,1,0,0,0,1
0,0,0,0,0,0,0,1,0,0
1,0,1,0,0,1,0,0,1,0
2,0,0,0,0,0,1,0,0,0
1 0 1 0 0 0 1 0 1 0 3
2 1 0 0 0 0 0 1 0 0 0
3 0 0 1 1 0 1 0 1 0 1
4 1 1 0 0 0 0 1 0 0 0
5 0 0 0 0 0 0 0 0 1 0
6 0 1 0 1 1 0 0 1 0 0
7 1 0 0 0 0 1 0 0 0 1
8 0 0 0 0 0 0 0 1 0 0
9 1 0 1 0 0 1 0 0 1 0
10 2 0 0 0 0 0 1 0 0 0

View File

@@ -0,0 +1,10 @@
0,1,0,0,0,1,0,1,0,2
1,0,0,0,0,0,1,0,0,0
0,0,1,1,0,1,0,1,0,1
1,1,0,0,0,0,1,0,0,0
0,0,0,0,0,0,0,0,1,0
0,1,0,1,1,0,0,1,0,0
1,0,0,0,0,1,0,0,0,1
0,0,0,0,0,0,0,1,0,0
1,0,1,0,0,1,0,0,1,0
3,0,0,0,0,0,1,0,0,0
1 0 1 0 0 0 1 0 1 0 2
2 1 0 0 0 0 0 1 0 0 0
3 0 0 1 1 0 1 0 1 0 1
4 1 1 0 0 0 0 1 0 0 0
5 0 0 0 0 0 0 0 0 1 0
6 0 1 0 1 1 0 0 1 0 0
7 1 0 0 0 0 1 0 0 0 1
8 0 0 0 0 0 0 0 1 0 0
9 1 0 1 0 0 1 0 0 1 0
10 3 0 0 0 0 0 1 0 0 0

View File

@@ -0,0 +1,10 @@
0,0,0,0,2,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0
0,0,1,1,1,1,1,0,0,0
0,0,1,0,3,0,1,0,0,0
0,0,1,0,0,0,1,0,0,0
0,0,1,0,0,0,1,0,0,0
0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0
1 0 0 0 0 2 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0 0
4 0 0 1 1 1 1 1 0 0 0
5 0 0 1 0 3 0 1 0 0 0
6 0 0 1 0 0 0 1 0 0 0
7 0 0 1 0 0 0 1 0 0 0
8 0 0 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 0 0 0 0
10 0 0 0 0 0 0 0 0 0 0

View File

@@ -0,0 +1,10 @@
0,1,0,1,0,0,0,0,0,0
0,1,0,1,0,0,0,0,0,0
0,1,0,0,0,0,0,0,0,0
0,1,0,1,1,1,1,1,1,1
3,1,0,1,0,0,0,0,0,0
0,1,0,1,0,0,1,0,0,2
0,0,0,1,0,0,1,0,0,0
0,1,0,0,0,0,1,1,1,1
0,1,0,1,0,0,0,0,0,0
0,0,0,1,0,0,0,0,0,0
1 0 1 0 1 0 0 0 0 0 0
2 0 1 0 1 0 0 0 0 0 0
3 0 1 0 0 0 0 0 0 0 0
4 0 1 0 1 1 1 1 1 1 1
5 3 1 0 1 0 0 0 0 0 0
6 0 1 0 1 0 0 1 0 0 2
7 0 0 0 1 0 0 1 0 0 0
8 0 1 0 0 0 0 1 1 1 1
9 0 1 0 1 0 0 0 0 0 0
10 0 0 0 1 0 0 0 0 0 0

View File

@@ -0,0 +1,10 @@
0,0,0,0,0,2,0,0,0,0
0,0,0,1,0,0,1,0,0,0
0,1,0,1,0,0,1,0,1,0
0,1,0,1,1,1,1,0,1,0
0,1,0,0,1,0,0,0,1,0
0,1,1,1,1,1,1,1,1,0
0,0,0,0,1,0,0,0,0,0
0,0,0,0,1,0,0,1,0,0
0,0,0,0,1,0,0,1,0,0
0,0,0,0,1,3,0,1,0,0
1 0 0 0 0 0 2 0 0 0 0
2 0 0 0 1 0 0 1 0 0 0
3 0 1 0 1 0 0 1 0 1 0
4 0 1 0 1 1 1 1 0 1 0
5 0 1 0 0 1 0 0 0 1 0
6 0 1 1 1 1 1 1 1 1 0
7 0 0 0 0 1 0 0 0 0 0
8 0 0 0 0 1 0 0 1 0 0
9 0 0 0 0 1 0 0 1 0 0
10 0 0 0 0 1 3 0 1 0 0

View File

@@ -0,0 +1,10 @@
0,0,0,0,0,0,0,0,0,0
0,0,0,1,0,0,1,0,0,0
0,1,0,1,0,0,1,0,1,0
0,1,0,1,1,1,1,0,1,0
0,1,0,0,1,0,0,0,1,0
0,1,1,1,1,0,1,1,1,0
0,0,0,0,1,0,0,0,0,0
0,0,0,0,1,0,0,1,0,0
0,0,0,0,1,0,0,1,0,0
0,0,0,2,1,3,0,1,0,0
1 0 0 0 0 0 0 0 0 0 0
2 0 0 0 1 0 0 1 0 0 0
3 0 1 0 1 0 0 1 0 1 0
4 0 1 0 1 1 1 1 0 1 0
5 0 1 0 0 1 0 0 0 1 0
6 0 1 1 1 1 0 1 1 1 0
7 0 0 0 0 1 0 0 0 0 0
8 0 0 0 0 1 0 0 1 0 0
9 0 0 0 0 1 0 0 1 0 0
10 0 0 0 2 1 3 0 1 0 0

View File

@@ -0,0 +1,77 @@
import numpy as np
class AbstractTreeLearner:
LEAF = -1
NA = -1
def author(self):
return 'felixm' # replace tb34 with your Georgia Tech username
def create_node(self, factor, split_value, left, right):
return np.array([(factor, split_value, left, right), ],
dtype='|i4, f4, i4, i4')
def query_point(self, point):
node_index = 0
while self.rel_tree[node_index][0] != self.LEAF:
node = self.rel_tree[node_index]
split_factor = node[0]
split_value = node[1]
if point[split_factor] <= split_value:
# Recurse into left sub-tree.
node_index += node[2]
else:
node_index += node[3]
v = self.rel_tree[node_index][1]
return v
def query(self, points):
"""
@summary: Estimate a set of test points given the model we built.
@param points: should be a numpy array with each row corresponding to a specific query.
@returns the estimated values according to the saved model.
"""
query_point = lambda p: self.query_point(p)
r = np.apply_along_axis(query_point, 1, points)
return r
def build_tree(self, xs, y):
"""
@summary: Build a decision tree from the training data.
@param dataX: X values of data to add
@param dataY: the Y training values
"""
assert(xs.shape[0] == y.shape[0])
assert(xs.shape[0] > 0) # If this is 0 something went wrong.
if xs.shape[0] <= self.leaf_size:
value = np.mean(y)
if value < -0.2:
value = -1
elif value > 0.2:
value = 1
else:
value = 0
return self.create_node(self.LEAF, value, self.NA, self.NA)
if np.all(y[0] == y):
return self.create_node(self.LEAF, y[0], self.NA, self.NA)
i, split_value = self.get_i_and_split_value(xs, y)
select_l = xs[:, i] <= split_value
select_r = xs[:, i] > split_value
lt = self.build_tree(xs[select_l], y[select_l])
rt = self.build_tree(xs[select_r], y[select_r])
root = self.create_node(i, split_value, 1, lt.shape[0] + 1)
root = np.concatenate([root, lt, rt])
return root
def addEvidence(self, data_x, data_y):
"""
@summary: Add training data to learner
@param dataX: X values of data to add
@param dataY: the Y training values
"""
self.rel_tree = self.build_tree(data_x, data_y)

View File

@@ -0,0 +1,34 @@
import pandas as pd
import util as ut
import datetime as dt
class BenchmarkStrategy:
def __init__(self, verbose=False, impact=0.0, commission=0.0):
self.verbose = verbose
self.impact = impact
self.commission = commission
def addEvidence(self, symbol=0, sd=0, ed=0, sv=0):
"""Keep this so that API is valid."""
pass
def testPolicy(self, symbol="IBM",
sd=dt.datetime(2009, 1, 1),
ed=dt.datetime(2010, 1, 1),
sv=10000):
"""Benchmark is to buy 1000 shares and hold."""
dates = pd.date_range(sd, ed)
prices = ut.get_data([symbol], dates) # automatically adds SPY
orders = pd.DataFrame(index=prices.index)
orders["Symbol"] = symbol
orders["Order"] = ""
orders["Shares"] = 0
orders.iloc[0] = [symbol, "BUY", 1000]
orders.iloc[-1] = [symbol, "SELL", -1000]
if self.verbose:
print(type(orders)) # it better be a DataFrame!
print(orders)
return orders

View File

@@ -0,0 +1,114 @@
import datetime as dt
import pandas as pd
import util
import indicators
class ManualStrategy:
def __init__(self, verbose=False, impact=0.0, commission=0.0):
self.verbose = verbose
self.impact = impact
self.commission = commission
# this method should create a QLearner, and train it for trading
def addEvidence(self, symbol="IBM",
sd=dt.datetime(2008, 1, 1),
ed=dt.datetime(2009, 1, 1),
sv=10000):
# add your code to do learning here
# example usage of the old backward compatible util function
syms = [symbol]
dates = pd.date_range(sd, ed)
prices_all = util.get_data(syms, dates) # automatically adds SPY
prices = prices_all[syms] # only portfolio symbols
# prices_SPY = prices_all['SPY'] # only SPY, for comparison later
if self.verbose:
print(prices)
# example use with new colname
# automatically adds SPY
volume_all = util.get_data(syms, dates, colname="Volume")
volume = volume_all[syms] # only portfolio symbols
# volume_SPY = volume_all['SPY'] # only SPY, for comparison later
if self.verbose:
print(volume)
def macd_strat(self, macd, orders):
"""Strategy based on MACD cross."""
def strat(ser):
m = macd.loc[ser.index]
prev_macd, prev_signal, _ = m.iloc[0]
cur_macd, cur_signal, _ = m.iloc[1]
shares = 0
if cur_macd < -1 and prev_macd < prev_signal \
and cur_macd > cur_signal:
if self.holding == 0:
shares = 1000
elif self.holding == -1000:
shares = 2000
elif cur_macd > 1 and prev_macd > prev_signal \
and cur_macd < cur_signal:
if self.holding == 0:
shares = -1000
elif self.holding == 1000:
shares = -2000
self.holding += shares
return shares
orders['Shares'] = orders['Shares'].rolling(2).apply(strat)
def three_indicator_strat(self, macd, rsi, price_sma, orders):
"""Strategy based on three indicators. Thresholds selected based on
scatter plots."""
def strat(row):
shares = 0
_, _, macd_diff = macd.loc[row.name]
cur_rsi = rsi.loc[row.name][0]
cur_price_sma = price_sma.loc[row.name][0]
if self.holding == -1000 and cur_price_sma < 0.9:
shares = 2000
elif self.holding == 0 and cur_price_sma < 0.9:
shares = 1000
elif self.holding == -1000 and cur_rsi > 80:
shares = 2000
elif self.holding == 0 and cur_rsi > 80:
shares = 1000
elif self.holding == -1000 and macd_diff < -0.5:
shares = 2000
elif self.holding == 0 and macd_diff < -0.5:
shares = 1000
elif self.holding == 1000 and cur_price_sma > 1.1:
shares = -2000
elif self.holding == 0 and cur_price_sma > 1.1:
shares = -1000
self.holding += shares
return shares
orders['Shares'] = orders.apply(strat, axis=1)
def testPolicy(self, symbol="IBM",
sd=dt.datetime(2009, 1, 1),
ed=dt.datetime(2010, 1, 1),
sv=10000, macd_strat=False):
self.holding = 0
df = util.get_data([symbol], pd.date_range(sd, ed))
df.drop(columns=["SPY"], inplace=True)
orders = pd.DataFrame(index=df.index)
orders["Symbol"] = symbol
orders["Order"] = ""
orders["Shares"] = 0
macd = indicators.macd(df, symbol)
rsi = indicators.rsi(df, symbol)
price_sma = indicators.price_sma(df, symbol, [8])
if macd_strat:
self.macd_strat(macd, orders)
else:
self.three_indicator_strat(macd, rsi, price_sma, orders)
return orders

View File

@@ -0,0 +1,169 @@
import datetime as dt
import pandas as pd
import util
import indicators
from qlearning_robot.QLearner import QLearner as Learner
from dataclasses import dataclass
@dataclass
class Holding:
cash: int
shares: int
equity: int
class QLearner(object):
def __init__(self, verbose=False, impact=0.0, commission=0.0, testing=False, n_bins=5):
self.verbose = verbose
self.impact = impact
self.commission = commission
self.testing = testing # Decides which type of order df to return.
self.indicators = ['macd_diff', 'rsi', 'price_sma_8']
self.n_bins = n_bins
self.bins = {}
self.num_states = self.get_num_states()
self.num_actions = 3 # buy, sell, hold
self.learner = Learner(self.num_states, self.num_actions)
def row_to_state(self, holding, df_row):
"""Transforms a row into a state value."""
holding = (holding + 1000) // 1000
assert(holding in [0, 1, 2])
# For each indicator that goes into the state the interval becomes
# smaller based on how many bins the indicator has. The first
# 'indicator' is the information about how many shares we are currently
# holding. So for example, if I have 450 states then the intervall (aka
# remaining_states) is 150 because there are three values for holding:
# holding = 0 -> state = 0 * 150 = 0
# holding = 1 -> state = 1 * 150 = 150
# holding = 2 -> state = 2 * 150 = 300
remaining_states = self.num_states // 3
state = holding * remaining_states
for indicator in self.indicators:
value = df_row[indicator]
bin_n = self.indicator_value_to_bin(indicator, value)
remaining_states //= self.n_bins
state += bin_n * remaining_states
return state
def indicator_value_to_bin(self, indicator, value):
for i, upper_bound in enumerate(self.bins[indicator]):
if value < upper_bound:
return i
return i + 1
def add_indicators(self, df, symbol):
"""Add indicators for learning to DataFrame."""
for indicator in self.indicators:
if indicator == "macd_diff":
indicators.macd(df, symbol)
df.drop(columns=["macd", "macd_signal"], inplace=True)
elif indicator == "rsi":
indicators.rsi(df, symbol)
elif indicator.startswith("price_sma_"):
period = int(indicator.replace("price_sma_", ""))
indicators.price_sma(df, symbol, [period])
df.drop(columns=["SPY"], inplace=True)
df.dropna(inplace=True)
def bin_indicators(self, df):
"""Create bins for indicators."""
for indicator in self.indicators:
ser, bins = pd.qcut(df[indicator], self.n_bins, retbins=True)
self.bins[indicator] = bins[1:self.n_bins]
def get_num_states(self):
"""Return the total num of states."""
num_states = 3 # Three states holding (1000, 0, -1000)
for _ in self.indicators:
num_states *= self.n_bins
return num_states
def handle_order(self, action, holding, adj_closing_price):
shares = 0
if action == 0: # buy
if holding.shares == 0 or holding.shares == -1000:
shares = 1000
elif action == 1: # sell
if holding.shares== 0 or holding.shares == 1000:
shares = -1000
elif action == 2: # hold
shares = 0
cost = shares * adj_closing_price
if shares != 0:
# Charge commission and deduct impact penalty
holding.cash -= self.commission
holding.cash -= (self.impact * adj_closing_price * abs(shares))
holding.cash -= cost
holding.shares += shares
holding.equity = holding.cash + holding.shares * adj_closing_price
def get_reward(self, equity, new_equity):
if new_equity > equity:
return 1
return -1
def train(self, df, symbol, sv):
holding = Holding(sv, 0, sv)
row = df.iloc[0]
state = self.row_to_state(holding.shares, row)
action = self.learner.querysetstate(state)
adj_closing_price = row[symbol]
equity = holding.equity
self.handle_order(action, holding, adj_closing_price)
for index, row in df.iloc[1:].iterrows():
adj_closing_price = row[symbol]
new_equity = holding.cash + holding.shares * adj_closing_price
r = self.get_reward(equity, new_equity)
s_prime = self.row_to_state(holding.shares, row)
a = self.learner.query(s_prime, r)
equity = new_equity
self.handle_order(a, holding, adj_closing_price)
if self.verbose:
print(f"{holding=} {s_prime=} {r=} {a=}")
def addEvidence(self, symbol="IBM", sd=dt.datetime(2008, 1, 1), ed=dt.datetime(2009, 1, 1), sv=10000):
df = util.get_data([symbol], pd.date_range(sd, ed))
self.add_indicators(df, symbol)
self.bin_indicators(df)
for _ in range(15):
self.train(df, symbol, sv)
def testPolicy(self, symbol="IBM", sd=dt.datetime(2009, 1, 1), ed=dt.datetime(2010, 1, 1), sv=10000):
df = util.get_data([symbol], pd.date_range(sd, ed))
orders = pd.DataFrame(index=df.index)
orders["Symbol"] = symbol
orders["Order"] = ""
orders["Shares"] = 0
shares = orders["Shares"]
self.add_indicators(df, symbol)
holding = 0
for index, row in df.iterrows():
state = self.row_to_state(holding, row)
action = self.learner.querysetstate(state)
if action == 0: # buy
if holding == 0 or holding == -1000:
holding += 1000
orders.loc[index, "Shares"] = 1000
elif action == 1: # sell
if holding == 0 or holding == 1000:
holding -= 1000
orders.loc[index, "Shares"] = -1000
elif action == 2: # hold
pass
if self.testing:
return orders
else:
return orders[["Shares"]]

View File

@@ -0,0 +1,30 @@
import numpy as np
from AbstractTreeLearner import AbstractTreeLearner
class RTLearner(AbstractTreeLearner):
def __init__(self, leaf_size = 1, verbose = False):
self.leaf_size = leaf_size
self.verbose = verbose
def get_i_and_split_value(self, xs, y):
"""
@summary: Pick a random i and split value.
Make sure that not all X are the same for i and also pick
different values to average the split_value from.
"""
i = np.random.randint(0, xs.shape[1])
while np.all(xs[0,i] == xs[:,i]):
i = np.random.randint(0, xs.shape[1])
# I don't know about the performance of this, but at least it
# terminates reliably. If the two elements are the same something is
# wrong.
a = np.array(list(set(xs[:, i])))
r1, r2 = np.random.choice(a, size = 2, replace = False)
assert(r1 != r2)
split_value = (r1 + r2) / 2.0
return i, split_value

View File

@@ -0,0 +1,106 @@
import datetime as dt
import pandas as pd
import util
import indicators
from RTLearner import RTLearner
class StrategyLearner(object):
def __init__(self, verbose=False, impact=0.0, commission=0.0, testing=False):
self.verbose = verbose
self.impact = impact
self.commission = commission
self.testing = testing
def _get_volume(self):
"""For reference."""
volume_all = ut.get_data(syms, dates, colname="Volume")
volume = volume_all[syms] # only portfolio symbols
# volume_SPY = volume_all['SPY'] # only SPY, for comparison later
if self.verbose:
print(volume)
def _add_indicators(self, df, symbol):
"""Add indicators for learning to DataFrame."""
df.drop(columns=["SPY"], inplace=True)
indicators.macd(df, symbol)
indicators.rsi(df, symbol)
indicators.price_sma(df, symbol, [8])
indicators.price_delta(df, symbol, 3)
df.dropna(inplace=True)
def addEvidence(self, symbol="IBM",
sd=dt.datetime(2008, 1, 1),
ed=dt.datetime(2009, 1, 1),
sv=10000):
self.y_threshold = 0.2
self.indicators = ['macd_diff', 'rsi', 'price_sma_8']
df = util.get_data([symbol], pd.date_range(sd, ed))
self._add_indicators(df, symbol)
def classify_y(row):
if row > self.y_threshold:
return 1
elif row < -self.y_threshold:
return -1
else:
pass
return 0
def set_y_threshold(pct):
if max(pct) < 0.2:
self.y_threshold = 0.02
self.learner = RTLearner(leaf_size = 5)
# self.learner = BagLearner(RTLearner, 3, {'leaf_size': 5})
data_x = df[self.indicators].to_numpy()
pct = df['pct_3']
# This is a hack to get a low enough buy/sell threshold for the
# cyclic the test 'ML4T-220' where the max pct_3 is 0.0268.
set_y_threshold(pct)
y = pct.apply(classify_y)
self.learner.addEvidence(data_x, y.to_numpy())
return y
def strat(self, data_y, orders):
self.holding = 0
def strat(row):
y = int(data_y.loc[row.name][0])
shares = 0
if self.holding == 0 and y == 1:
shares = 1000
elif self.holding == -1000 and y == 1:
shares = 2000
elif self.holding == 0 and y == -1:
shares = -1000
elif self.holding == 1000 and y == -1:
shares = -2000
self.holding += shares
return shares
orders["Shares"] = orders.apply(strat, axis=1)
def testPolicy(self, symbol="IBM",
sd=dt.datetime(2009, 1, 1),
ed=dt.datetime(2010, 1, 1),
sv=10000):
df = util.get_data([symbol], pd.date_range(sd, ed))
self._add_indicators(df, symbol)
data_x = df[self.indicators].to_numpy()
data_y = pd.DataFrame(index=df.index, data=self.learner.query(data_x))
orders = pd.DataFrame(index=df.index)
orders["Symbol"] = symbol
orders["Order"] = ""
orders["Shares"] = 0
self.strat(data_y, orders)
if self.testing:
return orders
else:
return orders[["Shares"]]

View File

@@ -0,0 +1,236 @@
import pandas as pd
import datetime as dt
import sys
import util
import indicators
import marketsim.marketsim as marketsim
import matplotlib.pyplot as plt
from matplotlib.widgets import MultiCursor
from BenchmarkStrategy import BenchmarkStrategy
from ManualStrategy import ManualStrategy
from StrategyLearner import StrategyLearner
from QLearner import QLearner
def plot_indicators(symbol, df):
fig, ax = plt.subplots(4, sharex=True)
price_sma = indicators.price_sma(df, symbol, [8])
bb = indicators.bollinger_band(df, symbol)
rsi = indicators.rsi(df, symbol)
macd = indicators.macd(df, symbol).copy()
df[[symbol]].plot(ax=ax[0])
bb.plot(ax=ax[0])
price_sma.plot(ax=ax[1])
macd.plot(ax=ax[2])
rsi.plot(ax=ax[3])
for a in ax.flat:
a.grid()
m = MultiCursor(fig.canvas, ax, color='r', lw=0.5)
plt.show()
sys.exit(0)
def visualize_correlations(symbol, df):
indicators.price_sma(df, symbol, [8, 21])
indicators.price_delta(df, symbol, 5)
indicators.price_delta(df, symbol, 3)
indicators.price_delta(df, symbol, 1)
indicators.macd(df, symbol)
indicators.rsi(df, symbol)
# df = df[df['rsi'] > 80]
fig, ax = plt.subplots(3, 2) # sharex=True)
df.plot.scatter(x="price_sma_8", y="pct_5", ax=ax[0, 0])
df.plot.scatter(x="price_sma_8", y="pct_3", ax=ax[1, 0])
df.plot.scatter(x="price_sma_8", y="pct_1", ax=ax[2, 0])
# df.plot.scatter(x="rsi", y="pct_5", ax=ax[0, 1])
# df.plot.scatter(x="rsi", y="pct_3", ax=ax[1, 1])
# df.plot.scatter(x="rsi", y="pct_1", ax=ax[2, 1])
df.plot.scatter(x="macd_diff", y="pct_5", ax=ax[0, 1])
df.plot.scatter(x="macd_diff", y="pct_3", ax=ax[1, 1])
df.plot.scatter(x="macd_diff", y="pct_1", ax=ax[2, 1])
for a in ax.flat:
a.grid()
plt.show()
sys.exit(0)
def compare_manual_strategies(symbol, sv, sd, ed):
df = util.get_data([symbol], pd.date_range(sd, ed))
df.drop(columns=["SPY"], inplace=True)
bs = BenchmarkStrategy()
orders = bs.testPolicy(symbol, sd, ed, sv)
df["Benchmark"] = marketsim.compute_portvals(orders, sv)
df["Orders Benchmark"] = orders["Shares"]
ms = ManualStrategy()
orders = ms.testPolicy(symbol, sd, ed, sv, macd_strat=True)
df["MACD Strat"] = marketsim.compute_portvals(orders, sv)
df["Orders MACD"] = orders["Shares"]
# df["Holding Manual"] = orders["Shares"].cumsum()
orders = ms.testPolicy(symbol, sd, ed, sv)
df["Three Strat"] = marketsim.compute_portvals(orders, sv)
df["Orders Three"] = orders["Shares"]
fig, ax = plt.subplots(3, sharex=True)
df[[symbol]].plot(ax=ax[0])
df[["Benchmark", "MACD Strat", "Three Strat"]].plot(ax=ax[1])
df[["Orders Benchmark", "Orders MACD", "Orders Three"]].plot(ax=ax[2])
for a in ax:
a.grid()
MultiCursor(fig.canvas, ax, color='r', lw=0.5)
# plt.show()
fig.set_size_inches(10, 8, forward=True)
plt.savefig('figure_1.png', dpi=fig.dpi)
def compare_all_strategies(symbol, sv, sd, ed):
df = util.get_data([symbol], pd.date_range(sd, ed))
df.drop(columns=["SPY"], inplace=True)
normalize = indicators.normalize
bs = BenchmarkStrategy()
orders = bs.testPolicy(symbol, sd, ed, sv)
df["Benchmark"] = normalize(marketsim.compute_portvals(orders, sv))
df["Orders Benchmark"] = orders["Shares"]
ms = ManualStrategy()
orders = ms.testPolicy(symbol, sd, ed, sv)
df["Manual"] = normalize(marketsim.compute_portvals(orders, sv))
df["Orders Manual"] = orders["Shares"]
sl = StrategyLearner(testing=True)
sl.addEvidence(symbol, sd, ed, sv)
orders = sl.testPolicy(symbol, sd, ed, sv)
df["Strategy"] = normalize(marketsim.compute_portvals(orders, sv))
df["Orders Strategy"] = orders["Shares"]
fig, ax = plt.subplots(3, sharex=True)
df[[symbol]].plot(ax=ax[0])
df[["Benchmark", "Manual", "Strategy"]].plot(ax=ax[1])
df[["Orders Benchmark", "Orders Manual", "Orders Strategy"]].plot(ax=ax[2])
for a in ax:
a.grid()
MultiCursor(fig.canvas, ax, color='r', lw=0.5)
# plt.show()
fig.set_size_inches(10, 8, forward=True)
plt.savefig('figure_2.png', dpi=fig.dpi)
def compare_number_trades():
symbol = "JPM"
sv = 10000
sd = dt.datetime(2008, 1, 1) # in-sample
ed = dt.datetime(2009, 12, 31) # in-sample
df = util.get_data([symbol], pd.date_range(sd, ed))
df.drop(columns=["SPY"], inplace=True)
print(f"| commission | n_orders |")
print(f"-------------------------")
for commission in [0, 9.95, 20, 50, 100]:
ql = QLearner(testing=True, commission=commission, impact=0.005)
ql.addEvidence(symbol, sd, ed, sv)
orders = ql.testPolicy(symbol, sd, ed, sv)
n_orders = orders[orders["Shares"] != 0].shape[0]
print(f"| {commission} | {n_orders} |")
def compare_q_learners():
symbol = "JPM"
sv = 10000
sd = dt.datetime(2008, 1, 1) # in-sample
ed = dt.datetime(2009, 12, 31) # in-sample
sd_out = dt.datetime(2010, 1, 1) # out-sample
ed_out = dt.datetime(2011, 12, 31) # out-sample
df = util.get_data([symbol], pd.date_range(sd_out, ed_out))
df.drop(columns=["SPY"], inplace=True)
bs = BenchmarkStrategy()
orders = bs.testPolicy(symbol, sd_out, ed_out, sv)
df["Benchmark"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
df["Orders Benchmark"] = orders["Shares"]
ql = QLearner(testing=True, verbose=False)
ql.addEvidence(symbol, sd, ed, sv)
orders = ql.testPolicy(symbol, sd_out, ed_out, sv)
df["QL 5"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
df["Orders QL 5"] = orders["Shares"]
ql = QLearner(testing=True, verbose=False, n_bins=4)
ql.addEvidence(symbol, sd, ed, sv)
orders = ql.testPolicy(symbol, sd_out, ed_out, sv)
df["QL 4"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
df["Orders QL 4"] = orders["Shares"]
fig, ax = plt.subplots(3, sharex=True)
df[[symbol]].plot(ax=ax[0])
df[["Benchmark", "QL 5", "QL 4"]].plot(ax=ax[1])
df[["Orders Benchmark", "Orders QL 5", "Orders QL 4"]].plot(ax=ax[2])
for a in ax:
a.grid()
m = MultiCursor(fig.canvas, ax, color='r', lw=0.5)
fig.set_size_inches(10, 8, forward=True)
plt.savefig('figure_4.png', dpi=fig.dpi)
sys.exit(0)
def experiment1(create_report=False):
symbol = "JPM"
sv = 10000
sd = dt.datetime(2008, 1, 1) # in-sample
ed = dt.datetime(2009, 12, 31) # in-sample
sd_out = dt.datetime(2010, 1, 1) # out-sample
ed_out = dt.datetime(2011, 12, 31) # out-sample
df = util.get_data([symbol], pd.date_range(sd_out, ed_out))
df.drop(columns=["SPY"], inplace=True)
if create_report:
compare_manual_strategies(symbol, sv, sd, ed)
compare_all_strategies(symbol, sv, sd, ed)
sys.exit(0)
# visualize_correlations(symbol, df)
# plot_indicators(symbol, df)
# compare_number_trades(symbol, sv, sd, ed)
# compare_q_learners()
bs = BenchmarkStrategy()
orders = bs.testPolicy(symbol, sd_out, ed_out, sv)
df["Benchmark"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
df["Orders Benchmark"] = orders["Shares"]
ql = QLearner(testing=True, verbose=False)
ql.addEvidence(symbol, sd, ed, sv)
orders = ql.testPolicy(symbol, sd_out, ed_out, sv)
df["QL"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
df["Orders QL"] = orders["Shares"]
fig, ax = plt.subplots(3, sharex=True)
df[[symbol]].plot(ax=ax[0])
df[["Benchmark", "QL"]].plot(ax=ax[1])
df[["Orders Benchmark", "Orders QL"]].plot(ax=ax[2])
for a in ax:
a.grid()
m = MultiCursor(fig.canvas, ax, color='r', lw=0.5)
plt.show()
# fig.set_size_inches(10, 8, forward=True)
# plt.savefig('figure_4.png', dpi=fig.dpi)
if __name__ == "__main__":
experiment1()

View File

@@ -0,0 +1,8 @@
import experiment1
def experiment2():
experiment1.compare_number_trades()
if __name__ == "__main__":
experiment2()

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 108 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 85 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 130 KiB

View File

@@ -0,0 +1,339 @@
"""MC3-P3: Strategy Learner - grading script.
Usage:
- Switch to a student feedback directory first (will write "points.txt" and "comments.txt" in pwd).
- Run this script with both ml4t/ and student solution in PYTHONPATH, e.g.:
PYTHONPATH=ml4t:MC1-P2/jdoe7 python ml4t/mc2_p1_grading/grade_marketsim.py
Copyright 2017, Georgia Tech Research Corporation
Atlanta, Georgia 30332-0415
All Rights Reserved
Template code for CS 4646/7646
Georgia Tech asserts copyright ownership of this template and all derivative
works, including solutions to the projects assigned in this course. Students
and other users of this template code are advised not to share it with others
or to make it available on publicly viewable websites including repositories
such as github and gitlab. This copyright statement should not be removed
or edited.
We do grant permission to share solutions privately with non-students such
as potential employers. However, sharing with other current or future
students of CS 7646 is prohibited and subject to being investigated as a
GT honor code violation.
-----do not edit anything above this line---
Student Name: Tucker Balch (replace with your name)
GT User ID: tb34 (replace with your User ID)
GT ID: 900897987 (replace with your GT ID)
"""
import pytest
from grading.grading import grader, GradeResult, run_with_timeout, IncorrectOutput
import os
import sys
import traceback as tb
import datetime as dt
import numpy as np
import pandas as pd
from collections import namedtuple
import time
import util
import random
# Test cases
StrategyTestCase = namedtuple('Strategy', ['description','insample_args','outsample_args','benchmark_type','benchmark','impact','train_time','test_time','max_time','seed'])
strategy_test_cases = [
StrategyTestCase(
description="ML4T-220",
insample_args=dict(symbol="ML4T-220",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
outsample_args=dict(symbol="ML4T-220",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
benchmark_type='clean',
benchmark=1.0, #benchmark updated Apr 24 2017
impact=0.0,
train_time=25,
test_time=5,
max_time=60,
seed=1481090000
),
StrategyTestCase(
description="AAPL",
insample_args=dict(symbol="AAPL",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
outsample_args=dict(symbol="AAPL",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
benchmark_type='stock',
benchmark=0.1581999999999999, #benchmark computed Nov 22 2017
impact=0.0,
train_time=25,
test_time=5,
max_time=60,
seed=1481090000
),
StrategyTestCase(
description="SINE_FAST_NOISE",
insample_args=dict(symbol="SINE_FAST_NOISE",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
outsample_args=dict(symbol="SINE_FAST_NOISE",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
benchmark_type='noisy',
benchmark=2.0, #benchmark updated Apr 24 2017
impact=0.0,
train_time=25,
test_time=5,
max_time=60,
seed=1481090000
),
StrategyTestCase(
description="UNH - In sample",
insample_args=dict(symbol="UNH",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
outsample_args=dict(symbol="UNH",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
benchmark_type='stock',
benchmark= -0.25239999999999996, #benchmark computed Nov 22 2017
impact=0.0,
train_time=25,
test_time=5,
max_time=60,
seed=1481090000
),
]
max_points = 60.0
html_pre_block = True # surround comments with HTML <pre> tag (for T-Square comments field)
MAX_HOLDINGS = 1000
# Test functon(s)
@pytest.mark.parametrize("description, insample_args, outsample_args, benchmark_type, benchmark, impact, train_time, test_time, max_time, seed", strategy_test_cases)
def test_strategy(description, insample_args, outsample_args, benchmark_type, benchmark, impact, train_time, test_time, max_time, seed, grader):
"""Test StrategyLearner.
Requires test description, insample args (dict), outsample args (dict), benchmark_type (str), benchmark (float)
max time (seconds), points for this test case (int), random seed (long), and a grader fixture.
"""
points_earned = 0.0 # initialize points for this test case
try:
incorrect = True
if not 'StrategyLearner' in globals():
import importlib
m = importlib.import_module('StrategyLearner')
globals()['StrategyLearner'] = m
outsample_cr_to_beat = None
if benchmark_type == 'clean':
outsample_cr_to_beat = benchmark
def timeoutwrapper_strategylearner():
#Set fixed seed for repetability
np.random.seed(seed)
random.seed(seed)
learner = StrategyLearner.StrategyLearner(verbose=False,impact=impact)
tmp = time.time()
learner.addEvidence(**insample_args)
train_t = time.time()-tmp
tmp = time.time()
insample_trades_1 = learner.testPolicy(**insample_args)
test_t = time.time()-tmp
insample_trades_2 = learner.testPolicy(**insample_args)
tmp = time.time()
outsample_trades = learner.testPolicy(**outsample_args)
out_test_t = time.time()-tmp
return insample_trades_1, insample_trades_2, outsample_trades, train_t, test_t, out_test_t
msgs = []
in_trades_1, in_trades_2, out_trades, train_t, test_t, out_test_t = run_with_timeout(timeoutwrapper_strategylearner,max_time,(),{})
incorrect = False
if len(in_trades_1.shape)!=2 or in_trades_1.shape[1]!=1:
incorrect=True
msgs.append(" First insample trades DF has invalid shape: {}".format(in_trades_1.shape))
elif len(in_trades_2.shape)!=2 or in_trades_2.shape[1]!=1:
incorrect=True
msgs.append(" Second insample trades DF has invalid shape: {}".format(in_trades_2.shape))
elif len(out_trades.shape)!=2 or out_trades.shape[1]!=1:
incorrect=True
msgs.append(" Out-of-sample trades DF has invalid shape: {}".format(out_trades.shape))
else:
tmp_csum=0.0
for date,trade in in_trades_1.iterrows():
tmp_csum+= trade.iloc[0]
if (trade.iloc[0]!=0) and\
(trade.abs().iloc[0]!=MAX_HOLDINGS) and\
(trade.abs().iloc[0]!=2*MAX_HOLDINGS):
incorrect=True
msgs.append(" illegal trade in first insample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
break
elif abs(tmp_csum)>MAX_HOLDINGS:
incorrect=True
msgs.append(" holdings more than {} long or short in first insample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
break
tmp_csum=0.0
for date,trade in in_trades_2.iterrows():
tmp_csum+= trade.iloc[0]
if (trade.iloc[0]!=0) and\
(trade.abs().iloc[0]!=MAX_HOLDINGS) and\
(trade.abs().iloc[0]!=2*MAX_HOLDINGS):
incorrect=True
msgs.append(" illegal trade in second insample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
break
elif abs(tmp_csum)>MAX_HOLDINGS:
incorrect=True
msgs.append(" holdings more than {} long or short in second insample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
break
tmp_csum=0.0
for date,trade in out_trades.iterrows():
tmp_csum+= trade.iloc[0]
if (trade.iloc[0]!=0) and\
(trade.abs().iloc[0]!=MAX_HOLDINGS) and\
(trade.abs().iloc[0]!=2*MAX_HOLDINGS):
incorrect=True
msgs.append(" illegal trade in out-of-sample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
break
elif abs(tmp_csum)>MAX_HOLDINGS:
incorrect=True
msgs.append(" holdings more than {} long or short in out-of-sample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
break
# if (((in_trades_1.abs()!=0) & (in_trades_1.abs()!=MAX_HOLDINGS) & (in_trades_1.abs()!=2*MAX_HOLDINGS)).any().any() or\
# ((in_trades_2.abs()!=0) & (in_trades_2.abs()!=MAX_HOLDINGS) & (in_trades_2.abs()!=2*MAX_HOLDINGS)).any().any() or\
# ((out_trades.abs()!=0) & (out_trades.abs()!=MAX_HOLDINGS) & (out_trades.abs()!=2*MAX_HOLDINGS)).any().any()):
# incorrect = True
# msgs.append(" illegal trade. abs(trades) not one of ({},{},{})".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS))
# if ((in_trades_1.cumsum().abs()>MAX_HOLDINGS).any()[0]) or ((in_trades_2.cumsum().abs()>MAX_HOLDINGS).any()[0]) or ((out_trades.cumsum().abs()>MAX_HOLDINGS).any()[0]):
# incorrect = True
# msgs.append(" holdings more than {} long or short".format(MAX_HOLDINGS))
if not(incorrect):
if train_t>train_time:
incorrect=True
msgs.append(" addEvidence() took {} seconds, max allowed {}".format(train_t,train_time))
else:
points_earned += 1.0
if test_t > test_time:
incorrect = True
msgs.append(" testPolicy() took {} seconds, max allowed {}".format(test_t,test_time))
else:
points_earned += 2.0
if not((in_trades_1 == in_trades_2).all()[0]):
incorrect = True
mismatches = in_trades_1.join(in_trades_2,how='outer',lsuffix='1',rsuffix='2')
mismatches = mismatches[mismatches.iloc[:,0]!=mismatches.iloc[:,1]]
msgs.append(" consecutive calls to testPolicy() with same input did not produce same output:")
msgs.append(" Mismatched trades:\n {}".format(mismatches))
else:
points_earned += 2.0
student_insample_cr = evalPolicy2(insample_args['symbol'],in_trades_1,insample_args['sv'],insample_args['sd'],insample_args['ed'],market_impact=impact,commission_cost=0.0)
student_outsample_cr = evalPolicy2(outsample_args['symbol'],out_trades, outsample_args['sv'],outsample_args['sd'],outsample_args['ed'],market_impact=impact,commission_cost=0.0)
if student_insample_cr <= benchmark:
incorrect = True
msgs.append(" in-sample return ({}) did not beat benchmark ({})".format(student_insample_cr,benchmark))
else:
points_earned += 5.0
if outsample_cr_to_beat is None:
if out_test_t > test_time:
incorrect = True
msgs.append(" out-sample took {} seconds, max of {}".format(out_test_t,test_time))
else:
points_earned += 5.0
else:
if student_outsample_cr < outsample_cr_to_beat:
incorrect = True
msgs.append(" out-sample return ({}) did not beat benchmark ({})".format(student_outsample_cr,outsample_cr_to_beat))
else:
points_earned += 5.0
if incorrect:
inputs_str = " insample_args: {}\n" \
" outsample_args: {}\n" \
" benchmark_type: {}\n" \
" benchmark: {}\n" \
" train_time: {}\n" \
" test_time: {}\n" \
" max_time: {}\n" \
" seed: {}\n".format(insample_args, outsample_args, benchmark_type, benchmark, train_time, test_time, max_time,seed)
raise IncorrectOutput("Test failed on one or more output criteria.\n Inputs:\n{}\n Failures:\n{}".format(inputs_str, "\n".join(msgs)))
except Exception as e:
# Test result: failed
msg = "Test case description: {}\n".format(description)
# Generate a filtered stacktrace, only showing erroneous lines in student file(s)
tb_list = tb.extract_tb(sys.exc_info()[2])
for i in range(len(tb_list)):
row = tb_list[i]
tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3]) # show only filename instead of long absolute path
# tb_list = [row for row in tb_list if row[0] in ['QLearner.py','StrategyLearner.py']]
if tb_list:
msg += "Traceback:\n"
msg += ''.join(tb.format_list(tb_list)) # contains newlines
elif 'grading_traceback' in dir(e):
msg += "Traceback:\n"
msg += ''.join(tb.format_list(e.grading_traceback))
msg += "{}: {}".format(e.__class__.__name__, str(e))
# Report failure result to grader, with stacktrace
grader.add_result(GradeResult(outcome='failed', points=points_earned, msg=msg))
raise
else:
# Test result: passed (no exceptions)
grader.add_result(GradeResult(outcome='passed', points=points_earned, msg=None))
def compute_benchmark(sd,ed,sv,symbol,market_impact,commission_cost,max_holdings):
date_idx = util.get_data([symbol,],pd.date_range(sd,ed)).index
orders = pd.DataFrame(index=date_idx)
orders['orders'] = 0; orders['orders'][0] = max_holdings; orders['orders'][-1] = -max_holdings
return evalPolicy2(symbol,orders,sv,sd,ed,market_impact,commission_cost)
def evalPolicy(student_trades,sym_prices,startval):
ending_cash = startval - student_trades.mul(sym_prices,axis=0).sum()
ending_stocks = student_trades.sum()*sym_prices.iloc[-1]
return float((ending_cash+ending_stocks)/startval)-1.0
def evalPolicy2(symbol, student_trades, startval, sd, ed, market_impact,commission_cost):
orders_df = pd.DataFrame(columns=['Shares','Order','Symbol'])
for row_idx in student_trades.index:
nshares = student_trades.loc[row_idx][0]
if nshares == 0:
continue
order = 'sell' if nshares < 0 else 'buy'
new_row = pd.DataFrame([[abs(nshares),order,symbol],],columns=['Shares','Order','Symbol'],index=[row_idx,])
orders_df = orders_df.append(new_row)
portvals = compute_portvals(orders_df, sd, ed, startval,market_impact,commission_cost)
return float(portvals[-1]/portvals[0])-1
def compute_portvals(orders_df, start_date, end_date, startval, market_impact=0.0, commission_cost=0.0):
"""Simulate the market for the given date range and orders file."""
symbols = []
orders = []
orders_df = orders_df.sort_index()
for date, order in orders_df.iterrows():
shares = order['Shares']
action = order['Order']
symbol = order['Symbol']
if action.lower() == 'sell':
shares *= -1
order = (date, symbol, shares)
orders.append(order)
symbols.append(symbol)
symbols = list(set(symbols))
dates = pd.date_range(start_date, end_date)
prices_all = util.get_data(symbols, dates)
prices = prices_all[symbols]
prices = prices.fillna(method='ffill').fillna(method='bfill')
prices['_CASH'] = 1.0
trades = pd.DataFrame(index=prices.index, columns=symbols)
trades = trades.fillna(0)
cash = pd.Series(index=prices.index)
cash = cash.fillna(0)
cash.iloc[0] = startval
for date, symbol, shares in orders:
price = prices[symbol][date]
val = shares * price
# transaction cost model
val += commission_cost + (pd.np.abs(shares)*price*market_impact)
positions = prices.loc[date] * trades.sum()
totalcash = cash.sum()
if (date < prices.index.min()) or (date > prices.index.max()):
continue
trades[symbol][date] += shares
cash[date] -= val
trades['_CASH'] = cash
holdings = trades.cumsum()
df_portvals = (prices * holdings).sum(axis=1)
return df_portvals
if __name__ == "__main__":
pytest.main(["-s", __file__])

View File

@@ -0,0 +1,140 @@
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
from util import get_data
def author():
return "felixm"
def normalize(timeseries):
return timeseries / timeseries.iloc[0]
def bollinger_band(df, symbol, period=20, m=2):
boll_sma = df[symbol].rolling(period).mean()
std = df[symbol].rolling(period).std()
boll_up = boll_sma + m * std
boll_lo = boll_sma - m * std
key_sma, key_up, key_lo = "boll_sma", "boll_up", "boll_lo"
df[key_sma] = boll_sma
df[key_up] = boll_up
df[key_lo] = boll_lo
return df[[key_sma, key_up, key_lo]]
def sma(df, symbol, period):
"""Adds SMA for one or multiple periods to df and returns SMAs"""
if type(period) is int:
period = [period]
keys = []
for p in period:
key = f"sma_{p}"
df[key] = df[symbol].rolling(p).mean()
keys.append(key)
return df[keys]
def ema(df, symbol, period):
"""Adds EMA for one or multiple periods to df and returns EMAs"""
if type(period) is int:
period = [period]
keys = []
for p in period:
key = f"ema_{p}"
df[key] = df[symbol].ewm(span=p).mean()
keys.append(key)
return df[keys]
def price_sma(df, symbol, period):
"""Calculates SMA and adds new column price divided by SMA to the df."""
if type(period) is int:
period = [period]
keys = []
for p in period:
key = f"price_sma_{p}"
sma = df[symbol].rolling(p).mean()
df[key] = df[symbol] / sma
keys.append(key)
return df[keys]
def rsi(df, symbol, period=14):
"""Calculates relative strength index over given period."""
def rsi(x):
pct = x.pct_change()
avg_gain = pct[pct > 0].mean()
avg_loss = pct[pct <= 0].abs().mean()
rsi = 100 - (100 /
(1 + ((avg_gain / period) /
(avg_loss / period))))
return rsi
key = "rsi"
# Add one to get 'period' price changes (first change is nan).
period += 1
df[key] = df[symbol].rolling(period).apply(rsi)
return df[[key]]
def macd(df, symbol):
macd = df[symbol].ewm(span=12).mean() - df[symbol].ewm(span=26).mean()
k1 = "macd"
k2 = "macd_signal"
k3 = "macd_diff"
df[k1] = macd
df[k2] = macd.rolling(9).mean()
df[k3] = df[k1] - df[k2]
return df[[k1, k2, k3]]
def price_delta(df, symbol, period=1):
"""Calculate percentage change for period."""
k = f"pct_{period}"
df[k] = df[symbol].pct_change(periods=period)
df[k] = df[k].shift(-period)
return df[k]
def test_indicators():
symbol = "JPM"
sd = dt.datetime(2008, 1, 1)
ed = dt.datetime(2009, 12, 31)
df = get_data([symbol], pd.date_range(sd, ed))
df.drop(columns=["SPY"], inplace=True)
df_orig = df.copy()
# df = normalize(df)
sma(df, symbol, 21)
ema(df, symbol, 21)
df.plot(title="21 SMA and EMA")
plt.savefig('figure_1.png')
df = df_orig.copy()
sma(df, symbol, 8)
price_sma(df, symbol, 8)
df.plot(title="SMA and price / SMA", subplots=True)
plt.savefig('figure_2.png')
df = df_orig.copy()
bollinger_band(df, symbol)
df.plot(title="Bollinger Band")
plt.savefig('figure_3.png')
df = df_orig.copy()
rsi(df, symbol)
fig, axes = plt.subplots(nrows=2, sharex=True)
df[symbol].plot(ax=axes[0], title="JPM price action")
df["JPM-rsi(14)"].plot(ax=axes[1], title="RSI")
plt.savefig('figure_4.png')
df = df_orig.copy()
macd(df, symbol)
fig, axes = plt.subplots(nrows=2, sharex=True)
df[symbol].plot(ax=axes[0], title="JPM price action")
df[["JPM-macd", "JPM-macd-signal"]].plot(ax=axes[1])
plt.savefig('figure_5.png')

View File

@@ -0,0 +1,75 @@
This document is the final report for the machine learning for trading
course. I have implemented two manual strategies, a random tree
learner-based strategy and one based on Q-learning.
# Experiment 1
I have implemented two manual strategies. The first strategy buys on a
bullish MACD cross with a MACD smaller than zero and sells on a bearish
MACD cross with a MACD greater than one.
The second strategy uses MACD diff (the difference between the MACD and
the MACD signal), RSI, and price SMA with a period of eight. I have
plotted the metrics over their one, three, and five days return to find
reasonable thresholds for the strategy.
![Scatter plot to find reasonable thresholds.](figure_3.png)
Based on the scatter plots, I have created a list of buy and sell
signals. Each signal uses the current number of shares owned and one of
the three indicators. The following figure shows the result for both
manual strategies compared to the benchmark. Both approaches do well in
the in-sample period but worse afterward, which I expected because I
cherry-picked the thresholds based on the in-sample period's scatter
plots.
![First strategy based on MACD. Better than just holding.](figure_1.png)
Next, I have implemented a random tree-based strategy learner. The
learner uses a leaf size of five and no bagging. A smaller leaf size
would result in overfitting to the in-sample data. But as the following
screenshot shows, five works well, and the RT learner does well for the
out of sample data.
![Manual strategy compared to RT learner.](figure_2.png)
I have also implemented a strategy learner based on Q-learning. The
Q-learner uses fifteen training runs on the in-sample data. It mostly
does well for the out of sample data, but it looks like the RT-based
strategy learner is better.
I am using a bin-size of five for the three indicators mentioned before.
That results in 375 (3x5x5x5) states with only about 500 in-sample data
points. Probably the Q-learner is overfitting to the in-sample data.
Indeed, with bin sizes of four, the Q learner performs better for the
out-of-sample data.
![Strategy learner based on Q-Learning with using four and five bins
for discretization out of sample.](figure_4.png)
# Experiment 2
Experiment 2 aims to show that the strategy learner trades differently
when there is a commission, and the impact is not zero. The RT-based
trader does not consider the commission value, but the Q-learning based
trader does.
However, it seems like a commission smaller than $10 does not affect
the number of trades significantly. Only when the commission is around
$50 or with a slippage of 1% we see considerably fewer transactions.
| commission | n_orders |
|------------|----------|
| 9.95 | 79 |
| 20 | 83 |
| 50 | 63 |
| 100 | 37 |
# Closing Remarks
Machine Learning for Trading is a great course. It gives an excellent
introduction to finance, trading, and machine learning without getting lost in
technical or mathematical details. I have enjoyed building decision tree
learners and a Q learner from first principles. At the same time, the course
accurately teaches powerful libraries such as NumPy and Pandas.

View File

@@ -0,0 +1,8 @@
from experiment1 import experiment1
from experiment2 import experiment2
if __name__ == "__main__":
experiment1(create_report=True)
experiment2()

68
util.py Normal file
View File

@@ -0,0 +1,68 @@
"""MLT: Utility code.
Copyright 2017, Georgia Tech Research Corporation
Atlanta, Georgia 30332-0415
All Rights Reserved
"""
import os
import pandas as pd
def symbol_to_path(symbol, base_dir=None):
"""Return CSV file path given ticker symbol."""
if base_dir is None:
base_dir = os.environ.get("MARKET_DATA_DIR", '../data/')
return os.path.join(base_dir, "{}.csv".format(str(symbol)))
def get_data(symbols, dates, addSPY=True, colname='Adj Close', datecol='Date'):
"""Read stock data (adjusted close) for given symbols from CSV files."""
df = pd.DataFrame(index=dates)
if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent
# handles the case where symbols is np array of 'object'
symbols = ['SPY'] + list(symbols)
for symbol in symbols:
if 'BTC' in symbol or 'ETH' in symbol:
colname = 'close'
datecol = 'time'
elif symbol == 'SPY':
colname = 'close'
datecol = 'time'
else:
colname = 'Adj Close'
datecol = 'Date'
df_temp = pd.read_csv(symbol_to_path(symbol),
index_col=datecol,
parse_dates=True, usecols=[datecol, colname],
na_values=['nan'])
df_temp = df_temp.rename(columns={colname: symbol})
if datecol == 'time':
df_temp['date'] = pd.to_datetime(df_temp.index, unit='s')
df_temp['date'] = pd.DatetimeIndex(df_temp['date']).normalize()
df_temp.set_index('date', drop=True, inplace=True)
df = df.join(df_temp)
if symbol == 'SPY': # drop dates SPY did not trade
pass
# df = df.dropna(subset=["SPY"])
return df
def plot_data(df, title="Stock prices", xlabel="Date", ylabel="Price"):
import matplotlib.pyplot as plt
"""Plot stock prices with a custom title and meaningful axis labels."""
ax = df.plot(title=title, fontsize=12)
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
plt.show()
def get_orders_data_file(basefilename):
return open(os.path.join(os.environ.get("ORDERS_DATA_DIR",'orders/'),basefilename))
def get_learner_data_file(basefilename):
return open(os.path.join(os.environ.get("LEARNER_DATA_DIR",'Data/'),basefilename),'r')
def get_robot_world_file(basefilename):
return open(os.path.join(os.environ.get("ROBOT_WORLDS_DIR",'testworlds/'),basefilename))

Binary file not shown.

Binary file not shown.