1
0
Fork 0

Add support for trading view data

master
Felix Martin 2021-06-20 15:04:30 -04:00
parent 71f9b58803
commit e577ce34d2
13 changed files with 1470 additions and 6 deletions

View File

@ -0,0 +1,77 @@
import numpy as np
class AbstractTreeLearner:
LEAF = -1
NA = -1
def author(self):
return 'felixm' # replace tb34 with your Georgia Tech username
def create_node(self, factor, split_value, left, right):
return np.array([(factor, split_value, left, right), ],
dtype='|i4, f4, i4, i4')
def query_point(self, point):
node_index = 0
while self.rel_tree[node_index][0] != self.LEAF:
node = self.rel_tree[node_index]
split_factor = node[0]
split_value = node[1]
if point[split_factor] <= split_value:
# Recurse into left sub-tree.
node_index += node[2]
else:
node_index += node[3]
v = self.rel_tree[node_index][1]
return v
def query(self, points):
"""
@summary: Estimate a set of test points given the model we built.
@param points: should be a numpy array with each row corresponding to a specific query.
@returns the estimated values according to the saved model.
"""
query_point = lambda p: self.query_point(p)
r = np.apply_along_axis(query_point, 1, points)
return r
def build_tree(self, xs, y):
"""
@summary: Build a decision tree from the training data.
@param dataX: X values of data to add
@param dataY: the Y training values
"""
assert(xs.shape[0] == y.shape[0])
assert(xs.shape[0] > 0) # If this is 0 something went wrong.
if xs.shape[0] <= self.leaf_size:
value = np.mean(y)
if value < -0.2:
value = -1
elif value > 0.2:
value = 1
else:
value = 0
return self.create_node(self.LEAF, value, self.NA, self.NA)
if np.all(y[0] == y):
return self.create_node(self.LEAF, y[0], self.NA, self.NA)
i, split_value = self.get_i_and_split_value(xs, y)
select_l = xs[:, i] <= split_value
select_r = xs[:, i] > split_value
lt = self.build_tree(xs[select_l], y[select_l])
rt = self.build_tree(xs[select_r], y[select_r])
root = self.create_node(i, split_value, 1, lt.shape[0] + 1)
root = np.concatenate([root, lt, rt])
return root
def addEvidence(self, data_x, data_y):
"""
@summary: Add training data to learner
@param dataX: X values of data to add
@param dataY: the Y training values
"""
self.rel_tree = self.build_tree(data_x, data_y)

View File

@ -0,0 +1,36 @@
import pandas as pd
import util as ut
import datetime as dt
class BenchmarkStrategy:
def __init__(self, verbose=False, impact=0.0, commission=0.0, units=1000):
self.verbose = verbose
self.impact = impact
self.commission = commission
self.units = units
def addEvidence(self, symbol=0, sd=0, ed=0, sv=0):
"""Keep this so that API is valid."""
pass
def testPolicy(self, symbol="IBM",
sd=dt.datetime(2009, 1, 1),
ed=dt.datetime(2010, 1, 1),
sv=10000):
"""Benchmark is to buy 1000 shares and hold."""
dates = pd.date_range(sd, ed)
prices = ut.get_data([symbol], dates, addSPY=False,
colname='close', datecol='time')
orders = pd.DataFrame(index=prices.index)
orders["Symbol"] = symbol
orders["Order"] = ""
orders["Shares"] = 0
orders.iloc[0] = [symbol, "BUY", self.units]
orders.iloc[-1] = [symbol, "SELL", -self.units]
if self.verbose:
print(type(orders)) # it better be a DataFrame!
print(orders)
return orders

View File

@ -0,0 +1,114 @@
import datetime as dt
import pandas as pd
import util
import indicators
class ManualStrategy:
def __init__(self, verbose=False, impact=0.0, commission=0.0):
self.verbose = verbose
self.impact = impact
self.commission = commission
# this method should create a QLearner, and train it for trading
def addEvidence(self, symbol="IBM",
sd=dt.datetime(2008, 1, 1),
ed=dt.datetime(2009, 1, 1),
sv=10000):
# add your code to do learning here
# example usage of the old backward compatible util function
syms = [symbol]
dates = pd.date_range(sd, ed)
prices_all = util.get_data(syms, dates) # automatically adds SPY
prices = prices_all[syms] # only portfolio symbols
# prices_SPY = prices_all['SPY'] # only SPY, for comparison later
if self.verbose:
print(prices)
# example use with new colname
# automatically adds SPY
volume_all = util.get_data(syms, dates, colname="Volume")
volume = volume_all[syms] # only portfolio symbols
# volume_SPY = volume_all['SPY'] # only SPY, for comparison later
if self.verbose:
print(volume)
def macd_strat(self, macd, orders):
"""Strategy based on MACD cross."""
def strat(ser):
m = macd.loc[ser.index]
prev_macd, prev_signal, _ = m.iloc[0]
cur_macd, cur_signal, _ = m.iloc[1]
shares = 0
if cur_macd < -1 and prev_macd < prev_signal \
and cur_macd > cur_signal:
if self.holding == 0:
shares = 1000
elif self.holding == -1000:
shares = 2000
elif cur_macd > 1 and prev_macd > prev_signal \
and cur_macd < cur_signal:
if self.holding == 0:
shares = -1000
elif self.holding == 1000:
shares = -2000
self.holding += shares
return shares
orders['Shares'] = orders['Shares'].rolling(2).apply(strat)
def three_indicator_strat(self, macd, rsi, price_sma, orders):
"""Strategy based on three indicators. Thresholds selected based on
scatter plots."""
def strat(row):
shares = 0
_, _, macd_diff = macd.loc[row.name]
cur_rsi = rsi.loc[row.name][0]
cur_price_sma = price_sma.loc[row.name][0]
if self.holding == -1000 and cur_price_sma < 0.9:
shares = 2000
elif self.holding == 0 and cur_price_sma < 0.9:
shares = 1000
elif self.holding == -1000 and cur_rsi > 80:
shares = 2000
elif self.holding == 0 and cur_rsi > 80:
shares = 1000
elif self.holding == -1000 and macd_diff < -0.5:
shares = 2000
elif self.holding == 0 and macd_diff < -0.5:
shares = 1000
elif self.holding == 1000 and cur_price_sma > 1.1:
shares = -2000
elif self.holding == 0 and cur_price_sma > 1.1:
shares = -1000
self.holding += shares
return shares
orders['Shares'] = orders.apply(strat, axis=1)
def testPolicy(self, symbol="IBM",
sd=dt.datetime(2009, 1, 1),
ed=dt.datetime(2010, 1, 1),
sv=10000, macd_strat=False):
self.holding = 0
df = util.get_data([symbol], pd.date_range(sd, ed))
df.drop(columns=["SPY"], inplace=True)
orders = pd.DataFrame(index=df.index)
orders["Symbol"] = symbol
orders["Order"] = ""
orders["Shares"] = 0
macd = indicators.macd(df, symbol)
rsi = indicators.rsi(df, symbol)
price_sma = indicators.price_sma(df, symbol, [8])
if macd_strat:
self.macd_strat(macd, orders)
else:
self.three_indicator_strat(macd, rsi, price_sma, orders)
return orders

170
crypto_eval/QLearner.py Normal file
View File

@ -0,0 +1,170 @@
import datetime as dt
import pandas as pd
import util
import indicators
from qlearning_robot.QLearner import QLearner as Learner
from dataclasses import dataclass
@dataclass
class Holding:
cash: int
shares: int
equity: int
class QLearner(object):
def __init__(self, verbose=False, impact=0.0, units=1000, commission=0.0, testing=False, n_bins=5):
self.verbose = verbose
self.impact = impact
self.commission = commission
self.testing = testing # Decides which type of order df to return.
self.indicators = ['macd_diff', 'rsi', 'price_sma_8']
self.n_bins = n_bins
self.bins = {}
self.num_states = self.get_num_states()
self.num_actions = 3 # buy, sell, hold
self.learner = Learner(self.num_states, self.num_actions)
self.units = units
def row_to_state(self, holding, df_row):
"""Transforms a row into a state value."""
holding = (holding + self.units) // self.units
assert(holding in [0, 1, 2])
# For each indicator that goes into the state the interval becomes
# smaller based on how many bins the indicator has. The first
# 'indicator' is the information about how many shares we are currently
# holding. So for example, if I have 450 states then the intervall (aka
# remaining_states) is 150 because there are three values for holding:
# holding = 0 -> state = 0 * 150 = 0
# holding = 1 -> state = 1 * 150 = 150
# holding = 2 -> state = 2 * 150 = 300
remaining_states = self.num_states // 3
state = holding * remaining_states
for indicator in self.indicators:
value = df_row[indicator]
bin_n = self.indicator_value_to_bin(indicator, value)
remaining_states //= self.n_bins
state += bin_n * remaining_states
return state
def indicator_value_to_bin(self, indicator, value):
for i, upper_bound in enumerate(self.bins[indicator]):
if value < upper_bound:
return i
return i + 1
def add_indicators(self, df, symbol):
"""Add indicators for learning to DataFrame."""
for indicator in self.indicators:
if indicator == "macd_diff":
indicators.macd(df, symbol)
df.drop(columns=["macd", "macd_signal"], inplace=True)
elif indicator == "rsi":
indicators.rsi(df, symbol)
elif indicator.startswith("price_sma_"):
period = int(indicator.replace("price_sma_", ""))
indicators.price_sma(df, symbol, [period])
df.drop(columns=["SPY"], inplace=True)
df.dropna(inplace=True)
def bin_indicators(self, df):
"""Create bins for indicators."""
for indicator in self.indicators:
ser, bins = pd.qcut(df[indicator], self.n_bins, retbins=True)
self.bins[indicator] = bins[1:self.n_bins]
def get_num_states(self):
"""Return the total num of states."""
num_states = 3 # Three states holding (1000, 0, -1000)
for _ in self.indicators:
num_states *= self.n_bins
return num_states
def handle_order(self, action, holding, adj_closing_price):
shares = 0
if action == 0: # buy
if holding.shares == 0 or holding.shares == -self.units:
shares = self.units
elif action == 1: # sell
if holding.shares== 0 or holding.shares == self.units:
shares = -self.units
elif action == 2: # hold
shares = 0
cost = shares * adj_closing_price
if shares != 0:
# Charge commission and deduct impact penalty
holding.cash -= self.commission
holding.cash -= (self.impact * adj_closing_price * abs(shares))
holding.cash -= cost
holding.shares += shares
holding.equity = holding.cash + holding.shares * adj_closing_price
def get_reward(self, equity, new_equity):
if new_equity > equity:
return 1
return -1
def train(self, df, symbol, sv):
holding = Holding(sv, 0, sv)
row = df.iloc[0]
state = self.row_to_state(holding.shares, row)
action = self.learner.querysetstate(state)
adj_closing_price = row[symbol]
equity = holding.equity
self.handle_order(action, holding, adj_closing_price)
for index, row in df.iloc[1:].iterrows():
adj_closing_price = row[symbol]
new_equity = holding.cash + holding.shares * adj_closing_price
r = self.get_reward(equity, new_equity)
s_prime = self.row_to_state(holding.shares, row)
a = self.learner.query(s_prime, r)
equity = new_equity
self.handle_order(a, holding, adj_closing_price)
if self.verbose:
print(f"{holding=} {s_prime=} {r=} {a=}")
def addEvidence(self, symbol="IBM", sd=dt.datetime(2008, 1, 1), ed=dt.datetime(2009, 1, 1), sv=10000):
df = util.get_data([symbol], pd.date_range(sd, ed))
self.add_indicators(df, symbol)
self.bin_indicators(df)
for _ in range(15):
self.train(df, symbol, sv)
def testPolicy(self, symbol="IBM", sd=dt.datetime(2009, 1, 1), ed=dt.datetime(2010, 1, 1), sv=10000):
df = util.get_data([symbol], pd.date_range(sd, ed))
orders = pd.DataFrame(index=df.index)
orders["Symbol"] = symbol
orders["Order"] = ""
orders["Shares"] = 0
shares = orders["Shares"]
self.add_indicators(df, symbol)
holding = 0
for index, row in df.iterrows():
state = self.row_to_state(holding, row)
action = self.learner.querysetstate(state)
if action == 0: # buy
if holding == 0 or holding == -self.units:
holding += self.units
orders.loc[index, "Shares"] = self.units
elif action == 1: # sell
if holding == 0 or holding == self.units:
holding -= self.units
orders.loc[index, "Shares"] = -self.units
elif action == 2: # hold
pass
if self.testing:
return orders
else:
return orders[["Shares"]]

30
crypto_eval/RTLearner.py Normal file
View File

@ -0,0 +1,30 @@
import numpy as np
from AbstractTreeLearner import AbstractTreeLearner
class RTLearner(AbstractTreeLearner):
def __init__(self, leaf_size = 1, verbose = False):
self.leaf_size = leaf_size
self.verbose = verbose
def get_i_and_split_value(self, xs, y):
"""
@summary: Pick a random i and split value.
Make sure that not all X are the same for i and also pick
different values to average the split_value from.
"""
i = np.random.randint(0, xs.shape[1])
while np.all(xs[0,i] == xs[:,i]):
i = np.random.randint(0, xs.shape[1])
# I don't know about the performance of this, but at least it
# terminates reliably. If the two elements are the same something is
# wrong.
a = np.array(list(set(xs[:, i])))
r1, r2 = np.random.choice(a, size = 2, replace = False)
assert(r1 != r2)
split_value = (r1 + r2) / 2.0
return i, split_value

View File

@ -0,0 +1,106 @@
import datetime as dt
import pandas as pd
import util
import indicators
from RTLearner import RTLearner
class StrategyLearner(object):
def __init__(self, verbose=False, impact=0.0, commission=0.0, testing=False):
self.verbose = verbose
self.impact = impact
self.commission = commission
self.testing = testing
def _get_volume(self):
"""For reference."""
volume_all = ut.get_data(syms, dates, colname="Volume")
volume = volume_all[syms] # only portfolio symbols
# volume_SPY = volume_all['SPY'] # only SPY, for comparison later
if self.verbose:
print(volume)
def _add_indicators(self, df, symbol):
"""Add indicators for learning to DataFrame."""
df.drop(columns=["SPY"], inplace=True)
indicators.macd(df, symbol)
indicators.rsi(df, symbol)
indicators.price_sma(df, symbol, [8])
indicators.price_delta(df, symbol, 3)
df.dropna(inplace=True)
def addEvidence(self, symbol="IBM",
sd=dt.datetime(2008, 1, 1),
ed=dt.datetime(2009, 1, 1),
sv=10000):
self.y_threshold = 0.2
self.indicators = ['macd_diff', 'rsi', 'price_sma_8']
df = util.get_data([symbol], pd.date_range(sd, ed))
self._add_indicators(df, symbol)
def classify_y(row):
if row > self.y_threshold:
return 1
elif row < -self.y_threshold:
return -1
else:
pass
return 0
def set_y_threshold(pct):
if max(pct) < 0.2:
self.y_threshold = 0.02
self.learner = RTLearner(leaf_size = 5)
# self.learner = BagLearner(RTLearner, 3, {'leaf_size': 5})
data_x = df[self.indicators].to_numpy()
pct = df['pct_3']
# This is a hack to get a low enough buy/sell threshold for the
# cyclic the test 'ML4T-220' where the max pct_3 is 0.0268.
set_y_threshold(pct)
y = pct.apply(classify_y)
self.learner.addEvidence(data_x, y.to_numpy())
return y
def strat(self, data_y, orders):
self.holding = 0
def strat(row):
y = int(data_y.loc[row.name][0])
shares = 0
if self.holding == 0 and y == 1:
shares = 1000
elif self.holding == -1000 and y == 1:
shares = 2000
elif self.holding == 0 and y == -1:
shares = -1000
elif self.holding == 1000 and y == -1:
shares = -2000
self.holding += shares
return shares
orders["Shares"] = orders.apply(strat, axis=1)
def testPolicy(self, symbol="IBM",
sd=dt.datetime(2009, 1, 1),
ed=dt.datetime(2010, 1, 1),
sv=10000):
df = util.get_data([symbol], pd.date_range(sd, ed))
self._add_indicators(df, symbol)
data_x = df[self.indicators].to_numpy()
data_y = pd.DataFrame(index=df.index, data=self.learner.query(data_x))
orders = pd.DataFrame(index=df.index)
orders["Symbol"] = symbol
orders["Order"] = ""
orders["Shares"] = 0
self.strat(data_y, orders)
if self.testing:
return orders
else:
return orders[["Shares"]]

237
crypto_eval/experiment1.py Normal file
View File

@ -0,0 +1,237 @@
import pandas as pd
import datetime as dt
import sys
import util
import indicators
import crypto_eval.marketsim as marketsim
import matplotlib.pyplot as plt
from matplotlib.widgets import MultiCursor
from BenchmarkStrategy import BenchmarkStrategy
from ManualStrategy import ManualStrategy
from StrategyLearner import StrategyLearner
from QLearner import QLearner
def plot_indicators(symbol, df):
fig, ax = plt.subplots(4, sharex=True)
price_sma = indicators.price_sma(df, symbol, [8])
bb = indicators.bollinger_band(df, symbol)
rsi = indicators.rsi(df, symbol)
macd = indicators.macd(df, symbol).copy()
df[[symbol]].plot(ax=ax[0])
bb.plot(ax=ax[0])
price_sma.plot(ax=ax[1])
macd.plot(ax=ax[2])
rsi.plot(ax=ax[3])
for a in ax.flat:
a.grid()
m = MultiCursor(fig.canvas, ax, color='r', lw=0.5)
plt.show()
sys.exit(0)
def visualize_correlations(symbol, df):
indicators.price_sma(df, symbol, [8, 21])
indicators.price_delta(df, symbol, 5)
indicators.price_delta(df, symbol, 3)
indicators.price_delta(df, symbol, 1)
indicators.macd(df, symbol)
indicators.rsi(df, symbol)
# df = df[df['rsi'] > 80]
fig, ax = plt.subplots(3, 2) # sharex=True)
df.plot.scatter(x="price_sma_8", y="pct_5", ax=ax[0, 0])
df.plot.scatter(x="price_sma_8", y="pct_3", ax=ax[1, 0])
df.plot.scatter(x="price_sma_8", y="pct_1", ax=ax[2, 0])
# df.plot.scatter(x="rsi", y="pct_5", ax=ax[0, 1])
# df.plot.scatter(x="rsi", y="pct_3", ax=ax[1, 1])
# df.plot.scatter(x="rsi", y="pct_1", ax=ax[2, 1])
df.plot.scatter(x="macd_diff", y="pct_5", ax=ax[0, 1])
df.plot.scatter(x="macd_diff", y="pct_3", ax=ax[1, 1])
df.plot.scatter(x="macd_diff", y="pct_1", ax=ax[2, 1])
for a in ax.flat:
a.grid()
plt.show()
sys.exit(0)
def compare_manual_strategies(symbol, sv, sd, ed):
df = util.get_data([symbol], pd.date_range(sd, ed))
df.drop(columns=["SPY"], inplace=True)
bs = BenchmarkStrategy()
orders = bs.testPolicy(symbol, sd, ed, sv)
df["Benchmark"] = marketsim.compute_portvals(orders, sv)
df["Orders Benchmark"] = orders["Shares"]
ms = ManualStrategy()
orders = ms.testPolicy(symbol, sd, ed, sv, macd_strat=True)
df["MACD Strat"] = marketsim.compute_portvals(orders, sv)
df["Orders MACD"] = orders["Shares"]
# df["Holding Manual"] = orders["Shares"].cumsum()
orders = ms.testPolicy(symbol, sd, ed, sv)
df["Three Strat"] = marketsim.compute_portvals(orders, sv)
df["Orders Three"] = orders["Shares"]
fig, ax = plt.subplots(3, sharex=True)
df[[symbol]].plot(ax=ax[0])
df[["Benchmark", "MACD Strat", "Three Strat"]].plot(ax=ax[1])
df[["Orders Benchmark", "Orders MACD", "Orders Three"]].plot(ax=ax[2])
for a in ax:
a.grid()
MultiCursor(fig.canvas, ax, color='r', lw=0.5)
# plt.show()
fig.set_size_inches(10, 8, forward=True)
plt.savefig('figure_1.png', dpi=fig.dpi)
def compare_all_strategies(symbol, sv, sd, ed):
df = util.get_data([symbol], pd.date_range(sd, ed))
df.drop(columns=["SPY"], inplace=True)
normalize = indicators.normalize
bs = BenchmarkStrategy()
orders = bs.testPolicy(symbol, sd, ed, sv)
df["Benchmark"] = normalize(marketsim.compute_portvals(orders, sv))
df["Orders Benchmark"] = orders["Shares"]
ms = ManualStrategy()
orders = ms.testPolicy(symbol, sd, ed, sv)
df["Manual"] = normalize(marketsim.compute_portvals(orders, sv))
df["Orders Manual"] = orders["Shares"]
sl = StrategyLearner(testing=True)
sl.addEvidence(symbol, sd, ed, sv)
orders = sl.testPolicy(symbol, sd, ed, sv)
df["Strategy"] = normalize(marketsim.compute_portvals(orders, sv))
df["Orders Strategy"] = orders["Shares"]
fig, ax = plt.subplots(3, sharex=True)
df[[symbol]].plot(ax=ax[0])
df[["Benchmark", "Manual", "Strategy"]].plot(ax=ax[1])
df[["Orders Benchmark", "Orders Manual", "Orders Strategy"]].plot(ax=ax[2])
for a in ax:
a.grid()
MultiCursor(fig.canvas, ax, color='r', lw=0.5)
# plt.show()
fig.set_size_inches(10, 8, forward=True)
plt.savefig('figure_2.png', dpi=fig.dpi)
def compare_number_trades():
symbol = "JPM"
sv = 10000
sd = dt.datetime(2008, 1, 1) # in-sample
ed = dt.datetime(2009, 12, 31) # in-sample
df = util.get_data([symbol], pd.date_range(sd, ed))
df.drop(columns=["SPY"], inplace=True)
print(f"| commission | n_orders |")
print(f"-------------------------")
for commission in [0, 9.95, 20, 50, 100]:
ql = QLearner(testing=True, commission=commission, impact=0.005)
ql.addEvidence(symbol, sd, ed, sv)
orders = ql.testPolicy(symbol, sd, ed, sv)
n_orders = orders[orders["Shares"] != 0].shape[0]
print(f"| {commission} | {n_orders} |")
def compare_q_learners():
symbol = "JPM"
sv = 10000
sd = dt.datetime(2008, 1, 1) # in-sample
ed = dt.datetime(2009, 12, 31) # in-sample
sd_out = dt.datetime(2010, 1, 1) # out-sample
ed_out = dt.datetime(2011, 12, 31) # out-sample
df = util.get_data([symbol], pd.date_range(sd_out, ed_out))
df.drop(columns=["SPY"], inplace=True)
bs = BenchmarkStrategy()
orders = bs.testPolicy(symbol, sd_out, ed_out, sv)
df["Benchmark"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
df["Orders Benchmark"] = orders["Shares"]
ql = QLearner(testing=True, verbose=False)
ql.addEvidence(symbol, sd, ed, sv)
orders = ql.testPolicy(symbol, sd_out, ed_out, sv)
df["QL 5"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
df["Orders QL 5"] = orders["Shares"]
ql = QLearner(testing=True, verbose=False, n_bins=4)
ql.addEvidence(symbol, sd, ed, sv)
orders = ql.testPolicy(symbol, sd_out, ed_out, sv)
df["QL 4"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
df["Orders QL 4"] = orders["Shares"]
fig, ax = plt.subplots(3, sharex=True)
df[[symbol]].plot(ax=ax[0])
df[["Benchmark", "QL 5", "QL 4"]].plot(ax=ax[1])
df[["Orders Benchmark", "Orders QL 5", "Orders QL 4"]].plot(ax=ax[2])
for a in ax:
a.grid()
m = MultiCursor(fig.canvas, ax, color='r', lw=0.5)
fig.set_size_inches(10, 8, forward=True)
plt.savefig('figure_4.png', dpi=fig.dpi)
sys.exit(0)
def experiment1(create_report=False):
symbol = "COINBASE_BTCUSD_1D"
sv = 10000
sd = dt.datetime(2020, 1, 1) # in-sample
ed = dt.datetime(2020, 12, 31) # in-sample
sd_out = dt.datetime(2020, 1, 1) # out-sample
ed_out = dt.datetime(2020, 12, 31) # out-sample
df = util.get_data([symbol], pd.date_range(sd_out, ed_out), addSPY=True)
# if create_report:
# compare_manual_strategies(symbol, sv, sd, ed)
# compare_all_strategies(symbol, sv, sd, ed)
# sys.exit(0)
# visualize_correlations(symbol, df)
# plot_indicators(symbol, df)
# compare_number_trades(symbol, sv, sd, ed)
# compare_q_learners()
# return
bs = BenchmarkStrategy(units=1)
orders = bs.testPolicy(symbol, sd_out, ed_out, sv)
pvs = marketsim.compute_portvals(orders, start_val=sv)
df["Benchmark"] = indicators.normalize(pvs)
df["Orders Benchmark"] = orders["Shares"]
ql = QLearner(testing=True, verbose=False, units=1)
ql.addEvidence(symbol, sd, ed, sv)
orders = ql.testPolicy(symbol, sd_out, ed_out, sv)
df["QL"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
df["Orders QL"] = orders["Shares"]
fig, ax = plt.subplots(3, sharex=True)
df[[symbol]].plot(ax=ax[0])
df[["Benchmark", "QL"]].plot(ax=ax[1])
df[["Orders Benchmark", "Orders QL"]].plot(ax=ax[2])
for a in ax:
a.grid()
m = MultiCursor(fig.canvas, ax, color='r', lw=0.5)
plt.show()
# fig.set_size_inches(10, 8, forward=True)
# plt.savefig('figure_4.png', dpi=fig.dpi)
if __name__ == "__main__":
experiment1()

View File

@ -0,0 +1,8 @@
import experiment1
def experiment2():
experiment1.compare_number_trades()
if __name__ == "__main__":
experiment2()

View File

@ -0,0 +1,339 @@
"""MC3-P3: Strategy Learner - grading script.
Usage:
- Switch to a student feedback directory first (will write "points.txt" and "comments.txt" in pwd).
- Run this script with both ml4t/ and student solution in PYTHONPATH, e.g.:
PYTHONPATH=ml4t:MC1-P2/jdoe7 python ml4t/mc2_p1_grading/grade_marketsim.py
Copyright 2017, Georgia Tech Research Corporation
Atlanta, Georgia 30332-0415
All Rights Reserved
Template code for CS 4646/7646
Georgia Tech asserts copyright ownership of this template and all derivative
works, including solutions to the projects assigned in this course. Students
and other users of this template code are advised not to share it with others
or to make it available on publicly viewable websites including repositories
such as github and gitlab. This copyright statement should not be removed
or edited.
We do grant permission to share solutions privately with non-students such
as potential employers. However, sharing with other current or future
students of CS 7646 is prohibited and subject to being investigated as a
GT honor code violation.
-----do not edit anything above this line---
Student Name: Tucker Balch (replace with your name)
GT User ID: tb34 (replace with your User ID)
GT ID: 900897987 (replace with your GT ID)
"""
import pytest
from grading.grading import grader, GradeResult, run_with_timeout, IncorrectOutput
import os
import sys
import traceback as tb
import datetime as dt
import numpy as np
import pandas as pd
from collections import namedtuple
import time
import util
import random
# Test cases
StrategyTestCase = namedtuple('Strategy', ['description','insample_args','outsample_args','benchmark_type','benchmark','impact','train_time','test_time','max_time','seed'])
strategy_test_cases = [
StrategyTestCase(
description="ML4T-220",
insample_args=dict(symbol="ML4T-220",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
outsample_args=dict(symbol="ML4T-220",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
benchmark_type='clean',
benchmark=1.0, #benchmark updated Apr 24 2017
impact=0.0,
train_time=25,
test_time=5,
max_time=60,
seed=1481090000
),
StrategyTestCase(
description="AAPL",
insample_args=dict(symbol="AAPL",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
outsample_args=dict(symbol="AAPL",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
benchmark_type='stock',
benchmark=0.1581999999999999, #benchmark computed Nov 22 2017
impact=0.0,
train_time=25,
test_time=5,
max_time=60,
seed=1481090000
),
StrategyTestCase(
description="SINE_FAST_NOISE",
insample_args=dict(symbol="SINE_FAST_NOISE",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
outsample_args=dict(symbol="SINE_FAST_NOISE",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
benchmark_type='noisy',
benchmark=2.0, #benchmark updated Apr 24 2017
impact=0.0,
train_time=25,
test_time=5,
max_time=60,
seed=1481090000
),
StrategyTestCase(
description="UNH - In sample",
insample_args=dict(symbol="UNH",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
outsample_args=dict(symbol="UNH",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
benchmark_type='stock',
benchmark= -0.25239999999999996, #benchmark computed Nov 22 2017
impact=0.0,
train_time=25,
test_time=5,
max_time=60,
seed=1481090000
),
]
max_points = 60.0
html_pre_block = True # surround comments with HTML <pre> tag (for T-Square comments field)
MAX_HOLDINGS = 1000
# Test functon(s)
@pytest.mark.parametrize("description, insample_args, outsample_args, benchmark_type, benchmark, impact, train_time, test_time, max_time, seed", strategy_test_cases)
def test_strategy(description, insample_args, outsample_args, benchmark_type, benchmark, impact, train_time, test_time, max_time, seed, grader):
"""Test StrategyLearner.
Requires test description, insample args (dict), outsample args (dict), benchmark_type (str), benchmark (float)
max time (seconds), points for this test case (int), random seed (long), and a grader fixture.
"""
points_earned = 0.0 # initialize points for this test case
try:
incorrect = True
if not 'StrategyLearner' in globals():
import importlib
m = importlib.import_module('StrategyLearner')
globals()['StrategyLearner'] = m
outsample_cr_to_beat = None
if benchmark_type == 'clean':
outsample_cr_to_beat = benchmark
def timeoutwrapper_strategylearner():
#Set fixed seed for repetability
np.random.seed(seed)
random.seed(seed)
learner = StrategyLearner.StrategyLearner(verbose=False,impact=impact)
tmp = time.time()
learner.addEvidence(**insample_args)
train_t = time.time()-tmp
tmp = time.time()
insample_trades_1 = learner.testPolicy(**insample_args)
test_t = time.time()-tmp
insample_trades_2 = learner.testPolicy(**insample_args)
tmp = time.time()
outsample_trades = learner.testPolicy(**outsample_args)
out_test_t = time.time()-tmp
return insample_trades_1, insample_trades_2, outsample_trades, train_t, test_t, out_test_t
msgs = []
in_trades_1, in_trades_2, out_trades, train_t, test_t, out_test_t = run_with_timeout(timeoutwrapper_strategylearner,max_time,(),{})
incorrect = False
if len(in_trades_1.shape)!=2 or in_trades_1.shape[1]!=1:
incorrect=True
msgs.append(" First insample trades DF has invalid shape: {}".format(in_trades_1.shape))
elif len(in_trades_2.shape)!=2 or in_trades_2.shape[1]!=1:
incorrect=True
msgs.append(" Second insample trades DF has invalid shape: {}".format(in_trades_2.shape))
elif len(out_trades.shape)!=2 or out_trades.shape[1]!=1:
incorrect=True
msgs.append(" Out-of-sample trades DF has invalid shape: {}".format(out_trades.shape))
else:
tmp_csum=0.0
for date,trade in in_trades_1.iterrows():
tmp_csum+= trade.iloc[0]
if (trade.iloc[0]!=0) and\
(trade.abs().iloc[0]!=MAX_HOLDINGS) and\
(trade.abs().iloc[0]!=2*MAX_HOLDINGS):
incorrect=True
msgs.append(" illegal trade in first insample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
break
elif abs(tmp_csum)>MAX_HOLDINGS:
incorrect=True
msgs.append(" holdings more than {} long or short in first insample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
break
tmp_csum=0.0
for date,trade in in_trades_2.iterrows():
tmp_csum+= trade.iloc[0]
if (trade.iloc[0]!=0) and\
(trade.abs().iloc[0]!=MAX_HOLDINGS) and\
(trade.abs().iloc[0]!=2*MAX_HOLDINGS):
incorrect=True
msgs.append(" illegal trade in second insample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
break
elif abs(tmp_csum)>MAX_HOLDINGS:
incorrect=True
msgs.append(" holdings more than {} long or short in second insample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
break
tmp_csum=0.0
for date,trade in out_trades.iterrows():
tmp_csum+= trade.iloc[0]
if (trade.iloc[0]!=0) and\
(trade.abs().iloc[0]!=MAX_HOLDINGS) and\
(trade.abs().iloc[0]!=2*MAX_HOLDINGS):
incorrect=True
msgs.append(" illegal trade in out-of-sample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
break
elif abs(tmp_csum)>MAX_HOLDINGS:
incorrect=True
msgs.append(" holdings more than {} long or short in out-of-sample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
break
# if (((in_trades_1.abs()!=0) & (in_trades_1.abs()!=MAX_HOLDINGS) & (in_trades_1.abs()!=2*MAX_HOLDINGS)).any().any() or\
# ((in_trades_2.abs()!=0) & (in_trades_2.abs()!=MAX_HOLDINGS) & (in_trades_2.abs()!=2*MAX_HOLDINGS)).any().any() or\
# ((out_trades.abs()!=0) & (out_trades.abs()!=MAX_HOLDINGS) & (out_trades.abs()!=2*MAX_HOLDINGS)).any().any()):
# incorrect = True
# msgs.append(" illegal trade. abs(trades) not one of ({},{},{})".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS))
# if ((in_trades_1.cumsum().abs()>MAX_HOLDINGS).any()[0]) or ((in_trades_2.cumsum().abs()>MAX_HOLDINGS).any()[0]) or ((out_trades.cumsum().abs()>MAX_HOLDINGS).any()[0]):
# incorrect = True
# msgs.append(" holdings more than {} long or short".format(MAX_HOLDINGS))
if not(incorrect):
if train_t>train_time:
incorrect=True
msgs.append(" addEvidence() took {} seconds, max allowed {}".format(train_t,train_time))
else:
points_earned += 1.0
if test_t > test_time:
incorrect = True
msgs.append(" testPolicy() took {} seconds, max allowed {}".format(test_t,test_time))
else:
points_earned += 2.0
if not((in_trades_1 == in_trades_2).all()[0]):
incorrect = True
mismatches = in_trades_1.join(in_trades_2,how='outer',lsuffix='1',rsuffix='2')
mismatches = mismatches[mismatches.iloc[:,0]!=mismatches.iloc[:,1]]
msgs.append(" consecutive calls to testPolicy() with same input did not produce same output:")
msgs.append(" Mismatched trades:\n {}".format(mismatches))
else:
points_earned += 2.0
student_insample_cr = evalPolicy2(insample_args['symbol'],in_trades_1,insample_args['sv'],insample_args['sd'],insample_args['ed'],market_impact=impact,commission_cost=0.0)
student_outsample_cr = evalPolicy2(outsample_args['symbol'],out_trades, outsample_args['sv'],outsample_args['sd'],outsample_args['ed'],market_impact=impact,commission_cost=0.0)
if student_insample_cr <= benchmark:
incorrect = True
msgs.append(" in-sample return ({}) did not beat benchmark ({})".format(student_insample_cr,benchmark))
else:
points_earned += 5.0
if outsample_cr_to_beat is None:
if out_test_t > test_time:
incorrect = True
msgs.append(" out-sample took {} seconds, max of {}".format(out_test_t,test_time))
else:
points_earned += 5.0
else:
if student_outsample_cr < outsample_cr_to_beat:
incorrect = True
msgs.append(" out-sample return ({}) did not beat benchmark ({})".format(student_outsample_cr,outsample_cr_to_beat))
else:
points_earned += 5.0
if incorrect:
inputs_str = " insample_args: {}\n" \
" outsample_args: {}\n" \
" benchmark_type: {}\n" \
" benchmark: {}\n" \
" train_time: {}\n" \
" test_time: {}\n" \
" max_time: {}\n" \
" seed: {}\n".format(insample_args, outsample_args, benchmark_type, benchmark, train_time, test_time, max_time,seed)
raise IncorrectOutput("Test failed on one or more output criteria.\n Inputs:\n{}\n Failures:\n{}".format(inputs_str, "\n".join(msgs)))
except Exception as e:
# Test result: failed
msg = "Test case description: {}\n".format(description)
# Generate a filtered stacktrace, only showing erroneous lines in student file(s)
tb_list = tb.extract_tb(sys.exc_info()[2])
for i in range(len(tb_list)):
row = tb_list[i]
tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3]) # show only filename instead of long absolute path
# tb_list = [row for row in tb_list if row[0] in ['QLearner.py','StrategyLearner.py']]
if tb_list:
msg += "Traceback:\n"
msg += ''.join(tb.format_list(tb_list)) # contains newlines
elif 'grading_traceback' in dir(e):
msg += "Traceback:\n"
msg += ''.join(tb.format_list(e.grading_traceback))
msg += "{}: {}".format(e.__class__.__name__, str(e))
# Report failure result to grader, with stacktrace
grader.add_result(GradeResult(outcome='failed', points=points_earned, msg=msg))
raise
else:
# Test result: passed (no exceptions)
grader.add_result(GradeResult(outcome='passed', points=points_earned, msg=None))
def compute_benchmark(sd,ed,sv,symbol,market_impact,commission_cost,max_holdings):
date_idx = util.get_data([symbol,],pd.date_range(sd,ed)).index
orders = pd.DataFrame(index=date_idx)
orders['orders'] = 0; orders['orders'][0] = max_holdings; orders['orders'][-1] = -max_holdings
return evalPolicy2(symbol,orders,sv,sd,ed,market_impact,commission_cost)
def evalPolicy(student_trades,sym_prices,startval):
ending_cash = startval - student_trades.mul(sym_prices,axis=0).sum()
ending_stocks = student_trades.sum()*sym_prices.iloc[-1]
return float((ending_cash+ending_stocks)/startval)-1.0
def evalPolicy2(symbol, student_trades, startval, sd, ed, market_impact,commission_cost):
orders_df = pd.DataFrame(columns=['Shares','Order','Symbol'])
for row_idx in student_trades.index:
nshares = student_trades.loc[row_idx][0]
if nshares == 0:
continue
order = 'sell' if nshares < 0 else 'buy'
new_row = pd.DataFrame([[abs(nshares),order,symbol],],columns=['Shares','Order','Symbol'],index=[row_idx,])
orders_df = orders_df.append(new_row)
portvals = compute_portvals(orders_df, sd, ed, startval,market_impact,commission_cost)
return float(portvals[-1]/portvals[0])-1
def compute_portvals(orders_df, start_date, end_date, startval, market_impact=0.0, commission_cost=0.0):
"""Simulate the market for the given date range and orders file."""
symbols = []
orders = []
orders_df = orders_df.sort_index()
for date, order in orders_df.iterrows():
shares = order['Shares']
action = order['Order']
symbol = order['Symbol']
if action.lower() == 'sell':
shares *= -1
order = (date, symbol, shares)
orders.append(order)
symbols.append(symbol)
symbols = list(set(symbols))
dates = pd.date_range(start_date, end_date)
prices_all = util.get_data(symbols, dates)
prices = prices_all[symbols]
prices = prices.fillna(method='ffill').fillna(method='bfill')
prices['_CASH'] = 1.0
trades = pd.DataFrame(index=prices.index, columns=symbols)
trades = trades.fillna(0)
cash = pd.Series(index=prices.index)
cash = cash.fillna(0)
cash.iloc[0] = startval
for date, symbol, shares in orders:
price = prices[symbol][date]
val = shares * price
# transaction cost model
val += commission_cost + (pd.np.abs(shares)*price*market_impact)
positions = prices.loc[date] * trades.sum()
totalcash = cash.sum()
if (date < prices.index.min()) or (date > prices.index.max()):
continue
trades[symbol][date] += shares
cash[date] -= val
trades['_CASH'] = cash
holdings = trades.cumsum()
df_portvals = (prices * holdings).sum(axis=1)
return df_portvals
if __name__ == "__main__":
pytest.main(["-s", __file__])

140
crypto_eval/indicators.py Normal file
View File

@ -0,0 +1,140 @@
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
from util import get_data
def author():
return "felixm"
def normalize(timeseries):
return timeseries / timeseries.iloc[0]
def bollinger_band(df, symbol, period=20, m=2):
boll_sma = df[symbol].rolling(period).mean()
std = df[symbol].rolling(period).std()
boll_up = boll_sma + m * std
boll_lo = boll_sma - m * std
key_sma, key_up, key_lo = "boll_sma", "boll_up", "boll_lo"
df[key_sma] = boll_sma
df[key_up] = boll_up
df[key_lo] = boll_lo
return df[[key_sma, key_up, key_lo]]
def sma(df, symbol, period):
"""Adds SMA for one or multiple periods to df and returns SMAs"""
if type(period) is int:
period = [period]
keys = []
for p in period:
key = f"sma_{p}"
df[key] = df[symbol].rolling(p).mean()
keys.append(key)
return df[keys]
def ema(df, symbol, period):
"""Adds EMA for one or multiple periods to df and returns EMAs"""
if type(period) is int:
period = [period]
keys = []
for p in period:
key = f"ema_{p}"
df[key] = df[symbol].ewm(span=p).mean()
keys.append(key)
return df[keys]
def price_sma(df, symbol, period):
"""Calculates SMA and adds new column price divided by SMA to the df."""
if type(period) is int:
period = [period]
keys = []
for p in period:
key = f"price_sma_{p}"
sma = df[symbol].rolling(p).mean()
df[key] = df[symbol] / sma
keys.append(key)
return df[keys]
def rsi(df, symbol, period=14):
"""Calculates relative strength index over given period."""
def rsi(x):
pct = x.pct_change()
avg_gain = pct[pct > 0].mean()
avg_loss = pct[pct <= 0].abs().mean()
rsi = 100 - (100 /
(1 + ((avg_gain / period) /
(avg_loss / period))))
return rsi
key = "rsi"
# Add one to get 'period' price changes (first change is nan).
period += 1
df[key] = df[symbol].rolling(period).apply(rsi)
return df[[key]]
def macd(df, symbol):
macd = df[symbol].ewm(span=12).mean() - df[symbol].ewm(span=26).mean()
k1 = "macd"
k2 = "macd_signal"
k3 = "macd_diff"
df[k1] = macd
df[k2] = macd.rolling(9).mean()
df[k3] = df[k1] - df[k2]
return df[[k1, k2, k3]]
def price_delta(df, symbol, period=1):
"""Calculate percentage change for period."""
k = f"pct_{period}"
df[k] = df[symbol].pct_change(periods=period)
df[k] = df[k].shift(-period)
return df[k]
def test_indicators():
symbol = "JPM"
sd = dt.datetime(2008, 1, 1)
ed = dt.datetime(2009, 12, 31)
df = get_data([symbol], pd.date_range(sd, ed))
df.drop(columns=["SPY"], inplace=True)
df_orig = df.copy()
# df = normalize(df)
sma(df, symbol, 21)
ema(df, symbol, 21)
df.plot(title="21 SMA and EMA")
plt.savefig('figure_1.png')
df = df_orig.copy()
sma(df, symbol, 8)
price_sma(df, symbol, 8)
df.plot(title="SMA and price / SMA", subplots=True)
plt.savefig('figure_2.png')
df = df_orig.copy()
bollinger_band(df, symbol)
df.plot(title="Bollinger Band")
plt.savefig('figure_3.png')
df = df_orig.copy()
rsi(df, symbol)
fig, axes = plt.subplots(nrows=2, sharex=True)
df[symbol].plot(ax=axes[0], title="JPM price action")
df["JPM-rsi(14)"].plot(ax=axes[1], title="RSI")
plt.savefig('figure_4.png')
df = df_orig.copy()
macd(df, symbol)
fig, axes = plt.subplots(nrows=2, sharex=True)
df[symbol].plot(ax=axes[0], title="JPM price action")
df[["JPM-macd", "JPM-macd-signal"]].plot(ax=axes[1])
plt.savefig('figure_5.png')

179
crypto_eval/marketsim.py Normal file
View File

@ -0,0 +1,179 @@
"""MC2-P1: Market simulator.
Copyright 2018, Georgia Institute of Technology (Georgia Tech)
Atlanta, Georgia 30332
All Rights Reserved
Template code for CS 4646/7646
Georgia Tech asserts copyright ownership of this template and all derivative
works, including solutions to the projects assigned in this course. Students
and other users of this template code are advised not to share it with others
or to make it available on publicly viewable websites including repositories
such as github and gitlab. This copyright statement should not be removed
or edited.
We do grant permission to share solutions privately with non-students such
as potential employers. However, sharing with other current or future
students of CS 7646 is prohibited and subject to being investigated as a
GT honor code violation.
-----do not edit anything above this line---
Student Name: Tucker Balch (replace with your name)
GT User ID: felixm (replace with your User ID)
GT ID: 1337 (replace with your GT ID)
"""
import pandas as pd
from util import get_data, plot_data
from optimize_something.optimization import calculate_stats
def read_orders(orders_file):
"""
Parser orders into the form:
Date datetime64[ns]
Symbol object
Order object
Shares int32
This is how the order book looks like:
Date,Symbol,Order,Shares
2011-01-10,AAPL,BUY,1500
2011-01-10,AAPL,SELL,1500
"""
orders = pd.read_csv(orders_file,
index_col=['Date'],
dtype='|str, str, str, i4',
parse_dates=['Date'])
orders.sort_values(by="Date", inplace=True)
return orders
def get_order_book_info(orders):
"""Return start_date, end_date, and symbols (as a list)."""
start_date = orders.index[0]
end_date = orders.index[-1]
symbols = sorted(list((set(orders.Symbol.tolist()))))
return start_date, end_date, symbols
def get_portfolio_value(holding, prices):
"""Calculate the current portofolio value."""
value = 0
for ticker, shares in holding.items():
if ticker == 'cash':
value += shares
else:
value += shares * prices[ticker]
return value
def handle_order(date, order, holding, prices, commission, impact):
"""Process the order."""
symbol, order, shares = order
if shares == 0 and order == "":
return # empty order
if pd.isnull(shares):
return # shares is nan
# Allow indicating buying and selling via shares. If shares is positive we
# buy and if it is negative we sell.
if shares > 0 and order == "":
order = "BUY"
elif shares < 0 and order == "":
order = "SELL"
shares = abs(shares)
adj_closing_price = prices[symbol]
cost = shares * adj_closing_price
# Charge commission and deduct impact penalty
holding['cash'] -= (commission + impact * adj_closing_price * shares)
if order.upper() == "BUY":
# print(f"Buy {shares:6} of {symbol:4} on {date}")
holding['cash'] -= cost
holding[symbol] += shares
elif order.upper() == "SELL":
# print(f"Sell {shares:6} of {symbol:4} on {date}")
holding['cash'] += cost
holding[symbol] -= shares
else:
raise Exception("Unexpected order type.")
def compute_portvals(orders_file, start_val=1000000, commission=9.95, impact=0.005):
if isinstance(orders_file, pd.DataFrame):
orders = orders_file
else:
orders = read_orders(orders_file)
start_date, end_date, symbols = get_order_book_info(orders)
# Tickers in the orderbook over the date_range in the order book.
prices = get_data(symbols, pd.date_range(start_date, end_date))
prices['Portval'] = pd.Series(0.0, index=prices.index)
# A dictionary to keep track of the assets we are holding.
holding = {s: 0 for s in symbols}
holding['cash'] = start_val
# Iterate over all trading days that are in the (inclusive) range of the
# order book dates. This implicitly ignores orders placed on non-trading
# days.
for date, values in prices.iterrows():
# Process orders for that day.
for date, order in orders.loc[date:date].iterrows():
handle_order(date, order, holding, values, commission, impact)
# Compute portfolio value at the end of day.
values['Portval'] = get_portfolio_value(holding, values)
return prices[['Portval']]
def test_code():
of = "./orders/orders-02.csv"
sv = 1000000
portvals = compute_portvals(orders_file=of, start_val=sv)
if isinstance(portvals, pd.DataFrame):
portvals = portvals[portvals.columns[0]] # just get the first column
else:
raise Exception("warning, code did not return a DataFrame")
start_date = portvals.index[0]
end_date = portvals.index[-1]
cum_ret, avg_daily_ret, \
std_daily_ret, sharpe_ratio = calculate_stats(portvals.to_frame(), [1])
spy = get_data(['SPY'], pd.date_range(start_date, end_date))
cum_ret_SPY, avg_daily_ret_SPY, \
std_daily_ret_SPY, sharpe_ratio_SPY = calculate_stats(spy, [1])
# Compare portfolio against $SPY
print(f"Date Range: {start_date} to {end_date}")
print()
print(f"Sharpe Ratio of Fund: {sharpe_ratio}")
print(f"Sharpe Ratio of SPY : {sharpe_ratio_SPY}")
print()
print(f"Cumulative Return of Fund: {cum_ret}")
print(f"Cumulative Return of SPY : {cum_ret_SPY}")
print()
print(f"Standard Deviation of Fund: {std_daily_ret}")
print(f"Standard Deviation of SPY : {std_daily_ret_SPY}")
print()
print(f"Average Daily Return of Fund: {avg_daily_ret}")
print(f"Average Daily Return of SPY : {avg_daily_ret_SPY}")
print()
print(f"Final Portfolio Value: {portvals[-1]}")
def author():
return 'felixm'
if __name__ == "__main__":
test_code()

View File

@ -0,0 +1,8 @@
from experiment1 import experiment1
from experiment2 import experiment2
if __name__ == "__main__":
experiment1(create_report=True)
experiment2()

32
util.py
View File

@ -14,22 +14,42 @@ def symbol_to_path(symbol, base_dir=None):
base_dir = os.environ.get("MARKET_DATA_DIR", '../data/')
return os.path.join(base_dir, "{}.csv".format(str(symbol)))
def get_data(symbols, dates, addSPY=True, colname = 'Adj Close'):
def get_data(symbols, dates, addSPY=True, colname='Adj Close', datecol='Date'):
"""Read stock data (adjusted close) for given symbols from CSV files."""
df = pd.DataFrame(index=dates)
if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent
symbols = ['SPY'] + list(symbols) # handles the case where symbols is np array of 'object'
# handles the case where symbols is np array of 'object'
symbols = ['SPY'] + list(symbols)
for symbol in symbols:
df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date',
parse_dates=True, usecols=['Date', colname], na_values=['nan'])
if 'BTC' in symbol or 'ETH' in symbol:
colname = 'close'
datecol = 'time'
elif symbol == 'SPY':
colname = 'close'
datecol = 'time'
else:
colname = 'Adj Close'
datecol = 'Date'
df_temp = pd.read_csv(symbol_to_path(symbol),
index_col=datecol,
parse_dates=True, usecols=[datecol, colname],
na_values=['nan'])
df_temp = df_temp.rename(columns={colname: symbol})
if datecol == 'time':
df_temp['date'] = pd.to_datetime(df_temp.index, unit='s')
df_temp['date'] = pd.DatetimeIndex(df_temp['date']).normalize()
df_temp.set_index('date', drop=True, inplace=True)
df = df.join(df_temp)
if symbol == 'SPY': # drop dates SPY did not trade
df = df.dropna(subset=["SPY"])
pass
# df = df.dropna(subset=["SPY"])
return df
def plot_data(df, title="Stock prices", xlabel="Date", ylabel="Price"):
import matplotlib.pyplot as plt
"""Plot stock prices with a custom title and meaningful axis labels."""