From e577ce34d2ffef903549ede7191f9f62ed42ce29 Mon Sep 17 00:00:00 2001 From: Felix Martin Date: Sun, 20 Jun 2021 15:04:30 -0400 Subject: [PATCH] Add support for trading view data --- crypto_eval/AbstractTreeLearner.py | 77 ++++++ crypto_eval/BenchmarkStrategy.py | 36 +++ crypto_eval/ManualStrategy.py | 114 +++++++++ crypto_eval/QLearner.py | 170 +++++++++++++ crypto_eval/RTLearner.py | 30 +++ crypto_eval/StrategyLearner.py | 106 ++++++++ crypto_eval/experiment1.py | 237 ++++++++++++++++++ crypto_eval/experiment2.py | 8 + crypto_eval/grade_strategy_learner.py | 339 ++++++++++++++++++++++++++ crypto_eval/indicators.py | 140 +++++++++++ crypto_eval/marketsim.py | 179 ++++++++++++++ crypto_eval/testproject.py | 8 + util.py | 32 ++- 13 files changed, 1470 insertions(+), 6 deletions(-) create mode 100644 crypto_eval/AbstractTreeLearner.py create mode 100644 crypto_eval/BenchmarkStrategy.py create mode 100644 crypto_eval/ManualStrategy.py create mode 100644 crypto_eval/QLearner.py create mode 100644 crypto_eval/RTLearner.py create mode 100644 crypto_eval/StrategyLearner.py create mode 100644 crypto_eval/experiment1.py create mode 100644 crypto_eval/experiment2.py create mode 100644 crypto_eval/grade_strategy_learner.py create mode 100644 crypto_eval/indicators.py create mode 100644 crypto_eval/marketsim.py create mode 100644 crypto_eval/testproject.py diff --git a/crypto_eval/AbstractTreeLearner.py b/crypto_eval/AbstractTreeLearner.py new file mode 100644 index 0000000..408b32b --- /dev/null +++ b/crypto_eval/AbstractTreeLearner.py @@ -0,0 +1,77 @@ +import numpy as np + + +class AbstractTreeLearner: + LEAF = -1 + NA = -1 + + def author(self): + return 'felixm' # replace tb34 with your Georgia Tech username + + def create_node(self, factor, split_value, left, right): + return np.array([(factor, split_value, left, right), ], + dtype='|i4, f4, i4, i4') + + def query_point(self, point): + node_index = 0 + while self.rel_tree[node_index][0] != self.LEAF: + node = self.rel_tree[node_index] + split_factor = node[0] + split_value = node[1] + if point[split_factor] <= split_value: + # Recurse into left sub-tree. + node_index += node[2] + else: + node_index += node[3] + v = self.rel_tree[node_index][1] + return v + + def query(self, points): + """ + @summary: Estimate a set of test points given the model we built. + @param points: should be a numpy array with each row corresponding to a specific query. + @returns the estimated values according to the saved model. + """ + query_point = lambda p: self.query_point(p) + r = np.apply_along_axis(query_point, 1, points) + return r + + def build_tree(self, xs, y): + """ + @summary: Build a decision tree from the training data. + @param dataX: X values of data to add + @param dataY: the Y training values + """ + assert(xs.shape[0] == y.shape[0]) + assert(xs.shape[0] > 0) # If this is 0 something went wrong. + + if xs.shape[0] <= self.leaf_size: + value = np.mean(y) + if value < -0.2: + value = -1 + elif value > 0.2: + value = 1 + else: + value = 0 + return self.create_node(self.LEAF, value, self.NA, self.NA) + + if np.all(y[0] == y): + return self.create_node(self.LEAF, y[0], self.NA, self.NA) + + i, split_value = self.get_i_and_split_value(xs, y) + select_l = xs[:, i] <= split_value + select_r = xs[:, i] > split_value + lt = self.build_tree(xs[select_l], y[select_l]) + rt = self.build_tree(xs[select_r], y[select_r]) + root = self.create_node(i, split_value, 1, lt.shape[0] + 1) + root = np.concatenate([root, lt, rt]) + return root + + def addEvidence(self, data_x, data_y): + """ + @summary: Add training data to learner + @param dataX: X values of data to add + @param dataY: the Y training values + """ + self.rel_tree = self.build_tree(data_x, data_y) + diff --git a/crypto_eval/BenchmarkStrategy.py b/crypto_eval/BenchmarkStrategy.py new file mode 100644 index 0000000..15e6bfa --- /dev/null +++ b/crypto_eval/BenchmarkStrategy.py @@ -0,0 +1,36 @@ +import pandas as pd +import util as ut +import datetime as dt + + +class BenchmarkStrategy: + def __init__(self, verbose=False, impact=0.0, commission=0.0, units=1000): + self.verbose = verbose + self.impact = impact + self.commission = commission + self.units = units + + def addEvidence(self, symbol=0, sd=0, ed=0, sv=0): + """Keep this so that API is valid.""" + pass + + def testPolicy(self, symbol="IBM", + sd=dt.datetime(2009, 1, 1), + ed=dt.datetime(2010, 1, 1), + sv=10000): + """Benchmark is to buy 1000 shares and hold.""" + dates = pd.date_range(sd, ed) + prices = ut.get_data([symbol], dates, addSPY=False, + colname='close', datecol='time') + + orders = pd.DataFrame(index=prices.index) + orders["Symbol"] = symbol + orders["Order"] = "" + orders["Shares"] = 0 + orders.iloc[0] = [symbol, "BUY", self.units] + orders.iloc[-1] = [symbol, "SELL", -self.units] + + if self.verbose: + print(type(orders)) # it better be a DataFrame! + print(orders) + return orders diff --git a/crypto_eval/ManualStrategy.py b/crypto_eval/ManualStrategy.py new file mode 100644 index 0000000..19ba0d8 --- /dev/null +++ b/crypto_eval/ManualStrategy.py @@ -0,0 +1,114 @@ +import datetime as dt +import pandas as pd +import util +import indicators + + +class ManualStrategy: + def __init__(self, verbose=False, impact=0.0, commission=0.0): + self.verbose = verbose + self.impact = impact + self.commission = commission + + # this method should create a QLearner, and train it for trading + def addEvidence(self, symbol="IBM", + sd=dt.datetime(2008, 1, 1), + ed=dt.datetime(2009, 1, 1), + sv=10000): + + # add your code to do learning here + + # example usage of the old backward compatible util function + syms = [symbol] + dates = pd.date_range(sd, ed) + prices_all = util.get_data(syms, dates) # automatically adds SPY + prices = prices_all[syms] # only portfolio symbols + # prices_SPY = prices_all['SPY'] # only SPY, for comparison later + if self.verbose: + print(prices) + + # example use with new colname + # automatically adds SPY + volume_all = util.get_data(syms, dates, colname="Volume") + volume = volume_all[syms] # only portfolio symbols + # volume_SPY = volume_all['SPY'] # only SPY, for comparison later + if self.verbose: + print(volume) + + def macd_strat(self, macd, orders): + """Strategy based on MACD cross.""" + + def strat(ser): + m = macd.loc[ser.index] + prev_macd, prev_signal, _ = m.iloc[0] + cur_macd, cur_signal, _ = m.iloc[1] + shares = 0 + if cur_macd < -1 and prev_macd < prev_signal \ + and cur_macd > cur_signal: + if self.holding == 0: + shares = 1000 + elif self.holding == -1000: + shares = 2000 + elif cur_macd > 1 and prev_macd > prev_signal \ + and cur_macd < cur_signal: + if self.holding == 0: + shares = -1000 + elif self.holding == 1000: + shares = -2000 + self.holding += shares + return shares + + orders['Shares'] = orders['Shares'].rolling(2).apply(strat) + + def three_indicator_strat(self, macd, rsi, price_sma, orders): + """Strategy based on three indicators. Thresholds selected based on + scatter plots.""" + def strat(row): + shares = 0 + _, _, macd_diff = macd.loc[row.name] + cur_rsi = rsi.loc[row.name][0] + cur_price_sma = price_sma.loc[row.name][0] + if self.holding == -1000 and cur_price_sma < 0.9: + shares = 2000 + elif self.holding == 0 and cur_price_sma < 0.9: + shares = 1000 + elif self.holding == -1000 and cur_rsi > 80: + shares = 2000 + elif self.holding == 0 and cur_rsi > 80: + shares = 1000 + elif self.holding == -1000 and macd_diff < -0.5: + shares = 2000 + elif self.holding == 0 and macd_diff < -0.5: + shares = 1000 + elif self.holding == 1000 and cur_price_sma > 1.1: + shares = -2000 + elif self.holding == 0 and cur_price_sma > 1.1: + shares = -1000 + self.holding += shares + return shares + + orders['Shares'] = orders.apply(strat, axis=1) + + def testPolicy(self, symbol="IBM", + sd=dt.datetime(2009, 1, 1), + ed=dt.datetime(2010, 1, 1), + sv=10000, macd_strat=False): + + self.holding = 0 + df = util.get_data([symbol], pd.date_range(sd, ed)) + df.drop(columns=["SPY"], inplace=True) + + orders = pd.DataFrame(index=df.index) + orders["Symbol"] = symbol + orders["Order"] = "" + orders["Shares"] = 0 + + macd = indicators.macd(df, symbol) + rsi = indicators.rsi(df, symbol) + price_sma = indicators.price_sma(df, symbol, [8]) + + if macd_strat: + self.macd_strat(macd, orders) + else: + self.three_indicator_strat(macd, rsi, price_sma, orders) + return orders diff --git a/crypto_eval/QLearner.py b/crypto_eval/QLearner.py new file mode 100644 index 0000000..b86b143 --- /dev/null +++ b/crypto_eval/QLearner.py @@ -0,0 +1,170 @@ +import datetime as dt +import pandas as pd +import util +import indicators +from qlearning_robot.QLearner import QLearner as Learner +from dataclasses import dataclass + +@dataclass +class Holding: + cash: int + shares: int + equity: int + + +class QLearner(object): + + def __init__(self, verbose=False, impact=0.0, units=1000, commission=0.0, testing=False, n_bins=5): + self.verbose = verbose + self.impact = impact + self.commission = commission + self.testing = testing # Decides which type of order df to return. + self.indicators = ['macd_diff', 'rsi', 'price_sma_8'] + self.n_bins = n_bins + self.bins = {} + self.num_states = self.get_num_states() + self.num_actions = 3 # buy, sell, hold + self.learner = Learner(self.num_states, self.num_actions) + self.units = units + + def row_to_state(self, holding, df_row): + """Transforms a row into a state value.""" + holding = (holding + self.units) // self.units + assert(holding in [0, 1, 2]) + + # For each indicator that goes into the state the interval becomes + # smaller based on how many bins the indicator has. The first + # 'indicator' is the information about how many shares we are currently + # holding. So for example, if I have 450 states then the intervall (aka + # remaining_states) is 150 because there are three values for holding: + # holding = 0 -> state = 0 * 150 = 0 + # holding = 1 -> state = 1 * 150 = 150 + # holding = 2 -> state = 2 * 150 = 300 + remaining_states = self.num_states // 3 + state = holding * remaining_states + + for indicator in self.indicators: + value = df_row[indicator] + bin_n = self.indicator_value_to_bin(indicator, value) + remaining_states //= self.n_bins + state += bin_n * remaining_states + return state + + def indicator_value_to_bin(self, indicator, value): + for i, upper_bound in enumerate(self.bins[indicator]): + if value < upper_bound: + return i + return i + 1 + + def add_indicators(self, df, symbol): + """Add indicators for learning to DataFrame.""" + for indicator in self.indicators: + if indicator == "macd_diff": + indicators.macd(df, symbol) + df.drop(columns=["macd", "macd_signal"], inplace=True) + elif indicator == "rsi": + indicators.rsi(df, symbol) + elif indicator.startswith("price_sma_"): + period = int(indicator.replace("price_sma_", "")) + indicators.price_sma(df, symbol, [period]) + df.drop(columns=["SPY"], inplace=True) + df.dropna(inplace=True) + + def bin_indicators(self, df): + """Create bins for indicators.""" + for indicator in self.indicators: + ser, bins = pd.qcut(df[indicator], self.n_bins, retbins=True) + self.bins[indicator] = bins[1:self.n_bins] + + def get_num_states(self): + """Return the total num of states.""" + num_states = 3 # Three states holding (1000, 0, -1000) + for _ in self.indicators: + num_states *= self.n_bins + return num_states + + def handle_order(self, action, holding, adj_closing_price): + shares = 0 + if action == 0: # buy + if holding.shares == 0 or holding.shares == -self.units: + shares = self.units + elif action == 1: # sell + if holding.shares== 0 or holding.shares == self.units: + shares = -self.units + elif action == 2: # hold + shares = 0 + + cost = shares * adj_closing_price + if shares != 0: + # Charge commission and deduct impact penalty + holding.cash -= self.commission + holding.cash -= (self.impact * adj_closing_price * abs(shares)) + holding.cash -= cost + holding.shares += shares + + holding.equity = holding.cash + holding.shares * adj_closing_price + + def get_reward(self, equity, new_equity): + if new_equity > equity: + return 1 + return -1 + + def train(self, df, symbol, sv): + holding = Holding(sv, 0, sv) + + row = df.iloc[0] + state = self.row_to_state(holding.shares, row) + action = self.learner.querysetstate(state) + adj_closing_price = row[symbol] + equity = holding.equity + self.handle_order(action, holding, adj_closing_price) + + for index, row in df.iloc[1:].iterrows(): + adj_closing_price = row[symbol] + new_equity = holding.cash + holding.shares * adj_closing_price + r = self.get_reward(equity, new_equity) + s_prime = self.row_to_state(holding.shares, row) + a = self.learner.query(s_prime, r) + equity = new_equity + self.handle_order(a, holding, adj_closing_price) + if self.verbose: + print(f"{holding=} {s_prime=} {r=} {a=}") + + def addEvidence(self, symbol="IBM", sd=dt.datetime(2008, 1, 1), ed=dt.datetime(2009, 1, 1), sv=10000): + df = util.get_data([symbol], pd.date_range(sd, ed)) + self.add_indicators(df, symbol) + self.bin_indicators(df) + + for _ in range(15): + self.train(df, symbol, sv) + + def testPolicy(self, symbol="IBM", sd=dt.datetime(2009, 1, 1), ed=dt.datetime(2010, 1, 1), sv=10000): + df = util.get_data([symbol], pd.date_range(sd, ed)) + orders = pd.DataFrame(index=df.index) + orders["Symbol"] = symbol + orders["Order"] = "" + orders["Shares"] = 0 + shares = orders["Shares"] + self.add_indicators(df, symbol) + holding = 0 + + for index, row in df.iterrows(): + state = self.row_to_state(holding, row) + action = self.learner.querysetstate(state) + + if action == 0: # buy + if holding == 0 or holding == -self.units: + holding += self.units + orders.loc[index, "Shares"] = self.units + elif action == 1: # sell + if holding == 0 or holding == self.units: + holding -= self.units + orders.loc[index, "Shares"] = -self.units + elif action == 2: # hold + pass + + if self.testing: + return orders + else: + return orders[["Shares"]] + diff --git a/crypto_eval/RTLearner.py b/crypto_eval/RTLearner.py new file mode 100644 index 0000000..c1162bf --- /dev/null +++ b/crypto_eval/RTLearner.py @@ -0,0 +1,30 @@ +import numpy as np +from AbstractTreeLearner import AbstractTreeLearner + + +class RTLearner(AbstractTreeLearner): + + def __init__(self, leaf_size = 1, verbose = False): + self.leaf_size = leaf_size + self.verbose = verbose + + def get_i_and_split_value(self, xs, y): + """ + @summary: Pick a random i and split value. + + Make sure that not all X are the same for i and also pick + different values to average the split_value from. + """ + i = np.random.randint(0, xs.shape[1]) + while np.all(xs[0,i] == xs[:,i]): + i = np.random.randint(0, xs.shape[1]) + + # I don't know about the performance of this, but at least it + # terminates reliably. If the two elements are the same something is + # wrong. + a = np.array(list(set(xs[:, i]))) + r1, r2 = np.random.choice(a, size = 2, replace = False) + assert(r1 != r2) + split_value = (r1 + r2) / 2.0 + return i, split_value + diff --git a/crypto_eval/StrategyLearner.py b/crypto_eval/StrategyLearner.py new file mode 100644 index 0000000..7b6b9a6 --- /dev/null +++ b/crypto_eval/StrategyLearner.py @@ -0,0 +1,106 @@ +import datetime as dt +import pandas as pd +import util +import indicators +from RTLearner import RTLearner + + +class StrategyLearner(object): + + def __init__(self, verbose=False, impact=0.0, commission=0.0, testing=False): + self.verbose = verbose + self.impact = impact + self.commission = commission + self.testing = testing + + def _get_volume(self): + """For reference.""" + volume_all = ut.get_data(syms, dates, colname="Volume") + volume = volume_all[syms] # only portfolio symbols + # volume_SPY = volume_all['SPY'] # only SPY, for comparison later + if self.verbose: + print(volume) + + def _add_indicators(self, df, symbol): + """Add indicators for learning to DataFrame.""" + df.drop(columns=["SPY"], inplace=True) + indicators.macd(df, symbol) + indicators.rsi(df, symbol) + indicators.price_sma(df, symbol, [8]) + indicators.price_delta(df, symbol, 3) + df.dropna(inplace=True) + + def addEvidence(self, symbol="IBM", + sd=dt.datetime(2008, 1, 1), + ed=dt.datetime(2009, 1, 1), + sv=10000): + + self.y_threshold = 0.2 + self.indicators = ['macd_diff', 'rsi', 'price_sma_8'] + df = util.get_data([symbol], pd.date_range(sd, ed)) + self._add_indicators(df, symbol) + + def classify_y(row): + if row > self.y_threshold: + return 1 + elif row < -self.y_threshold: + return -1 + else: + pass + return 0 + + def set_y_threshold(pct): + if max(pct) < 0.2: + self.y_threshold = 0.02 + + self.learner = RTLearner(leaf_size = 5) + # self.learner = BagLearner(RTLearner, 3, {'leaf_size': 5}) + data_x = df[self.indicators].to_numpy() + pct = df['pct_3'] + + # This is a hack to get a low enough buy/sell threshold for the + # cyclic the test 'ML4T-220' where the max pct_3 is 0.0268. + set_y_threshold(pct) + y = pct.apply(classify_y) + + self.learner.addEvidence(data_x, y.to_numpy()) + return y + + def strat(self, data_y, orders): + self.holding = 0 + + def strat(row): + y = int(data_y.loc[row.name][0]) + shares = 0 + if self.holding == 0 and y == 1: + shares = 1000 + elif self.holding == -1000 and y == 1: + shares = 2000 + elif self.holding == 0 and y == -1: + shares = -1000 + elif self.holding == 1000 and y == -1: + shares = -2000 + self.holding += shares + return shares + + orders["Shares"] = orders.apply(strat, axis=1) + + def testPolicy(self, symbol="IBM", + sd=dt.datetime(2009, 1, 1), + ed=dt.datetime(2010, 1, 1), + sv=10000): + df = util.get_data([symbol], pd.date_range(sd, ed)) + self._add_indicators(df, symbol) + data_x = df[self.indicators].to_numpy() + data_y = pd.DataFrame(index=df.index, data=self.learner.query(data_x)) + + orders = pd.DataFrame(index=df.index) + orders["Symbol"] = symbol + orders["Order"] = "" + orders["Shares"] = 0 + self.strat(data_y, orders) + if self.testing: + return orders + else: + return orders[["Shares"]] + diff --git a/crypto_eval/experiment1.py b/crypto_eval/experiment1.py new file mode 100644 index 0000000..0b8363e --- /dev/null +++ b/crypto_eval/experiment1.py @@ -0,0 +1,237 @@ +import pandas as pd +import datetime as dt +import sys + +import util +import indicators +import crypto_eval.marketsim as marketsim +import matplotlib.pyplot as plt +from matplotlib.widgets import MultiCursor +from BenchmarkStrategy import BenchmarkStrategy +from ManualStrategy import ManualStrategy +from StrategyLearner import StrategyLearner +from QLearner import QLearner + + +def plot_indicators(symbol, df): + fig, ax = plt.subplots(4, sharex=True) + + price_sma = indicators.price_sma(df, symbol, [8]) + bb = indicators.bollinger_band(df, symbol) + rsi = indicators.rsi(df, symbol) + macd = indicators.macd(df, symbol).copy() + + df[[symbol]].plot(ax=ax[0]) + bb.plot(ax=ax[0]) + price_sma.plot(ax=ax[1]) + macd.plot(ax=ax[2]) + rsi.plot(ax=ax[3]) + for a in ax.flat: + a.grid() + m = MultiCursor(fig.canvas, ax, color='r', lw=0.5) + plt.show() + sys.exit(0) + + +def visualize_correlations(symbol, df): + indicators.price_sma(df, symbol, [8, 21]) + indicators.price_delta(df, symbol, 5) + indicators.price_delta(df, symbol, 3) + indicators.price_delta(df, symbol, 1) + indicators.macd(df, symbol) + indicators.rsi(df, symbol) + + # df = df[df['rsi'] > 80] + fig, ax = plt.subplots(3, 2) # sharex=True) + df.plot.scatter(x="price_sma_8", y="pct_5", ax=ax[0, 0]) + df.plot.scatter(x="price_sma_8", y="pct_3", ax=ax[1, 0]) + df.plot.scatter(x="price_sma_8", y="pct_1", ax=ax[2, 0]) + # df.plot.scatter(x="rsi", y="pct_5", ax=ax[0, 1]) + # df.plot.scatter(x="rsi", y="pct_3", ax=ax[1, 1]) + # df.plot.scatter(x="rsi", y="pct_1", ax=ax[2, 1]) + df.plot.scatter(x="macd_diff", y="pct_5", ax=ax[0, 1]) + df.plot.scatter(x="macd_diff", y="pct_3", ax=ax[1, 1]) + df.plot.scatter(x="macd_diff", y="pct_1", ax=ax[2, 1]) + + for a in ax.flat: + a.grid() + plt.show() + sys.exit(0) + + +def compare_manual_strategies(symbol, sv, sd, ed): + + df = util.get_data([symbol], pd.date_range(sd, ed)) + df.drop(columns=["SPY"], inplace=True) + + bs = BenchmarkStrategy() + orders = bs.testPolicy(symbol, sd, ed, sv) + df["Benchmark"] = marketsim.compute_portvals(orders, sv) + df["Orders Benchmark"] = orders["Shares"] + + ms = ManualStrategy() + orders = ms.testPolicy(symbol, sd, ed, sv, macd_strat=True) + df["MACD Strat"] = marketsim.compute_portvals(orders, sv) + df["Orders MACD"] = orders["Shares"] + # df["Holding Manual"] = orders["Shares"].cumsum() + + orders = ms.testPolicy(symbol, sd, ed, sv) + df["Three Strat"] = marketsim.compute_portvals(orders, sv) + df["Orders Three"] = orders["Shares"] + + fig, ax = plt.subplots(3, sharex=True) + df[[symbol]].plot(ax=ax[0]) + df[["Benchmark", "MACD Strat", "Three Strat"]].plot(ax=ax[1]) + df[["Orders Benchmark", "Orders MACD", "Orders Three"]].plot(ax=ax[2]) + + for a in ax: + a.grid() + MultiCursor(fig.canvas, ax, color='r', lw=0.5) + + # plt.show() + fig.set_size_inches(10, 8, forward=True) + plt.savefig('figure_1.png', dpi=fig.dpi) + + +def compare_all_strategies(symbol, sv, sd, ed): + df = util.get_data([symbol], pd.date_range(sd, ed)) + df.drop(columns=["SPY"], inplace=True) + normalize = indicators.normalize + + bs = BenchmarkStrategy() + orders = bs.testPolicy(symbol, sd, ed, sv) + df["Benchmark"] = normalize(marketsim.compute_portvals(orders, sv)) + df["Orders Benchmark"] = orders["Shares"] + + ms = ManualStrategy() + orders = ms.testPolicy(symbol, sd, ed, sv) + df["Manual"] = normalize(marketsim.compute_portvals(orders, sv)) + df["Orders Manual"] = orders["Shares"] + + sl = StrategyLearner(testing=True) + sl.addEvidence(symbol, sd, ed, sv) + orders = sl.testPolicy(symbol, sd, ed, sv) + df["Strategy"] = normalize(marketsim.compute_portvals(orders, sv)) + df["Orders Strategy"] = orders["Shares"] + + fig, ax = plt.subplots(3, sharex=True) + df[[symbol]].plot(ax=ax[0]) + df[["Benchmark", "Manual", "Strategy"]].plot(ax=ax[1]) + df[["Orders Benchmark", "Orders Manual", "Orders Strategy"]].plot(ax=ax[2]) + + for a in ax: + a.grid() + MultiCursor(fig.canvas, ax, color='r', lw=0.5) + + # plt.show() + fig.set_size_inches(10, 8, forward=True) + plt.savefig('figure_2.png', dpi=fig.dpi) + + +def compare_number_trades(): + symbol = "JPM" + sv = 10000 + sd = dt.datetime(2008, 1, 1) # in-sample + ed = dt.datetime(2009, 12, 31) # in-sample + + df = util.get_data([symbol], pd.date_range(sd, ed)) + df.drop(columns=["SPY"], inplace=True) + + print(f"| commission | n_orders |") + print(f"-------------------------") + for commission in [0, 9.95, 20, 50, 100]: + ql = QLearner(testing=True, commission=commission, impact=0.005) + ql.addEvidence(symbol, sd, ed, sv) + orders = ql.testPolicy(symbol, sd, ed, sv) + n_orders = orders[orders["Shares"] != 0].shape[0] + print(f"| {commission} | {n_orders} |") + +def compare_q_learners(): + symbol = "JPM" + sv = 10000 + sd = dt.datetime(2008, 1, 1) # in-sample + ed = dt.datetime(2009, 12, 31) # in-sample + sd_out = dt.datetime(2010, 1, 1) # out-sample + ed_out = dt.datetime(2011, 12, 31) # out-sample + + df = util.get_data([symbol], pd.date_range(sd_out, ed_out)) + df.drop(columns=["SPY"], inplace=True) + + bs = BenchmarkStrategy() + orders = bs.testPolicy(symbol, sd_out, ed_out, sv) + df["Benchmark"] = indicators.normalize(marketsim.compute_portvals(orders, sv)) + df["Orders Benchmark"] = orders["Shares"] + + ql = QLearner(testing=True, verbose=False) + ql.addEvidence(symbol, sd, ed, sv) + orders = ql.testPolicy(symbol, sd_out, ed_out, sv) + df["QL 5"] = indicators.normalize(marketsim.compute_portvals(orders, sv)) + df["Orders QL 5"] = orders["Shares"] + + ql = QLearner(testing=True, verbose=False, n_bins=4) + ql.addEvidence(symbol, sd, ed, sv) + orders = ql.testPolicy(symbol, sd_out, ed_out, sv) + df["QL 4"] = indicators.normalize(marketsim.compute_portvals(orders, sv)) + df["Orders QL 4"] = orders["Shares"] + + fig, ax = plt.subplots(3, sharex=True) + df[[symbol]].plot(ax=ax[0]) + df[["Benchmark", "QL 5", "QL 4"]].plot(ax=ax[1]) + df[["Orders Benchmark", "Orders QL 5", "Orders QL 4"]].plot(ax=ax[2]) + + for a in ax: + a.grid() + m = MultiCursor(fig.canvas, ax, color='r', lw=0.5) + fig.set_size_inches(10, 8, forward=True) + plt.savefig('figure_4.png', dpi=fig.dpi) + sys.exit(0) + + +def experiment1(create_report=False): + symbol = "COINBASE_BTCUSD_1D" + sv = 10000 + sd = dt.datetime(2020, 1, 1) # in-sample + ed = dt.datetime(2020, 12, 31) # in-sample + sd_out = dt.datetime(2020, 1, 1) # out-sample + ed_out = dt.datetime(2020, 12, 31) # out-sample + + df = util.get_data([symbol], pd.date_range(sd_out, ed_out), addSPY=True) + + # if create_report: + # compare_manual_strategies(symbol, sv, sd, ed) + # compare_all_strategies(symbol, sv, sd, ed) + # sys.exit(0) + + # visualize_correlations(symbol, df) + # plot_indicators(symbol, df) + # compare_number_trades(symbol, sv, sd, ed) + # compare_q_learners() + # return + + bs = BenchmarkStrategy(units=1) + orders = bs.testPolicy(symbol, sd_out, ed_out, sv) + pvs = marketsim.compute_portvals(orders, start_val=sv) + df["Benchmark"] = indicators.normalize(pvs) + df["Orders Benchmark"] = orders["Shares"] + + ql = QLearner(testing=True, verbose=False, units=1) + ql.addEvidence(symbol, sd, ed, sv) + orders = ql.testPolicy(symbol, sd_out, ed_out, sv) + df["QL"] = indicators.normalize(marketsim.compute_portvals(orders, sv)) + df["Orders QL"] = orders["Shares"] + + fig, ax = plt.subplots(3, sharex=True) + df[[symbol]].plot(ax=ax[0]) + df[["Benchmark", "QL"]].plot(ax=ax[1]) + df[["Orders Benchmark", "Orders QL"]].plot(ax=ax[2]) + + for a in ax: + a.grid() + m = MultiCursor(fig.canvas, ax, color='r', lw=0.5) + plt.show() + # fig.set_size_inches(10, 8, forward=True) + # plt.savefig('figure_4.png', dpi=fig.dpi) + + +if __name__ == "__main__": + experiment1() diff --git a/crypto_eval/experiment2.py b/crypto_eval/experiment2.py new file mode 100644 index 0000000..9b12f55 --- /dev/null +++ b/crypto_eval/experiment2.py @@ -0,0 +1,8 @@ +import experiment1 + +def experiment2(): + experiment1.compare_number_trades() + + +if __name__ == "__main__": + experiment2() diff --git a/crypto_eval/grade_strategy_learner.py b/crypto_eval/grade_strategy_learner.py new file mode 100644 index 0000000..0dc0e20 --- /dev/null +++ b/crypto_eval/grade_strategy_learner.py @@ -0,0 +1,339 @@ +"""MC3-P3: Strategy Learner - grading script. + +Usage: +- Switch to a student feedback directory first (will write "points.txt" and "comments.txt" in pwd). +- Run this script with both ml4t/ and student solution in PYTHONPATH, e.g.: + PYTHONPATH=ml4t:MC1-P2/jdoe7 python ml4t/mc2_p1_grading/grade_marketsim.py + +Copyright 2017, Georgia Tech Research Corporation +Atlanta, Georgia 30332-0415 +All Rights Reserved + +Template code for CS 4646/7646 + +Georgia Tech asserts copyright ownership of this template and all derivative +works, including solutions to the projects assigned in this course. Students +and other users of this template code are advised not to share it with others +or to make it available on publicly viewable websites including repositories +such as github and gitlab. This copyright statement should not be removed +or edited. + +We do grant permission to share solutions privately with non-students such +as potential employers. However, sharing with other current or future +students of CS 7646 is prohibited and subject to being investigated as a +GT honor code violation. + +-----do not edit anything above this line--- + +Student Name: Tucker Balch (replace with your name) +GT User ID: tb34 (replace with your User ID) +GT ID: 900897987 (replace with your GT ID) +""" + +import pytest +from grading.grading import grader, GradeResult, run_with_timeout, IncorrectOutput + +import os +import sys +import traceback as tb + +import datetime as dt +import numpy as np +import pandas as pd +from collections import namedtuple + +import time +import util +import random + +# Test cases +StrategyTestCase = namedtuple('Strategy', ['description','insample_args','outsample_args','benchmark_type','benchmark','impact','train_time','test_time','max_time','seed']) +strategy_test_cases = [ + StrategyTestCase( + description="ML4T-220", + insample_args=dict(symbol="ML4T-220",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000), + outsample_args=dict(symbol="ML4T-220",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000), + benchmark_type='clean', + benchmark=1.0, #benchmark updated Apr 24 2017 + impact=0.0, + train_time=25, + test_time=5, + max_time=60, + seed=1481090000 + ), + StrategyTestCase( + description="AAPL", + insample_args=dict(symbol="AAPL",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000), + outsample_args=dict(symbol="AAPL",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000), + benchmark_type='stock', + benchmark=0.1581999999999999, #benchmark computed Nov 22 2017 + impact=0.0, + train_time=25, + test_time=5, + max_time=60, + seed=1481090000 + ), + StrategyTestCase( + description="SINE_FAST_NOISE", + insample_args=dict(symbol="SINE_FAST_NOISE",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000), + outsample_args=dict(symbol="SINE_FAST_NOISE",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000), + benchmark_type='noisy', + benchmark=2.0, #benchmark updated Apr 24 2017 + impact=0.0, + train_time=25, + test_time=5, + max_time=60, + seed=1481090000 + ), + StrategyTestCase( + description="UNH - In sample", + insample_args=dict(symbol="UNH",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000), + outsample_args=dict(symbol="UNH",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000), + benchmark_type='stock', + benchmark= -0.25239999999999996, #benchmark computed Nov 22 2017 + impact=0.0, + train_time=25, + test_time=5, + max_time=60, + seed=1481090000 + ), +] + +max_points = 60.0 +html_pre_block = True # surround comments with HTML
 tag (for T-Square comments field)
+
+MAX_HOLDINGS = 1000
+
+# Test functon(s)
+@pytest.mark.parametrize("description, insample_args, outsample_args, benchmark_type, benchmark, impact, train_time, test_time, max_time, seed", strategy_test_cases)
+def test_strategy(description, insample_args, outsample_args, benchmark_type, benchmark, impact, train_time, test_time, max_time, seed, grader):
+    """Test StrategyLearner.
+
+    Requires test description, insample args (dict), outsample args (dict), benchmark_type (str), benchmark (float)
+    max time (seconds), points for this test case (int), random seed (long), and a grader fixture.
+    """
+    points_earned = 0.0  # initialize points for this test case
+    try:
+        incorrect = True
+        if not 'StrategyLearner' in globals():
+            import importlib
+            m = importlib.import_module('StrategyLearner')
+            globals()['StrategyLearner'] = m
+        outsample_cr_to_beat = None
+        if benchmark_type == 'clean':
+            outsample_cr_to_beat = benchmark
+        def timeoutwrapper_strategylearner():
+            #Set fixed seed for repetability
+            np.random.seed(seed)
+            random.seed(seed)
+            learner = StrategyLearner.StrategyLearner(verbose=False,impact=impact)
+            tmp = time.time()
+            learner.addEvidence(**insample_args)
+            train_t = time.time()-tmp
+            tmp = time.time()
+            insample_trades_1 = learner.testPolicy(**insample_args)
+            test_t = time.time()-tmp
+            insample_trades_2 = learner.testPolicy(**insample_args)
+            tmp = time.time()
+            outsample_trades = learner.testPolicy(**outsample_args)
+            out_test_t = time.time()-tmp
+            return insample_trades_1, insample_trades_2, outsample_trades, train_t, test_t, out_test_t
+        msgs = []
+        in_trades_1, in_trades_2, out_trades, train_t, test_t, out_test_t = run_with_timeout(timeoutwrapper_strategylearner,max_time,(),{})
+        incorrect = False
+        if len(in_trades_1.shape)!=2 or in_trades_1.shape[1]!=1:
+            incorrect=True
+            msgs.append("  First insample trades DF has invalid shape: {}".format(in_trades_1.shape))
+        elif len(in_trades_2.shape)!=2 or in_trades_2.shape[1]!=1:
+            incorrect=True
+            msgs.append("  Second insample trades DF has invalid shape: {}".format(in_trades_2.shape))
+        elif len(out_trades.shape)!=2 or out_trades.shape[1]!=1:
+            incorrect=True
+            msgs.append("  Out-of-sample trades DF has invalid shape: {}".format(out_trades.shape))
+        else:
+            tmp_csum=0.0
+            for date,trade in in_trades_1.iterrows():
+                tmp_csum+= trade.iloc[0]
+                if (trade.iloc[0]!=0) and\
+                   (trade.abs().iloc[0]!=MAX_HOLDINGS) and\
+                   (trade.abs().iloc[0]!=2*MAX_HOLDINGS):
+                   incorrect=True
+                   msgs.append("  illegal trade in first insample DF. abs(trade) not one of ({},{},{}).\n  Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
+                   break
+                elif abs(tmp_csum)>MAX_HOLDINGS:
+                    incorrect=True
+                    msgs.append("  holdings more than {} long or short in first insample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
+                    break
+            tmp_csum=0.0
+            for date,trade in in_trades_2.iterrows():
+                tmp_csum+= trade.iloc[0]
+                if (trade.iloc[0]!=0) and\
+                   (trade.abs().iloc[0]!=MAX_HOLDINGS) and\
+                   (trade.abs().iloc[0]!=2*MAX_HOLDINGS):
+                   incorrect=True
+                   msgs.append("  illegal trade in second insample DF. abs(trade) not one of ({},{},{}).\n  Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
+                   break
+                elif abs(tmp_csum)>MAX_HOLDINGS:
+                    incorrect=True
+                    msgs.append("  holdings more than {} long or short in second insample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
+                    break
+            tmp_csum=0.0
+            for date,trade in out_trades.iterrows():
+                tmp_csum+= trade.iloc[0]
+                if (trade.iloc[0]!=0) and\
+                   (trade.abs().iloc[0]!=MAX_HOLDINGS) and\
+                   (trade.abs().iloc[0]!=2*MAX_HOLDINGS):
+                   incorrect=True
+                   msgs.append("  illegal trade in out-of-sample DF. abs(trade) not one of ({},{},{}).\n  Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
+                   break
+                elif abs(tmp_csum)>MAX_HOLDINGS:
+                    incorrect=True
+                    msgs.append("  holdings more than {} long or short in out-of-sample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
+                    break
+            # if (((in_trades_1.abs()!=0) & (in_trades_1.abs()!=MAX_HOLDINGS) & (in_trades_1.abs()!=2*MAX_HOLDINGS)).any().any() or\
+            #     ((in_trades_2.abs()!=0) & (in_trades_2.abs()!=MAX_HOLDINGS) & (in_trades_2.abs()!=2*MAX_HOLDINGS)).any().any() or\
+            #     ((out_trades.abs()!=0)  & (out_trades.abs()!=MAX_HOLDINGS)  & (out_trades.abs()!=2*MAX_HOLDINGS)).any().any()):
+            #     incorrect = True
+            #     msgs.append("  illegal trade. abs(trades) not one of ({},{},{})".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS))
+            # if ((in_trades_1.cumsum().abs()>MAX_HOLDINGS).any()[0]) or ((in_trades_2.cumsum().abs()>MAX_HOLDINGS).any()[0]) or ((out_trades.cumsum().abs()>MAX_HOLDINGS).any()[0]):
+            #     incorrect = True
+            #     msgs.append("  holdings more than {} long or short".format(MAX_HOLDINGS))
+        if not(incorrect):
+            if train_t>train_time:
+                incorrect=True
+                msgs.append("  addEvidence() took {} seconds, max allowed {}".format(train_t,train_time))
+            else:
+                points_earned += 1.0
+            if test_t > test_time:
+                incorrect = True
+                msgs.append("  testPolicy() took {} seconds, max allowed {}".format(test_t,test_time))
+            else:
+                points_earned += 2.0
+            if not((in_trades_1 == in_trades_2).all()[0]):
+                incorrect = True
+                mismatches = in_trades_1.join(in_trades_2,how='outer',lsuffix='1',rsuffix='2')
+                mismatches = mismatches[mismatches.iloc[:,0]!=mismatches.iloc[:,1]]
+                msgs.append("  consecutive calls to testPolicy() with same input did not produce same output:")
+                msgs.append("  Mismatched trades:\n {}".format(mismatches))
+            else:
+                points_earned += 2.0
+            student_insample_cr = evalPolicy2(insample_args['symbol'],in_trades_1,insample_args['sv'],insample_args['sd'],insample_args['ed'],market_impact=impact,commission_cost=0.0)
+            student_outsample_cr = evalPolicy2(outsample_args['symbol'],out_trades, outsample_args['sv'],outsample_args['sd'],outsample_args['ed'],market_impact=impact,commission_cost=0.0)
+            if student_insample_cr <= benchmark:
+                incorrect = True
+                msgs.append("  in-sample return ({}) did not beat benchmark ({})".format(student_insample_cr,benchmark))
+            else:
+                points_earned += 5.0
+            if outsample_cr_to_beat is None:
+                if out_test_t > test_time:
+                    incorrect = True
+                    msgs.append("  out-sample took {} seconds, max of {}".format(out_test_t,test_time))
+                else:
+                    points_earned += 5.0
+            else:
+                if student_outsample_cr < outsample_cr_to_beat:
+                    incorrect = True
+                    msgs.append("  out-sample return ({}) did not beat benchmark ({})".format(student_outsample_cr,outsample_cr_to_beat))
+                else:
+                    points_earned += 5.0
+        if incorrect:
+            inputs_str = "    insample_args: {}\n" \
+                         "    outsample_args: {}\n" \
+                         "    benchmark_type: {}\n" \
+                         "    benchmark: {}\n" \
+                         "    train_time: {}\n" \
+                         "    test_time: {}\n" \
+                         "    max_time: {}\n" \
+                         "    seed: {}\n".format(insample_args, outsample_args, benchmark_type, benchmark, train_time, test_time, max_time,seed)
+            raise IncorrectOutput("Test failed on one or more output criteria.\n  Inputs:\n{}\n  Failures:\n{}".format(inputs_str, "\n".join(msgs)))
+    except Exception as e:
+        # Test result: failed
+        msg = "Test case description: {}\n".format(description)
+
+        # Generate a filtered stacktrace, only showing erroneous lines in student file(s)
+        tb_list = tb.extract_tb(sys.exc_info()[2])
+        for i in range(len(tb_list)):
+            row = tb_list[i]
+            tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3])  # show only filename instead of long absolute path
+        # tb_list = [row for row in tb_list if row[0] in ['QLearner.py','StrategyLearner.py']]
+        if tb_list:
+            msg += "Traceback:\n"
+            msg += ''.join(tb.format_list(tb_list))  # contains newlines
+        elif 'grading_traceback' in dir(e):
+            msg += "Traceback:\n"
+            msg += ''.join(tb.format_list(e.grading_traceback))
+        msg += "{}: {}".format(e.__class__.__name__, str(e))
+
+        # Report failure result to grader, with stacktrace
+        grader.add_result(GradeResult(outcome='failed', points=points_earned, msg=msg))
+        raise
+    else:
+        # Test result: passed (no exceptions)
+        grader.add_result(GradeResult(outcome='passed', points=points_earned, msg=None))
+
+def compute_benchmark(sd,ed,sv,symbol,market_impact,commission_cost,max_holdings):
+    date_idx = util.get_data([symbol,],pd.date_range(sd,ed)).index
+    orders = pd.DataFrame(index=date_idx)
+    orders['orders'] = 0; orders['orders'][0] = max_holdings; orders['orders'][-1] = -max_holdings
+    return evalPolicy2(symbol,orders,sv,sd,ed,market_impact,commission_cost)
+
+def evalPolicy(student_trades,sym_prices,startval):
+    ending_cash = startval - student_trades.mul(sym_prices,axis=0).sum()
+    ending_stocks = student_trades.sum()*sym_prices.iloc[-1]
+    return float((ending_cash+ending_stocks)/startval)-1.0
+
+def evalPolicy2(symbol, student_trades, startval, sd, ed, market_impact,commission_cost):
+    orders_df = pd.DataFrame(columns=['Shares','Order','Symbol'])
+    for row_idx in student_trades.index:
+        nshares = student_trades.loc[row_idx][0]
+        if nshares == 0:
+            continue
+        order = 'sell' if nshares < 0 else 'buy'
+        new_row = pd.DataFrame([[abs(nshares),order,symbol],],columns=['Shares','Order','Symbol'],index=[row_idx,])
+        orders_df = orders_df.append(new_row)
+    portvals = compute_portvals(orders_df, sd, ed, startval,market_impact,commission_cost)
+    return float(portvals[-1]/portvals[0])-1
+
+def compute_portvals(orders_df, start_date, end_date, startval, market_impact=0.0, commission_cost=0.0):
+    """Simulate the market for the given date range and orders file."""
+    symbols = []
+    orders = []
+    orders_df = orders_df.sort_index()
+    for date, order in orders_df.iterrows():
+        shares = order['Shares']
+        action = order['Order']
+        symbol = order['Symbol']
+        if action.lower() == 'sell':
+            shares *= -1
+        order = (date, symbol, shares)
+        orders.append(order)
+        symbols.append(symbol)
+    symbols = list(set(symbols))
+    dates = pd.date_range(start_date, end_date)
+    prices_all = util.get_data(symbols, dates)
+    prices = prices_all[symbols]
+    prices = prices.fillna(method='ffill').fillna(method='bfill')
+    prices['_CASH'] = 1.0
+    trades = pd.DataFrame(index=prices.index, columns=symbols)
+    trades = trades.fillna(0)
+    cash = pd.Series(index=prices.index)
+    cash = cash.fillna(0)
+    cash.iloc[0] = startval
+    for date, symbol, shares in orders:
+        price = prices[symbol][date]
+        val = shares * price
+        # transaction cost model
+        val += commission_cost + (pd.np.abs(shares)*price*market_impact)
+        positions = prices.loc[date] * trades.sum()
+        totalcash = cash.sum()
+        if (date < prices.index.min()) or (date > prices.index.max()):
+            continue
+        trades[symbol][date] += shares
+        cash[date] -= val
+    trades['_CASH'] = cash
+    holdings = trades.cumsum()
+    df_portvals = (prices * holdings).sum(axis=1)
+    return df_portvals
+
+if __name__ == "__main__":
+    pytest.main(["-s", __file__])
diff --git a/crypto_eval/indicators.py b/crypto_eval/indicators.py
new file mode 100644
index 0000000..ee4b1cb
--- /dev/null
+++ b/crypto_eval/indicators.py
@@ -0,0 +1,140 @@
+import pandas as pd
+import datetime as dt
+import matplotlib.pyplot as plt
+from util import get_data
+
+
+def author():
+    return "felixm"
+
+
+def normalize(timeseries):
+    return timeseries / timeseries.iloc[0]
+
+
+def bollinger_band(df, symbol, period=20, m=2):
+    boll_sma = df[symbol].rolling(period).mean()
+    std = df[symbol].rolling(period).std()
+    boll_up = boll_sma + m * std
+    boll_lo = boll_sma - m * std
+    key_sma, key_up, key_lo = "boll_sma", "boll_up", "boll_lo"
+    df[key_sma] = boll_sma
+    df[key_up] = boll_up
+    df[key_lo] = boll_lo
+    return df[[key_sma, key_up, key_lo]]
+
+
+def sma(df, symbol, period):
+    """Adds SMA for one or multiple periods to df and returns SMAs"""
+    if type(period) is int:
+        period = [period]
+    keys = []
+    for p in period:
+        key = f"sma_{p}"
+        df[key] = df[symbol].rolling(p).mean()
+        keys.append(key)
+    return df[keys]
+
+
+def ema(df, symbol, period):
+    """Adds EMA for one or multiple periods to df and returns EMAs"""
+    if type(period) is int:
+        period = [period]
+    keys = []
+    for p in period:
+        key = f"ema_{p}"
+        df[key] = df[symbol].ewm(span=p).mean()
+        keys.append(key)
+    return df[keys]
+
+
+def price_sma(df, symbol, period):
+    """Calculates SMA and adds new column price divided by SMA to the df."""
+    if type(period) is int:
+        period = [period]
+    keys = []
+    for p in period:
+        key = f"price_sma_{p}"
+        sma = df[symbol].rolling(p).mean()
+        df[key] = df[symbol] / sma
+        keys.append(key)
+    return df[keys]
+
+
+def rsi(df, symbol, period=14):
+    """Calculates relative strength index over given period."""
+
+    def rsi(x):
+        pct = x.pct_change()
+        avg_gain = pct[pct > 0].mean()
+        avg_loss = pct[pct <= 0].abs().mean()
+        rsi = 100 - (100 /
+                     (1 + ((avg_gain / period) /
+                           (avg_loss / period))))
+        return rsi
+
+    key = "rsi"
+    # Add one to get 'period' price changes (first change is nan).
+    period += 1
+    df[key] = df[symbol].rolling(period).apply(rsi)
+    return df[[key]]
+
+
+def macd(df, symbol):
+    macd = df[symbol].ewm(span=12).mean() - df[symbol].ewm(span=26).mean()
+    k1 = "macd"
+    k2 = "macd_signal"
+    k3 = "macd_diff"
+    df[k1] = macd
+    df[k2] = macd.rolling(9).mean()
+    df[k3] = df[k1] - df[k2]
+    return df[[k1, k2, k3]]
+
+
+def price_delta(df, symbol, period=1):
+    """Calculate percentage change for period."""
+    k = f"pct_{period}"
+    df[k] = df[symbol].pct_change(periods=period)
+    df[k] = df[k].shift(-period)
+    return df[k]
+
+
+def test_indicators():
+    symbol = "JPM"
+
+    sd = dt.datetime(2008, 1, 1)
+    ed = dt.datetime(2009, 12, 31)
+    df = get_data([symbol], pd.date_range(sd, ed))
+    df.drop(columns=["SPY"], inplace=True)
+    df_orig = df.copy()
+    # df = normalize(df)
+
+    sma(df, symbol, 21)
+    ema(df, symbol, 21)
+    df.plot(title="21 SMA and EMA")
+    plt.savefig('figure_1.png')
+
+    df = df_orig.copy()
+    sma(df, symbol, 8)
+    price_sma(df, symbol, 8)
+    df.plot(title="SMA and price / SMA", subplots=True)
+    plt.savefig('figure_2.png')
+
+    df = df_orig.copy()
+    bollinger_band(df, symbol)
+    df.plot(title="Bollinger Band")
+    plt.savefig('figure_3.png')
+
+    df = df_orig.copy()
+    rsi(df, symbol)
+    fig, axes = plt.subplots(nrows=2, sharex=True)
+    df[symbol].plot(ax=axes[0], title="JPM price action")
+    df["JPM-rsi(14)"].plot(ax=axes[1], title="RSI")
+    plt.savefig('figure_4.png')
+
+    df = df_orig.copy()
+    macd(df, symbol)
+    fig, axes = plt.subplots(nrows=2, sharex=True)
+    df[symbol].plot(ax=axes[0], title="JPM price action")
+    df[["JPM-macd", "JPM-macd-signal"]].plot(ax=axes[1])
+    plt.savefig('figure_5.png')
diff --git a/crypto_eval/marketsim.py b/crypto_eval/marketsim.py
new file mode 100644
index 0000000..9847c9c
--- /dev/null
+++ b/crypto_eval/marketsim.py
@@ -0,0 +1,179 @@
+"""MC2-P1: Market simulator.
+
+Copyright 2018, Georgia Institute of Technology (Georgia Tech)
+Atlanta, Georgia 30332
+All Rights Reserved
+
+Template code for CS 4646/7646
+
+Georgia Tech asserts copyright ownership of this template and all derivative
+works, including solutions to the projects assigned in this course. Students
+and other users of this template code are advised not to share it with others
+or to make it available on publicly viewable websites including repositories
+such as github and gitlab.  This copyright statement should not be removed
+or edited.
+
+We do grant permission to share solutions privately with non-students such
+as potential employers. However, sharing with other current or future
+students of CS 7646 is prohibited and subject to being investigated as a
+GT honor code violation.
+
+-----do not edit anything above this line---
+
+Student Name: Tucker Balch (replace with your name)
+GT User ID: felixm (replace with your User ID)
+GT ID: 1337 (replace with your GT ID)
+"""
+
+import pandas as pd
+from util import get_data, plot_data
+from optimize_something.optimization import calculate_stats
+
+
+def read_orders(orders_file):
+    """
+    Parser orders into the form:
+
+        Date      datetime64[ns]
+        Symbol            object
+        Order             object
+        Shares             int32
+
+    This is how the order book looks like:
+
+        Date,Symbol,Order,Shares
+        2011-01-10,AAPL,BUY,1500
+        2011-01-10,AAPL,SELL,1500
+    """
+    orders = pd.read_csv(orders_file,
+                         index_col=['Date'],
+                         dtype='|str, str, str,  i4',
+                         parse_dates=['Date'])
+    orders.sort_values(by="Date", inplace=True)
+    return orders
+
+
+def get_order_book_info(orders):
+    """Return start_date, end_date, and symbols (as a list)."""
+    start_date = orders.index[0]
+    end_date = orders.index[-1]
+    symbols = sorted(list((set(orders.Symbol.tolist()))))
+    return start_date, end_date, symbols
+
+
+def get_portfolio_value(holding, prices):
+    """Calculate the current portofolio value."""
+    value = 0
+    for ticker, shares in holding.items():
+        if ticker == 'cash':
+            value += shares
+        else:
+            value += shares * prices[ticker]
+    return value
+
+
+def handle_order(date, order, holding, prices, commission, impact):
+    """Process the order."""
+    symbol, order, shares = order
+    if shares == 0 and order == "":
+        return  # empty order
+    if pd.isnull(shares):
+        return  # shares is nan
+
+    # Allow indicating buying and selling via shares. If shares is positive we
+    # buy and if it is negative we sell.
+    if shares > 0 and order == "":
+        order = "BUY"
+    elif shares < 0 and order == "":
+        order = "SELL"
+        shares = abs(shares)
+
+    adj_closing_price = prices[symbol]
+    cost = shares * adj_closing_price
+    # Charge commission and deduct impact penalty
+    holding['cash'] -= (commission + impact * adj_closing_price * shares)
+    if order.upper() == "BUY":
+        # print(f"Buy  {shares:6} of {symbol:4} on {date}")
+        holding['cash'] -= cost
+        holding[symbol] += shares
+    elif order.upper() == "SELL":
+        # print(f"Sell {shares:6} of {symbol:4} on {date}")
+        holding['cash'] += cost
+        holding[symbol] -= shares
+    else:
+        raise Exception("Unexpected order type.")
+
+
+def compute_portvals(orders_file, start_val=1000000, commission=9.95, impact=0.005):
+    if isinstance(orders_file, pd.DataFrame):
+        orders = orders_file
+    else:
+        orders = read_orders(orders_file)
+
+    start_date, end_date, symbols = get_order_book_info(orders)
+
+    # Tickers in the orderbook over the date_range in the order book.
+    prices = get_data(symbols, pd.date_range(start_date, end_date))
+    prices['Portval'] = pd.Series(0.0, index=prices.index)
+
+    # A dictionary to keep track of the assets we are holding.
+    holding = {s: 0 for s in symbols}
+    holding['cash'] = start_val
+
+    # Iterate over all trading days that are in the (inclusive) range of the
+    # order book dates. This implicitly ignores orders placed on non-trading
+    # days.
+    for date, values in prices.iterrows():
+        # Process orders for that day.
+        for date, order in orders.loc[date:date].iterrows():
+            handle_order(date, order, holding, values, commission, impact)
+        # Compute portfolio value at the end of day.
+        values['Portval'] = get_portfolio_value(holding, values)
+
+    return prices[['Portval']]
+
+
+def test_code():
+    of = "./orders/orders-02.csv"
+    sv = 1000000
+
+    portvals = compute_portvals(orders_file=of, start_val=sv)
+
+    if isinstance(portvals, pd.DataFrame):
+        portvals = portvals[portvals.columns[0]]  # just get the first column
+    else:
+        raise Exception("warning, code did not return a DataFrame")
+
+    start_date = portvals.index[0]
+    end_date = portvals.index[-1]
+    cum_ret, avg_daily_ret, \
+        std_daily_ret, sharpe_ratio = calculate_stats(portvals.to_frame(), [1])
+
+    spy = get_data(['SPY'], pd.date_range(start_date, end_date))
+    cum_ret_SPY, avg_daily_ret_SPY, \
+        std_daily_ret_SPY, sharpe_ratio_SPY = calculate_stats(spy, [1])
+
+    # Compare portfolio against $SPY
+    print(f"Date Range: {start_date} to {end_date}")
+    print()
+    print(f"Sharpe Ratio of Fund: {sharpe_ratio}")
+    print(f"Sharpe Ratio of SPY : {sharpe_ratio_SPY}")
+    print()
+    print(f"Cumulative Return of Fund: {cum_ret}")
+    print(f"Cumulative Return of SPY : {cum_ret_SPY}")
+    print()
+    print(f"Standard Deviation of Fund: {std_daily_ret}")
+    print(f"Standard Deviation of SPY : {std_daily_ret_SPY}")
+    print()
+    print(f"Average Daily Return of Fund: {avg_daily_ret}")
+    print(f"Average Daily Return of SPY : {avg_daily_ret_SPY}")
+    print()
+    print(f"Final Portfolio Value: {portvals[-1]}")
+
+
+def author():
+    return 'felixm'
+
+
+if __name__ == "__main__":
+    test_code()
diff --git a/crypto_eval/testproject.py b/crypto_eval/testproject.py
new file mode 100644
index 0000000..f37954e
--- /dev/null
+++ b/crypto_eval/testproject.py
@@ -0,0 +1,8 @@
+from experiment1 import experiment1
+from experiment2 import experiment2
+
+
+if __name__ == "__main__":
+    experiment1(create_report=True)
+    experiment2()
+
diff --git a/util.py b/util.py
index 6fa77a1..2b7db0d 100644
--- a/util.py
+++ b/util.py
@@ -14,22 +14,42 @@ def symbol_to_path(symbol, base_dir=None):
         base_dir = os.environ.get("MARKET_DATA_DIR", '../data/')
     return os.path.join(base_dir, "{}.csv".format(str(symbol)))
 
-def get_data(symbols, dates, addSPY=True, colname = 'Adj Close'):
+def get_data(symbols, dates, addSPY=True, colname='Adj Close', datecol='Date'):
     """Read stock data (adjusted close) for given symbols from CSV files."""
     df = pd.DataFrame(index=dates)
     if addSPY and 'SPY' not in symbols:  # add SPY for reference, if absent
-        symbols = ['SPY'] + list(symbols) # handles the case where symbols is np array of 'object'
+        # handles the case where symbols is np array of 'object'
+        symbols = ['SPY'] + list(symbols)
 
     for symbol in symbols:
-        df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date',
-                parse_dates=True, usecols=['Date', colname], na_values=['nan'])
+        if 'BTC' in symbol or 'ETH' in symbol:
+            colname = 'close'
+            datecol = 'time'
+        elif symbol == 'SPY':
+            colname = 'close'
+            datecol = 'time'
+        else:
+            colname = 'Adj Close'
+            datecol = 'Date'
+
+        df_temp = pd.read_csv(symbol_to_path(symbol),
+                              index_col=datecol,
+                              parse_dates=True, usecols=[datecol, colname],
+                              na_values=['nan'])
         df_temp = df_temp.rename(columns={colname: symbol})
+
+        if datecol == 'time':
+            df_temp['date'] = pd.to_datetime(df_temp.index, unit='s')
+            df_temp['date'] = pd.DatetimeIndex(df_temp['date']).normalize()
+            df_temp.set_index('date', drop=True, inplace=True)
+
         df = df.join(df_temp)
         if symbol == 'SPY':  # drop dates SPY did not trade
-            df = df.dropna(subset=["SPY"])
-
+            pass
+            # df = df.dropna(subset=["SPY"])
     return df
 
+
 def plot_data(df, title="Stock prices", xlabel="Date", ylabel="Price"):
     import matplotlib.pyplot as plt
     """Plot stock prices with a custom title and meaningful axis labels."""