Add support for trading view data

2021-06-20 15:04:30 -04:00 · 2021-06-20 15:04:30 -04:00 · e577ce34d2
parent 71f9b58803
commit e577ce34d2
13 changed files with 1470 additions and 6 deletions
--- a/crypto_eval/AbstractTreeLearner.py
+++ b/crypto_eval/AbstractTreeLearner.py
@ -0,0 +1,77 @@
 import numpy as np
 class AbstractTreeLearner:
    LEAF = -1
    NA = -1
    def author(self):
        return 'felixm' # replace tb34 with your Georgia Tech username
    def create_node(self, factor, split_value, left, right):
        return np.array([(factor, split_value, left, right), ],
                        dtype='|i4, f4, i4,  i4')
    def query_point(self, point):
        node_index = 0
        while self.rel_tree[node_index][0] != self.LEAF:
            node = self.rel_tree[node_index]
            split_factor = node[0]
            split_value = node[1]
            if point[split_factor] <= split_value:
                # Recurse into left sub-tree.
                node_index += node[2]
            else:
                node_index += node[3]
        v = self.rel_tree[node_index][1]
        return v
    def query(self, points):
        """
        @summary: Estimate a set of test points given the model we built.
        @param points: should be a numpy array with each row corresponding to a specific query.
        @returns the estimated values according to the saved model.
        """
        query_point = lambda p: self.query_point(p)
        r = np.apply_along_axis(query_point, 1, points)
        return r
    def build_tree(self, xs, y):
        """
        @summary: Build a decision tree from the training data.
        @param dataX: X values of data to add
        @param dataY: the Y training values
        """
        assert(xs.shape[0] == y.shape[0])
        assert(xs.shape[0] > 0) # If this is 0 something went wrong.
        if xs.shape[0] <= self.leaf_size:
            value = np.mean(y)
            if value < -0.2:
                value = -1
            elif value > 0.2:
                value = 1
            else:
                value = 0
            return self.create_node(self.LEAF, value, self.NA, self.NA)
        if np.all(y[0] == y):
            return self.create_node(self.LEAF, y[0], self.NA, self.NA)
        i, split_value = self.get_i_and_split_value(xs, y)
        select_l = xs[:, i] <= split_value
        select_r = xs[:, i] > split_value
        lt = self.build_tree(xs[select_l], y[select_l])
        rt = self.build_tree(xs[select_r], y[select_r])
        root = self.create_node(i, split_value, 1, lt.shape[0] + 1)
        root = np.concatenate([root, lt, rt])
        return root
    def addEvidence(self, data_x, data_y):
        """
        @summary: Add training data to learner
        @param dataX: X values of data to add
        @param dataY: the Y training values
        """
        self.rel_tree = self.build_tree(data_x, data_y)
--- a/crypto_eval/BenchmarkStrategy.py
+++ b/crypto_eval/BenchmarkStrategy.py
@ -0,0 +1,36 @@
 import pandas as pd
 import util as ut
 import datetime as dt
 class BenchmarkStrategy:
    def __init__(self, verbose=False, impact=0.0, commission=0.0, units=1000):
        self.verbose = verbose
        self.impact = impact
        self.commission = commission
        self.units = units
    def addEvidence(self, symbol=0, sd=0, ed=0, sv=0):
        """Keep this so that API is valid."""
        pass
    def testPolicy(self, symbol="IBM",
                   sd=dt.datetime(2009, 1, 1),
                   ed=dt.datetime(2010, 1, 1),
                   sv=10000):
        """Benchmark is to buy 1000 shares and hold."""
        dates = pd.date_range(sd, ed)
        prices = ut.get_data([symbol], dates, addSPY=False,
                             colname='close', datecol='time')
        orders = pd.DataFrame(index=prices.index)
        orders["Symbol"] = symbol
        orders["Order"] = ""
        orders["Shares"] = 0
        orders.iloc[0] = [symbol, "BUY", self.units]
        orders.iloc[-1] = [symbol, "SELL", -self.units]
        if self.verbose:
            print(type(orders))  # it better be a DataFrame!
            print(orders)
        return orders
--- a/crypto_eval/ManualStrategy.py
+++ b/crypto_eval/ManualStrategy.py
@ -0,0 +1,114 @@
 import datetime as dt
 import pandas as pd
 import util
 import indicators
 class ManualStrategy:
    def __init__(self, verbose=False, impact=0.0, commission=0.0):
        self.verbose = verbose
        self.impact = impact
        self.commission = commission
    # this method should create a QLearner, and train it for trading
    def addEvidence(self, symbol="IBM",
                    sd=dt.datetime(2008, 1, 1),
                    ed=dt.datetime(2009, 1, 1),
                    sv=10000):
        # add your code to do learning here
        # example usage of the old backward compatible util function
        syms = [symbol]
        dates = pd.date_range(sd, ed)
        prices_all = util.get_data(syms, dates)  # automatically adds SPY
        prices = prices_all[syms]  # only portfolio symbols
        # prices_SPY = prices_all['SPY']  # only SPY, for comparison later
        if self.verbose:
            print(prices)
        # example use with new colname
        # automatically adds SPY
        volume_all = util.get_data(syms, dates, colname="Volume")
        volume = volume_all[syms]  # only portfolio symbols
        # volume_SPY = volume_all['SPY']  # only SPY, for comparison later
        if self.verbose:
            print(volume)
    def macd_strat(self, macd, orders):
        """Strategy based on MACD cross."""
        def strat(ser):
            m = macd.loc[ser.index]
            prev_macd, prev_signal, _ = m.iloc[0]
            cur_macd, cur_signal, _ = m.iloc[1]
            shares = 0
            if cur_macd < -1 and prev_macd < prev_signal \
                             and cur_macd > cur_signal:
                if self.holding == 0:
                    shares = 1000
                elif self.holding == -1000:
                    shares = 2000
            elif cur_macd > 1 and prev_macd > prev_signal \
                              and cur_macd < cur_signal:
                if self.holding == 0:
                    shares = -1000
                elif self.holding == 1000:
                    shares = -2000
            self.holding += shares
            return shares
        orders['Shares'] = orders['Shares'].rolling(2).apply(strat)
    def three_indicator_strat(self, macd, rsi, price_sma, orders):
        """Strategy based on three indicators. Thresholds selected based on
        scatter plots."""
        def strat(row):
            shares = 0
            _, _, macd_diff = macd.loc[row.name]
            cur_rsi = rsi.loc[row.name][0]
            cur_price_sma = price_sma.loc[row.name][0]
            if self.holding == -1000 and cur_price_sma < 0.9:
                shares = 2000
            elif self.holding == 0 and cur_price_sma < 0.9:
                shares = 1000
            elif self.holding == -1000 and cur_rsi > 80:
                shares = 2000
            elif self.holding == 0 and cur_rsi > 80:
                shares = 1000
            elif self.holding == -1000 and macd_diff < -0.5:
                shares = 2000
            elif self.holding == 0 and macd_diff < -0.5:
                shares = 1000
            elif self.holding == 1000 and cur_price_sma > 1.1:
                shares = -2000
            elif self.holding == 0 and cur_price_sma > 1.1:
                shares = -1000
            self.holding += shares
            return shares
        orders['Shares'] = orders.apply(strat, axis=1)
    def testPolicy(self, symbol="IBM",
                   sd=dt.datetime(2009, 1, 1),
                   ed=dt.datetime(2010, 1, 1),
                   sv=10000, macd_strat=False):
        self.holding = 0
        df = util.get_data([symbol], pd.date_range(sd, ed))
        df.drop(columns=["SPY"], inplace=True)
        orders = pd.DataFrame(index=df.index)
        orders["Symbol"] = symbol
        orders["Order"] = ""
        orders["Shares"] = 0
        macd = indicators.macd(df, symbol)
        rsi = indicators.rsi(df, symbol)
        price_sma = indicators.price_sma(df, symbol, [8])
        if macd_strat:
            self.macd_strat(macd, orders)
        else:
            self.three_indicator_strat(macd, rsi, price_sma, orders)
        return orders
--- a/crypto_eval/QLearner.py
+++ b/crypto_eval/QLearner.py
@ -0,0 +1,170 @@
 import datetime as dt
 import pandas as pd
 import util
 import indicators
 from qlearning_robot.QLearner import QLearner as Learner
 from dataclasses import dataclass
@dataclass
 class Holding:
    cash: int
    shares: int
    equity: int
 class QLearner(object):
    def __init__(self, verbose=False, impact=0.0, units=1000, commission=0.0, testing=False, n_bins=5):
        self.verbose = verbose
        self.impact = impact
        self.commission = commission
        self.testing = testing  # Decides which type of order df to return.
        self.indicators = ['macd_diff', 'rsi', 'price_sma_8']
        self.n_bins = n_bins
        self.bins = {}
        self.num_states = self.get_num_states()
        self.num_actions = 3  # buy, sell, hold
        self.learner = Learner(self.num_states, self.num_actions)
        self.units = units
    def row_to_state(self, holding, df_row):
        """Transforms a row into a state value."""
        holding = (holding + self.units) // self.units
        assert(holding in [0, 1, 2])
        # For each indicator that goes into the state the interval becomes
        # smaller based on how many bins the indicator has.  The first
        # 'indicator' is the information about how many shares we are currently
        # holding. So for example, if I have 450 states then the intervall (aka
        # remaining_states) is 150 because there are three values for holding:
        #   holding = 0 -> state = 0 * 150 = 0
        #   holding = 1 -> state = 1 * 150 = 150
        #   holding = 2 -> state = 2 * 150 = 300
        remaining_states = self.num_states // 3
        state = holding * remaining_states
        for indicator in self.indicators:
            value = df_row[indicator]
            bin_n = self.indicator_value_to_bin(indicator, value)
            remaining_states //= self.n_bins
            state += bin_n * remaining_states
        return state
    def indicator_value_to_bin(self, indicator, value):
        for i, upper_bound in enumerate(self.bins[indicator]):
            if value < upper_bound:
                return i
        return i + 1
    def add_indicators(self, df, symbol):
        """Add indicators for learning to DataFrame."""
        for indicator in self.indicators:
            if indicator == "macd_diff":
                indicators.macd(df, symbol)
                df.drop(columns=["macd", "macd_signal"], inplace=True)
            elif indicator == "rsi":
                indicators.rsi(df, symbol)
            elif indicator.startswith("price_sma_"):
                period = int(indicator.replace("price_sma_", ""))
                indicators.price_sma(df, symbol, [period])
        df.drop(columns=["SPY"], inplace=True)
        df.dropna(inplace=True)
    def bin_indicators(self, df):
        """Create bins for indicators."""
        for indicator in self.indicators:
            ser, bins = pd.qcut(df[indicator], self.n_bins, retbins=True)
            self.bins[indicator] = bins[1:self.n_bins]
    def get_num_states(self):
        """Return the total num of states."""
        num_states = 3  # Three states holding (1000, 0, -1000)
        for _ in self.indicators:
            num_states *= self.n_bins
        return num_states
    def handle_order(self, action, holding, adj_closing_price):
        shares = 0
        if action == 0:  # buy
            if holding.shares == 0 or holding.shares == -self.units:
                shares = self.units
        elif action == 1:  # sell
            if holding.shares== 0 or holding.shares == self.units:
                shares = -self.units
        elif action == 2:  # hold
            shares = 0
        cost = shares * adj_closing_price
        if shares != 0:
            # Charge commission and deduct impact penalty
            holding.cash -= self.commission
            holding.cash -= (self.impact * adj_closing_price * abs(shares))
            holding.cash -= cost
            holding.shares += shares
        holding.equity = holding.cash + holding.shares * adj_closing_price
    def get_reward(self, equity, new_equity):
        if new_equity > equity:
            return 1
        return -1
    def train(self, df, symbol, sv):
        holding = Holding(sv, 0, sv)
        row = df.iloc[0]
        state = self.row_to_state(holding.shares, row)
        action = self.learner.querysetstate(state)
        adj_closing_price = row[symbol]
        equity = holding.equity
        self.handle_order(action, holding, adj_closing_price)
        for index, row in df.iloc[1:].iterrows():
            adj_closing_price = row[symbol]
            new_equity = holding.cash + holding.shares * adj_closing_price
            r = self.get_reward(equity, new_equity)
            s_prime = self.row_to_state(holding.shares, row)
            a = self.learner.query(s_prime, r)
            equity = new_equity
            self.handle_order(a, holding, adj_closing_price)
            if self.verbose:
                print(f"{holding=} {s_prime=} {r=} {a=}")
    def addEvidence(self, symbol="IBM", sd=dt.datetime(2008, 1, 1), ed=dt.datetime(2009, 1, 1), sv=10000):
        df = util.get_data([symbol], pd.date_range(sd, ed))
        self.add_indicators(df, symbol)
        self.bin_indicators(df)
        for _ in range(15):
            self.train(df, symbol, sv)
    def testPolicy(self, symbol="IBM", sd=dt.datetime(2009, 1, 1), ed=dt.datetime(2010, 1, 1), sv=10000):
        df = util.get_data([symbol], pd.date_range(sd, ed))
        orders = pd.DataFrame(index=df.index)
        orders["Symbol"] = symbol
        orders["Order"] = ""
        orders["Shares"] = 0
        shares = orders["Shares"]
        self.add_indicators(df, symbol)
        holding = 0
        for index, row in df.iterrows():
            state = self.row_to_state(holding, row)
            action = self.learner.querysetstate(state)
            if action == 0:  # buy
                if holding == 0 or holding == -self.units:
                    holding += self.units
                    orders.loc[index, "Shares"] = self.units
            elif action == 1:  # sell
                if holding == 0 or holding == self.units:
                    holding -= self.units
                    orders.loc[index, "Shares"] = -self.units
            elif action == 2:  # hold
                pass
        if self.testing:
            return orders
        else:
            return orders[["Shares"]]
--- a/crypto_eval/RTLearner.py
+++ b/crypto_eval/RTLearner.py
@ -0,0 +1,30 @@
 import numpy as np
 from AbstractTreeLearner import AbstractTreeLearner
 class RTLearner(AbstractTreeLearner):
    def __init__(self, leaf_size = 1, verbose = False):
        self.leaf_size = leaf_size
        self.verbose = verbose
    def get_i_and_split_value(self, xs, y):
        """
        @summary: Pick a random i and split value.
        Make sure that not all X are the same for i and also pick
        different values to average the split_value from.
        """
        i = np.random.randint(0, xs.shape[1])
        while np.all(xs[0,i] == xs[:,i]):
            i = np.random.randint(0, xs.shape[1])
        # I don't know about the performance of this, but at least it
        # terminates reliably. If the two elements are the same something is
        # wrong.
        a = np.array(list(set(xs[:, i])))
        r1, r2 = np.random.choice(a, size = 2, replace = False)
        assert(r1 != r2)
        split_value = (r1 + r2) / 2.0
        return i, split_value
--- a/crypto_eval/StrategyLearner.py
+++ b/crypto_eval/StrategyLearner.py
@ -0,0 +1,106 @@
 import datetime as dt
 import pandas as pd
 import util
 import indicators
 from RTLearner import RTLearner
 class StrategyLearner(object):
    def __init__(self, verbose=False, impact=0.0, commission=0.0, testing=False):
        self.verbose = verbose
        self.impact = impact
        self.commission = commission
        self.testing = testing
    def _get_volume(self):
        """For reference."""
        volume_all = ut.get_data(syms, dates, colname="Volume")
        volume = volume_all[syms]  # only portfolio symbols
        # volume_SPY = volume_all['SPY']  # only SPY, for comparison later
        if self.verbose:
            print(volume)
    def _add_indicators(self, df, symbol):
        """Add indicators for learning to DataFrame."""
        df.drop(columns=["SPY"], inplace=True)
        indicators.macd(df, symbol)
        indicators.rsi(df, symbol)
        indicators.price_sma(df, symbol, [8])
        indicators.price_delta(df, symbol, 3)
        df.dropna(inplace=True)
    def addEvidence(self, symbol="IBM",
                    sd=dt.datetime(2008, 1, 1),
                    ed=dt.datetime(2009, 1, 1),
                    sv=10000):
        self.y_threshold = 0.2
        self.indicators = ['macd_diff', 'rsi', 'price_sma_8']
        df = util.get_data([symbol], pd.date_range(sd, ed))
        self._add_indicators(df, symbol)
        def classify_y(row):
            if row > self.y_threshold:
                return 1
            elif row < -self.y_threshold:
                return -1
            else:
                pass
            return 0
        def set_y_threshold(pct):
            if max(pct) < 0.2:
                self.y_threshold = 0.02
        self.learner = RTLearner(leaf_size = 5)
        # self.learner = BagLearner(RTLearner, 3, {'leaf_size': 5})
        data_x = df[self.indicators].to_numpy()
        pct = df['pct_3']
        # This is a hack to get a low enough buy/sell threshold for the
        # cyclic the test 'ML4T-220' where the max pct_3 is 0.0268.
        set_y_threshold(pct)
        y = pct.apply(classify_y)
        self.learner.addEvidence(data_x, y.to_numpy())
        return y
    def strat(self, data_y, orders):
        self.holding = 0
        def strat(row):
            y = int(data_y.loc[row.name][0])
            shares = 0
            if self.holding == 0 and y == 1:
                shares = 1000
            elif self.holding == -1000 and y == 1:
                shares = 2000
            elif self.holding == 0 and y == -1:
                shares = -1000
            elif self.holding == 1000 and y == -1:
                shares = -2000
            self.holding += shares
            return shares
        orders["Shares"] = orders.apply(strat, axis=1)
    def testPolicy(self, symbol="IBM",
                   sd=dt.datetime(2009, 1, 1),
                   ed=dt.datetime(2010, 1, 1),
                   sv=10000):
        df = util.get_data([symbol], pd.date_range(sd, ed))
        self._add_indicators(df, symbol)
        data_x = df[self.indicators].to_numpy()
        data_y = pd.DataFrame(index=df.index, data=self.learner.query(data_x))
        orders = pd.DataFrame(index=df.index)
        orders["Symbol"] = symbol
        orders["Order"] = ""
        orders["Shares"] = 0
        self.strat(data_y, orders)
        if self.testing:
            return orders
        else:
            return orders[["Shares"]]
--- a/crypto_eval/experiment1.py
+++ b/crypto_eval/experiment1.py
@ -0,0 +1,237 @@
 import pandas as pd
 import datetime as dt
 import sys
 import util
 import indicators
 import crypto_eval.marketsim as marketsim
 import matplotlib.pyplot as plt
 from matplotlib.widgets import MultiCursor
 from BenchmarkStrategy import BenchmarkStrategy
 from ManualStrategy import ManualStrategy
 from StrategyLearner import StrategyLearner
 from QLearner import QLearner
 def plot_indicators(symbol, df):
    fig, ax = plt.subplots(4, sharex=True)
    price_sma = indicators.price_sma(df, symbol, [8])
    bb = indicators.bollinger_band(df, symbol)
    rsi = indicators.rsi(df, symbol)
    macd = indicators.macd(df, symbol).copy()
    df[[symbol]].plot(ax=ax[0])
    bb.plot(ax=ax[0])
    price_sma.plot(ax=ax[1])
    macd.plot(ax=ax[2])
    rsi.plot(ax=ax[3])
    for a in ax.flat:
        a.grid()
    m = MultiCursor(fig.canvas, ax, color='r', lw=0.5)
    plt.show()
    sys.exit(0)
 def visualize_correlations(symbol, df):
    indicators.price_sma(df, symbol, [8, 21])
    indicators.price_delta(df, symbol, 5)
    indicators.price_delta(df, symbol, 3)
    indicators.price_delta(df, symbol, 1)
    indicators.macd(df, symbol)
    indicators.rsi(df, symbol)
    # df = df[df['rsi'] > 80]
    fig, ax = plt.subplots(3, 2)  # sharex=True)
    df.plot.scatter(x="price_sma_8", y="pct_5", ax=ax[0, 0])
    df.plot.scatter(x="price_sma_8", y="pct_3", ax=ax[1, 0])
    df.plot.scatter(x="price_sma_8", y="pct_1", ax=ax[2, 0])
    # df.plot.scatter(x="rsi", y="pct_5", ax=ax[0, 1])
    # df.plot.scatter(x="rsi", y="pct_3", ax=ax[1, 1])
    # df.plot.scatter(x="rsi", y="pct_1", ax=ax[2, 1])
    df.plot.scatter(x="macd_diff", y="pct_5", ax=ax[0, 1])
    df.plot.scatter(x="macd_diff", y="pct_3", ax=ax[1, 1])
    df.plot.scatter(x="macd_diff", y="pct_1", ax=ax[2, 1])
    for a in ax.flat:
        a.grid()
    plt.show()
    sys.exit(0)
 def compare_manual_strategies(symbol, sv, sd, ed):
    df = util.get_data([symbol], pd.date_range(sd, ed))
    df.drop(columns=["SPY"], inplace=True)
    bs = BenchmarkStrategy()
    orders = bs.testPolicy(symbol, sd, ed, sv)
    df["Benchmark"] = marketsim.compute_portvals(orders, sv)
    df["Orders Benchmark"] = orders["Shares"]
    ms = ManualStrategy()
    orders = ms.testPolicy(symbol, sd, ed, sv, macd_strat=True)
    df["MACD Strat"] = marketsim.compute_portvals(orders, sv)
    df["Orders MACD"] = orders["Shares"]
    # df["Holding Manual"] = orders["Shares"].cumsum()
    orders = ms.testPolicy(symbol, sd, ed, sv)
    df["Three Strat"] = marketsim.compute_portvals(orders, sv)
    df["Orders Three"] = orders["Shares"]
    fig, ax = plt.subplots(3, sharex=True)
    df[[symbol]].plot(ax=ax[0])
    df[["Benchmark", "MACD Strat", "Three Strat"]].plot(ax=ax[1])
    df[["Orders Benchmark", "Orders MACD", "Orders Three"]].plot(ax=ax[2])
    for a in ax:
        a.grid()
    MultiCursor(fig.canvas, ax, color='r', lw=0.5)
    # plt.show()
    fig.set_size_inches(10, 8, forward=True)
    plt.savefig('figure_1.png', dpi=fig.dpi)
 def compare_all_strategies(symbol, sv, sd, ed):
    df = util.get_data([symbol], pd.date_range(sd, ed))
    df.drop(columns=["SPY"], inplace=True)
    normalize = indicators.normalize
    bs = BenchmarkStrategy()
    orders = bs.testPolicy(symbol, sd, ed, sv)
    df["Benchmark"] = normalize(marketsim.compute_portvals(orders, sv))
    df["Orders Benchmark"] = orders["Shares"]
    ms = ManualStrategy()
    orders = ms.testPolicy(symbol, sd, ed, sv)
    df["Manual"] = normalize(marketsim.compute_portvals(orders, sv))
    df["Orders Manual"] = orders["Shares"]
    sl = StrategyLearner(testing=True)
    sl.addEvidence(symbol, sd, ed, sv)
    orders = sl.testPolicy(symbol, sd, ed, sv)
    df["Strategy"] = normalize(marketsim.compute_portvals(orders, sv))
    df["Orders Strategy"] = orders["Shares"]
    fig, ax = plt.subplots(3, sharex=True)
    df[[symbol]].plot(ax=ax[0])
    df[["Benchmark", "Manual", "Strategy"]].plot(ax=ax[1])
    df[["Orders Benchmark", "Orders Manual", "Orders Strategy"]].plot(ax=ax[2])
    for a in ax:
        a.grid()
    MultiCursor(fig.canvas, ax, color='r', lw=0.5)
    # plt.show()
    fig.set_size_inches(10, 8, forward=True)
    plt.savefig('figure_2.png', dpi=fig.dpi)
 def compare_number_trades():
    symbol = "JPM"
    sv = 10000
    sd = dt.datetime(2008, 1, 1)  # in-sample
    ed = dt.datetime(2009, 12, 31)  # in-sample
    df = util.get_data([symbol], pd.date_range(sd, ed))
    df.drop(columns=["SPY"], inplace=True)
    print(f"| commission | n_orders |")
    print(f"-------------------------")
    for commission in [0, 9.95, 20, 50, 100]:
        ql = QLearner(testing=True, commission=commission, impact=0.005)
        ql.addEvidence(symbol, sd, ed, sv)
        orders = ql.testPolicy(symbol, sd, ed, sv)
        n_orders = orders[orders["Shares"] != 0].shape[0]
        print(f"| {commission} | {n_orders} |")
 def compare_q_learners():
    symbol = "JPM"
    sv = 10000
    sd = dt.datetime(2008, 1, 1)  # in-sample
    ed = dt.datetime(2009, 12, 31)  # in-sample
    sd_out = dt.datetime(2010, 1, 1)  # out-sample
    ed_out = dt.datetime(2011, 12, 31)  # out-sample
    df = util.get_data([symbol], pd.date_range(sd_out, ed_out))
    df.drop(columns=["SPY"], inplace=True)
    bs = BenchmarkStrategy()
    orders = bs.testPolicy(symbol, sd_out, ed_out, sv)
    df["Benchmark"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
    df["Orders Benchmark"] = orders["Shares"]
    ql = QLearner(testing=True, verbose=False)
    ql.addEvidence(symbol, sd, ed, sv)
    orders = ql.testPolicy(symbol, sd_out, ed_out, sv)
    df["QL 5"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
    df["Orders QL 5"] = orders["Shares"]
    ql = QLearner(testing=True, verbose=False, n_bins=4)
    ql.addEvidence(symbol, sd, ed, sv)
    orders = ql.testPolicy(symbol, sd_out, ed_out, sv)
    df["QL 4"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
    df["Orders QL 4"] = orders["Shares"]
    fig, ax = plt.subplots(3, sharex=True)
    df[[symbol]].plot(ax=ax[0])
    df[["Benchmark", "QL 5", "QL 4"]].plot(ax=ax[1])
    df[["Orders Benchmark", "Orders QL 5", "Orders QL 4"]].plot(ax=ax[2])
    for a in ax:
        a.grid()
    m = MultiCursor(fig.canvas, ax, color='r', lw=0.5)
    fig.set_size_inches(10, 8, forward=True)
    plt.savefig('figure_4.png', dpi=fig.dpi)
    sys.exit(0)
 def experiment1(create_report=False):
    symbol = "COINBASE_BTCUSD_1D"
    sv = 10000
    sd = dt.datetime(2020, 1, 1)  # in-sample
    ed = dt.datetime(2020, 12, 31)  # in-sample
    sd_out = dt.datetime(2020, 1, 1)  # out-sample
    ed_out = dt.datetime(2020, 12, 31)  # out-sample
    df = util.get_data([symbol], pd.date_range(sd_out, ed_out), addSPY=True)
    # if create_report:
    #     compare_manual_strategies(symbol, sv, sd, ed)
    #     compare_all_strategies(symbol, sv, sd, ed)
    #     sys.exit(0)
    # visualize_correlations(symbol, df)
    # plot_indicators(symbol, df)
    # compare_number_trades(symbol, sv, sd, ed)
    # compare_q_learners()
    # return
    bs = BenchmarkStrategy(units=1)
    orders = bs.testPolicy(symbol, sd_out, ed_out, sv)
    pvs = marketsim.compute_portvals(orders, start_val=sv)
    df["Benchmark"] = indicators.normalize(pvs)
    df["Orders Benchmark"] = orders["Shares"]
    ql = QLearner(testing=True, verbose=False, units=1)
    ql.addEvidence(symbol, sd, ed, sv)
    orders = ql.testPolicy(symbol, sd_out, ed_out, sv)
    df["QL"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
    df["Orders QL"] = orders["Shares"]
    fig, ax = plt.subplots(3, sharex=True)
    df[[symbol]].plot(ax=ax[0])
    df[["Benchmark", "QL"]].plot(ax=ax[1])
    df[["Orders Benchmark", "Orders QL"]].plot(ax=ax[2])
    for a in ax:
        a.grid()
    m = MultiCursor(fig.canvas, ax, color='r', lw=0.5)
    plt.show()
    # fig.set_size_inches(10, 8, forward=True)
    # plt.savefig('figure_4.png', dpi=fig.dpi)
 if __name__ == "__main__":
    experiment1()
--- a/crypto_eval/experiment2.py
+++ b/crypto_eval/experiment2.py
@ -0,0 +1,8 @@
 import experiment1
 def experiment2():
    experiment1.compare_number_trades()
 if __name__ == "__main__":
    experiment2()
--- a/crypto_eval/grade_strategy_learner.py
+++ b/crypto_eval/grade_strategy_learner.py
@ -0,0 +1,339 @@
 """MC3-P3: Strategy Learner - grading script.
 Usage:
 - Switch to a student feedback directory first (will write "points.txt" and "comments.txt" in pwd).
 - Run this script with both ml4t/ and student solution in PYTHONPATH, e.g.:
    PYTHONPATH=ml4t:MC1-P2/jdoe7 python ml4t/mc2_p1_grading/grade_marketsim.py
 Copyright 2017, Georgia Tech Research Corporation
 Atlanta, Georgia 30332-0415
 All Rights Reserved
 Template code for CS 4646/7646
 Georgia Tech asserts copyright ownership of this template and all derivative
 works, including solutions to the projects assigned in this course. Students
 and other users of this template code are advised not to share it with others
 or to make it available on publicly viewable websites including repositories
 such as github and gitlab.  This copyright statement should not be removed
 or edited.
 We do grant permission to share solutions privately with non-students such
 as potential employers. However, sharing with other current or future
 students of CS 7646 is prohibited and subject to being investigated as a
 GT honor code violation.
 -----do not edit anything above this line---
 Student Name: Tucker Balch (replace with your name)
 GT User ID: tb34 (replace with your User ID)
 GT ID: 900897987 (replace with your GT ID)
 """
 import pytest
 from grading.grading import grader, GradeResult, run_with_timeout, IncorrectOutput
 import os
 import sys
 import traceback as tb
 import datetime as dt
 import numpy as np
 import pandas as pd
 from collections import namedtuple
 import time
 import util
 import random
 # Test cases
 StrategyTestCase = namedtuple('Strategy', ['description','insample_args','outsample_args','benchmark_type','benchmark','impact','train_time','test_time','max_time','seed'])
 strategy_test_cases = [
    StrategyTestCase(
        description="ML4T-220",
        insample_args=dict(symbol="ML4T-220",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
        outsample_args=dict(symbol="ML4T-220",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
        benchmark_type='clean',
        benchmark=1.0, #benchmark updated Apr 24 2017
        impact=0.0,
        train_time=25,
        test_time=5,
        max_time=60,
        seed=1481090000
        ),
    StrategyTestCase(
        description="AAPL",
        insample_args=dict(symbol="AAPL",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
        outsample_args=dict(symbol="AAPL",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
        benchmark_type='stock',
        benchmark=0.1581999999999999, #benchmark computed Nov 22 2017
        impact=0.0,
        train_time=25,
        test_time=5,
        max_time=60,
        seed=1481090000
        ),
    StrategyTestCase(
        description="SINE_FAST_NOISE",
        insample_args=dict(symbol="SINE_FAST_NOISE",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
        outsample_args=dict(symbol="SINE_FAST_NOISE",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
        benchmark_type='noisy',
        benchmark=2.0, #benchmark updated Apr 24 2017
        impact=0.0,
        train_time=25,
        test_time=5,
        max_time=60,
        seed=1481090000
        ),
    StrategyTestCase(
        description="UNH - In sample",
        insample_args=dict(symbol="UNH",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000),
        outsample_args=dict(symbol="UNH",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000),
        benchmark_type='stock',
        benchmark= -0.25239999999999996, #benchmark computed Nov 22 2017
        impact=0.0,
        train_time=25,
        test_time=5,
        max_time=60,
        seed=1481090000
        ),
 ]
 max_points = 60.0
 html_pre_block = True  # surround comments with HTML <pre> tag (for T-Square comments field)
 MAX_HOLDINGS = 1000
 # Test functon(s)
@pytest.mark.parametrize("description, insample_args, outsample_args, benchmark_type, benchmark, impact, train_time, test_time, max_time, seed", strategy_test_cases)
 def test_strategy(description, insample_args, outsample_args, benchmark_type, benchmark, impact, train_time, test_time, max_time, seed, grader):
    """Test StrategyLearner.
    Requires test description, insample args (dict), outsample args (dict), benchmark_type (str), benchmark (float)
    max time (seconds), points for this test case (int), random seed (long), and a grader fixture.
    """
    points_earned = 0.0  # initialize points for this test case
    try:
        incorrect = True
        if not 'StrategyLearner' in globals():
            import importlib
            m = importlib.import_module('StrategyLearner')
            globals()['StrategyLearner'] = m
        outsample_cr_to_beat = None
        if benchmark_type == 'clean':
            outsample_cr_to_beat = benchmark
        def timeoutwrapper_strategylearner():
            #Set fixed seed for repetability
            np.random.seed(seed)
            random.seed(seed)
            learner = StrategyLearner.StrategyLearner(verbose=False,impact=impact)
            tmp = time.time()
            learner.addEvidence(**insample_args)
            train_t = time.time()-tmp
            tmp = time.time()
            insample_trades_1 = learner.testPolicy(**insample_args)
            test_t = time.time()-tmp
            insample_trades_2 = learner.testPolicy(**insample_args)
            tmp = time.time()
            outsample_trades = learner.testPolicy(**outsample_args)
            out_test_t = time.time()-tmp
            return insample_trades_1, insample_trades_2, outsample_trades, train_t, test_t, out_test_t
        msgs = []
        in_trades_1, in_trades_2, out_trades, train_t, test_t, out_test_t = run_with_timeout(timeoutwrapper_strategylearner,max_time,(),{})
        incorrect = False
        if len(in_trades_1.shape)!=2 or in_trades_1.shape[1]!=1:
            incorrect=True
            msgs.append("  First insample trades DF has invalid shape: {}".format(in_trades_1.shape))
        elif len(in_trades_2.shape)!=2 or in_trades_2.shape[1]!=1:
            incorrect=True
            msgs.append("  Second insample trades DF has invalid shape: {}".format(in_trades_2.shape))
        elif len(out_trades.shape)!=2 or out_trades.shape[1]!=1:
            incorrect=True
            msgs.append("  Out-of-sample trades DF has invalid shape: {}".format(out_trades.shape))
        else:
            tmp_csum=0.0
            for date,trade in in_trades_1.iterrows():
                tmp_csum+= trade.iloc[0]
                if (trade.iloc[0]!=0) and\
                   (trade.abs().iloc[0]!=MAX_HOLDINGS) and\
                   (trade.abs().iloc[0]!=2*MAX_HOLDINGS):
                   incorrect=True
                   msgs.append("  illegal trade in first insample DF. abs(trade) not one of ({},{},{}).\n  Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
                   break
                elif abs(tmp_csum)>MAX_HOLDINGS:
                    incorrect=True
                    msgs.append("  holdings more than {} long or short in first insample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
                    break
            tmp_csum=0.0
            for date,trade in in_trades_2.iterrows():
                tmp_csum+= trade.iloc[0]
                if (trade.iloc[0]!=0) and\
                   (trade.abs().iloc[0]!=MAX_HOLDINGS) and\
                   (trade.abs().iloc[0]!=2*MAX_HOLDINGS):
                   incorrect=True
                   msgs.append("  illegal trade in second insample DF. abs(trade) not one of ({},{},{}).\n  Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
                   break
                elif abs(tmp_csum)>MAX_HOLDINGS:
                    incorrect=True
                    msgs.append("  holdings more than {} long or short in second insample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
                    break
            tmp_csum=0.0
            for date,trade in out_trades.iterrows():
                tmp_csum+= trade.iloc[0]
                if (trade.iloc[0]!=0) and\
                   (trade.abs().iloc[0]!=MAX_HOLDINGS) and\
                   (trade.abs().iloc[0]!=2*MAX_HOLDINGS):
                   incorrect=True
                   msgs.append("  illegal trade in out-of-sample DF. abs(trade) not one of ({},{},{}).\n  Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade))
                   break
                elif abs(tmp_csum)>MAX_HOLDINGS:
                    incorrect=True
                    msgs.append("  holdings more than {} long or short in out-of-sample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade))
                    break
            # if (((in_trades_1.abs()!=0) & (in_trades_1.abs()!=MAX_HOLDINGS) & (in_trades_1.abs()!=2*MAX_HOLDINGS)).any().any() or\
            #     ((in_trades_2.abs()!=0) & (in_trades_2.abs()!=MAX_HOLDINGS) & (in_trades_2.abs()!=2*MAX_HOLDINGS)).any().any() or\
            #     ((out_trades.abs()!=0)  & (out_trades.abs()!=MAX_HOLDINGS)  & (out_trades.abs()!=2*MAX_HOLDINGS)).any().any()):
            #     incorrect = True
            #     msgs.append("  illegal trade. abs(trades) not one of ({},{},{})".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS))
            # if ((in_trades_1.cumsum().abs()>MAX_HOLDINGS).any()[0]) or ((in_trades_2.cumsum().abs()>MAX_HOLDINGS).any()[0]) or ((out_trades.cumsum().abs()>MAX_HOLDINGS).any()[0]):
            #     incorrect = True
            #     msgs.append("  holdings more than {} long or short".format(MAX_HOLDINGS))
        if not(incorrect):
            if train_t>train_time:
                incorrect=True
                msgs.append("  addEvidence() took {} seconds, max allowed {}".format(train_t,train_time))
            else:
                points_earned += 1.0
            if test_t > test_time:
                incorrect = True
                msgs.append("  testPolicy() took {} seconds, max allowed {}".format(test_t,test_time))
            else:
                points_earned += 2.0
            if not((in_trades_1 == in_trades_2).all()[0]):
                incorrect = True
                mismatches = in_trades_1.join(in_trades_2,how='outer',lsuffix='1',rsuffix='2')
                mismatches = mismatches[mismatches.iloc[:,0]!=mismatches.iloc[:,1]]
                msgs.append("  consecutive calls to testPolicy() with same input did not produce same output:")
                msgs.append("  Mismatched trades:\n {}".format(mismatches))
            else:
                points_earned += 2.0
            student_insample_cr = evalPolicy2(insample_args['symbol'],in_trades_1,insample_args['sv'],insample_args['sd'],insample_args['ed'],market_impact=impact,commission_cost=0.0)
            student_outsample_cr = evalPolicy2(outsample_args['symbol'],out_trades, outsample_args['sv'],outsample_args['sd'],outsample_args['ed'],market_impact=impact,commission_cost=0.0)
            if student_insample_cr <= benchmark:
                incorrect = True
                msgs.append("  in-sample return ({}) did not beat benchmark ({})".format(student_insample_cr,benchmark))
            else:
                points_earned += 5.0
            if outsample_cr_to_beat is None:
                if out_test_t > test_time:
                    incorrect = True
                    msgs.append("  out-sample took {} seconds, max of {}".format(out_test_t,test_time))
                else:
                    points_earned += 5.0
            else:
                if student_outsample_cr < outsample_cr_to_beat:
                    incorrect = True
                    msgs.append("  out-sample return ({}) did not beat benchmark ({})".format(student_outsample_cr,outsample_cr_to_beat))
                else:
                    points_earned += 5.0
        if incorrect:
            inputs_str = "    insample_args: {}\n" \
                         "    outsample_args: {}\n" \
                         "    benchmark_type: {}\n" \
                         "    benchmark: {}\n" \
                         "    train_time: {}\n" \
                         "    test_time: {}\n" \
                         "    max_time: {}\n" \
                         "    seed: {}\n".format(insample_args, outsample_args, benchmark_type, benchmark, train_time, test_time, max_time,seed)
            raise IncorrectOutput("Test failed on one or more output criteria.\n  Inputs:\n{}\n  Failures:\n{}".format(inputs_str, "\n".join(msgs)))
    except Exception as e:
        # Test result: failed
        msg = "Test case description: {}\n".format(description)
        # Generate a filtered stacktrace, only showing erroneous lines in student file(s)
        tb_list = tb.extract_tb(sys.exc_info()[2])
        for i in range(len(tb_list)):
            row = tb_list[i]
            tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3])  # show only filename instead of long absolute path
        # tb_list = [row for row in tb_list if row[0] in ['QLearner.py','StrategyLearner.py']]
        if tb_list:
            msg += "Traceback:\n"
            msg += ''.join(tb.format_list(tb_list))  # contains newlines
        elif 'grading_traceback' in dir(e):
            msg += "Traceback:\n"
            msg += ''.join(tb.format_list(e.grading_traceback))
        msg += "{}: {}".format(e.__class__.__name__, str(e))
        # Report failure result to grader, with stacktrace
        grader.add_result(GradeResult(outcome='failed', points=points_earned, msg=msg))
        raise
    else:
        # Test result: passed (no exceptions)
        grader.add_result(GradeResult(outcome='passed', points=points_earned, msg=None))
 def compute_benchmark(sd,ed,sv,symbol,market_impact,commission_cost,max_holdings):
    date_idx = util.get_data([symbol,],pd.date_range(sd,ed)).index
    orders = pd.DataFrame(index=date_idx)
    orders['orders'] = 0; orders['orders'][0] = max_holdings; orders['orders'][-1] = -max_holdings
    return evalPolicy2(symbol,orders,sv,sd,ed,market_impact,commission_cost)
 def evalPolicy(student_trades,sym_prices,startval):
    ending_cash = startval - student_trades.mul(sym_prices,axis=0).sum()
    ending_stocks = student_trades.sum()*sym_prices.iloc[-1]
    return float((ending_cash+ending_stocks)/startval)-1.0
 def evalPolicy2(symbol, student_trades, startval, sd, ed, market_impact,commission_cost):
    orders_df = pd.DataFrame(columns=['Shares','Order','Symbol'])
    for row_idx in student_trades.index:
        nshares = student_trades.loc[row_idx][0]
        if nshares == 0:
            continue
        order = 'sell' if nshares < 0 else 'buy'
        new_row = pd.DataFrame([[abs(nshares),order,symbol],],columns=['Shares','Order','Symbol'],index=[row_idx,])
        orders_df = orders_df.append(new_row)
    portvals = compute_portvals(orders_df, sd, ed, startval,market_impact,commission_cost)
    return float(portvals[-1]/portvals[0])-1
 def compute_portvals(orders_df, start_date, end_date, startval, market_impact=0.0, commission_cost=0.0):
    """Simulate the market for the given date range and orders file."""
    symbols = []
    orders = []
    orders_df = orders_df.sort_index()
    for date, order in orders_df.iterrows():
        shares = order['Shares']
        action = order['Order']
        symbol = order['Symbol']
        if action.lower() == 'sell':
            shares *= -1
        order = (date, symbol, shares)
        orders.append(order)
        symbols.append(symbol)
    symbols = list(set(symbols))
    dates = pd.date_range(start_date, end_date)
    prices_all = util.get_data(symbols, dates)
    prices = prices_all[symbols]
    prices = prices.fillna(method='ffill').fillna(method='bfill')
    prices['_CASH'] = 1.0
    trades = pd.DataFrame(index=prices.index, columns=symbols)
    trades = trades.fillna(0)
    cash = pd.Series(index=prices.index)
    cash = cash.fillna(0)
    cash.iloc[0] = startval
    for date, symbol, shares in orders:
        price = prices[symbol][date]
        val = shares * price
        # transaction cost model
        val += commission_cost + (pd.np.abs(shares)*price*market_impact)
        positions = prices.loc[date] * trades.sum()
        totalcash = cash.sum()
        if (date < prices.index.min()) or (date > prices.index.max()):
            continue
        trades[symbol][date] += shares
        cash[date] -= val
    trades['_CASH'] = cash
    holdings = trades.cumsum()
    df_portvals = (prices * holdings).sum(axis=1)
    return df_portvals
 if __name__ == "__main__":
    pytest.main(["-s", __file__])
--- a/crypto_eval/indicators.py
+++ b/crypto_eval/indicators.py
@ -0,0 +1,140 @@
 import pandas as pd
 import datetime as dt
 import matplotlib.pyplot as plt
 from util import get_data
 def author():
    return "felixm"
 def normalize(timeseries):
    return timeseries / timeseries.iloc[0]
 def bollinger_band(df, symbol, period=20, m=2):
    boll_sma = df[symbol].rolling(period).mean()
    std = df[symbol].rolling(period).std()
    boll_up = boll_sma + m * std
    boll_lo = boll_sma - m * std
    key_sma, key_up, key_lo = "boll_sma", "boll_up", "boll_lo"
    df[key_sma] = boll_sma
    df[key_up] = boll_up
    df[key_lo] = boll_lo
    return df[[key_sma, key_up, key_lo]]
 def sma(df, symbol, period):
    """Adds SMA for one or multiple periods to df and returns SMAs"""
    if type(period) is int:
        period = [period]
    keys = []
    for p in period:
        key = f"sma_{p}"
        df[key] = df[symbol].rolling(p).mean()
        keys.append(key)
    return df[keys]
 def ema(df, symbol, period):
    """Adds EMA for one or multiple periods to df and returns EMAs"""
    if type(period) is int:
        period = [period]
    keys = []
    for p in period:
        key = f"ema_{p}"
        df[key] = df[symbol].ewm(span=p).mean()
        keys.append(key)
    return df[keys]
 def price_sma(df, symbol, period):
    """Calculates SMA and adds new column price divided by SMA to the df."""
    if type(period) is int:
        period = [period]
    keys = []
    for p in period:
        key = f"price_sma_{p}"
        sma = df[symbol].rolling(p).mean()
        df[key] = df[symbol] / sma
        keys.append(key)
    return df[keys]
 def rsi(df, symbol, period=14):
    """Calculates relative strength index over given period."""
    def rsi(x):
        pct = x.pct_change()
        avg_gain = pct[pct > 0].mean()
        avg_loss = pct[pct <= 0].abs().mean()
        rsi = 100 - (100 /
                     (1 + ((avg_gain / period) /
                           (avg_loss / period))))
        return rsi
    key = "rsi"
    # Add one to get 'period' price changes (first change is nan).
    period += 1
    df[key] = df[symbol].rolling(period).apply(rsi)
    return df[[key]]
 def macd(df, symbol):
    macd = df[symbol].ewm(span=12).mean() - df[symbol].ewm(span=26).mean()
    k1 = "macd"
    k2 = "macd_signal"
    k3 = "macd_diff"
    df[k1] = macd
    df[k2] = macd.rolling(9).mean()
    df[k3] = df[k1] - df[k2]
    return df[[k1, k2, k3]]
 def price_delta(df, symbol, period=1):
    """Calculate percentage change for period."""
    k = f"pct_{period}"
    df[k] = df[symbol].pct_change(periods=period)
    df[k] = df[k].shift(-period)
    return df[k]
 def test_indicators():
    symbol = "JPM"
    sd = dt.datetime(2008, 1, 1)
    ed = dt.datetime(2009, 12, 31)
    df = get_data([symbol], pd.date_range(sd, ed))
    df.drop(columns=["SPY"], inplace=True)
    df_orig = df.copy()
    # df = normalize(df)
    sma(df, symbol, 21)
    ema(df, symbol, 21)
    df.plot(title="21 SMA and EMA")
    plt.savefig('figure_1.png')
    df = df_orig.copy()
    sma(df, symbol, 8)
    price_sma(df, symbol, 8)
    df.plot(title="SMA and price / SMA", subplots=True)
    plt.savefig('figure_2.png')
    df = df_orig.copy()
    bollinger_band(df, symbol)
    df.plot(title="Bollinger Band")
    plt.savefig('figure_3.png')
    df = df_orig.copy()
    rsi(df, symbol)
    fig, axes = plt.subplots(nrows=2, sharex=True)
    df[symbol].plot(ax=axes[0], title="JPM price action")
    df["JPM-rsi(14)"].plot(ax=axes[1], title="RSI")
    plt.savefig('figure_4.png')
    df = df_orig.copy()
    macd(df, symbol)
    fig, axes = plt.subplots(nrows=2, sharex=True)
    df[symbol].plot(ax=axes[0], title="JPM price action")
    df[["JPM-macd", "JPM-macd-signal"]].plot(ax=axes[1])
    plt.savefig('figure_5.png')
--- a/crypto_eval/marketsim.py
+++ b/crypto_eval/marketsim.py
@ -0,0 +1,179 @@
 """MC2-P1: Market simulator.
 Copyright 2018, Georgia Institute of Technology (Georgia Tech)
 Atlanta, Georgia 30332
 All Rights Reserved
 Template code for CS 4646/7646
 Georgia Tech asserts copyright ownership of this template and all derivative
 works, including solutions to the projects assigned in this course. Students
 and other users of this template code are advised not to share it with others
 or to make it available on publicly viewable websites including repositories
 such as github and gitlab.  This copyright statement should not be removed
 or edited.
 We do grant permission to share solutions privately with non-students such
 as potential employers. However, sharing with other current or future
 students of CS 7646 is prohibited and subject to being investigated as a
 GT honor code violation.
 -----do not edit anything above this line---
 Student Name: Tucker Balch (replace with your name)
 GT User ID: felixm (replace with your User ID)
 GT ID: 1337 (replace with your GT ID)
 """
 import pandas as pd
 from util import get_data, plot_data
 from optimize_something.optimization import calculate_stats
 def read_orders(orders_file):
    """
    Parser orders into the form:
        Date      datetime64[ns]
        Symbol            object
        Order             object
        Shares             int32
    This is how the order book looks like:
        Date,Symbol,Order,Shares
        2011-01-10,AAPL,BUY,1500
        2011-01-10,AAPL,SELL,1500
    """
    orders = pd.read_csv(orders_file,
                         index_col=['Date'],
                         dtype='|str, str, str,  i4',
                         parse_dates=['Date'])
    orders.sort_values(by="Date", inplace=True)
    return orders
 def get_order_book_info(orders):
    """Return start_date, end_date, and symbols (as a list)."""
    start_date = orders.index[0]
    end_date = orders.index[-1]
    symbols = sorted(list((set(orders.Symbol.tolist()))))
    return start_date, end_date, symbols
 def get_portfolio_value(holding, prices):
    """Calculate the current portofolio value."""
    value = 0
    for ticker, shares in holding.items():
        if ticker == 'cash':
            value += shares
        else:
            value += shares * prices[ticker]
    return value
 def handle_order(date, order, holding, prices, commission, impact):
    """Process the order."""
    symbol, order, shares = order
    if shares == 0 and order == "":
        return  # empty order
    if pd.isnull(shares):
        return  # shares is nan
    # Allow indicating buying and selling via shares. If shares is positive we
    # buy and if it is negative we sell.
    if shares > 0 and order == "":
        order = "BUY"
    elif shares < 0 and order == "":
        order = "SELL"
        shares = abs(shares)
    adj_closing_price = prices[symbol]
    cost = shares * adj_closing_price
    # Charge commission and deduct impact penalty
    holding['cash'] -= (commission + impact * adj_closing_price * shares)
    if order.upper() == "BUY":
        # print(f"Buy  {shares:6} of {symbol:4} on {date}")
        holding['cash'] -= cost
        holding[symbol] += shares
    elif order.upper() == "SELL":
        # print(f"Sell {shares:6} of {symbol:4} on {date}")
        holding['cash'] += cost
        holding[symbol] -= shares
    else:
        raise Exception("Unexpected order type.")
 def compute_portvals(orders_file, start_val=1000000, commission=9.95, impact=0.005):
    if isinstance(orders_file, pd.DataFrame):
        orders = orders_file
    else:
        orders = read_orders(orders_file)
    start_date, end_date, symbols = get_order_book_info(orders)
    # Tickers in the orderbook over the date_range in the order book.
    prices = get_data(symbols, pd.date_range(start_date, end_date))
    prices['Portval'] = pd.Series(0.0, index=prices.index)
    # A dictionary to keep track of the assets we are holding.
    holding = {s: 0 for s in symbols}
    holding['cash'] = start_val
    # Iterate over all trading days that are in the (inclusive) range of the
    # order book dates. This implicitly ignores orders placed on non-trading
    # days.
    for date, values in prices.iterrows():
        # Process orders for that day.
        for date, order in orders.loc[date:date].iterrows():
            handle_order(date, order, holding, values, commission, impact)
        # Compute portfolio value at the end of day.
        values['Portval'] = get_portfolio_value(holding, values)
    return prices[['Portval']]
 def test_code():
    of = "./orders/orders-02.csv"
    sv = 1000000
    portvals = compute_portvals(orders_file=of, start_val=sv)
    if isinstance(portvals, pd.DataFrame):
        portvals = portvals[portvals.columns[0]]  # just get the first column
    else:
        raise Exception("warning, code did not return a DataFrame")
    start_date = portvals.index[0]
    end_date = portvals.index[-1]
    cum_ret, avg_daily_ret, \
        std_daily_ret, sharpe_ratio = calculate_stats(portvals.to_frame(), [1])
    spy = get_data(['SPY'], pd.date_range(start_date, end_date))
    cum_ret_SPY, avg_daily_ret_SPY, \
        std_daily_ret_SPY, sharpe_ratio_SPY = calculate_stats(spy, [1])
    # Compare portfolio against $SPY
    print(f"Date Range: {start_date} to {end_date}")
    print()
    print(f"Sharpe Ratio of Fund: {sharpe_ratio}")
    print(f"Sharpe Ratio of SPY : {sharpe_ratio_SPY}")
    print()
    print(f"Cumulative Return of Fund: {cum_ret}")
    print(f"Cumulative Return of SPY : {cum_ret_SPY}")
    print()
    print(f"Standard Deviation of Fund: {std_daily_ret}")
    print(f"Standard Deviation of SPY : {std_daily_ret_SPY}")
    print()
    print(f"Average Daily Return of Fund: {avg_daily_ret}")
    print(f"Average Daily Return of SPY : {avg_daily_ret_SPY}")
    print()
    print(f"Final Portfolio Value: {portvals[-1]}")
 def author():
    return 'felixm'
 if __name__ == "__main__":
    test_code()
--- a/crypto_eval/testproject.py
+++ b/crypto_eval/testproject.py
@ -0,0 +1,8 @@
 from experiment1 import experiment1
 from experiment2 import experiment2
 if __name__ == "__main__":
    experiment1(create_report=True)
    experiment2()
--- a/util.py
+++ b/util.py
@ -14,22 +14,42 @@ def symbol_to_path(symbol, base_dir=None):
        base_dir = os.environ.get("MARKET_DATA_DIR", '../data/')
    return os.path.join(base_dir, "{}.csv".format(str(symbol)))
-def get_data(symbols, dates, addSPY=True, colname = 'Adj Close'):
+def get_data(symbols, dates, addSPY=True, colname='Adj Close', datecol='Date'):
    """Read stock data (adjusted close) for given symbols from CSV files."""
    df = pd.DataFrame(index=dates)
    if addSPY and 'SPY' not in symbols:  # add SPY for reference, if absent
-        symbols = ['SPY'] + list(symbols) # handles the case where symbols is np array of 'object'
+        # handles the case where symbols is np array of 'object'
        symbols = ['SPY'] + list(symbols)
    for symbol in symbols:
-        df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date',
+        if 'BTC' in symbol or 'ETH' in symbol:
-                parse_dates=True, usecols=['Date', colname], na_values=['nan'])
+            colname = 'close'
            datecol = 'time'
        elif symbol == 'SPY':
            colname = 'close'
            datecol = 'time'
        else:
            colname = 'Adj Close'
            datecol = 'Date'
        df_temp = pd.read_csv(symbol_to_path(symbol),
                              index_col=datecol,
                              parse_dates=True, usecols=[datecol, colname],
                              na_values=['nan'])
        df_temp = df_temp.rename(columns={colname: symbol})
        if datecol == 'time':
            df_temp['date'] = pd.to_datetime(df_temp.index, unit='s')
            df_temp['date'] = pd.DatetimeIndex(df_temp['date']).normalize()
            df_temp.set_index('date', drop=True, inplace=True)
        df = df.join(df_temp)
        if symbol == 'SPY':  # drop dates SPY did not trade
-            df = df.dropna(subset=["SPY"])
+            pass
-
+            # df = df.dropna(subset=["SPY"])
    return df
 def plot_data(df, title="Stock prices", xlabel="Date", ylabel="Price"):
    import matplotlib.pyplot as plt
    """Plot stock prices with a custom title and meaningful axis labels."""