Finish project 8 and course!

2020-11-10 12:33:42 -05:00 · 2020-11-10 12:33:42 -05:00 · 063d9a75ae
parent 6e1f70bcba
commit 063d9a75ae
7 changed files with 147 additions and 19 deletions
--- a/strategy_evaluation/QLearner.py
+++ b/strategy_evaluation/QLearner.py
@ -14,13 +14,13 @@ class Holding:
 class QLearner(object):
-    def __init__(self, verbose=False, impact=0.0, commission=0.0, testing=False):
+    def __init__(self, verbose=False, impact=0.0, commission=0.0, testing=False, n_bins=5):
        self.verbose = verbose
        self.impact = impact
        self.commission = commission
        self.testing = testing  # Decides which type of order df to return.
        self.indicators = ['macd_diff', 'rsi', 'price_sma_8']
-        self.n_bins = 5
+        self.n_bins = n_bins
        self.bins = {}
        self.num_states = self.get_num_states()
        self.num_actions = 3  # buy, sell, hold
@ -134,7 +134,7 @@ class QLearner(object):
        self.add_indicators(df, symbol)
        self.bin_indicators(df)
-        for _ in range(10):
+        for _ in range(15):
            self.train(df, symbol, sv)
    def testPolicy(self, symbol="IBM", sd=dt.datetime(2009, 1, 1), ed=dt.datetime(2010, 1, 1), sv=10000):
--- a/strategy_evaluation/experiment1.py
+++ b/strategy_evaluation/experiment1.py
@ -128,20 +128,63 @@ def compare_all_strategies(symbol, sv, sd, ed):
    plt.savefig('figure_2.png', dpi=fig.dpi)
-def compare_number_trades(symbol, sv, sd, ed):
+def compare_number_trades():
    symbol = "JPM"
    sv = 10000
    sd = dt.datetime(2008, 1, 1)  # in-sample
    ed = dt.datetime(2009, 12, 31)  # in-sample
    df = util.get_data([symbol], pd.date_range(sd, ed))
    df.drop(columns=["SPY"], inplace=True)
    print(f"| commission | n_orders |")
    print(f"-------------------------")
    for commission in [0, 9.95, 20, 50, 100]:
        ql = QLearner(testing=True, commission=commission, impact=0.005)
        ql.addEvidence(symbol, sd, ed, sv)
        orders = ql.testPolicy(symbol, sd, ed, sv)
        n_orders = orders[orders["Shares"] != 0].shape[0]
        print(f"| {commission} | {n_orders} |")
 def compare_q_learners():
    symbol = "JPM"
    sv = 10000
    sd = dt.datetime(2008, 1, 1)  # in-sample
    ed = dt.datetime(2009, 12, 31)  # in-sample
    sd_out = dt.datetime(2010, 1, 1)  # out-sample
    ed_out = dt.datetime(2011, 12, 31)  # out-sample
    df = util.get_data([symbol], pd.date_range(sd_out, ed_out))
    df.drop(columns=["SPY"], inplace=True)
    bs = BenchmarkStrategy()
    orders = bs.testPolicy(symbol, sd_out, ed_out, sv)
    df["Benchmark"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
    df["Orders Benchmark"] = orders["Shares"]
    ql = QLearner(testing=True, verbose=False)
    ql.addEvidence(symbol, sd, ed, sv)
-    orders = ql.testPolicy(symbol, sd, ed, sv)
+    orders = ql.testPolicy(symbol, sd_out, ed_out, sv)
-    n_orders_no_commission = orders[orders["Shares"] != 0].shape[0]
+    df["QL 5"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
    df["Orders QL 5"] = orders["Shares"]
-    ql = QLearner(testing=True, verbose=False, commission=9.95, impact=0.005)
+    ql = QLearner(testing=True, verbose=False, n_bins=4)
    ql.addEvidence(symbol, sd, ed, sv)
-    orders = ql.testPolicy(symbol, sd, ed, sv)
+    orders = ql.testPolicy(symbol, sd_out, ed_out, sv)
-    n_orders_commision = orders[orders["Shares"] != 0].shape[0]
+    df["QL 4"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
-    print(f"{n_orders_no_commission=} {n_orders_commision=}")
+    df["Orders QL 4"] = orders["Shares"]
    fig, ax = plt.subplots(3, sharex=True)
    df[[symbol]].plot(ax=ax[0])
    df[["Benchmark", "QL 5", "QL 4"]].plot(ax=ax[1])
    df[["Orders Benchmark", "Orders QL 5", "Orders QL 4"]].plot(ax=ax[2])
    for a in ax:
        a.grid()
    m = MultiCursor(fig.canvas, ax, color='r', lw=0.5)
    fig.set_size_inches(10, 8, forward=True)
    plt.savefig('figure_4.png', dpi=fig.dpi)
    sys.exit(0)
 def experiment1(create_report=False):
@ -152,27 +195,28 @@ def experiment1(create_report=False):
    sd_out = dt.datetime(2010, 1, 1)  # out-sample
    ed_out = dt.datetime(2011, 12, 31)  # out-sample
-    df = util.get_data([symbol], pd.date_range(sd, ed))
+    df = util.get_data([symbol], pd.date_range(sd_out, ed_out))
    df.drop(columns=["SPY"], inplace=True)
    if create_report:
        compare_manual_strategies(symbol, sv, sd, ed)
        compare_all_strategies(symbol, sv, sd, ed)
-        return
+        sys.exit(0)
    # visualize_correlations(symbol, df)
    # plot_indicators(symbol, df)
    # compare_number_trades(symbol, sv, sd, ed)
    # compare_q_learners()
    bs = BenchmarkStrategy()
-    orders = bs.testPolicy(symbol, sd, ed, sv)
+    orders = bs.testPolicy(symbol, sd_out, ed_out, sv)
-    df["Benchmark"] = marketsim.compute_portvals(orders, sv)
+    df["Benchmark"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
    df["Orders Benchmark"] = orders["Shares"]
-    ql = QLearner(testing=True, verbose=False, commission=9.95, impact=0.005)
+    ql = QLearner(testing=True, verbose=False)
    ql.addEvidence(symbol, sd, ed, sv)
-    orders = ql.testPolicy(symbol, sd, ed, sv)
+    orders = ql.testPolicy(symbol, sd_out, ed_out, sv)
-    df["QL"] = marketsim.compute_portvals(orders, sv)
+    df["QL"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
    df["Orders QL"] = orders["Shares"]
    fig, ax = plt.subplots(3, sharex=True)
@ -184,6 +228,8 @@ def experiment1(create_report=False):
        a.grid()
    m = MultiCursor(fig.canvas, ax, color='r', lw=0.5)
    plt.show()
    # fig.set_size_inches(10, 8, forward=True)
    # plt.savefig('figure_4.png', dpi=fig.dpi)
 if __name__ == "__main__":
--- a/strategy_evaluation/experiment2.py
+++ b/strategy_evaluation/experiment2.py
@ -0,0 +1,8 @@
 import experiment1
 def experiment2():
    experiment1.compare_number_trades()
 if __name__ == "__main__":
    experiment2()
--- a/strategy_evaluation/figure_3.png
+++ b/strategy_evaluation/figure_3.png
--- a/strategy_evaluation/figure_4.png
+++ b/strategy_evaluation/figure_4.png
--- a/strategy_evaluation/strategy_evaluation.md
+++ b/strategy_evaluation/strategy_evaluation.md
@ -1,3 +1,75 @@
-# Report
+This document is the final report for the machine learning for trading
 course. I have implemented two manual strategies, a random tree
 learner-based strategy and one based on Q-learning.
 # Experiment 1
 I have implemented two manual strategies. The first strategy buys on a
 bullish MACD cross with a MACD smaller than zero and sells on a bearish
 MACD cross with a MACD greater than one.
 The second strategy uses MACD diff (the difference between the MACD and
 the MACD signal), RSI, and price SMA with a period of eight. I have
 plotted the metrics over their one, three, and five days return to find
 reasonable thresholds for the strategy.
 ![Scatter plot to find reasonable thresholds.](figure_3.png)
 Based on the scatter plots, I have created a list of buy and sell
 signals. Each signal uses the current number of shares owned and one of
 the three indicators. The following figure shows the result for both
 manual strategies compared to the benchmark. Both approaches do well in
 the in-sample period but worse afterward, which I expected because I
 cherry-picked the thresholds based on the in-sample period's scatter
 plots.
 ![First strategy based on MACD. Better than just holding.](figure_1.png)
 Next, I have implemented a random tree-based strategy learner. The
 learner uses a leaf size of five and no bagging. A smaller leaf size
 would result in overfitting to the in-sample data. But as the following
 screenshot shows, five works well, and the RT learner does well for the
 out of sample data.
 ![Manual strategy compared to RT learner.](figure_2.png)
 I have also implemented a strategy learner based on Q-learning. The
 Q-learner uses fifteen training runs on the in-sample data. It mostly
 does well for the out of sample data, but it looks like the RT-based
 strategy learner is better.
 I am using a bin-size of five for the three indicators mentioned before.
 That results in 375 (3x5x5x5) states with only about 500 in-sample data
 points. Probably the Q-learner is overfitting to the in-sample data.
 Indeed, with bin sizes of four, the Q learner performs better for the
 out-of-sample data.
 ![Strategy learner based on Q-Learning with using four and five bins
 for discretization out of sample.](figure_4.png)
 # Experiment 2
 Experiment 2 aims to show that the strategy learner trades differently
 when there is a commission, and the impact is not zero. The RT-based
 trader does not consider the commission value, but the Q-learning based
 trader does.
 However, it seems like a commission smaller than $10 does not affect
 the number of trades significantly. Only when the commission is around
 $50 or with a slippage of 1% we see considerably fewer transactions.
 | commission | n_orders |
 |------------|----------|
 | 9.95       | 79       |
 | 20         | 83       |
 | 50         | 63       |
 | 100        | 37       |
 # Closing Remarks
 Machine Learning for Trading is a great course. It gives an excellent
 introduction to finance, trading, and machine learning without getting lost in
 technical or mathematical details. I have enjoyed building decision tree
 learners and a Q learner from first principles. At the same time, the course
 accurately teaches powerful libraries such as NumPy and Pandas.
 ![First strategy based on MACD. Better than just holding](figure_1.png)
--- a/strategy_evaluation/testproject.py
+++ b/strategy_evaluation/testproject.py
@ -1,6 +1,8 @@
 from experiment1 import experiment1
 from experiment2 import experiment2
 if __name__ == "__main__":
    experiment1(create_report=True)
    experiment2()