Finish project 8 and course!

2020-11-10 12:33:42 -05:00 · 2020-11-10 12:33:42 -05:00 · 063d9a75ae
commit 063d9a75ae
parent 6e1f70bcba
7 changed files with 147 additions and 19 deletions
--- a/strategy_evaluation/QLearner.py
+++ b/strategy_evaluation/QLearner.py
@ -14,13 +14,13 @@ class Holding:

 class QLearner(object):

-    def __init__(self, verbose=False, impact=0.0, commission=0.0, testing=False):
+    def __init__(self, verbose=False, impact=0.0, commission=0.0, testing=False, n_bins=5):
        self.verbose = verbose
        self.impact = impact
        self.commission = commission
        self.testing = testing  # Decides which type of order df to return.
        self.indicators = ['macd_diff', 'rsi', 'price_sma_8']
-        self.n_bins = 5
+        self.n_bins = n_bins
        self.bins = {}
        self.num_states = self.get_num_states()
        self.num_actions = 3  # buy, sell, hold
@ -134,7 +134,7 @@ class QLearner(object):
        self.add_indicators(df, symbol)
        self.bin_indicators(df)

-        for _ in range(10):
+        for _ in range(15):
            self.train(df, symbol, sv)

    def testPolicy(self, symbol="IBM", sd=dt.datetime(2009, 1, 1), ed=dt.datetime(2010, 1, 1), sv=10000):
--- a/strategy_evaluation/experiment1.py
+++ b/strategy_evaluation/experiment1.py
@ -128,20 +128,63 @@ def compare_all_strategies(symbol, sv, sd, ed):
    plt.savefig('figure_2.png', dpi=fig.dpi)


-def compare_number_trades(symbol, sv, sd, ed):
+def compare_number_trades():
+    symbol = "JPM"
+    sv = 10000
+    sd = dt.datetime(2008, 1, 1)  # in-sample
+    ed = dt.datetime(2009, 12, 31)  # in-sample
+
    df = util.get_data([symbol], pd.date_range(sd, ed))
    df.drop(columns=["SPY"], inplace=True)

+    print(f"| commission | n_orders |")
+    print(f"-------------------------")
+    for commission in [0, 9.95, 20, 50, 100]:
+        ql = QLearner(testing=True, commission=commission, impact=0.005)
+        ql.addEvidence(symbol, sd, ed, sv)
+        orders = ql.testPolicy(symbol, sd, ed, sv)
+        n_orders = orders[orders["Shares"] != 0].shape[0]
+        print(f"| {commission} | {n_orders} |")
+
+def compare_q_learners():
+    symbol = "JPM"
+    sv = 10000
+    sd = dt.datetime(2008, 1, 1)  # in-sample
+    ed = dt.datetime(2009, 12, 31)  # in-sample
+    sd_out = dt.datetime(2010, 1, 1)  # out-sample
+    ed_out = dt.datetime(2011, 12, 31)  # out-sample
+
+    df = util.get_data([symbol], pd.date_range(sd_out, ed_out))
+    df.drop(columns=["SPY"], inplace=True)
+
+    bs = BenchmarkStrategy()
+    orders = bs.testPolicy(symbol, sd_out, ed_out, sv)
+    df["Benchmark"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
+    df["Orders Benchmark"] = orders["Shares"]
+
    ql = QLearner(testing=True, verbose=False)
    ql.addEvidence(symbol, sd, ed, sv)
-    orders = ql.testPolicy(symbol, sd, ed, sv)
-    n_orders_no_commission = orders[orders["Shares"] != 0].shape[0]
+    orders = ql.testPolicy(symbol, sd_out, ed_out, sv)
+    df["QL 5"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
+    df["Orders QL 5"] = orders["Shares"]

-    ql = QLearner(testing=True, verbose=False, commission=9.95, impact=0.005)
+    ql = QLearner(testing=True, verbose=False, n_bins=4)
    ql.addEvidence(symbol, sd, ed, sv)
-    orders = ql.testPolicy(symbol, sd, ed, sv)
-    n_orders_commision = orders[orders["Shares"] != 0].shape[0]
-    print(f"{n_orders_no_commission=} {n_orders_commision=}")
+    orders = ql.testPolicy(symbol, sd_out, ed_out, sv)
+    df["QL 4"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
+    df["Orders QL 4"] = orders["Shares"]
+
+    fig, ax = plt.subplots(3, sharex=True)
+    df[[symbol]].plot(ax=ax[0])
+    df[["Benchmark", "QL 5", "QL 4"]].plot(ax=ax[1])
+    df[["Orders Benchmark", "Orders QL 5", "Orders QL 4"]].plot(ax=ax[2])
+
+    for a in ax:
+        a.grid()
+    m = MultiCursor(fig.canvas, ax, color='r', lw=0.5)
+    fig.set_size_inches(10, 8, forward=True)
+    plt.savefig('figure_4.png', dpi=fig.dpi)
+    sys.exit(0)


 def experiment1(create_report=False):
@ -152,27 +195,28 @@ def experiment1(create_report=False):
    sd_out = dt.datetime(2010, 1, 1)  # out-sample
    ed_out = dt.datetime(2011, 12, 31)  # out-sample

-    df = util.get_data([symbol], pd.date_range(sd, ed))
+    df = util.get_data([symbol], pd.date_range(sd_out, ed_out))
    df.drop(columns=["SPY"], inplace=True)

    if create_report:
        compare_manual_strategies(symbol, sv, sd, ed)
        compare_all_strategies(symbol, sv, sd, ed)
-        return
+        sys.exit(0)

    # visualize_correlations(symbol, df)
    # plot_indicators(symbol, df)
    # compare_number_trades(symbol, sv, sd, ed)
+    # compare_q_learners()

    bs = BenchmarkStrategy()
-    orders = bs.testPolicy(symbol, sd, ed, sv)
-    df["Benchmark"] = marketsim.compute_portvals(orders, sv)
+    orders = bs.testPolicy(symbol, sd_out, ed_out, sv)
+    df["Benchmark"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
    df["Orders Benchmark"] = orders["Shares"]

-    ql = QLearner(testing=True, verbose=False, commission=9.95, impact=0.005)
+    ql = QLearner(testing=True, verbose=False)
    ql.addEvidence(symbol, sd, ed, sv)
-    orders = ql.testPolicy(symbol, sd, ed, sv)
-    df["QL"] = marketsim.compute_portvals(orders, sv)
+    orders = ql.testPolicy(symbol, sd_out, ed_out, sv)
+    df["QL"] = indicators.normalize(marketsim.compute_portvals(orders, sv))
    df["Orders QL"] = orders["Shares"]

    fig, ax = plt.subplots(3, sharex=True)
@ -184,6 +228,8 @@ def experiment1(create_report=False):
        a.grid()
    m = MultiCursor(fig.canvas, ax, color='r', lw=0.5)
    plt.show()
+    # fig.set_size_inches(10, 8, forward=True)
+    # plt.savefig('figure_4.png', dpi=fig.dpi)


 if __name__ == "__main__":
--- a/strategy_evaluation/experiment2.py
+++ b/strategy_evaluation/experiment2.py
@ -0,0 +1,8 @@
+import experiment1
+
+def experiment2():
+    experiment1.compare_number_trades()
+
+
+if __name__ == "__main__":
+    experiment2()
--- a/strategy_evaluation/figure_3.png
+++ b/strategy_evaluation/figure_3.png
--- a/strategy_evaluation/figure_4.png
+++ b/strategy_evaluation/figure_4.png
--- a/strategy_evaluation/strategy_evaluation.md
+++ b/strategy_evaluation/strategy_evaluation.md
@ -1,3 +1,75 @@
-# Report
+This document is the final report for the machine learning for trading
+course. I have implemented two manual strategies, a random tree
+learner-based strategy and one based on Q-learning.
+
+# Experiment 1
+
+I have implemented two manual strategies. The first strategy buys on a
+bullish MACD cross with a MACD smaller than zero and sells on a bearish
+MACD cross with a MACD greater than one.
+
+The second strategy uses MACD diff (the difference between the MACD and
+the MACD signal), RSI, and price SMA with a period of eight. I have
+plotted the metrics over their one, three, and five days return to find
+reasonable thresholds for the strategy.
+
+![Scatter plot to find reasonable thresholds.](figure_3.png)
+
+Based on the scatter plots, I have created a list of buy and sell
+signals. Each signal uses the current number of shares owned and one of
+the three indicators. The following figure shows the result for both
+manual strategies compared to the benchmark. Both approaches do well in
+the in-sample period but worse afterward, which I expected because I
+cherry-picked the thresholds based on the in-sample period's scatter
+plots.
+
+![First strategy based on MACD. Better than just holding.](figure_1.png)
+
+Next, I have implemented a random tree-based strategy learner. The
+learner uses a leaf size of five and no bagging. A smaller leaf size
+would result in overfitting to the in-sample data. But as the following
+screenshot shows, five works well, and the RT learner does well for the
+out of sample data.
+
+![Manual strategy compared to RT learner.](figure_2.png)
+
+I have also implemented a strategy learner based on Q-learning. The
+Q-learner uses fifteen training runs on the in-sample data. It mostly
+does well for the out of sample data, but it looks like the RT-based
+strategy learner is better.
+
+I am using a bin-size of five for the three indicators mentioned before.
+That results in 375 (3x5x5x5) states with only about 500 in-sample data
+points. Probably the Q-learner is overfitting to the in-sample data.
+Indeed, with bin sizes of four, the Q learner performs better for the
+out-of-sample data.
+
+![Strategy learner based on Q-Learning with using four and five bins
+for discretization out of sample.](figure_4.png)
+
+# Experiment 2
+
+Experiment 2 aims to show that the strategy learner trades differently
+when there is a commission, and the impact is not zero. The RT-based
+trader does not consider the commission value, but the Q-learning based
+trader does.
+
+However, it seems like a commission smaller than $10 does not affect
+the number of trades significantly. Only when the commission is around
+$50 or with a slippage of 1% we see considerably fewer transactions.
+
+| commission | n_orders |
+|------------|----------|
+| 9.95       | 79       |
+| 20         | 83       |
+| 50         | 63       |
+| 100        | 37       |
+
+# Closing Remarks
+
+Machine Learning for Trading is a great course. It gives an excellent
+introduction to finance, trading, and machine learning without getting lost in
+technical or mathematical details. I have enjoyed building decision tree
+learners and a Q learner from first principles. At the same time, the course
+accurately teaches powerful libraries such as NumPy and Pandas.

-![First strategy based on MACD. Better than just holding](figure_1.png)
--- a/strategy_evaluation/testproject.py
+++ b/strategy_evaluation/testproject.py
@ -1,6 +1,8 @@
 from experiment1 import experiment1
+from experiment2 import experiment2


 if __name__ == "__main__":
    experiment1(create_report=True)
+    experiment2()