diff --git a/strategy_evaluation/QLearner.py b/strategy_evaluation/QLearner.py index f164169..4f3afdd 100644 --- a/strategy_evaluation/QLearner.py +++ b/strategy_evaluation/QLearner.py @@ -14,13 +14,13 @@ class Holding: class QLearner(object): - def __init__(self, verbose=False, impact=0.0, commission=0.0, testing=False): + def __init__(self, verbose=False, impact=0.0, commission=0.0, testing=False, n_bins=5): self.verbose = verbose self.impact = impact self.commission = commission self.testing = testing # Decides which type of order df to return. self.indicators = ['macd_diff', 'rsi', 'price_sma_8'] - self.n_bins = 5 + self.n_bins = n_bins self.bins = {} self.num_states = self.get_num_states() self.num_actions = 3 # buy, sell, hold @@ -134,7 +134,7 @@ class QLearner(object): self.add_indicators(df, symbol) self.bin_indicators(df) - for _ in range(10): + for _ in range(15): self.train(df, symbol, sv) def testPolicy(self, symbol="IBM", sd=dt.datetime(2009, 1, 1), ed=dt.datetime(2010, 1, 1), sv=10000): diff --git a/strategy_evaluation/experiment1.py b/strategy_evaluation/experiment1.py index 124166d..3499de4 100644 --- a/strategy_evaluation/experiment1.py +++ b/strategy_evaluation/experiment1.py @@ -128,20 +128,63 @@ def compare_all_strategies(symbol, sv, sd, ed): plt.savefig('figure_2.png', dpi=fig.dpi) -def compare_number_trades(symbol, sv, sd, ed): +def compare_number_trades(): + symbol = "JPM" + sv = 10000 + sd = dt.datetime(2008, 1, 1) # in-sample + ed = dt.datetime(2009, 12, 31) # in-sample + df = util.get_data([symbol], pd.date_range(sd, ed)) df.drop(columns=["SPY"], inplace=True) + print(f"| commission | n_orders |") + print(f"-------------------------") + for commission in [0, 9.95, 20, 50, 100]: + ql = QLearner(testing=True, commission=commission, impact=0.005) + ql.addEvidence(symbol, sd, ed, sv) + orders = ql.testPolicy(symbol, sd, ed, sv) + n_orders = orders[orders["Shares"] != 0].shape[0] + print(f"| {commission} | {n_orders} |") + +def compare_q_learners(): + symbol = "JPM" + sv = 10000 + sd = dt.datetime(2008, 1, 1) # in-sample + ed = dt.datetime(2009, 12, 31) # in-sample + sd_out = dt.datetime(2010, 1, 1) # out-sample + ed_out = dt.datetime(2011, 12, 31) # out-sample + + df = util.get_data([symbol], pd.date_range(sd_out, ed_out)) + df.drop(columns=["SPY"], inplace=True) + + bs = BenchmarkStrategy() + orders = bs.testPolicy(symbol, sd_out, ed_out, sv) + df["Benchmark"] = indicators.normalize(marketsim.compute_portvals(orders, sv)) + df["Orders Benchmark"] = orders["Shares"] + ql = QLearner(testing=True, verbose=False) ql.addEvidence(symbol, sd, ed, sv) - orders = ql.testPolicy(symbol, sd, ed, sv) - n_orders_no_commission = orders[orders["Shares"] != 0].shape[0] + orders = ql.testPolicy(symbol, sd_out, ed_out, sv) + df["QL 5"] = indicators.normalize(marketsim.compute_portvals(orders, sv)) + df["Orders QL 5"] = orders["Shares"] - ql = QLearner(testing=True, verbose=False, commission=9.95, impact=0.005) + ql = QLearner(testing=True, verbose=False, n_bins=4) ql.addEvidence(symbol, sd, ed, sv) - orders = ql.testPolicy(symbol, sd, ed, sv) - n_orders_commision = orders[orders["Shares"] != 0].shape[0] - print(f"{n_orders_no_commission=} {n_orders_commision=}") + orders = ql.testPolicy(symbol, sd_out, ed_out, sv) + df["QL 4"] = indicators.normalize(marketsim.compute_portvals(orders, sv)) + df["Orders QL 4"] = orders["Shares"] + + fig, ax = plt.subplots(3, sharex=True) + df[[symbol]].plot(ax=ax[0]) + df[["Benchmark", "QL 5", "QL 4"]].plot(ax=ax[1]) + df[["Orders Benchmark", "Orders QL 5", "Orders QL 4"]].plot(ax=ax[2]) + + for a in ax: + a.grid() + m = MultiCursor(fig.canvas, ax, color='r', lw=0.5) + fig.set_size_inches(10, 8, forward=True) + plt.savefig('figure_4.png', dpi=fig.dpi) + sys.exit(0) def experiment1(create_report=False): @@ -152,27 +195,28 @@ def experiment1(create_report=False): sd_out = dt.datetime(2010, 1, 1) # out-sample ed_out = dt.datetime(2011, 12, 31) # out-sample - df = util.get_data([symbol], pd.date_range(sd, ed)) + df = util.get_data([symbol], pd.date_range(sd_out, ed_out)) df.drop(columns=["SPY"], inplace=True) if create_report: compare_manual_strategies(symbol, sv, sd, ed) compare_all_strategies(symbol, sv, sd, ed) - return + sys.exit(0) # visualize_correlations(symbol, df) # plot_indicators(symbol, df) # compare_number_trades(symbol, sv, sd, ed) + # compare_q_learners() bs = BenchmarkStrategy() - orders = bs.testPolicy(symbol, sd, ed, sv) - df["Benchmark"] = marketsim.compute_portvals(orders, sv) + orders = bs.testPolicy(symbol, sd_out, ed_out, sv) + df["Benchmark"] = indicators.normalize(marketsim.compute_portvals(orders, sv)) df["Orders Benchmark"] = orders["Shares"] - ql = QLearner(testing=True, verbose=False, commission=9.95, impact=0.005) + ql = QLearner(testing=True, verbose=False) ql.addEvidence(symbol, sd, ed, sv) - orders = ql.testPolicy(symbol, sd, ed, sv) - df["QL"] = marketsim.compute_portvals(orders, sv) + orders = ql.testPolicy(symbol, sd_out, ed_out, sv) + df["QL"] = indicators.normalize(marketsim.compute_portvals(orders, sv)) df["Orders QL"] = orders["Shares"] fig, ax = plt.subplots(3, sharex=True) @@ -184,6 +228,8 @@ def experiment1(create_report=False): a.grid() m = MultiCursor(fig.canvas, ax, color='r', lw=0.5) plt.show() + # fig.set_size_inches(10, 8, forward=True) + # plt.savefig('figure_4.png', dpi=fig.dpi) if __name__ == "__main__": diff --git a/strategy_evaluation/experiment2.py b/strategy_evaluation/experiment2.py index e69de29..9b12f55 100644 --- a/strategy_evaluation/experiment2.py +++ b/strategy_evaluation/experiment2.py @@ -0,0 +1,8 @@ +import experiment1 + +def experiment2(): + experiment1.compare_number_trades() + + +if __name__ == "__main__": + experiment2() diff --git a/strategy_evaluation/figure_3.png b/strategy_evaluation/figure_3.png new file mode 100644 index 0000000..de7a0bf Binary files /dev/null and b/strategy_evaluation/figure_3.png differ diff --git a/strategy_evaluation/figure_4.png b/strategy_evaluation/figure_4.png new file mode 100644 index 0000000..7b3dc3b Binary files /dev/null and b/strategy_evaluation/figure_4.png differ diff --git a/strategy_evaluation/strategy_evaluation.md b/strategy_evaluation/strategy_evaluation.md index 660ea89..71c5c32 100644 --- a/strategy_evaluation/strategy_evaluation.md +++ b/strategy_evaluation/strategy_evaluation.md @@ -1,3 +1,75 @@ -# Report +This document is the final report for the machine learning for trading +course. I have implemented two manual strategies, a random tree +learner-based strategy and one based on Q-learning. + +# Experiment 1 + +I have implemented two manual strategies. The first strategy buys on a +bullish MACD cross with a MACD smaller than zero and sells on a bearish +MACD cross with a MACD greater than one. + +The second strategy uses MACD diff (the difference between the MACD and +the MACD signal), RSI, and price SMA with a period of eight. I have +plotted the metrics over their one, three, and five days return to find +reasonable thresholds for the strategy. + +![Scatter plot to find reasonable thresholds.](figure_3.png) + +Based on the scatter plots, I have created a list of buy and sell +signals. Each signal uses the current number of shares owned and one of +the three indicators. The following figure shows the result for both +manual strategies compared to the benchmark. Both approaches do well in +the in-sample period but worse afterward, which I expected because I +cherry-picked the thresholds based on the in-sample period's scatter +plots. + +![First strategy based on MACD. Better than just holding.](figure_1.png) + +Next, I have implemented a random tree-based strategy learner. The +learner uses a leaf size of five and no bagging. A smaller leaf size +would result in overfitting to the in-sample data. But as the following +screenshot shows, five works well, and the RT learner does well for the +out of sample data. + +![Manual strategy compared to RT learner.](figure_2.png) + +I have also implemented a strategy learner based on Q-learning. The +Q-learner uses fifteen training runs on the in-sample data. It mostly +does well for the out of sample data, but it looks like the RT-based +strategy learner is better. + +I am using a bin-size of five for the three indicators mentioned before. +That results in 375 (3x5x5x5) states with only about 500 in-sample data +points. Probably the Q-learner is overfitting to the in-sample data. +Indeed, with bin sizes of four, the Q learner performs better for the +out-of-sample data. + +![Strategy learner based on Q-Learning with using four and five bins +for discretization out of sample.](figure_4.png) + +# Experiment 2 + +Experiment 2 aims to show that the strategy learner trades differently +when there is a commission, and the impact is not zero. The RT-based +trader does not consider the commission value, but the Q-learning based +trader does. + +However, it seems like a commission smaller than $10 does not affect +the number of trades significantly. Only when the commission is around +$50 or with a slippage of 1% we see considerably fewer transactions. + +| commission | n_orders | +|------------|----------| +| 9.95 | 79 | +| 20 | 83 | +| 50 | 63 | +| 100 | 37 | + +# Closing Remarks + +Machine Learning for Trading is a great course. It gives an excellent +introduction to finance, trading, and machine learning without getting lost in +technical or mathematical details. I have enjoyed building decision tree +learners and a Q learner from first principles. At the same time, the course +accurately teaches powerful libraries such as NumPy and Pandas. -![First strategy based on MACD. Better than just holding](figure_1.png) diff --git a/strategy_evaluation/testproject.py b/strategy_evaluation/testproject.py index 866ad19..f37954e 100644 --- a/strategy_evaluation/testproject.py +++ b/strategy_evaluation/testproject.py @@ -1,6 +1,8 @@ from experiment1 import experiment1 +from experiment2 import experiment2 if __name__ == "__main__": experiment1(create_report=True) + experiment2()