diff --git a/strategy_evaluation/QLearner.py b/strategy_evaluation/QLearner.py index 984ca8e..f164169 100644 --- a/strategy_evaluation/QLearner.py +++ b/strategy_evaluation/QLearner.py @@ -28,18 +28,25 @@ class QLearner(object): def row_to_state(self, holding, df_row): """Transforms a row into a state value.""" - assert(holding in [-1000, 0, 1000]) holding = (holding + 1000) // 1000 - remaining_states = self.num_states - state = holding * (remaining_states // 3) - remaining_states //= 3 + assert(holding in [0, 1, 2]) + + # For each indicator that goes into the state the interval becomes + # smaller based on how many bins the indicator has. The first + # 'indicator' is the information about how many shares we are currently + # holding. So for example, if I have 450 states then the intervall (aka + # remaining_states) is 150 because there are three values for holding: + # holding = 0 -> state = 0 * 150 = 0 + # holding = 1 -> state = 1 * 150 = 150 + # holding = 2 -> state = 2 * 150 = 300 + remaining_states = self.num_states // 3 + state = holding * remaining_states for indicator in self.indicators: value = df_row[indicator] bin_n = self.indicator_value_to_bin(indicator, value) - interval = remaining_states // self.n_bins - state += bin_n * interval remaining_states //= self.n_bins + state += bin_n * remaining_states return state def indicator_value_to_bin(self, indicator, value): @@ -103,18 +110,21 @@ class QLearner(object): def train(self, df, symbol, sv): holding = Holding(sv, 0, sv) + row = df.iloc[0] state = self.row_to_state(holding.shares, row) action = self.learner.querysetstate(state) adj_closing_price = row[symbol] + equity = holding.equity self.handle_order(action, holding, adj_closing_price) for index, row in df.iloc[1:].iterrows(): adj_closing_price = row[symbol] new_equity = holding.cash + holding.shares * adj_closing_price - r = self.get_reward(holding.equity, new_equity) + r = self.get_reward(equity, new_equity) s_prime = self.row_to_state(holding.shares, row) a = self.learner.query(s_prime, r) + equity = new_equity self.handle_order(a, holding, adj_closing_price) if self.verbose: print(f"{holding=} {s_prime=} {r=} {a=}") diff --git a/strategy_evaluation/experiment1.py b/strategy_evaluation/experiment1.py index 758ea99..124166d 100644 --- a/strategy_evaluation/experiment1.py +++ b/strategy_evaluation/experiment1.py @@ -128,6 +128,22 @@ def compare_all_strategies(symbol, sv, sd, ed): plt.savefig('figure_2.png', dpi=fig.dpi) +def compare_number_trades(symbol, sv, sd, ed): + df = util.get_data([symbol], pd.date_range(sd, ed)) + df.drop(columns=["SPY"], inplace=True) + + ql = QLearner(testing=True, verbose=False) + ql.addEvidence(symbol, sd, ed, sv) + orders = ql.testPolicy(symbol, sd, ed, sv) + n_orders_no_commission = orders[orders["Shares"] != 0].shape[0] + + ql = QLearner(testing=True, verbose=False, commission=9.95, impact=0.005) + ql.addEvidence(symbol, sd, ed, sv) + orders = ql.testPolicy(symbol, sd, ed, sv) + n_orders_commision = orders[orders["Shares"] != 0].shape[0] + print(f"{n_orders_no_commission=} {n_orders_commision=}") + + def experiment1(create_report=False): symbol = "JPM" sv = 10000 @@ -136,7 +152,7 @@ def experiment1(create_report=False): sd_out = dt.datetime(2010, 1, 1) # out-sample ed_out = dt.datetime(2011, 12, 31) # out-sample - df = util.get_data([symbol], pd.date_range(sd_out, ed_out)) + df = util.get_data([symbol], pd.date_range(sd, ed)) df.drop(columns=["SPY"], inplace=True) if create_report: @@ -146,16 +162,16 @@ def experiment1(create_report=False): # visualize_correlations(symbol, df) # plot_indicators(symbol, df) + # compare_number_trades(symbol, sv, sd, ed) bs = BenchmarkStrategy() - orders = bs.testPolicy(symbol, sd_out, ed_out, sv) + orders = bs.testPolicy(symbol, sd, ed, sv) df["Benchmark"] = marketsim.compute_portvals(orders, sv) df["Orders Benchmark"] = orders["Shares"] - # ql = QLearner(testing=True, verbose=False, commission=10, impact=0.005) - ql = QLearner(testing=True, verbose=False) + ql = QLearner(testing=True, verbose=False, commission=9.95, impact=0.005) ql.addEvidence(symbol, sd, ed, sv) - orders = ql.testPolicy(symbol, sd_out, ed_out, sv) + orders = ql.testPolicy(symbol, sd, ed, sv) df["QL"] = marketsim.compute_portvals(orders, sv) df["Orders QL"] = orders["Shares"]