From 169dd8278d77cd1fcb075a28052f70728e1ae5a1 Mon Sep 17 00:00:00 2001 From: Felix Martin Date: Sat, 7 Nov 2020 12:39:37 -0500 Subject: [PATCH] Implement binning and state calculation --- strategy_evaluation/QLearner.py | 126 +++++++++++++++++++---------- strategy_evaluation/experiment1.py | 17 ++-- 2 files changed, 89 insertions(+), 54 deletions(-) diff --git a/strategy_evaluation/QLearner.py b/strategy_evaluation/QLearner.py index eecc6ed..6ff514c 100644 --- a/strategy_evaluation/QLearner.py +++ b/strategy_evaluation/QLearner.py @@ -11,61 +11,100 @@ class QLearner(object): self.verbose = verbose self.impact = impact self.commission = commission - self.testing = testing + self.testing = testing # Decides which type of order df to return. + self.indicators = ['macd_diff', 'rsi', 'price_sma_8'] + self.n_bins = 5 + self.bins = {} + self.num_states = self.get_num_states() + self.num_actions = 3 # buy, sell, hold + if verbose: + print(f"{self.num_states=}") - def _get_volume(self): - """For reference.""" - volume_all = ut.get_data(syms, dates, colname="Volume") - volume = volume_all[syms] # only portfolio symbols - # volume_SPY = volume_all['SPY'] # only SPY, for comparison later + def row_to_state(self, holding, df_row): + """Transforms a row into a state value.""" + assert(holding in [-1000, 0, 1000]) + holding = (holding + 1000) // 1000 if self.verbose: - print(volume) + print(f"{holding=}") + remaining_states = self.num_states + state = holding * (remaining_states // 3) + remaining_states //= 3 - def _add_indicators(self, df, symbol): + for indicator in self.indicators: + value = df_row[indicator] + bin_n = self.indicator_value_to_bin(indicator, value) + interval = remaining_states // self.n_bins + state += bin_n * interval + if self.verbose: + print(f"{value=} {bin_n=} {interval=} {state=}") + remaining_states //= self.n_bins + return state + + def indicator_value_to_bin(self, indicator, value): + for i, upper_bound in enumerate(self.bins[indicator]): + if value < upper_bound: + return i + return i + 1 + + def add_indicators(self, df, symbol): """Add indicators for learning to DataFrame.""" - df.drop(columns=["SPY"], inplace=True) - indicators.macd(df, symbol) - indicators.rsi(df, symbol) - indicators.price_sma(df, symbol, [8]) - indicators.price_delta(df, symbol, 3) + for indicator in self.indicators: + if indicator == "macd_diff": + indicators.macd(df, symbol) + df.drop(columns=["macd", "macd_signal"], inplace=True) + elif indicator == "rsi": + indicators.rsi(df, symbol) + elif indicator.startswith("price_sma_"): + period = int(indicator.replace("price_sma_", "")) + indicators.price_sma(df, symbol, [period]) + df.drop(columns=["SPY", symbol], inplace=True) df.dropna(inplace=True) - def addEvidence(self, symbol="IBM", - sd=dt.datetime(2008, 1, 1), - ed=dt.datetime(2009, 1, 1), - sv=10000): + def bin_indicators(self, df): + """Create bins for indicators.""" + for indicator in self.indicators: + ser, bins = pd.qcut(df[indicator], self.n_bins, retbins=True) + self.bins[indicator] = bins[1:self.n_bins] - self.indicators = ['macd_diff', 'rsi', 'price_sma_8'] + def get_num_states(self): + """Return the total num of states.""" + num_states = 3 # Three states holding (1000, 0, -1000) + for _ in self.indicators: + num_states *= self.n_bins + return num_states + + def update_holding(self, action, holding): + if action == 0: # buy + return 1000 + if holding == 0 or holding == -1000: + return 1000 + elif action == 1: # sell + return -1000 + elif action == 2: # hold + return 0 + raise Exception() + + def addEvidence(self, symbol="IBM", sd=dt.datetime(2008, 1, 1), ed=dt.datetime(2009, 1, 1), sv=10000): df = util.get_data([symbol], pd.date_range(sd, ed)) - self._add_indicators(df, symbol) + self.add_indicators(df, symbol) + self.bin_indicators(df) + + self.learner = Learner(self.num_states, self.num_actions) + + holding = 0 + s = self.row_to_state(holding, df.iloc[0]) + a = self.learner.querysetstate(state) + print(f"{action=}") + for row in df.iloc[1:].itertuples(index=False): + holding = update_holding(a, holding) + + print(row) + - self.learner = Learner() # self.learner.query(data_x, y.to_numpy()) # data_x = df[self.indicators].to_numpy() - def strat(self, data_y, orders): - self.holding = 0 - - def strat(row): - y = int(data_y.loc[row.name][0]) - shares = 0 - if self.holding == 0 and y == 1: - shares = 1000 - elif self.holding == -1000 and y == 1: - shares = 2000 - elif self.holding == 0 and y == -1: - shares = -1000 - elif self.holding == 1000 and y == -1: - shares = -2000 - self.holding += shares - return shares - - orders["Shares"] = orders.apply(strat, axis=1) - - def testPolicy(self, symbol="IBM", - sd=dt.datetime(2009, 1, 1), - ed=dt.datetime(2010, 1, 1), - sv=10000): + def testPolicy(self, symbol="IBM", sd=dt.datetime(2009, 1, 1), ed=dt.datetime(2010, 1, 1), sv=10000): df = util.get_data([symbol], pd.date_range(sd, ed)) self._add_indicators(df, symbol) # data_x = df[self.indicators].to_numpy() @@ -81,4 +120,3 @@ class QLearner(object): return orders else: return orders[["Shares"]] - diff --git a/strategy_evaluation/experiment1.py b/strategy_evaluation/experiment1.py index 53909d3..b3e0c9e 100644 --- a/strategy_evaluation/experiment1.py +++ b/strategy_evaluation/experiment1.py @@ -147,13 +147,15 @@ def experiment1(create_report=False): # visualize_correlations(symbol, df) # plot_indicators(symbol, df) - bs = BenchmarkStrategy() - orders = bs.testPolicy(symbol, sd_out, ed_out, sv) - df["Benchmark"] = marketsim.compute_portvals(orders, sv) - df["Orders Benchmark"] = orders["Shares"] + # bs = BenchmarkStrategy() + # orders = bs.testPolicy(symbol, sd_out, ed_out, sv) + # df["Benchmark"] = marketsim.compute_portvals(orders, sv) + # df["Orders Benchmark"] = orders["Shares"] - ql = QLearner(testing=True) + ql = QLearner(testing=True, verbose=True) ql.addEvidence(symbol, sd, ed, sv) + return + orders = ql.testPolicy(symbol, sd_out, ed_out, sv) df["QL"] = marketsim.compute_portvals(orders, sv) df["Orders QL"] = orders["Shares"] @@ -168,11 +170,6 @@ def experiment1(create_report=False): m = MultiCursor(fig.canvas, ax, color='r', lw=0.5) plt.show() - # For debugging the classification learner: - # df["y_train"] = sl.addEvidence(symbol, sd, ed, sv) - # df["y_query"] = sl.testPolicy(symbol, sd, ed, sv) - # df[["y_train", "y_query"]].plot(ax=ax[1]) - if __name__ == "__main__": experiment1()