diff --git a/strategy_evaluation/QLearner.py b/strategy_evaluation/QLearner.py index 6ff514c..984ca8e 100644 --- a/strategy_evaluation/QLearner.py +++ b/strategy_evaluation/QLearner.py @@ -3,6 +3,13 @@ import pandas as pd import util import indicators from qlearning_robot.QLearner import QLearner as Learner +from dataclasses import dataclass + +@dataclass +class Holding: + cash: int + shares: int + equity: int class QLearner(object): @@ -17,15 +24,12 @@ class QLearner(object): self.bins = {} self.num_states = self.get_num_states() self.num_actions = 3 # buy, sell, hold - if verbose: - print(f"{self.num_states=}") + self.learner = Learner(self.num_states, self.num_actions) def row_to_state(self, holding, df_row): """Transforms a row into a state value.""" assert(holding in [-1000, 0, 1000]) holding = (holding + 1000) // 1000 - if self.verbose: - print(f"{holding=}") remaining_states = self.num_states state = holding * (remaining_states // 3) remaining_states //= 3 @@ -35,8 +39,6 @@ class QLearner(object): bin_n = self.indicator_value_to_bin(indicator, value) interval = remaining_states // self.n_bins state += bin_n * interval - if self.verbose: - print(f"{value=} {bin_n=} {interval=} {state=}") remaining_states //= self.n_bins return state @@ -57,7 +59,7 @@ class QLearner(object): elif indicator.startswith("price_sma_"): period = int(indicator.replace("price_sma_", "")) indicators.price_sma(df, symbol, [period]) - df.drop(columns=["SPY", symbol], inplace=True) + df.drop(columns=["SPY"], inplace=True) df.dropna(inplace=True) def bin_indicators(self, df): @@ -73,50 +75,85 @@ class QLearner(object): num_states *= self.n_bins return num_states - def update_holding(self, action, holding): + def handle_order(self, action, holding, adj_closing_price): + shares = 0 if action == 0: # buy - return 1000 - if holding == 0 or holding == -1000: - return 1000 + if holding.shares == 0 or holding.shares == -1000: + shares = 1000 elif action == 1: # sell - return -1000 + if holding.shares== 0 or holding.shares == 1000: + shares = -1000 elif action == 2: # hold - return 0 - raise Exception() + shares = 0 + + cost = shares * adj_closing_price + if shares != 0: + # Charge commission and deduct impact penalty + holding.cash -= self.commission + holding.cash -= (self.impact * adj_closing_price * abs(shares)) + holding.cash -= cost + holding.shares += shares + + holding.equity = holding.cash + holding.shares * adj_closing_price + + def get_reward(self, equity, new_equity): + if new_equity > equity: + return 1 + return -1 + + def train(self, df, symbol, sv): + holding = Holding(sv, 0, sv) + row = df.iloc[0] + state = self.row_to_state(holding.shares, row) + action = self.learner.querysetstate(state) + adj_closing_price = row[symbol] + self.handle_order(action, holding, adj_closing_price) + + for index, row in df.iloc[1:].iterrows(): + adj_closing_price = row[symbol] + new_equity = holding.cash + holding.shares * adj_closing_price + r = self.get_reward(holding.equity, new_equity) + s_prime = self.row_to_state(holding.shares, row) + a = self.learner.query(s_prime, r) + self.handle_order(a, holding, adj_closing_price) + if self.verbose: + print(f"{holding=} {s_prime=} {r=} {a=}") def addEvidence(self, symbol="IBM", sd=dt.datetime(2008, 1, 1), ed=dt.datetime(2009, 1, 1), sv=10000): df = util.get_data([symbol], pd.date_range(sd, ed)) self.add_indicators(df, symbol) self.bin_indicators(df) - self.learner = Learner(self.num_states, self.num_actions) - - holding = 0 - s = self.row_to_state(holding, df.iloc[0]) - a = self.learner.querysetstate(state) - print(f"{action=}") - for row in df.iloc[1:].itertuples(index=False): - holding = update_holding(a, holding) - - print(row) - - - # self.learner.query(data_x, y.to_numpy()) - # data_x = df[self.indicators].to_numpy() + for _ in range(10): + self.train(df, symbol, sv) def testPolicy(self, symbol="IBM", sd=dt.datetime(2009, 1, 1), ed=dt.datetime(2010, 1, 1), sv=10000): df = util.get_data([symbol], pd.date_range(sd, ed)) - self._add_indicators(df, symbol) - # data_x = df[self.indicators].to_numpy() - # data_y = pd.DataFrame(index=df.index, data=self.learner.query(data_x)) - orders = pd.DataFrame(index=df.index) orders["Symbol"] = symbol orders["Order"] = "" orders["Shares"] = 0 - return orders + shares = orders["Shares"] + self.add_indicators(df, symbol) + holding = 0 + + for index, row in df.iterrows(): + state = self.row_to_state(holding, row) + action = self.learner.querysetstate(state) + + if action == 0: # buy + if holding == 0 or holding == -1000: + holding += 1000 + orders.loc[index, "Shares"] = 1000 + elif action == 1: # sell + if holding == 0 or holding == 1000: + holding -= 1000 + orders.loc[index, "Shares"] = -1000 + elif action == 2: # hold + pass if self.testing: return orders else: return orders[["Shares"]] + diff --git a/strategy_evaluation/experiment1.py b/strategy_evaluation/experiment1.py index b3e0c9e..758ea99 100644 --- a/strategy_evaluation/experiment1.py +++ b/strategy_evaluation/experiment1.py @@ -136,7 +136,7 @@ def experiment1(create_report=False): sd_out = dt.datetime(2010, 1, 1) # out-sample ed_out = dt.datetime(2011, 12, 31) # out-sample - df = util.get_data([symbol], pd.date_range(sd, ed_out)) + df = util.get_data([symbol], pd.date_range(sd_out, ed_out)) df.drop(columns=["SPY"], inplace=True) if create_report: @@ -147,15 +147,14 @@ def experiment1(create_report=False): # visualize_correlations(symbol, df) # plot_indicators(symbol, df) - # bs = BenchmarkStrategy() - # orders = bs.testPolicy(symbol, sd_out, ed_out, sv) - # df["Benchmark"] = marketsim.compute_portvals(orders, sv) - # df["Orders Benchmark"] = orders["Shares"] + bs = BenchmarkStrategy() + orders = bs.testPolicy(symbol, sd_out, ed_out, sv) + df["Benchmark"] = marketsim.compute_portvals(orders, sv) + df["Orders Benchmark"] = orders["Shares"] - ql = QLearner(testing=True, verbose=True) + # ql = QLearner(testing=True, verbose=False, commission=10, impact=0.005) + ql = QLearner(testing=True, verbose=False) ql.addEvidence(symbol, sd, ed, sv) - return - orders = ql.testPolicy(symbol, sd_out, ed_out, sv) df["QL"] = marketsim.compute_portvals(orders, sv) df["Orders QL"] = orders["Shares"]