Implement binning and state calculation
parent
889bcf68ca
commit
169dd8278d
|
@ -11,61 +11,100 @@ class QLearner(object):
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
self.impact = impact
|
self.impact = impact
|
||||||
self.commission = commission
|
self.commission = commission
|
||||||
self.testing = testing
|
self.testing = testing # Decides which type of order df to return.
|
||||||
|
self.indicators = ['macd_diff', 'rsi', 'price_sma_8']
|
||||||
|
self.n_bins = 5
|
||||||
|
self.bins = {}
|
||||||
|
self.num_states = self.get_num_states()
|
||||||
|
self.num_actions = 3 # buy, sell, hold
|
||||||
|
if verbose:
|
||||||
|
print(f"{self.num_states=}")
|
||||||
|
|
||||||
def _get_volume(self):
|
def row_to_state(self, holding, df_row):
|
||||||
"""For reference."""
|
"""Transforms a row into a state value."""
|
||||||
volume_all = ut.get_data(syms, dates, colname="Volume")
|
assert(holding in [-1000, 0, 1000])
|
||||||
volume = volume_all[syms] # only portfolio symbols
|
holding = (holding + 1000) // 1000
|
||||||
# volume_SPY = volume_all['SPY'] # only SPY, for comparison later
|
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
print(volume)
|
print(f"{holding=}")
|
||||||
|
remaining_states = self.num_states
|
||||||
|
state = holding * (remaining_states // 3)
|
||||||
|
remaining_states //= 3
|
||||||
|
|
||||||
def _add_indicators(self, df, symbol):
|
for indicator in self.indicators:
|
||||||
|
value = df_row[indicator]
|
||||||
|
bin_n = self.indicator_value_to_bin(indicator, value)
|
||||||
|
interval = remaining_states // self.n_bins
|
||||||
|
state += bin_n * interval
|
||||||
|
if self.verbose:
|
||||||
|
print(f"{value=} {bin_n=} {interval=} {state=}")
|
||||||
|
remaining_states //= self.n_bins
|
||||||
|
return state
|
||||||
|
|
||||||
|
def indicator_value_to_bin(self, indicator, value):
|
||||||
|
for i, upper_bound in enumerate(self.bins[indicator]):
|
||||||
|
if value < upper_bound:
|
||||||
|
return i
|
||||||
|
return i + 1
|
||||||
|
|
||||||
|
def add_indicators(self, df, symbol):
|
||||||
"""Add indicators for learning to DataFrame."""
|
"""Add indicators for learning to DataFrame."""
|
||||||
df.drop(columns=["SPY"], inplace=True)
|
for indicator in self.indicators:
|
||||||
indicators.macd(df, symbol)
|
if indicator == "macd_diff":
|
||||||
indicators.rsi(df, symbol)
|
indicators.macd(df, symbol)
|
||||||
indicators.price_sma(df, symbol, [8])
|
df.drop(columns=["macd", "macd_signal"], inplace=True)
|
||||||
indicators.price_delta(df, symbol, 3)
|
elif indicator == "rsi":
|
||||||
|
indicators.rsi(df, symbol)
|
||||||
|
elif indicator.startswith("price_sma_"):
|
||||||
|
period = int(indicator.replace("price_sma_", ""))
|
||||||
|
indicators.price_sma(df, symbol, [period])
|
||||||
|
df.drop(columns=["SPY", symbol], inplace=True)
|
||||||
df.dropna(inplace=True)
|
df.dropna(inplace=True)
|
||||||
|
|
||||||
def addEvidence(self, symbol="IBM",
|
def bin_indicators(self, df):
|
||||||
sd=dt.datetime(2008, 1, 1),
|
"""Create bins for indicators."""
|
||||||
ed=dt.datetime(2009, 1, 1),
|
for indicator in self.indicators:
|
||||||
sv=10000):
|
ser, bins = pd.qcut(df[indicator], self.n_bins, retbins=True)
|
||||||
|
self.bins[indicator] = bins[1:self.n_bins]
|
||||||
|
|
||||||
self.indicators = ['macd_diff', 'rsi', 'price_sma_8']
|
def get_num_states(self):
|
||||||
|
"""Return the total num of states."""
|
||||||
|
num_states = 3 # Three states holding (1000, 0, -1000)
|
||||||
|
for _ in self.indicators:
|
||||||
|
num_states *= self.n_bins
|
||||||
|
return num_states
|
||||||
|
|
||||||
|
def update_holding(self, action, holding):
|
||||||
|
if action == 0: # buy
|
||||||
|
return 1000
|
||||||
|
if holding == 0 or holding == -1000:
|
||||||
|
return 1000
|
||||||
|
elif action == 1: # sell
|
||||||
|
return -1000
|
||||||
|
elif action == 2: # hold
|
||||||
|
return 0
|
||||||
|
raise Exception()
|
||||||
|
|
||||||
|
def addEvidence(self, symbol="IBM", sd=dt.datetime(2008, 1, 1), ed=dt.datetime(2009, 1, 1), sv=10000):
|
||||||
df = util.get_data([symbol], pd.date_range(sd, ed))
|
df = util.get_data([symbol], pd.date_range(sd, ed))
|
||||||
self._add_indicators(df, symbol)
|
self.add_indicators(df, symbol)
|
||||||
|
self.bin_indicators(df)
|
||||||
|
|
||||||
|
self.learner = Learner(self.num_states, self.num_actions)
|
||||||
|
|
||||||
|
holding = 0
|
||||||
|
s = self.row_to_state(holding, df.iloc[0])
|
||||||
|
a = self.learner.querysetstate(state)
|
||||||
|
print(f"{action=}")
|
||||||
|
for row in df.iloc[1:].itertuples(index=False):
|
||||||
|
holding = update_holding(a, holding)
|
||||||
|
|
||||||
|
print(row)
|
||||||
|
|
||||||
|
|
||||||
self.learner = Learner()
|
|
||||||
# self.learner.query(data_x, y.to_numpy())
|
# self.learner.query(data_x, y.to_numpy())
|
||||||
# data_x = df[self.indicators].to_numpy()
|
# data_x = df[self.indicators].to_numpy()
|
||||||
|
|
||||||
def strat(self, data_y, orders):
|
def testPolicy(self, symbol="IBM", sd=dt.datetime(2009, 1, 1), ed=dt.datetime(2010, 1, 1), sv=10000):
|
||||||
self.holding = 0
|
|
||||||
|
|
||||||
def strat(row):
|
|
||||||
y = int(data_y.loc[row.name][0])
|
|
||||||
shares = 0
|
|
||||||
if self.holding == 0 and y == 1:
|
|
||||||
shares = 1000
|
|
||||||
elif self.holding == -1000 and y == 1:
|
|
||||||
shares = 2000
|
|
||||||
elif self.holding == 0 and y == -1:
|
|
||||||
shares = -1000
|
|
||||||
elif self.holding == 1000 and y == -1:
|
|
||||||
shares = -2000
|
|
||||||
self.holding += shares
|
|
||||||
return shares
|
|
||||||
|
|
||||||
orders["Shares"] = orders.apply(strat, axis=1)
|
|
||||||
|
|
||||||
def testPolicy(self, symbol="IBM",
|
|
||||||
sd=dt.datetime(2009, 1, 1),
|
|
||||||
ed=dt.datetime(2010, 1, 1),
|
|
||||||
sv=10000):
|
|
||||||
df = util.get_data([symbol], pd.date_range(sd, ed))
|
df = util.get_data([symbol], pd.date_range(sd, ed))
|
||||||
self._add_indicators(df, symbol)
|
self._add_indicators(df, symbol)
|
||||||
# data_x = df[self.indicators].to_numpy()
|
# data_x = df[self.indicators].to_numpy()
|
||||||
|
@ -81,4 +120,3 @@ class QLearner(object):
|
||||||
return orders
|
return orders
|
||||||
else:
|
else:
|
||||||
return orders[["Shares"]]
|
return orders[["Shares"]]
|
||||||
|
|
||||||
|
|
|
@ -147,13 +147,15 @@ def experiment1(create_report=False):
|
||||||
# visualize_correlations(symbol, df)
|
# visualize_correlations(symbol, df)
|
||||||
# plot_indicators(symbol, df)
|
# plot_indicators(symbol, df)
|
||||||
|
|
||||||
bs = BenchmarkStrategy()
|
# bs = BenchmarkStrategy()
|
||||||
orders = bs.testPolicy(symbol, sd_out, ed_out, sv)
|
# orders = bs.testPolicy(symbol, sd_out, ed_out, sv)
|
||||||
df["Benchmark"] = marketsim.compute_portvals(orders, sv)
|
# df["Benchmark"] = marketsim.compute_portvals(orders, sv)
|
||||||
df["Orders Benchmark"] = orders["Shares"]
|
# df["Orders Benchmark"] = orders["Shares"]
|
||||||
|
|
||||||
ql = QLearner(testing=True)
|
ql = QLearner(testing=True, verbose=True)
|
||||||
ql.addEvidence(symbol, sd, ed, sv)
|
ql.addEvidence(symbol, sd, ed, sv)
|
||||||
|
return
|
||||||
|
|
||||||
orders = ql.testPolicy(symbol, sd_out, ed_out, sv)
|
orders = ql.testPolicy(symbol, sd_out, ed_out, sv)
|
||||||
df["QL"] = marketsim.compute_portvals(orders, sv)
|
df["QL"] = marketsim.compute_portvals(orders, sv)
|
||||||
df["Orders QL"] = orders["Shares"]
|
df["Orders QL"] = orders["Shares"]
|
||||||
|
@ -168,11 +170,6 @@ def experiment1(create_report=False):
|
||||||
m = MultiCursor(fig.canvas, ax, color='r', lw=0.5)
|
m = MultiCursor(fig.canvas, ax, color='r', lw=0.5)
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
# For debugging the classification learner:
|
|
||||||
# df["y_train"] = sl.addEvidence(symbol, sd, ed, sv)
|
|
||||||
# df["y_query"] = sl.testPolicy(symbol, sd, ed, sv)
|
|
||||||
# df[["y_train", "y_query"]].plot(ax=ax[1])
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
experiment1()
|
experiment1()
|
||||||
|
|
Loading…
Reference in New Issue