Finish first version of Q trader
parent
169dd8278d
commit
761a0366e4
|
@ -3,6 +3,13 @@ import pandas as pd
|
||||||
import util
|
import util
|
||||||
import indicators
|
import indicators
|
||||||
from qlearning_robot.QLearner import QLearner as Learner
|
from qlearning_robot.QLearner import QLearner as Learner
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Holding:
|
||||||
|
cash: int
|
||||||
|
shares: int
|
||||||
|
equity: int
|
||||||
|
|
||||||
|
|
||||||
class QLearner(object):
|
class QLearner(object):
|
||||||
|
@ -17,15 +24,12 @@ class QLearner(object):
|
||||||
self.bins = {}
|
self.bins = {}
|
||||||
self.num_states = self.get_num_states()
|
self.num_states = self.get_num_states()
|
||||||
self.num_actions = 3 # buy, sell, hold
|
self.num_actions = 3 # buy, sell, hold
|
||||||
if verbose:
|
self.learner = Learner(self.num_states, self.num_actions)
|
||||||
print(f"{self.num_states=}")
|
|
||||||
|
|
||||||
def row_to_state(self, holding, df_row):
|
def row_to_state(self, holding, df_row):
|
||||||
"""Transforms a row into a state value."""
|
"""Transforms a row into a state value."""
|
||||||
assert(holding in [-1000, 0, 1000])
|
assert(holding in [-1000, 0, 1000])
|
||||||
holding = (holding + 1000) // 1000
|
holding = (holding + 1000) // 1000
|
||||||
if self.verbose:
|
|
||||||
print(f"{holding=}")
|
|
||||||
remaining_states = self.num_states
|
remaining_states = self.num_states
|
||||||
state = holding * (remaining_states // 3)
|
state = holding * (remaining_states // 3)
|
||||||
remaining_states //= 3
|
remaining_states //= 3
|
||||||
|
@ -35,8 +39,6 @@ class QLearner(object):
|
||||||
bin_n = self.indicator_value_to_bin(indicator, value)
|
bin_n = self.indicator_value_to_bin(indicator, value)
|
||||||
interval = remaining_states // self.n_bins
|
interval = remaining_states // self.n_bins
|
||||||
state += bin_n * interval
|
state += bin_n * interval
|
||||||
if self.verbose:
|
|
||||||
print(f"{value=} {bin_n=} {interval=} {state=}")
|
|
||||||
remaining_states //= self.n_bins
|
remaining_states //= self.n_bins
|
||||||
return state
|
return state
|
||||||
|
|
||||||
|
@ -57,7 +59,7 @@ class QLearner(object):
|
||||||
elif indicator.startswith("price_sma_"):
|
elif indicator.startswith("price_sma_"):
|
||||||
period = int(indicator.replace("price_sma_", ""))
|
period = int(indicator.replace("price_sma_", ""))
|
||||||
indicators.price_sma(df, symbol, [period])
|
indicators.price_sma(df, symbol, [period])
|
||||||
df.drop(columns=["SPY", symbol], inplace=True)
|
df.drop(columns=["SPY"], inplace=True)
|
||||||
df.dropna(inplace=True)
|
df.dropna(inplace=True)
|
||||||
|
|
||||||
def bin_indicators(self, df):
|
def bin_indicators(self, df):
|
||||||
|
@ -73,50 +75,85 @@ class QLearner(object):
|
||||||
num_states *= self.n_bins
|
num_states *= self.n_bins
|
||||||
return num_states
|
return num_states
|
||||||
|
|
||||||
def update_holding(self, action, holding):
|
def handle_order(self, action, holding, adj_closing_price):
|
||||||
|
shares = 0
|
||||||
if action == 0: # buy
|
if action == 0: # buy
|
||||||
return 1000
|
if holding.shares == 0 or holding.shares == -1000:
|
||||||
if holding == 0 or holding == -1000:
|
shares = 1000
|
||||||
return 1000
|
|
||||||
elif action == 1: # sell
|
elif action == 1: # sell
|
||||||
return -1000
|
if holding.shares== 0 or holding.shares == 1000:
|
||||||
|
shares = -1000
|
||||||
elif action == 2: # hold
|
elif action == 2: # hold
|
||||||
return 0
|
shares = 0
|
||||||
raise Exception()
|
|
||||||
|
cost = shares * adj_closing_price
|
||||||
|
if shares != 0:
|
||||||
|
# Charge commission and deduct impact penalty
|
||||||
|
holding.cash -= self.commission
|
||||||
|
holding.cash -= (self.impact * adj_closing_price * abs(shares))
|
||||||
|
holding.cash -= cost
|
||||||
|
holding.shares += shares
|
||||||
|
|
||||||
|
holding.equity = holding.cash + holding.shares * adj_closing_price
|
||||||
|
|
||||||
|
def get_reward(self, equity, new_equity):
|
||||||
|
if new_equity > equity:
|
||||||
|
return 1
|
||||||
|
return -1
|
||||||
|
|
||||||
|
def train(self, df, symbol, sv):
|
||||||
|
holding = Holding(sv, 0, sv)
|
||||||
|
row = df.iloc[0]
|
||||||
|
state = self.row_to_state(holding.shares, row)
|
||||||
|
action = self.learner.querysetstate(state)
|
||||||
|
adj_closing_price = row[symbol]
|
||||||
|
self.handle_order(action, holding, adj_closing_price)
|
||||||
|
|
||||||
|
for index, row in df.iloc[1:].iterrows():
|
||||||
|
adj_closing_price = row[symbol]
|
||||||
|
new_equity = holding.cash + holding.shares * adj_closing_price
|
||||||
|
r = self.get_reward(holding.equity, new_equity)
|
||||||
|
s_prime = self.row_to_state(holding.shares, row)
|
||||||
|
a = self.learner.query(s_prime, r)
|
||||||
|
self.handle_order(a, holding, adj_closing_price)
|
||||||
|
if self.verbose:
|
||||||
|
print(f"{holding=} {s_prime=} {r=} {a=}")
|
||||||
|
|
||||||
def addEvidence(self, symbol="IBM", sd=dt.datetime(2008, 1, 1), ed=dt.datetime(2009, 1, 1), sv=10000):
|
def addEvidence(self, symbol="IBM", sd=dt.datetime(2008, 1, 1), ed=dt.datetime(2009, 1, 1), sv=10000):
|
||||||
df = util.get_data([symbol], pd.date_range(sd, ed))
|
df = util.get_data([symbol], pd.date_range(sd, ed))
|
||||||
self.add_indicators(df, symbol)
|
self.add_indicators(df, symbol)
|
||||||
self.bin_indicators(df)
|
self.bin_indicators(df)
|
||||||
|
|
||||||
self.learner = Learner(self.num_states, self.num_actions)
|
for _ in range(10):
|
||||||
|
self.train(df, symbol, sv)
|
||||||
holding = 0
|
|
||||||
s = self.row_to_state(holding, df.iloc[0])
|
|
||||||
a = self.learner.querysetstate(state)
|
|
||||||
print(f"{action=}")
|
|
||||||
for row in df.iloc[1:].itertuples(index=False):
|
|
||||||
holding = update_holding(a, holding)
|
|
||||||
|
|
||||||
print(row)
|
|
||||||
|
|
||||||
|
|
||||||
# self.learner.query(data_x, y.to_numpy())
|
|
||||||
# data_x = df[self.indicators].to_numpy()
|
|
||||||
|
|
||||||
def testPolicy(self, symbol="IBM", sd=dt.datetime(2009, 1, 1), ed=dt.datetime(2010, 1, 1), sv=10000):
|
def testPolicy(self, symbol="IBM", sd=dt.datetime(2009, 1, 1), ed=dt.datetime(2010, 1, 1), sv=10000):
|
||||||
df = util.get_data([symbol], pd.date_range(sd, ed))
|
df = util.get_data([symbol], pd.date_range(sd, ed))
|
||||||
self._add_indicators(df, symbol)
|
|
||||||
# data_x = df[self.indicators].to_numpy()
|
|
||||||
# data_y = pd.DataFrame(index=df.index, data=self.learner.query(data_x))
|
|
||||||
|
|
||||||
orders = pd.DataFrame(index=df.index)
|
orders = pd.DataFrame(index=df.index)
|
||||||
orders["Symbol"] = symbol
|
orders["Symbol"] = symbol
|
||||||
orders["Order"] = ""
|
orders["Order"] = ""
|
||||||
orders["Shares"] = 0
|
orders["Shares"] = 0
|
||||||
return orders
|
shares = orders["Shares"]
|
||||||
|
self.add_indicators(df, symbol)
|
||||||
|
holding = 0
|
||||||
|
|
||||||
|
for index, row in df.iterrows():
|
||||||
|
state = self.row_to_state(holding, row)
|
||||||
|
action = self.learner.querysetstate(state)
|
||||||
|
|
||||||
|
if action == 0: # buy
|
||||||
|
if holding == 0 or holding == -1000:
|
||||||
|
holding += 1000
|
||||||
|
orders.loc[index, "Shares"] = 1000
|
||||||
|
elif action == 1: # sell
|
||||||
|
if holding == 0 or holding == 1000:
|
||||||
|
holding -= 1000
|
||||||
|
orders.loc[index, "Shares"] = -1000
|
||||||
|
elif action == 2: # hold
|
||||||
|
pass
|
||||||
|
|
||||||
if self.testing:
|
if self.testing:
|
||||||
return orders
|
return orders
|
||||||
else:
|
else:
|
||||||
return orders[["Shares"]]
|
return orders[["Shares"]]
|
||||||
|
|
||||||
|
|
|
@ -136,7 +136,7 @@ def experiment1(create_report=False):
|
||||||
sd_out = dt.datetime(2010, 1, 1) # out-sample
|
sd_out = dt.datetime(2010, 1, 1) # out-sample
|
||||||
ed_out = dt.datetime(2011, 12, 31) # out-sample
|
ed_out = dt.datetime(2011, 12, 31) # out-sample
|
||||||
|
|
||||||
df = util.get_data([symbol], pd.date_range(sd, ed_out))
|
df = util.get_data([symbol], pd.date_range(sd_out, ed_out))
|
||||||
df.drop(columns=["SPY"], inplace=True)
|
df.drop(columns=["SPY"], inplace=True)
|
||||||
|
|
||||||
if create_report:
|
if create_report:
|
||||||
|
@ -147,15 +147,14 @@ def experiment1(create_report=False):
|
||||||
# visualize_correlations(symbol, df)
|
# visualize_correlations(symbol, df)
|
||||||
# plot_indicators(symbol, df)
|
# plot_indicators(symbol, df)
|
||||||
|
|
||||||
# bs = BenchmarkStrategy()
|
bs = BenchmarkStrategy()
|
||||||
# orders = bs.testPolicy(symbol, sd_out, ed_out, sv)
|
orders = bs.testPolicy(symbol, sd_out, ed_out, sv)
|
||||||
# df["Benchmark"] = marketsim.compute_portvals(orders, sv)
|
df["Benchmark"] = marketsim.compute_portvals(orders, sv)
|
||||||
# df["Orders Benchmark"] = orders["Shares"]
|
df["Orders Benchmark"] = orders["Shares"]
|
||||||
|
|
||||||
ql = QLearner(testing=True, verbose=True)
|
# ql = QLearner(testing=True, verbose=False, commission=10, impact=0.005)
|
||||||
|
ql = QLearner(testing=True, verbose=False)
|
||||||
ql.addEvidence(symbol, sd, ed, sv)
|
ql.addEvidence(symbol, sd, ed, sv)
|
||||||
return
|
|
||||||
|
|
||||||
orders = ql.testPolicy(symbol, sd_out, ed_out, sv)
|
orders = ql.testPolicy(symbol, sd_out, ed_out, sv)
|
||||||
df["QL"] = marketsim.compute_portvals(orders, sv)
|
df["QL"] = marketsim.compute_portvals(orders, sv)
|
||||||
df["Orders QL"] = orders["Shares"]
|
df["Orders QL"] = orders["Shares"]
|
||||||
|
|
Loading…
Reference in New Issue