Improve QLearner so that commission is considered
This commit is contained in:
@@ -28,18 +28,25 @@ class QLearner(object):
|
||||
|
||||
def row_to_state(self, holding, df_row):
|
||||
"""Transforms a row into a state value."""
|
||||
assert(holding in [-1000, 0, 1000])
|
||||
holding = (holding + 1000) // 1000
|
||||
remaining_states = self.num_states
|
||||
state = holding * (remaining_states // 3)
|
||||
remaining_states //= 3
|
||||
assert(holding in [0, 1, 2])
|
||||
|
||||
# For each indicator that goes into the state the interval becomes
|
||||
# smaller based on how many bins the indicator has. The first
|
||||
# 'indicator' is the information about how many shares we are currently
|
||||
# holding. So for example, if I have 450 states then the intervall (aka
|
||||
# remaining_states) is 150 because there are three values for holding:
|
||||
# holding = 0 -> state = 0 * 150 = 0
|
||||
# holding = 1 -> state = 1 * 150 = 150
|
||||
# holding = 2 -> state = 2 * 150 = 300
|
||||
remaining_states = self.num_states // 3
|
||||
state = holding * remaining_states
|
||||
|
||||
for indicator in self.indicators:
|
||||
value = df_row[indicator]
|
||||
bin_n = self.indicator_value_to_bin(indicator, value)
|
||||
interval = remaining_states // self.n_bins
|
||||
state += bin_n * interval
|
||||
remaining_states //= self.n_bins
|
||||
state += bin_n * remaining_states
|
||||
return state
|
||||
|
||||
def indicator_value_to_bin(self, indicator, value):
|
||||
@@ -103,18 +110,21 @@ class QLearner(object):
|
||||
|
||||
def train(self, df, symbol, sv):
|
||||
holding = Holding(sv, 0, sv)
|
||||
|
||||
row = df.iloc[0]
|
||||
state = self.row_to_state(holding.shares, row)
|
||||
action = self.learner.querysetstate(state)
|
||||
adj_closing_price = row[symbol]
|
||||
equity = holding.equity
|
||||
self.handle_order(action, holding, adj_closing_price)
|
||||
|
||||
for index, row in df.iloc[1:].iterrows():
|
||||
adj_closing_price = row[symbol]
|
||||
new_equity = holding.cash + holding.shares * adj_closing_price
|
||||
r = self.get_reward(holding.equity, new_equity)
|
||||
r = self.get_reward(equity, new_equity)
|
||||
s_prime = self.row_to_state(holding.shares, row)
|
||||
a = self.learner.query(s_prime, r)
|
||||
equity = new_equity
|
||||
self.handle_order(a, holding, adj_closing_price)
|
||||
if self.verbose:
|
||||
print(f"{holding=} {s_prime=} {r=} {a=}")
|
||||
|
||||
Reference in New Issue
Block a user