Improve QLearner so that commission is considered

2020-11-10 11:01:21 -05:00
parent 761a0366e4
commit 6e1f70bcba
2 changed files with 38 additions and 12 deletions
--- a/strategy_evaluation/QLearner.py
+++ b/strategy_evaluation/QLearner.py
@@ -28,18 +28,25 @@ class QLearner(object):

    def row_to_state(self, holding, df_row):
        """Transforms a row into a state value."""
-        assert(holding in [-1000, 0, 1000])
        holding = (holding + 1000) // 1000
-        remaining_states = self.num_states
-        state = holding * (remaining_states // 3)
-        remaining_states //= 3
+        assert(holding in [0, 1, 2])
+
+        # For each indicator that goes into the state the interval becomes
+        # smaller based on how many bins the indicator has.  The first
+        # 'indicator' is the information about how many shares we are currently
+        # holding. So for example, if I have 450 states then the intervall (aka
+        # remaining_states) is 150 because there are three values for holding:
+        #   holding = 0 -> state = 0 * 150 = 0
+        #   holding = 1 -> state = 1 * 150 = 150
+        #   holding = 2 -> state = 2 * 150 = 300
+        remaining_states = self.num_states // 3
+        state = holding * remaining_states

        for indicator in self.indicators:
            value = df_row[indicator]
            bin_n = self.indicator_value_to_bin(indicator, value)
-            interval = remaining_states // self.n_bins
-            state += bin_n * interval
            remaining_states //= self.n_bins
+            state += bin_n * remaining_states
        return state

    def indicator_value_to_bin(self, indicator, value):
@@ -103,18 +110,21 @@ class QLearner(object):

    def train(self, df, symbol, sv):
        holding = Holding(sv, 0, sv)
+
        row = df.iloc[0]
        state = self.row_to_state(holding.shares, row)
        action = self.learner.querysetstate(state)
        adj_closing_price = row[symbol]
+        equity = holding.equity
        self.handle_order(action, holding, adj_closing_price)

        for index, row in df.iloc[1:].iterrows():
            adj_closing_price = row[symbol]
            new_equity = holding.cash + holding.shares * adj_closing_price
-            r = self.get_reward(holding.equity, new_equity)
+            r = self.get_reward(equity, new_equity)
            s_prime = self.row_to_state(holding.shares, row)
            a = self.learner.query(s_prime, r)
+            equity = new_equity
            self.handle_order(a, holding, adj_closing_price)
            if self.verbose:
                print(f"{holding=} {s_prime=} {r=} {a=}")