1
0
Fork 0
ML4T/strategy_evaluation/QLearner.py

123 lines
4.3 KiB
Python

import datetime as dt
import pandas as pd
import util
import indicators
from qlearning_robot.QLearner import QLearner as Learner
class QLearner(object):
def __init__(self, verbose=False, impact=0.0, commission=0.0, testing=False):
self.verbose = verbose
self.impact = impact
self.commission = commission
self.testing = testing # Decides which type of order df to return.
self.indicators = ['macd_diff', 'rsi', 'price_sma_8']
self.n_bins = 5
self.bins = {}
self.num_states = self.get_num_states()
self.num_actions = 3 # buy, sell, hold
if verbose:
print(f"{self.num_states=}")
def row_to_state(self, holding, df_row):
"""Transforms a row into a state value."""
assert(holding in [-1000, 0, 1000])
holding = (holding + 1000) // 1000
if self.verbose:
print(f"{holding=}")
remaining_states = self.num_states
state = holding * (remaining_states // 3)
remaining_states //= 3
for indicator in self.indicators:
value = df_row[indicator]
bin_n = self.indicator_value_to_bin(indicator, value)
interval = remaining_states // self.n_bins
state += bin_n * interval
if self.verbose:
print(f"{value=} {bin_n=} {interval=} {state=}")
remaining_states //= self.n_bins
return state
def indicator_value_to_bin(self, indicator, value):
for i, upper_bound in enumerate(self.bins[indicator]):
if value < upper_bound:
return i
return i + 1
def add_indicators(self, df, symbol):
"""Add indicators for learning to DataFrame."""
for indicator in self.indicators:
if indicator == "macd_diff":
indicators.macd(df, symbol)
df.drop(columns=["macd", "macd_signal"], inplace=True)
elif indicator == "rsi":
indicators.rsi(df, symbol)
elif indicator.startswith("price_sma_"):
period = int(indicator.replace("price_sma_", ""))
indicators.price_sma(df, symbol, [period])
df.drop(columns=["SPY", symbol], inplace=True)
df.dropna(inplace=True)
def bin_indicators(self, df):
"""Create bins for indicators."""
for indicator in self.indicators:
ser, bins = pd.qcut(df[indicator], self.n_bins, retbins=True)
self.bins[indicator] = bins[1:self.n_bins]
def get_num_states(self):
"""Return the total num of states."""
num_states = 3 # Three states holding (1000, 0, -1000)
for _ in self.indicators:
num_states *= self.n_bins
return num_states
def update_holding(self, action, holding):
if action == 0: # buy
return 1000
if holding == 0 or holding == -1000:
return 1000
elif action == 1: # sell
return -1000
elif action == 2: # hold
return 0
raise Exception()
def addEvidence(self, symbol="IBM", sd=dt.datetime(2008, 1, 1), ed=dt.datetime(2009, 1, 1), sv=10000):
df = util.get_data([symbol], pd.date_range(sd, ed))
self.add_indicators(df, symbol)
self.bin_indicators(df)
self.learner = Learner(self.num_states, self.num_actions)
holding = 0
s = self.row_to_state(holding, df.iloc[0])
a = self.learner.querysetstate(state)
print(f"{action=}")
for row in df.iloc[1:].itertuples(index=False):
holding = update_holding(a, holding)
print(row)
# self.learner.query(data_x, y.to_numpy())
# data_x = df[self.indicators].to_numpy()
def testPolicy(self, symbol="IBM", sd=dt.datetime(2009, 1, 1), ed=dt.datetime(2010, 1, 1), sv=10000):
df = util.get_data([symbol], pd.date_range(sd, ed))
self._add_indicators(df, symbol)
# data_x = df[self.indicators].to_numpy()
# data_y = pd.DataFrame(index=df.index, data=self.learner.query(data_x))
orders = pd.DataFrame(index=df.index)
orders["Symbol"] = symbol
orders["Order"] = ""
orders["Shares"] = 0
return orders
if self.testing:
return orders
else:
return orders[["Shares"]]