Start with optimize something exercise. Also add a playground for testing candlestick plotting via mplfinance.

This commit is contained in:
Felix Martin 2020-08-28 22:36:43 -04:00
parent a11cc99a88
commit a7e17c6059
6 changed files with 381 additions and 6 deletions

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
__pycache__
data
grading
util.py

View File

@ -15,20 +15,26 @@ sudo pacman -S python-pandas --asdeps python-pandas-datareader python-numexpr \
python-numpy
```
I am also using the wonderful
[mplfinance](https://github.com/matplotlib/mplfinance). You can install
mplfinance via pip and find the tutorial
[here](https://github.com/matplotlib/mplfinance#tutorials).
```
pip install mplfinance --user
```
Use unzip with the `-n` flag to extract the archives for the different
exercises. This makes sure that you do not override any of the existing files. I
might add a makefile to automize this later.
```
unzip -n zips/20Spring_martingale.zip -d ./
unzip -n zips/19fall_optimize_something.zip -d ./
```
[Here](https://pythonprogramming.net/candlestick-ohlc-graph-matplotlib-tutorial/)
is a tutorial for how to plot candlestick data. Will come in handy later.
# Reports
Let's test if I can reference the reports from here:
[Report 1](./martingale/martingale.md)
- [Report 1](./martingale/martingale.md)
- [Report 2](#)

View File

@ -0,0 +1,237 @@
"""MC1-P2: Optimize a portfolio - grading script.
Usage:
- Switch to a student feedback directory first (will write "points.txt" and "comments.txt" in pwd).
- Run this script with both ml4t/ and student solution in PYTHONPATH, e.g.:
PYTHONPATH=ml4t:MC1-P2/jdoe7 python ml4t/mc1_p2_grading/grade_optimization.py
"""
import pytest
from grading.grading import grader, GradeResult, time_limit, IncorrectOutput
import os
import sys
import traceback as tb
import numpy as np
import pandas as pd
import datetime
from collections import namedtuple
from util import get_data
#from portfolio.analysis import get_portfolio_value, get_portfolio_stats
# Student code
# main_code = "portfolio.optimization" # module name to import
main_code = "optimization"
def str2dt(strng):
year,month,day = map(int,strng.split('-'))
return datetime.datetime(year,month,day)
# Test cases
OptimizationTestCase = namedtuple('OptimizationTestCase', ['inputs', 'outputs', 'description'])
optimization_test_cases = [
OptimizationTestCase(
inputs=dict(
start_date=str2dt('2010-01-01'),
end_date=str2dt('2010-12-31'),
symbols=['GOOG', 'AAPL', 'GLD', 'XOM']
),
outputs=dict(
allocs=[0.0, 0.4, 0.6, 0.0]
),
description="Wiki example 1"
),
OptimizationTestCase(
inputs=dict(
start_date=str2dt('2004-01-01'),
end_date=str2dt('2006-01-01'),
symbols=['AXP', 'HPQ', 'IBM', 'HNZ']
),
outputs=dict(
allocs=[0.78, 0.22, 0.0, 0.0]
),
description="Wiki example 2"
),
OptimizationTestCase(
inputs=dict(
start_date=str2dt('2004-12-01'),
end_date=str2dt('2006-05-31'),
symbols=['YHOO', 'XOM', 'GLD', 'HNZ']
),
outputs=dict(
allocs=[0.0, 0.07, 0.59, 0.34]
),
description="Wiki example 3"
),
OptimizationTestCase(
inputs=dict(
start_date=str2dt('2005-12-01'),
end_date=str2dt('2006-05-31'),
symbols=['YHOO', 'HPQ', 'GLD', 'HNZ']
),
outputs=dict(
allocs=[0.0, 0.1, 0.25, 0.65]
),
description="Wiki example 4"
),
OptimizationTestCase(
inputs=dict(
start_date=str2dt('2005-12-01'),
end_date=str2dt('2007-05-31'),
symbols=['MSFT', 'HPQ', 'GLD', 'HNZ']
),
outputs=dict(
allocs=[0.0, 0.27, 0.11, 0.62]
),
description="MSFT vs HPQ"
),
OptimizationTestCase(
inputs=dict(
start_date=str2dt('2006-05-31'),
end_date=str2dt('2007-05-31'),
symbols=['MSFT', 'AAPL', 'GLD', 'HNZ']
),
outputs=dict(
allocs=[0.42, 0.32, 0.0, 0.26]
),
description="MSFT vs AAPL"
),
OptimizationTestCase(
inputs=dict(
start_date=str2dt('2011-01-01'),
end_date=str2dt('2011-12-31'),
symbols=['AAPL', 'GLD', 'GOOG', 'XOM']
),
outputs=dict(
allocs=[0.46, 0.37, 0.0, 0.17]
),
description="Wiki example 1 in 2011"
),
OptimizationTestCase(
inputs=dict(
start_date=str2dt('2010-01-01'),
end_date=str2dt('2010-12-31'),
symbols=['AXP', 'HPQ', 'IBM', 'HNZ']
),
outputs=dict(
allocs=[0.0, 0.0, 0.0, 1.0]
),
description="Year of the HNZ"
)
]
abs_margins = dict(sum_to_one=0.02, alloc_range=0.02, alloc_match=0.1) # absolute margin of error for each component
points_per_component = dict(sum_to_one=2.0, alloc_range=2.0, alloc_match=4.0) # points for each component, for partial credit
points_per_test_case = sum(points_per_component.values())
seconds_per_test_case = 10 # execution time limit
# Grading parameters (picked up by module-level grading fixtures)
max_points = float(len(optimization_test_cases) * points_per_test_case)
html_pre_block = True # surround comments with HTML <pre> tag (for T-Square comments field)
# Test functon(s)
@pytest.mark.parametrize("inputs,outputs,description", optimization_test_cases)
def test_optimization(inputs, outputs, description, grader):
"""Test find_optimal_allocations() returns correct allocations.
Requires test inputs, expected outputs, description, and a grader fixture.
"""
points_earned = 0.0 # initialize points for this test case
try:
# Try to import student code (only once)
if not main_code in globals():
import importlib
# * Import module
mod = importlib.import_module(main_code)
globals()[main_code] = mod
# * Import methods to test (refactored out, spring 2016, --BPH)
# for m in ['find_optimal_allocations']:
# globals()[m] = getattr(mod, m)
# Unpack test case
start_date = inputs['start_date']
end_date = inputs['end_date']
symbols = inputs['symbols'] # e.g.: ['GOOG', 'AAPL', 'GLD', 'XOM']
# Read in adjusted closing prices for given symbols, date range
# dates = pd.date_range(start_date, end_date)
# prices_all = get_data(symbols, dates) # automatically adds SPY
# prices = prices_all[symbols] # only portfolio symbols
# Run student code with time limit (in seconds, per test case)
port_stats = {}
with time_limit(seconds_per_test_case):
# * Find optimal allocations
student_allocs, student_cr, student_adr, student_sddr, student_sr = optimization.optimize_portfolio(sd=start_date,ed=end_date,syms=symbols,gen_plot=False)
student_allocs = np.float32(student_allocs) # make sure it's a NumPy array, for easier computation
# Verify against expected outputs and assign points
incorrect = False
msgs = []
correct_allocs = outputs['allocs']
# * Check sum_to_one: Allocations sum to 1.0 +/- margin
sum_allocs = np.sum(student_allocs)
if abs(sum_allocs - 1.0) > abs_margins['sum_to_one']:
incorrect = True
msgs.append(" sum of allocations: {} (expected: 1.0)".format(sum_allocs))
student_allocs = student_allocs / sum_allocs # normalize allocations, if they don't sum to 1.0
else:
points_earned += points_per_component['sum_to_one']
# * Get daily portfolio value and statistics, for comparison
#port_val = get_portfolio_value(prices, allocs, start_val)
#cum_ret, avg_daily_ret, std_daily_ret, sharpe_ratio = get_portfolio_stats(port_val)
# * Check alloc_range: Each allocation is within [0.0, 1.0] +/- margin
# * Check alloc_match: Each allocation matches expected value +/- margin
points_per_alloc_range = points_per_component['alloc_range'] / len(correct_allocs)
points_per_alloc_match = points_per_component['alloc_match'] / len(correct_allocs)
for symbol, alloc, correct_alloc in zip(symbols, student_allocs, correct_allocs):
if alloc < -abs_margins['alloc_range'] or alloc > (1.0 + abs_margins['alloc_range']):
incorrect = True
msgs.append(" {} - allocation out of range: {} (expected: [0.0, 1.0])".format(symbol, alloc))
else:
points_earned += points_per_alloc_range
if abs(alloc - correct_alloc) > abs_margins['alloc_match']:
incorrect = True
msgs.append(" {} - incorrect allocation: {} (expected: {})".format(symbol, alloc, correct_alloc))
else:
points_earned += points_per_alloc_match
#points_earned = round(points_earned) # round off points earned to nearest integer (?)
if incorrect:
inputs_str = " start_date: {}\n" \
" end_date: {}\n" \
" symbols: {}\n".format(start_date, end_date, symbols)
# If there are problems with the stats and all of the values returned match the template code, exactly, then award 0 points
#if check_template(student_allocs, student_cr, student_adr, student_sddr, student_sr):
points_earned = 0
raise IncorrectOutput("Test failed on one or more output criteria.\n Inputs:\n{}\n Failures:\n{}".format(inputs_str, "\n".join(msgs)))
except Exception as e:
# Test result: failed
msg = "Test case description: {}\n".format(description)
# Generate a filtered stacktrace, only showing erroneous lines in student file(s)
tb_list = tb.extract_tb(sys.exc_info()[2])
for i in range(len(tb_list)):
row = tb_list[i]
tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3]) # show only filename instead of long absolute path
tb_list = [row for row in tb_list if row[0] == 'optimization.py']
if tb_list:
msg += "Traceback:\n"
msg += ''.join(tb.format_list(tb_list)) # contains newlines
msg += "{}: {}".format(e.__class__.__name__, str(e))
# Report failure result to grader, with stacktrace
grader.add_result(GradeResult(outcome='failed', points=points_earned, msg=msg))
raise
else:
# Test result: passed (no exceptions)
grader.add_result(GradeResult(outcome='passed', points=points_earned, msg=None))
if __name__ == "__main__":
pytest.main(["-s", __file__])

View File

@ -0,0 +1,95 @@
"""MC1-P2: Optimize a portfolio.
Copyright 2018, Georgia Institute of Technology (Georgia Tech)
Atlanta, Georgia 30332
All Rights Reserved
Template code for CS 4646/7646
Georgia Tech asserts copyright ownership of this template and all derivative
works, including solutions to the projects assigned in this course. Students
and other users of this template code are advised not to share it with others
or to make it available on publicly viewable websites including repositories
such as github and gitlab. This copyright statement should not be removed
or edited.
We do grant permission to share solutions privately with non-students such
as potential employers. However, sharing with other current or future
students of CS 7646 is prohibited and subject to being investigated as a
GT honor code violation.
-----do not edit anything above this line---
Student Name: Tucker Balch (replace with your name)
GT User ID: tb34 (replace with your User ID)
GT ID: 900897987 (replace with your GT ID)
"""
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
from util import get_data, plot_data
# This is the function that will be tested by the autograder
# The student must update this code to properly implement the functionality
def optimize_portfolio(sd=dt.datetime(2008,1,1), ed=dt.datetime(2009,1,1), \
syms=['GOOG','AAPL','GLD','XOM'], gen_plot=False):
# Read in adjusted closing prices for given symbols, date range
dates = pd.date_range(sd, ed)
prices_all = get_data(syms, dates) # automatically adds SPY
prices = prices_all[syms] # only portfolio symbols
prices_SPY = prices_all['SPY'] # only SPY, for comparison later
plot_data(prices_all)
# find the allocations for the optimal portfolio
# note that the values here ARE NOT meant to be correct for a test case
allocs = np.asarray([0.2, 0.2, 0.3, 0.3]) # add code here to find the allocations
cr, adr, sddr, sr = [0.25, 0.001, 0.0005, 2.1] # add code here to compute stats
# Get daily portfolio value
port_val = prices_SPY # add code here to compute daily portfolio values
# Compare daily portfolio value with SPY using a normalized plot
if gen_plot:
# add code to plot here
df_temp = pd.concat([port_val, prices_SPY], keys=['Portfolio', 'SPY'], axis=1)
pass
return allocs, cr, adr, sddr, sr
def test_code():
# This function WILL NOT be called by the auto grader
# Do not assume that any variables defined here are available to your function/code
# It is only here to help you set up and test your code
# Define input parameters
# Note that ALL of these values will be set to different values by
# the autograder!
start_date = dt.datetime(2009,1,1)
end_date = dt.datetime(2010,1,1)
symbols = ['GOOG', 'AAPL', 'GLD', 'XOM', 'IBM']
# Assess the portfolio
allocations, cr, adr, sddr, sr = optimize_portfolio(sd = start_date, ed = end_date,\
syms = symbols, \
gen_plot = False)
# Print statistics
print(f"Start Date: {start_date}")
print(f"End Date: {end_date}")
print(f"Symbols: {symbols}")
print(f"Allocations:{allocations}")
print(f"Sharpe Ratio: {sr}")
print(f"Volatility (stdev of daily returns): {sddr}")
print(f"Average Daily Return: {adr}")
print(f"Cumulative Return: {cr}")
if __name__ == "__main__":
# This code WILL NOT be called by the auto grader
# Do not assume that it will be called
test_code()

36
playground/play.py Normal file
View File

@ -0,0 +1,36 @@
import os
import pandas as pd
import datetime as dt
def symbol_to_path(symbol, base_dir=None):
"""Return CSV file path given ticker symbol."""
if base_dir is None:
base_dir = os.environ.get("MARKET_DATA_DIR", '../data/')
return os.path.join(base_dir, "{}.csv".format(str(symbol)))
def get_data(path, dates):
"""Read stock data (adjusted close) for given symbols from CSV files."""
df = pd.DataFrame(index=dates)
df_temp = pd.read_csv(path,
index_col='time',
parse_dates=True,
usecols=['time', 'open', 'high', 'low', 'close'],
na_values=['nan'])
df = df.join(df_temp)
return df
def plot_data(df, title="Stock prices", xlabel="Date", ylabel="Price"):
import mplfinance as mpf
mpf.plot(df, type='candle', mav=(9, 24))
def test_code():
sd = dt.datetime(2020,1,1)
ed = dt.datetime(2020,8,30)
dates = pd.date_range(sd, ed)
prices_all = get_data(symbol_to_path('BTCUSD_Coinbase'), dates)
plot_data(prices_all)
if __name__ == "__main__":
test_code()

Binary file not shown.