Start with optimize something exercise. Also add a playground for testing candlestick plotting via mplfinance.
This commit is contained in:
parent
a11cc99a88
commit
a7e17c6059
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,3 +1,4 @@
|
||||
__pycache__
|
||||
data
|
||||
grading
|
||||
util.py
|
||||
|
18
README.md
18
README.md
@ -15,20 +15,26 @@ sudo pacman -S python-pandas --asdeps python-pandas-datareader python-numexpr \
|
||||
python-numpy
|
||||
```
|
||||
|
||||
I am also using the wonderful
|
||||
[mplfinance](https://github.com/matplotlib/mplfinance). You can install
|
||||
mplfinance via pip and find the tutorial
|
||||
[here](https://github.com/matplotlib/mplfinance#tutorials).
|
||||
|
||||
```
|
||||
pip install mplfinance --user
|
||||
```
|
||||
|
||||
Use unzip with the `-n` flag to extract the archives for the different
|
||||
exercises. This makes sure that you do not override any of the existing files. I
|
||||
might add a makefile to automize this later.
|
||||
|
||||
```
|
||||
unzip -n zips/20Spring_martingale.zip -d ./
|
||||
unzip -n zips/19fall_optimize_something.zip -d ./
|
||||
```
|
||||
|
||||
[Here](https://pythonprogramming.net/candlestick-ohlc-graph-matplotlib-tutorial/)
|
||||
is a tutorial for how to plot candlestick data. Will come in handy later.
|
||||
|
||||
# Reports
|
||||
|
||||
Let's test if I can reference the reports from here:
|
||||
|
||||
[Report 1](./martingale/martingale.md)
|
||||
- [Report 1](./martingale/martingale.md)
|
||||
- [Report 2](#)
|
||||
|
||||
|
237
optimize_something/grade_optimization.py
Normal file
237
optimize_something/grade_optimization.py
Normal file
@ -0,0 +1,237 @@
|
||||
"""MC1-P2: Optimize a portfolio - grading script.
|
||||
|
||||
Usage:
|
||||
- Switch to a student feedback directory first (will write "points.txt" and "comments.txt" in pwd).
|
||||
- Run this script with both ml4t/ and student solution in PYTHONPATH, e.g.:
|
||||
PYTHONPATH=ml4t:MC1-P2/jdoe7 python ml4t/mc1_p2_grading/grade_optimization.py
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from grading.grading import grader, GradeResult, time_limit, IncorrectOutput
|
||||
|
||||
import os
|
||||
import sys
|
||||
import traceback as tb
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import datetime
|
||||
from collections import namedtuple
|
||||
|
||||
from util import get_data
|
||||
#from portfolio.analysis import get_portfolio_value, get_portfolio_stats
|
||||
|
||||
# Student code
|
||||
# main_code = "portfolio.optimization" # module name to import
|
||||
main_code = "optimization"
|
||||
|
||||
def str2dt(strng):
|
||||
year,month,day = map(int,strng.split('-'))
|
||||
return datetime.datetime(year,month,day)
|
||||
|
||||
# Test cases
|
||||
OptimizationTestCase = namedtuple('OptimizationTestCase', ['inputs', 'outputs', 'description'])
|
||||
optimization_test_cases = [
|
||||
OptimizationTestCase(
|
||||
inputs=dict(
|
||||
start_date=str2dt('2010-01-01'),
|
||||
end_date=str2dt('2010-12-31'),
|
||||
symbols=['GOOG', 'AAPL', 'GLD', 'XOM']
|
||||
),
|
||||
outputs=dict(
|
||||
allocs=[0.0, 0.4, 0.6, 0.0]
|
||||
),
|
||||
description="Wiki example 1"
|
||||
),
|
||||
OptimizationTestCase(
|
||||
inputs=dict(
|
||||
start_date=str2dt('2004-01-01'),
|
||||
end_date=str2dt('2006-01-01'),
|
||||
symbols=['AXP', 'HPQ', 'IBM', 'HNZ']
|
||||
),
|
||||
outputs=dict(
|
||||
allocs=[0.78, 0.22, 0.0, 0.0]
|
||||
),
|
||||
description="Wiki example 2"
|
||||
),
|
||||
OptimizationTestCase(
|
||||
inputs=dict(
|
||||
start_date=str2dt('2004-12-01'),
|
||||
end_date=str2dt('2006-05-31'),
|
||||
symbols=['YHOO', 'XOM', 'GLD', 'HNZ']
|
||||
),
|
||||
outputs=dict(
|
||||
allocs=[0.0, 0.07, 0.59, 0.34]
|
||||
),
|
||||
description="Wiki example 3"
|
||||
),
|
||||
OptimizationTestCase(
|
||||
inputs=dict(
|
||||
start_date=str2dt('2005-12-01'),
|
||||
end_date=str2dt('2006-05-31'),
|
||||
symbols=['YHOO', 'HPQ', 'GLD', 'HNZ']
|
||||
),
|
||||
outputs=dict(
|
||||
allocs=[0.0, 0.1, 0.25, 0.65]
|
||||
),
|
||||
description="Wiki example 4"
|
||||
),
|
||||
OptimizationTestCase(
|
||||
inputs=dict(
|
||||
start_date=str2dt('2005-12-01'),
|
||||
end_date=str2dt('2007-05-31'),
|
||||
symbols=['MSFT', 'HPQ', 'GLD', 'HNZ']
|
||||
),
|
||||
outputs=dict(
|
||||
allocs=[0.0, 0.27, 0.11, 0.62]
|
||||
),
|
||||
description="MSFT vs HPQ"
|
||||
),
|
||||
OptimizationTestCase(
|
||||
inputs=dict(
|
||||
start_date=str2dt('2006-05-31'),
|
||||
end_date=str2dt('2007-05-31'),
|
||||
symbols=['MSFT', 'AAPL', 'GLD', 'HNZ']
|
||||
),
|
||||
outputs=dict(
|
||||
allocs=[0.42, 0.32, 0.0, 0.26]
|
||||
),
|
||||
description="MSFT vs AAPL"
|
||||
),
|
||||
OptimizationTestCase(
|
||||
inputs=dict(
|
||||
start_date=str2dt('2011-01-01'),
|
||||
end_date=str2dt('2011-12-31'),
|
||||
symbols=['AAPL', 'GLD', 'GOOG', 'XOM']
|
||||
),
|
||||
outputs=dict(
|
||||
allocs=[0.46, 0.37, 0.0, 0.17]
|
||||
),
|
||||
description="Wiki example 1 in 2011"
|
||||
),
|
||||
OptimizationTestCase(
|
||||
inputs=dict(
|
||||
start_date=str2dt('2010-01-01'),
|
||||
end_date=str2dt('2010-12-31'),
|
||||
symbols=['AXP', 'HPQ', 'IBM', 'HNZ']
|
||||
),
|
||||
outputs=dict(
|
||||
allocs=[0.0, 0.0, 0.0, 1.0]
|
||||
),
|
||||
description="Year of the HNZ"
|
||||
)
|
||||
]
|
||||
abs_margins = dict(sum_to_one=0.02, alloc_range=0.02, alloc_match=0.1) # absolute margin of error for each component
|
||||
points_per_component = dict(sum_to_one=2.0, alloc_range=2.0, alloc_match=4.0) # points for each component, for partial credit
|
||||
points_per_test_case = sum(points_per_component.values())
|
||||
seconds_per_test_case = 10 # execution time limit
|
||||
|
||||
# Grading parameters (picked up by module-level grading fixtures)
|
||||
max_points = float(len(optimization_test_cases) * points_per_test_case)
|
||||
html_pre_block = True # surround comments with HTML <pre> tag (for T-Square comments field)
|
||||
|
||||
# Test functon(s)
|
||||
@pytest.mark.parametrize("inputs,outputs,description", optimization_test_cases)
|
||||
def test_optimization(inputs, outputs, description, grader):
|
||||
"""Test find_optimal_allocations() returns correct allocations.
|
||||
|
||||
Requires test inputs, expected outputs, description, and a grader fixture.
|
||||
"""
|
||||
|
||||
points_earned = 0.0 # initialize points for this test case
|
||||
try:
|
||||
# Try to import student code (only once)
|
||||
if not main_code in globals():
|
||||
import importlib
|
||||
# * Import module
|
||||
mod = importlib.import_module(main_code)
|
||||
globals()[main_code] = mod
|
||||
# * Import methods to test (refactored out, spring 2016, --BPH)
|
||||
# for m in ['find_optimal_allocations']:
|
||||
# globals()[m] = getattr(mod, m)
|
||||
|
||||
# Unpack test case
|
||||
start_date = inputs['start_date']
|
||||
end_date = inputs['end_date']
|
||||
symbols = inputs['symbols'] # e.g.: ['GOOG', 'AAPL', 'GLD', 'XOM']
|
||||
|
||||
# Read in adjusted closing prices for given symbols, date range
|
||||
# dates = pd.date_range(start_date, end_date)
|
||||
# prices_all = get_data(symbols, dates) # automatically adds SPY
|
||||
# prices = prices_all[symbols] # only portfolio symbols
|
||||
|
||||
# Run student code with time limit (in seconds, per test case)
|
||||
port_stats = {}
|
||||
with time_limit(seconds_per_test_case):
|
||||
# * Find optimal allocations
|
||||
student_allocs, student_cr, student_adr, student_sddr, student_sr = optimization.optimize_portfolio(sd=start_date,ed=end_date,syms=symbols,gen_plot=False)
|
||||
student_allocs = np.float32(student_allocs) # make sure it's a NumPy array, for easier computation
|
||||
|
||||
# Verify against expected outputs and assign points
|
||||
incorrect = False
|
||||
msgs = []
|
||||
correct_allocs = outputs['allocs']
|
||||
|
||||
# * Check sum_to_one: Allocations sum to 1.0 +/- margin
|
||||
sum_allocs = np.sum(student_allocs)
|
||||
if abs(sum_allocs - 1.0) > abs_margins['sum_to_one']:
|
||||
incorrect = True
|
||||
msgs.append(" sum of allocations: {} (expected: 1.0)".format(sum_allocs))
|
||||
student_allocs = student_allocs / sum_allocs # normalize allocations, if they don't sum to 1.0
|
||||
else:
|
||||
points_earned += points_per_component['sum_to_one']
|
||||
|
||||
# * Get daily portfolio value and statistics, for comparison
|
||||
#port_val = get_portfolio_value(prices, allocs, start_val)
|
||||
#cum_ret, avg_daily_ret, std_daily_ret, sharpe_ratio = get_portfolio_stats(port_val)
|
||||
|
||||
# * Check alloc_range: Each allocation is within [0.0, 1.0] +/- margin
|
||||
# * Check alloc_match: Each allocation matches expected value +/- margin
|
||||
points_per_alloc_range = points_per_component['alloc_range'] / len(correct_allocs)
|
||||
points_per_alloc_match = points_per_component['alloc_match'] / len(correct_allocs)
|
||||
for symbol, alloc, correct_alloc in zip(symbols, student_allocs, correct_allocs):
|
||||
if alloc < -abs_margins['alloc_range'] or alloc > (1.0 + abs_margins['alloc_range']):
|
||||
incorrect = True
|
||||
msgs.append(" {} - allocation out of range: {} (expected: [0.0, 1.0])".format(symbol, alloc))
|
||||
else:
|
||||
points_earned += points_per_alloc_range
|
||||
if abs(alloc - correct_alloc) > abs_margins['alloc_match']:
|
||||
incorrect = True
|
||||
msgs.append(" {} - incorrect allocation: {} (expected: {})".format(symbol, alloc, correct_alloc))
|
||||
else:
|
||||
points_earned += points_per_alloc_match
|
||||
#points_earned = round(points_earned) # round off points earned to nearest integer (?)
|
||||
|
||||
if incorrect:
|
||||
inputs_str = " start_date: {}\n" \
|
||||
" end_date: {}\n" \
|
||||
" symbols: {}\n".format(start_date, end_date, symbols)
|
||||
# If there are problems with the stats and all of the values returned match the template code, exactly, then award 0 points
|
||||
#if check_template(student_allocs, student_cr, student_adr, student_sddr, student_sr):
|
||||
points_earned = 0
|
||||
raise IncorrectOutput("Test failed on one or more output criteria.\n Inputs:\n{}\n Failures:\n{}".format(inputs_str, "\n".join(msgs)))
|
||||
except Exception as e:
|
||||
# Test result: failed
|
||||
msg = "Test case description: {}\n".format(description)
|
||||
|
||||
# Generate a filtered stacktrace, only showing erroneous lines in student file(s)
|
||||
tb_list = tb.extract_tb(sys.exc_info()[2])
|
||||
for i in range(len(tb_list)):
|
||||
row = tb_list[i]
|
||||
tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3]) # show only filename instead of long absolute path
|
||||
tb_list = [row for row in tb_list if row[0] == 'optimization.py']
|
||||
if tb_list:
|
||||
msg += "Traceback:\n"
|
||||
msg += ''.join(tb.format_list(tb_list)) # contains newlines
|
||||
msg += "{}: {}".format(e.__class__.__name__, str(e))
|
||||
|
||||
# Report failure result to grader, with stacktrace
|
||||
grader.add_result(GradeResult(outcome='failed', points=points_earned, msg=msg))
|
||||
raise
|
||||
else:
|
||||
# Test result: passed (no exceptions)
|
||||
grader.add_result(GradeResult(outcome='passed', points=points_earned, msg=None))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main(["-s", __file__])
|
95
optimize_something/optimization.py
Normal file
95
optimize_something/optimization.py
Normal file
@ -0,0 +1,95 @@
|
||||
"""MC1-P2: Optimize a portfolio.
|
||||
|
||||
Copyright 2018, Georgia Institute of Technology (Georgia Tech)
|
||||
Atlanta, Georgia 30332
|
||||
All Rights Reserved
|
||||
|
||||
Template code for CS 4646/7646
|
||||
|
||||
Georgia Tech asserts copyright ownership of this template and all derivative
|
||||
works, including solutions to the projects assigned in this course. Students
|
||||
and other users of this template code are advised not to share it with others
|
||||
or to make it available on publicly viewable websites including repositories
|
||||
such as github and gitlab. This copyright statement should not be removed
|
||||
or edited.
|
||||
|
||||
We do grant permission to share solutions privately with non-students such
|
||||
as potential employers. However, sharing with other current or future
|
||||
students of CS 7646 is prohibited and subject to being investigated as a
|
||||
GT honor code violation.
|
||||
|
||||
-----do not edit anything above this line---
|
||||
|
||||
Student Name: Tucker Balch (replace with your name)
|
||||
GT User ID: tb34 (replace with your User ID)
|
||||
GT ID: 900897987 (replace with your GT ID)
|
||||
"""
|
||||
|
||||
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import datetime as dt
|
||||
from util import get_data, plot_data
|
||||
|
||||
# This is the function that will be tested by the autograder
|
||||
# The student must update this code to properly implement the functionality
|
||||
def optimize_portfolio(sd=dt.datetime(2008,1,1), ed=dt.datetime(2009,1,1), \
|
||||
syms=['GOOG','AAPL','GLD','XOM'], gen_plot=False):
|
||||
|
||||
# Read in adjusted closing prices for given symbols, date range
|
||||
dates = pd.date_range(sd, ed)
|
||||
prices_all = get_data(syms, dates) # automatically adds SPY
|
||||
prices = prices_all[syms] # only portfolio symbols
|
||||
prices_SPY = prices_all['SPY'] # only SPY, for comparison later
|
||||
|
||||
plot_data(prices_all)
|
||||
|
||||
# find the allocations for the optimal portfolio
|
||||
# note that the values here ARE NOT meant to be correct for a test case
|
||||
allocs = np.asarray([0.2, 0.2, 0.3, 0.3]) # add code here to find the allocations
|
||||
cr, adr, sddr, sr = [0.25, 0.001, 0.0005, 2.1] # add code here to compute stats
|
||||
|
||||
# Get daily portfolio value
|
||||
port_val = prices_SPY # add code here to compute daily portfolio values
|
||||
|
||||
# Compare daily portfolio value with SPY using a normalized plot
|
||||
if gen_plot:
|
||||
# add code to plot here
|
||||
df_temp = pd.concat([port_val, prices_SPY], keys=['Portfolio', 'SPY'], axis=1)
|
||||
pass
|
||||
|
||||
return allocs, cr, adr, sddr, sr
|
||||
|
||||
def test_code():
|
||||
# This function WILL NOT be called by the auto grader
|
||||
# Do not assume that any variables defined here are available to your function/code
|
||||
# It is only here to help you set up and test your code
|
||||
|
||||
# Define input parameters
|
||||
# Note that ALL of these values will be set to different values by
|
||||
# the autograder!
|
||||
|
||||
start_date = dt.datetime(2009,1,1)
|
||||
end_date = dt.datetime(2010,1,1)
|
||||
symbols = ['GOOG', 'AAPL', 'GLD', 'XOM', 'IBM']
|
||||
|
||||
# Assess the portfolio
|
||||
allocations, cr, adr, sddr, sr = optimize_portfolio(sd = start_date, ed = end_date,\
|
||||
syms = symbols, \
|
||||
gen_plot = False)
|
||||
|
||||
# Print statistics
|
||||
print(f"Start Date: {start_date}")
|
||||
print(f"End Date: {end_date}")
|
||||
print(f"Symbols: {symbols}")
|
||||
print(f"Allocations:{allocations}")
|
||||
print(f"Sharpe Ratio: {sr}")
|
||||
print(f"Volatility (stdev of daily returns): {sddr}")
|
||||
print(f"Average Daily Return: {adr}")
|
||||
print(f"Cumulative Return: {cr}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
# This code WILL NOT be called by the auto grader
|
||||
# Do not assume that it will be called
|
||||
test_code()
|
36
playground/play.py
Normal file
36
playground/play.py
Normal file
@ -0,0 +1,36 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
import datetime as dt
|
||||
|
||||
|
||||
def symbol_to_path(symbol, base_dir=None):
|
||||
"""Return CSV file path given ticker symbol."""
|
||||
if base_dir is None:
|
||||
base_dir = os.environ.get("MARKET_DATA_DIR", '../data/')
|
||||
return os.path.join(base_dir, "{}.csv".format(str(symbol)))
|
||||
|
||||
def get_data(path, dates):
|
||||
"""Read stock data (adjusted close) for given symbols from CSV files."""
|
||||
df = pd.DataFrame(index=dates)
|
||||
df_temp = pd.read_csv(path,
|
||||
index_col='time',
|
||||
parse_dates=True,
|
||||
usecols=['time', 'open', 'high', 'low', 'close'],
|
||||
na_values=['nan'])
|
||||
df = df.join(df_temp)
|
||||
return df
|
||||
|
||||
def plot_data(df, title="Stock prices", xlabel="Date", ylabel="Price"):
|
||||
import mplfinance as mpf
|
||||
mpf.plot(df, type='candle', mav=(9, 24))
|
||||
|
||||
def test_code():
|
||||
sd = dt.datetime(2020,1,1)
|
||||
ed = dt.datetime(2020,8,30)
|
||||
dates = pd.date_range(sd, ed)
|
||||
prices_all = get_data(symbol_to_path('BTCUSD_Coinbase'), dates)
|
||||
plot_data(prices_all)
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_code()
|
||||
|
BIN
zips/19fall_optimize_something.zip
Normal file
BIN
zips/19fall_optimize_something.zip
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user