-
Notifications
You must be signed in to change notification settings - Fork 14
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Submission #1
base: master
Are you sure you want to change the base?
Submission #1
Changes from all commits
ff11651
fe4dfcc
98cc6f5
ac7325b
3b386f6
28cb449
9b3d080
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
<!-- ABOUT THE PROJECT --> | ||
## About The Project | ||
|
||
There are six main files in this project. | ||
* data.py: Contain functions for downloading universe of stocks (S&P500 constituents). | ||
* series_generator.py: Contain functions for generating returns series described in the paper. | ||
* stop_loss.py: Contain a class for constructing a framework to determine if adding a stop-loss policy would improve the results of the strategy. | ||
* Demo.ipynb: Jupyter Notebook for demo. | ||
* Demo.html: HTML for demonstrate visualization using Bokeh. | ||
* unit_test.py: Unit tests |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
#For crawling S&P500 Tickers | ||
import os | ||
import requests | ||
from io import StringIO | ||
|
||
#For getting and modifying data | ||
import numpy as np | ||
import pandas as pd | ||
import datetime | ||
import yfinance as yf | ||
|
||
|
||
def get_sp500_tickers(): | ||
"""Crawling S&P500 Tickers | ||
|
||
Returns | ||
------- | ||
List contains S&P500 tickers | ||
""" | ||
|
||
# Send a request to the link | ||
headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'} | ||
link = "https://www.slickcharts.com/sp500" | ||
res = requests.get(link, headers = headers) | ||
|
||
# Clean the text, and save as a dataframe | ||
lines = res.text.replace("\r", "").split("\n") | ||
df = pd.read_html( StringIO("\n".join( lines[:] ) ), header = None )[0] | ||
tickers = df["Symbol"] | ||
tickers = tickers.apply(lambda s: s.replace(".", "-")) # Modify tickers, so they can fit Yahoo Finance's ticker format | ||
|
||
return list(tickers.values) | ||
|
||
def get_adj_close(tickers, start_y, start_m, start_d, end_y, end_m, end_d): | ||
"""Get adjusted close data from Yahoo Finance | ||
|
||
Parameters | ||
---------- | ||
tickers : str, list of str | ||
List of tickers to download | ||
start_y, start_m, start_d : int | ||
start year, start month, start day | ||
end_y, end_m, end_d : int | ||
end year, end month, end day | ||
Comment on lines
+34
to
+44
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's good to see docstrings. They can be adjusted a bit to fit our code style. |
||
|
||
Returns | ||
------- | ||
Pandas DataFrame contains adjusted close | ||
""" | ||
|
||
# Modify date format | ||
start = datetime.datetime(start_y, start_m, start_d).strftime("%Y-%m-%d") | ||
end = datetime.datetime(end_y, end_m, end_d).strftime("%Y-%m-%d") | ||
|
||
# Get data using yfinance module | ||
data = yf.download(tickers, start, end) | ||
data = data['Adj Close'] | ||
data = data.round(decimals=2) | ||
|
||
return data |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,285 @@ | ||
import pandas as pd | ||
import numpy as np | ||
import datetime | ||
|
||
import matplotlib.pyplot as plt | ||
from bokeh.io import output_notebook, show | ||
from bokeh.plotting import figure | ||
from bokeh.models import CheckboxGroup, CustomJS, ColumnDataSource, Panel, Tabs, ColorBar, LinearColorMapper | ||
from bokeh.transform import transform | ||
from bokeh.layouts import row, column | ||
from bokeh.palettes import Spectral10, Reds | ||
from bokeh.models.widgets import DataTable, DateFormatter, TableColumn | ||
|
||
|
||
def generate_date(start_date: str, end_date: str, step:int = 1, ignore_weekends: bool = True): | ||
"""Generate list of datetimes | ||
|
||
Parameters | ||
---------- | ||
start_date : str in %Y%m%d format | ||
Start date | ||
|
||
end_date : str in %Y%m%d format | ||
End date | ||
|
||
step : int | ||
Interval between two dates | ||
|
||
ignore_weekends : bool, default True | ||
Whether the process ignores weekends. | ||
|
||
Returns | ||
------- | ||
List of datetimes | ||
""" | ||
|
||
# Generate sequence of dates | ||
start_date = datetime.datetime.strptime(start_date, '%Y%m%d').date() | ||
end_date = datetime.datetime.strptime(end_date, '%Y%m%d').date() | ||
number_of_days = ((end_date - start_date).days + 1) | ||
dates = [start_date + datetime.timedelta(days = i) for i in range(number_of_days)] | ||
Comment on lines
+38
to
+41
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This works. Alternatively |
||
|
||
if ignore_weekends: | ||
dates = [d for d in dates if not d.isoweekday() in [6,7]] | ||
|
||
dates = [dates[i] for i in range(len(dates)) if i%step == 0] | ||
|
||
return dates | ||
|
||
|
||
def random_walk_generater(mean: float, std: float, start_date: str, end_date: str, step:int = 1, ignore_weekends: bool = True): | ||
"""Generate Series contains returns through Random Walk Hypothesis | ||
|
||
Method: | ||
rt = µ + et, et ∼ White Noise(0, σ^2), | ||
|
||
Parameters | ||
---------- | ||
mean : float | ||
Mean of the process | ||
|
||
std : float | ||
Standard deviation of the white noise | ||
|
||
start_date : str in %Y%m%d format | ||
Start date of the process | ||
|
||
end_date : str in %Y%m%d format | ||
End date of the process | ||
|
||
step : int | ||
Interval between two dates | ||
|
||
ignore_weekends : bool, default True | ||
Whether the process ignores weekends. | ||
|
||
Returns | ||
------- | ||
Series contains returns | ||
""" | ||
|
||
# Generate sequence of dates | ||
dates = generate_date(start_date, end_date, step, ignore_weekends) | ||
|
||
# Generate white noise | ||
white_noise = np.random.normal(0, std, size = len(dates)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This could be a bit simpler with |
||
|
||
# Generate returns Series | ||
rt = pd.Series(mean + white_noise, index = dates, name = 'random_walk') | ||
rt.index = pd.to_datetime(rt.index) | ||
rt.index.name = "Date" | ||
|
||
return rt | ||
|
||
|
||
def ar_1_return_generater(mean: float, std: float, rho: float, r0: float, start_date: str, end_date: str, step:int = 1, ignore_weekends: bool = True): | ||
"""Generate Series contains returns through the process described in Kaminski, Kathryn M., and Andrew W. Lo. (2013) equation 14. | ||
Comment on lines
+96
to
+97
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These lines are a bit too long. Please consider using 100-120 as a maximum line length for better readability. |
||
|
||
Method: | ||
rt = µ + ρ(rt-1 - µ) + et, et ∼ White Noise(0, σ^2), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice use of greek letters. I wonder if they will render properly in the documentation. |
||
|
||
Parameters | ||
---------- | ||
mean : float | ||
Mean of the process | ||
|
||
std : float | ||
Standard deviation of the white noise | ||
|
||
rho : float | ||
ρ ∈ (−1, 1) | ||
|
||
r0 : float | ||
Initial value of the series | ||
|
||
start_date : str in %Y%m%d format | ||
Start date of the process | ||
|
||
end_date : str in %Y%m%d format | ||
End date of the process | ||
|
||
step : int | ||
Interval between two dates | ||
|
||
ignore_weekends : bool, default True | ||
Whether the process ignores weekends. | ||
|
||
Returns | ||
------- | ||
Series contains returns | ||
""" | ||
|
||
# Generate sequence of dates | ||
dates = generate_date(start_date, end_date, step, ignore_weekends) | ||
|
||
# Generate white noise | ||
white_noise = np.random.normal(0, std, size = len(dates)) | ||
|
||
# Generate returns Series | ||
rt = pd.Series(r0, index = dates, name = 'ar_1') | ||
for i in range(1, len(rt)): | ||
rt.iloc[i] = mean + rho*(rt.iloc[i - 1] - mean) + white_noise[i] | ||
|
||
rt.index = pd.to_datetime(rt.index) | ||
rt.index.name = "Date" | ||
|
||
return rt | ||
|
||
|
||
def regime_switching_return_generater(mean_1: float, std_1: float, mean_2: float, std_2: float, I0: float, trans_prob_matrix: np.array, | ||
start_date: str, end_date: str, step:int = 1, ignore_weekends: bool = True): | ||
"""Generate Series contains returns through the process described in Kaminski, Kathryn M., and Andrew W. Lo. (2013) equation 19. | ||
|
||
Method: | ||
rt = It*r1t + (1 − It)*r2t, rit ~ N(µi, σi^2) | ||
|
||
It+1 = 1 It+1 = 0 | ||
A ≡ It = 1 [p11 p12] | ||
|
||
It = 0 [p21 p22] | ||
|
||
where A is the Markov transition probabilities matrix that governs the transitions between the two states. | ||
|
||
Parameters | ||
---------- | ||
mean_1 : float | ||
Mean of r1 | ||
|
||
std_1 : float | ||
Standard deviation of r1 | ||
|
||
mean_2 : float | ||
Mean of r2 | ||
|
||
std_2 : float | ||
Standard deviation of r2 | ||
|
||
I0 : float | ||
Initial value of the state | ||
|
||
trans_prob_matrix : 2x2 np.array | ||
Markov transition probabilities matrix | ||
|
||
start_date : str in %Y%m%d format | ||
Start date of the process | ||
|
||
end_date : str in %Y%m%d format | ||
End date of the process | ||
|
||
step : int | ||
Interval between two dates | ||
|
||
ignore_weekends : bool, default True | ||
Whether the process ignores weekends. | ||
|
||
Returns | ||
------- | ||
1. Series contains returns | ||
2. Series contains states | ||
""" | ||
|
||
# Generate sequence of dates | ||
dates = generate_date(start_date, end_date, step, ignore_weekends) | ||
|
||
# Generate r1, r2 | ||
r1 = np.random.normal(mean_1, std_1, size = len(dates)) | ||
r1 = pd.Series(r1, index = dates) | ||
|
||
r2 = np.random.normal(mean_2, std_2, size = len(dates)) | ||
r2 = pd.Series(r2, index = dates) | ||
|
||
# Generate returns Series | ||
It = pd.Series(I0, index = dates) | ||
for i in range(1, len(It)): | ||
if It.iloc[i - 1] == 1: | ||
It.iloc[i] = np.random.choice([1, 0], 1, p = trans_prob_matrix[0])[0] | ||
else: | ||
It.iloc[i] = np.random.choice([1, 0], 1, p = trans_prob_matrix[1])[0] | ||
|
||
rt = It*r1 + (1 - It)*r2 | ||
rt.index = pd.to_datetime(rt.index) | ||
rt.index.name = "Date" | ||
rt.name = 'regime_switching' | ||
|
||
return rt, It | ||
|
||
|
||
def trend_chart(returns_series, compounding: bool = False, height: int = 350, width: int = 800): | ||
"""Trend chart of the result using Bokeh. | ||
Comment on lines
+228
to
+229
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice to see that you're using Bokeh for visualisations. |
||
|
||
Parameters | ||
---------- | ||
returns_series : Pandas Series | ||
Series contains returns. | ||
|
||
compounding : bool, default False | ||
Whether returns are reinvested back into the account. | ||
|
||
height : int | ||
Height of the plot | ||
|
||
width : int | ||
Width of the plot | ||
|
||
Returns | ||
------- | ||
None | ||
""" | ||
|
||
data = returns_series | ||
|
||
if compounding: | ||
cum = (data + 1).cumprod() | ||
else: | ||
cum = data.cumsum() + 1 | ||
|
||
cum = pd.DataFrame(cum) | ||
source = ColumnDataSource(data = cum) | ||
|
||
p = figure(x_axis_type="datetime", title="Trend Line", plot_height=height, plot_width=width) | ||
p.xgrid.grid_line_color=None | ||
p.ygrid.grid_line_alpha=0.5 | ||
p.xaxis.axis_label = 'Time' | ||
p.yaxis.axis_label = 'Total Return' | ||
lines = [] | ||
for i in range(len(cum.columns)): | ||
lines.append(p.line("Date", cum.columns[i], source=source, line_width=2, line_alpha=0.8, line_color = Spectral10[i%10], legend_label = cum.columns[i], muted_color = Spectral10[i%10], muted_alpha = 0.1)) | ||
|
||
p.legend.location = "top_left" | ||
p.legend.click_policy="mute" | ||
|
||
LABELS = list(cum.columns) | ||
checkbox_group = CheckboxGroup(labels=LABELS) | ||
checkbox_group.active = list(range(len(LABELS))) | ||
|
||
code = """ for (var i = 0; i < lines.length; i++) { | ||
lines[i].visible = false; | ||
if (cb_obj.active.includes(i)){lines[i].visible = true;} | ||
} | ||
""" | ||
callback = CustomJS(code = code, args = {'lines': lines}) | ||
checkbox_group.js_on_click(callback) | ||
|
||
layout = row(p, checkbox_group) | ||
show(layout) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A nice
writeup.pdf
addition. However, I feel sometimes that you're not fluent in your descriptions.