Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Submission #1

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16,533 changes: 16,533 additions & 0 deletions YuAn_Lin/Demo.html

Large diffs are not rendered by default.

1,905 changes: 1,905 additions & 0 deletions YuAn_Lin/Demo.ipynb

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions YuAn_Lin/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<!-- ABOUT THE PROJECT -->
## About The Project
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A nice writeup.pdf addition. However, I feel sometimes that you're not fluent in your descriptions.


There are six main files in this project.
* data.py: Contain functions for downloading universe of stocks (S&P500 constituents).
* series_generator.py: Contain functions for generating returns series described in the paper.
* stop_loss.py: Contain a class for constructing a framework to determine if adding a stop-loss policy would improve the results of the strategy.
* Demo.ipynb: Jupyter Notebook for demo.
* Demo.html: HTML for demonstrate visualization using Bokeh.
* unit_test.py: Unit tests
Binary file added YuAn_Lin/S&P500_Daily_Return.pkl
Binary file not shown.
60 changes: 60 additions & 0 deletions YuAn_Lin/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#For crawling S&P500 Tickers
import os
import requests
from io import StringIO

#For getting and modifying data
import numpy as np
import pandas as pd
import datetime
import yfinance as yf


def get_sp500_tickers():
"""Crawling S&P500 Tickers

Returns
-------
List contains S&P500 tickers
"""

# Send a request to the link
headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'}
link = "https://www.slickcharts.com/sp500"
res = requests.get(link, headers = headers)

# Clean the text, and save as a dataframe
lines = res.text.replace("\r", "").split("\n")
df = pd.read_html( StringIO("\n".join( lines[:] ) ), header = None )[0]
tickers = df["Symbol"]
tickers = tickers.apply(lambda s: s.replace(".", "-")) # Modify tickers, so they can fit Yahoo Finance's ticker format

return list(tickers.values)

def get_adj_close(tickers, start_y, start_m, start_d, end_y, end_m, end_d):
"""Get adjusted close data from Yahoo Finance

Parameters
----------
tickers : str, list of str
List of tickers to download
start_y, start_m, start_d : int
start year, start month, start day
end_y, end_m, end_d : int
end year, end month, end day
Comment on lines +34 to +44
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's good to see docstrings.

They can be adjusted a bit to fit our code style.


Returns
-------
Pandas DataFrame contains adjusted close
"""

# Modify date format
start = datetime.datetime(start_y, start_m, start_d).strftime("%Y-%m-%d")
end = datetime.datetime(end_y, end_m, end_d).strftime("%Y-%m-%d")

# Get data using yfinance module
data = yf.download(tickers, start, end)
data = data['Adj Close']
data = data.round(decimals=2)

return data
285 changes: 285 additions & 0 deletions YuAn_Lin/series_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,285 @@
import pandas as pd
import numpy as np
import datetime

import matplotlib.pyplot as plt
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.models import CheckboxGroup, CustomJS, ColumnDataSource, Panel, Tabs, ColorBar, LinearColorMapper
from bokeh.transform import transform
from bokeh.layouts import row, column
from bokeh.palettes import Spectral10, Reds
from bokeh.models.widgets import DataTable, DateFormatter, TableColumn


def generate_date(start_date: str, end_date: str, step:int = 1, ignore_weekends: bool = True):
"""Generate list of datetimes

Parameters
----------
start_date : str in %Y%m%d format
Start date

end_date : str in %Y%m%d format
End date

step : int
Interval between two dates

ignore_weekends : bool, default True
Whether the process ignores weekends.

Returns
-------
List of datetimes
"""

# Generate sequence of dates
start_date = datetime.datetime.strptime(start_date, '%Y%m%d').date()
end_date = datetime.datetime.strptime(end_date, '%Y%m%d').date()
number_of_days = ((end_date - start_date).days + 1)
dates = [start_date + datetime.timedelta(days = i) for i in range(number_of_days)]
Comment on lines +38 to +41
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This works. Alternatively pd.date_range() can be used.


if ignore_weekends:
dates = [d for d in dates if not d.isoweekday() in [6,7]]

dates = [dates[i] for i in range(len(dates)) if i%step == 0]

return dates


def random_walk_generater(mean: float, std: float, start_date: str, end_date: str, step:int = 1, ignore_weekends: bool = True):
"""Generate Series contains returns through Random Walk Hypothesis

Method:
rt = µ + et, et ∼ White Noise(0, σ^2),

Parameters
----------
mean : float
Mean of the process

std : float
Standard deviation of the white noise

start_date : str in %Y%m%d format
Start date of the process

end_date : str in %Y%m%d format
End date of the process

step : int
Interval between two dates

ignore_weekends : bool, default True
Whether the process ignores weekends.

Returns
-------
Series contains returns
"""

# Generate sequence of dates
dates = generate_date(start_date, end_date, step, ignore_weekends)

# Generate white noise
white_noise = np.random.normal(0, std, size = len(dates))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be a bit simpler with
np.random.normal(mean, std, size = len(dates))


# Generate returns Series
rt = pd.Series(mean + white_noise, index = dates, name = 'random_walk')
rt.index = pd.to_datetime(rt.index)
rt.index.name = "Date"

return rt


def ar_1_return_generater(mean: float, std: float, rho: float, r0: float, start_date: str, end_date: str, step:int = 1, ignore_weekends: bool = True):
"""Generate Series contains returns through the process described in Kaminski, Kathryn M., and Andrew W. Lo. (2013) equation 14.
Comment on lines +96 to +97
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These lines are a bit too long. Please consider using 100-120 as a maximum line length for better readability.


Method:
rt = µ + ρ(rt-1 - µ) + et, et ∼ White Noise(0, σ^2),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice use of greek letters. I wonder if they will render properly in the documentation.


Parameters
----------
mean : float
Mean of the process

std : float
Standard deviation of the white noise

rho : float
ρ ∈ (−1, 1)

r0 : float
Initial value of the series

start_date : str in %Y%m%d format
Start date of the process

end_date : str in %Y%m%d format
End date of the process

step : int
Interval between two dates

ignore_weekends : bool, default True
Whether the process ignores weekends.

Returns
-------
Series contains returns
"""

# Generate sequence of dates
dates = generate_date(start_date, end_date, step, ignore_weekends)

# Generate white noise
white_noise = np.random.normal(0, std, size = len(dates))

# Generate returns Series
rt = pd.Series(r0, index = dates, name = 'ar_1')
for i in range(1, len(rt)):
rt.iloc[i] = mean + rho*(rt.iloc[i - 1] - mean) + white_noise[i]

rt.index = pd.to_datetime(rt.index)
rt.index.name = "Date"

return rt


def regime_switching_return_generater(mean_1: float, std_1: float, mean_2: float, std_2: float, I0: float, trans_prob_matrix: np.array,
start_date: str, end_date: str, step:int = 1, ignore_weekends: bool = True):
"""Generate Series contains returns through the process described in Kaminski, Kathryn M., and Andrew W. Lo. (2013) equation 19.

Method:
rt = It*r1t + (1 − It)*r2t, rit ~ N(µi, σi^2)

It+1 = 1 It+1 = 0
A ≡ It = 1 [p11 p12]

It = 0 [p21 p22]

where A is the Markov transition probabilities matrix that governs the transitions between the two states.

Parameters
----------
mean_1 : float
Mean of r1

std_1 : float
Standard deviation of r1

mean_2 : float
Mean of r2

std_2 : float
Standard deviation of r2

I0 : float
Initial value of the state

trans_prob_matrix : 2x2 np.array
Markov transition probabilities matrix

start_date : str in %Y%m%d format
Start date of the process

end_date : str in %Y%m%d format
End date of the process

step : int
Interval between two dates

ignore_weekends : bool, default True
Whether the process ignores weekends.

Returns
-------
1. Series contains returns
2. Series contains states
"""

# Generate sequence of dates
dates = generate_date(start_date, end_date, step, ignore_weekends)

# Generate r1, r2
r1 = np.random.normal(mean_1, std_1, size = len(dates))
r1 = pd.Series(r1, index = dates)

r2 = np.random.normal(mean_2, std_2, size = len(dates))
r2 = pd.Series(r2, index = dates)

# Generate returns Series
It = pd.Series(I0, index = dates)
for i in range(1, len(It)):
if It.iloc[i - 1] == 1:
It.iloc[i] = np.random.choice([1, 0], 1, p = trans_prob_matrix[0])[0]
else:
It.iloc[i] = np.random.choice([1, 0], 1, p = trans_prob_matrix[1])[0]

rt = It*r1 + (1 - It)*r2
rt.index = pd.to_datetime(rt.index)
rt.index.name = "Date"
rt.name = 'regime_switching'

return rt, It


def trend_chart(returns_series, compounding: bool = False, height: int = 350, width: int = 800):
"""Trend chart of the result using Bokeh.
Comment on lines +228 to +229
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice to see that you're using Bokeh for visualisations.


Parameters
----------
returns_series : Pandas Series
Series contains returns.

compounding : bool, default False
Whether returns are reinvested back into the account.

height : int
Height of the plot

width : int
Width of the plot

Returns
-------
None
"""

data = returns_series

if compounding:
cum = (data + 1).cumprod()
else:
cum = data.cumsum() + 1

cum = pd.DataFrame(cum)
source = ColumnDataSource(data = cum)

p = figure(x_axis_type="datetime", title="Trend Line", plot_height=height, plot_width=width)
p.xgrid.grid_line_color=None
p.ygrid.grid_line_alpha=0.5
p.xaxis.axis_label = 'Time'
p.yaxis.axis_label = 'Total Return'
lines = []
for i in range(len(cum.columns)):
lines.append(p.line("Date", cum.columns[i], source=source, line_width=2, line_alpha=0.8, line_color = Spectral10[i%10], legend_label = cum.columns[i], muted_color = Spectral10[i%10], muted_alpha = 0.1))

p.legend.location = "top_left"
p.legend.click_policy="mute"

LABELS = list(cum.columns)
checkbox_group = CheckboxGroup(labels=LABELS)
checkbox_group.active = list(range(len(LABELS)))

code = """ for (var i = 0; i < lines.length; i++) {
lines[i].visible = false;
if (cb_obj.active.includes(i)){lines[i].visible = true;}
}
"""
callback = CustomJS(code = code, args = {'lines': lines})
checkbox_group.js_on_click(callback)

layout = row(p, checkbox_group)
show(layout)
Loading