forked from doda/advances-in-financial-ml-notes
-
Notifications
You must be signed in to change notification settings - Fork 0
/
labeling.py
113 lines (94 loc) · 3.94 KB
/
labeling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import pandas as pd
from multiprocess import mpPandasObj
# we ignore the first version in the text, and immediately grab the one with meta-labeling
def getEvents(close, tEvents, ptSl, trgt, minRet, numThreads=1, t1=False, side=None):
# 1) get target
trgt = trgt.loc[tEvents]
trgt = trgt[trgt > minRet]
# 2) get t1 (max holding period)
if t1 is False:
t1 = pd.Series(pd.NaT, index=tEvents)
# 3) form events object, apply stop loss on t1
if side is None:
side_, ptSl_ = pd.Series(1.0, index=trgt.index), [ptSl[0], ptSl[0]]
else:
side_, ptSl_ = side.loc[trgt.index], ptSl[:2]
events = pd.concat({'t1': t1, 'trgt': trgt, 'side': side_}, axis=1).dropna(subset=['trgt'])
df0 = mpPandasObj(func=applyPtSlOnT1, pdObj=('molecule', events.index), numThreads=numThreads, close=close, events=events, ptSl=ptSl_)
events['t1'] = df0.dropna(how='all').min(axis=1) # pd.min ignores NaN
if side is None:
events = events.drop('side', axis=1)
# store for later
events['pt'] = ptSl[0]
events['sl'] = ptSl[1]
return events
def applyPtSlOnT1(close, events, ptSl, molecule):
# apply stop loss/profit taking, if it takes place before t1 (end of event)
events_ = events.loc[molecule]
out = events_[['t1']].copy(deep=True)
if ptSl[0] > 0:
pt = ptSl[0] * events_['trgt']
else:
pt = pd.Series(index=events.index) # NaNs
if ptSl[1] > 0:
sl = - ptSl[1] * events_['trgt']
else:
sl = pd.Series(index=events.index) # 'mo NaNs
for loc, t1 in events_['t1'].fillna(close.index[-1]).iteritems():
df0 = close[loc:t1] # path prices
df0 = (df0 / close[loc] - 1) * events_.at[loc, 'side'] # path returns
out.loc[loc, 'sl'] = df0[df0<sl[loc]].index.min() # earliest stop loss
out.loc[loc, 'pt'] = df0[df0>pt[loc]].index.min() # earliest profit take
return out
def getVerticalBarriers(close, tEvents, numDays):
t1 = close.index.searchsorted(tEvents+pd.Timedelta(days=numDays))
t1 = t1[t1 < close.shape[0]]
t1 = pd.Series(close.index[t1], index=tEvents[:t1.shape[0]]) # NaNs at the end
return t1
def barrierTouched(out_df, events):
store = []
for date_time, values in out_df.iterrows():
ret = values['ret']
target = values['trgt']
pt_level_reached = ret > target * events.loc[date_time, 'pt']
sl_level_reached = ret < -target * events.loc[date_time, 'sl']
if ret > 0.0 and pt_level_reached:
# Top barrier reached
store.append(1)
elif ret < 0.0 and sl_level_reached:
# Bottom barrier reached
store.append(-1)
else:
# Vertical barrier reached
store.append(0)
# Save to 'bin' column and return
out_df['bin'] = store
return out_df
def getBins(events, close):
'''
Compute event's outcome (including side information, if provided).
events is a DataFrame where:
-events.index is event's starttime
-events['t1'] is event's endtime
-events['trgt'] is event's target
-events['side'] (optional) implies the algo's position side
Case 1: ('side' not in events): bin in (-1, 1) <- label by price action
Case 2: ('side' in events): bin in (0, 1) <- label by pnl (meta-labeling)
'''
# 1) prices aligned with events
events_ = events.dropna(subset=['t1'])
px = events_.index.union(events_['t1'].values).drop_duplicates()
px = close.reindex(px, method='bfill')
# 2) create out object
out = pd.DataFrame(index=events_.index)
out['ret'] = px.loc[events_['t1'].values].values / px.loc[events_.index] - 1
if 'side' in events_:
out['ret'] *= events_['side'] # meta-labeling
out['trgt'] = events_['trgt']
out = barrierTouched(out, events)
if 'side' in events_:
out.loc[out['ret'] <= 0, 'bin'] = 0
if 'side' in events_:
out['side'] = events['side']
return out
# TODO: Rewrite "barrier_touched"