Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sync dev -> main #2140

Merged
merged 2 commits into from
Nov 19, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Prices dividend repair: fixes
Fix blind spot (regresssion)
Improve fixing div-too-big
ValueRaider committed Nov 19, 2024
commit 006649d80041e8f9788358c403a62eb4576e84bc
1,389 changes: 724 additions & 665 deletions tests/data/KWS-L-1d-bad-div-fixed.csv

Large diffs are not rendered by default.

1,389 changes: 724 additions & 665 deletions tests/data/KWS-L-1d-bad-div.csv

Large diffs are not rendered by default.

816 changes: 428 additions & 388 deletions tests/data/NVT-L-1d-bad-div-fixed.csv

Large diffs are not rendered by default.

546 changes: 293 additions & 253 deletions tests/data/NVT-L-1d-bad-div.csv

Large diffs are not rendered by default.

886 changes: 460 additions & 426 deletions tests/data/SCR-TO-1d-bad-div-fixed.csv

Large diffs are not rendered by default.

480 changes: 257 additions & 223 deletions tests/data/SCR-TO-1d-bad-div.csv

Large diffs are not rendered by default.

1,109 changes: 586 additions & 523 deletions tests/data/SOLB-BR-1d-bad-div-fixed.csv

Large diffs are not rendered by default.

615 changes: 339 additions & 276 deletions tests/data/SOLB-BR-1d-bad-div.csv

Large diffs are not rendered by default.

66 changes: 42 additions & 24 deletions yfinance/scrapers/history.py
Original file line number Diff line number Diff line change
@@ -1426,7 +1426,7 @@ def _fix_bad_div_adjust(self, df, interval, currency):
typical_volatility = np.nan
else:
diffs = df2['Close'].iloc[start:end-1].to_numpy() - df2['Low'].iloc[start+1:end].to_numpy()
typical_volatility = np.median(np.abs(diffs))
typical_volatility = np.mean(np.abs(diffs))

possibilities = []
if (drops==0.0).all() and df2['Volume'].iloc[div_idx]==0:
@@ -1681,10 +1681,6 @@ def cluster_dividends(df, column='div', threshold=7):
div_status_df.loc[phantom_div_dt, c] = False
checks.append('phantom')

if not div_status_df[checks].any().any():
# Perfect
return df

# Remove phantoms early
if 'phantom' in div_status_df.columns:
f_phantom = div_status_df['phantom']
@@ -1709,6 +1705,29 @@ def cluster_dividends(df, column='div', threshold=7):
if 'phantom' in checks:
checks.remove('phantom')

if not div_status_df[checks].any().any():
# Maybe failed to detect a too-small div. If div is ~0.01x of previous and next, then
# treat as a 0.01x error
if len(div_status_df) > 1:
for i in range(0, len(div_status_df)):
r_pre, r_post = None, None
if i > 0:
r_pre = div_status_df['%'].iloc[i-1] / div_status_df['%'].iloc[i]
if i < (len(div_status_df)-1):
r_post = div_status_df['%'].iloc[i+1] / div_status_df['%'].iloc[i]
r_pre = r_pre or r_post
r_post = r_post or r_pre
if abs(r_pre-currency_divide)<20 and abs(r_post-currency_divide)<20:
div_dt = div_status_df.index[i]
div_status_df.loc[div_dt, 'div_too_small'] = True

if not div_status_df[checks].any().any():
# Perfect
if df_modified:
return df2
else:
return df

# Check if the present div-adjustment contradicts price action
for i in range(len(div_status_df)):
div_idx = div_status_df['idx'].iloc[i]
@@ -1789,7 +1808,8 @@ def cluster_dividends(df, column='div', threshold=7):
elif adjDelta_drop > 0.39*adjDiv:
# Still true that applied adjustment exceeds price action,
# just not clear what solution is (if any).
div_adj_exceeds_prices = True
if (x['Adj']<1.0).any():
div_adj_exceeds_prices = True
break

# Can prune the space:
@@ -1843,22 +1863,6 @@ def cluster_dividends(df, column='div', threshold=7):

checks += ['adj_exceeds_prices', 'div_date_wrong']

if not div_status_df[checks].any().any():
# Maybe failed to detect a too-small div. If div is ~0.01x of previous and next, then
# treat as a 0.01x error
if len(div_status_df) > 1:
for i in range(0, len(div_status_df)):
r_pre, r_post = None, None
if i > 0:
r_pre = div_status_df['%'].iloc[i-1] / div_status_df['%'].iloc[i]
if i < (len(div_status_df)-1):
r_post = div_status_df['%'].iloc[i+1] / div_status_df['%'].iloc[i]
r_pre = r_pre or r_post
r_post = r_post or r_pre
if abs(r_pre-currency_divide)<20 and abs(r_post-currency_divide)<20:
div_dt = div_status_df.index[i]
div_status_df.loc[div_dt, 'div_too_small'] = True

for c in checks:
if not div_status_df[c].any():
div_status_df = div_status_df.drop(c, axis=1)
@@ -1887,11 +1891,16 @@ def cluster_dividends(df, column='div', threshold=7):
div_pcts['avg yr yield'] = div_pcts['%'] / div_pcts['period']

for c in checks:
if not cluster[c].to_numpy().any():
cluster = cluster.drop(c, axis=1)
cluster_checks = [c for c in checks if c in cluster.columns]

for c in cluster_checks:
f_fail = cluster[c].to_numpy()
n_fail = np.sum(f_fail)
if n_fail in [0, n]:
continue
pct_fail = np.sum(f_fail) / n
pct_fail = n_fail / n
if c == 'div_too_big':
true_threshold = 1.0
fals_threshold = 0.2
@@ -1900,7 +1909,16 @@ def cluster_dividends(df, column='div', threshold=7):
continue

if 'adj_exceeds_prices' in cluster.columns and (cluster[c] == (cluster[c] & cluster['adj_exceeds_prices'])).all():
# More likely that true-positive. Maybe the div never happened
# Treat div_too_big=False as false positives IFF adj_exceeds_prices=true AND
# true ratio above (lowered) threshold.
true_threshold = 0.5
f_adj_exceeds_prices = cluster['adj_exceeds_prices'].to_numpy()
n = np.sum(f_adj_exceeds_prices)
n_fail = np.sum(f_fail[f_adj_exceeds_prices])
pct_fail = n_fail / n
if pct_fail > true_threshold:
f = fc & div_status_df['adj_exceeds_prices'].to_numpy()
div_status_df.loc[f, c] = True
continue

if 'div_exceeds_adj' in cluster.columns and cluster['div_exceeds_adj'].all():