Skip to content

Commit

Permalink
Stop an annoying prompt, and some small fixes
Browse files Browse the repository at this point in the history
Features:
- add 'yfc.options.calendar.accept_unexpected_Yahoo_intervals', to stop prompt 'Accept into cache anyway?'

Fixes:
- fix for not knowing day-before close for a newly-fetched dividend
- little bugfix for 'get_release_dates'
- another Pandas TypeError
- GetTimestampNextSession() handles long Chinese holiday
- tweak date printing in verify's debug output
  • Loading branch information
ValueRaider committed Oct 25, 2024
1 parent b0b4e11 commit 343f470
Show file tree
Hide file tree
Showing 8 changed files with 62 additions and 106 deletions.
1 change: 0 additions & 1 deletion setup.cfg.template
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ install_requires =
pandas >= 1.5
exchange_calendars >= 4.5.5
scipy >= 1.6.3
click
appdirs
pulp

Expand Down
3 changes: 3 additions & 0 deletions yfinance_cache/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,6 @@
from .yfc_upgrade import _tidy_upgrade_history, _fix_prices_inconsistencies
_tidy_upgrade_history()
_fix_prices_inconsistencies()

from .yfc_upgrade import _add_xcal_to_options
_add_xcal_to_options()
19 changes: 11 additions & 8 deletions yfinance_cache/yfc_financials_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -1124,14 +1124,17 @@ def _calc_release_dates(self, period, refresh=True, check=False):
if interim_clusters is None:
interim_cluster = None
else:
longest_i = 0
longest_delay = interim_clusters[0]
for i in range(1, len(interim_clusters)):
m = mean(interim_clusters[i])
if m > longest_delay:
longest_i = i
longest_delay = m
interim_cluster = interim_clusters[longest_i]
if isinstance(interim_clusters[0], int):
interim_cluster = interim_clusters
else:
longest_i = 0
longest_delay = mean(interim_clusters[0])
for i in range(1, len(interim_clusters)):
m = mean(interim_clusters[i])
if m > longest_delay:
longest_i = i
longest_delay = m
interim_cluster = interim_clusters[longest_i]
for i in range(len(pes)):
for j in range(len(rdts)):
if pulp.value(x[i,j]) == 1:
Expand Down
55 changes: 0 additions & 55 deletions yfinance_cache/yfc_options.py

This file was deleted.

48 changes: 16 additions & 32 deletions yfinance_cache/yfc_prices_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import dateutil
from zoneinfo import ZoneInfo
from pprint import pprint
import click
import logging


Expand Down Expand Up @@ -1655,26 +1654,23 @@ def _aggregate_yfdf_daily(df):
if self.interval == yfcd.Interval.Days1 and divs_df is not None and not divs_df.empty:
# Verify dividends
c = "Dividends"
h_divs = h.loc[h[c] != 0.0, c].copy().dropna()
divs = divs_df['Dividends']
yf_divs = df_yf['Dividends'][df_yf['Dividends']!=0.0]
f_orphan = np.full(h_divs.shape[0], False)
for i in range(len(h_divs)):
if h_divs.index[i] not in yf_divs.index:
f_orphan[i] = True
f_orphan = ~divs.index.isin(yf_divs.index)
if f_orphan.any():
if correct in ['one', 'all']:
print(f'Dropping these orphan dividends: {h_divs.index.date[f_orphan]}')
orphan_divs = h.loc[h_divs.index[f_orphan], [c, 'FetchDate']].copy().dropna()
print(f'Dropping these orphan dividends: {divs.index.date[f_orphan]}')
orphan_divs = divs_df.loc[divs.index[f_orphan], [c, 'FetchDate']].copy().dropna()
orphan_divs['Dividends'] = 0.0
orphan_divs['Close before'] = 1.0
orphan_divs['FetchDate'] = dt_now
self.manager.GetHistory("Events").UpdateDividends(orphan_divs)
else:
if not quiet:
divs_orphan = h_divs[f_orphan]
divs_orphan.index = divs_orphan.index.date
print(f'- detected orphan dividends: { {str(k):v for k,v in divs_orphan.to_dict().items()} }')
for dt in h_divs.index[f_orphan]:
orphan_divs = divs[f_orphan]
orphan_divs.index = orphan_divs.index.date
print(f'- detected orphan dividends: { {str(k):v for k,v in orphan_divs.to_dict().items()} }')
for dt in divs.index[f_orphan]:
f_diff_all[dt] = True

f_diff = yfcu.VerifyPricesDf(h, df_yf, self.interval, rtol=rtol, vol_rtol=vol_rtol, quiet=quiet, debug=debug, exit_first_error=True)
Expand Down Expand Up @@ -1872,7 +1868,6 @@ def _fetchYfHistory(self, start, end, prepost, debug, verify_intervals=True, dis
first_fetch_failed = False
try:
df = self._fetchYfHistory_dateRange(fetch_start_pad, fetch_end, prepost, debug, quiet=quiet)
df = df.loc[str(fetch_start):].copy()
except yfcd.NoPriceDataInRangeException as e:
first_fetch_failed = True
ex = e
Expand Down Expand Up @@ -1906,10 +1901,6 @@ def _fetchYfHistory(self, start, end, prepost, debug, verify_intervals=True, dis
yfcm.StoreCacheDatum(self.ticker, "history_metadata", hist_md)

df = df_wider
if fetch_start is not None:
df = df.loc[fetch_start_dt:]
if fetch_end is not None:
df = df.loc[:fetch_end_dt-timedelta(milliseconds=1)]

if first_fetch_failed:
if second_fetch_failed:
Expand All @@ -1930,7 +1921,6 @@ def _fetchYfHistory(self, start, end, prepost, debug, verify_intervals=True, dis
fetch_end_pad = s.iloc[s.index.get_indexer([str(fetch_end)], method="bfill")[0]+1].name.date()

df = self._fetchYfHistory_dateRange(fetch_start_pad, fetch_end_pad, prepost, debug, quiet=quiet)
df = df.loc[str(fetch_start) : str(fetch_end-td_1d)].copy()

else:
# Intraday
Expand Down Expand Up @@ -2179,6 +2169,7 @@ def _fetchYfHistory(self, start, end, prepost, debug, verify_intervals=True, dis
# Send these out so at least can de-adjust h_pre.
self.manager.GetHistory("Events").UpdateDividends(df_divs)

# append new divs in df_divs to new_divs
divs_pretty = df_divs['Dividends'].copy()
divs_pretty.index = divs_pretty.index.date
self.manager.LogEvent("info", "DividendManager", f"detected {divs_pretty.shape[0]} new dividends: {divs_pretty} (before reversing adjust)")
Expand All @@ -2190,23 +2181,19 @@ def _fetchYfHistory(self, start, end, prepost, debug, verify_intervals=True, dis
yfcm.StoreCacheDatum(self.ticker, "new_divs", cached_new_divs)
else:
yfcm.StoreCacheDatum(self.ticker, "new_divs", df_divs)
# Send these out so at least can de-adjust h_pre.
self.manager.GetHistory("Events").UpdateDividends(df_divs)

# Remove any out-of-range data:
if (n > 0):
if n > 0:
# NOTE: YF has a bug-fix pending merge: https://github.com/ranaroussi/yfinance/pull/1012
if end is not None:
if self.interday:
df = df[df.index.date < end_d]
else:
df = df[df.index < end_dt]
df = df.loc[:fetch_end_dt-timedelta(milliseconds=1)]
n = df.shape[0]
#
# And again for pre-start data:
if start is not None:
if self.interday:
df = df[df.index.date >= start_d]
else:
df = df[df.index >= start_dt]
df = df.loc[fetch_start_dt:]
n = df.shape[0]

# Verify that all datetimes match up with actual intervals:
Expand Down Expand Up @@ -2417,10 +2404,7 @@ def _fetchYfHistory(self, start, end, prepost, debug, verify_intervals=True, dis
print(warning_msg)
print(df_na)
msg = "Accept into cache anyway?"
if False:
accept = True
else:
accept = click.confirm(msg, default=False)
accept = yfcm._option_manager.calendar.accept_unexpected_Yahoo_intervals
if accept:
for idx in np.where(f_na)[0]:
dt = intervals.index[idx]
Expand Down Expand Up @@ -3676,7 +3660,7 @@ def map_signals_to_ranges(f, f_up, f_down):
if correct_dividend:
df2.iloc[r[0]:r[1], df2.columns.get_loc('Dividends')] *= m
if correct_volume:
df2.iloc[r[0]:r[1], df2.columns.get_loc("Volume")] *= m_rcp
df2.iloc[r[0]:r[1], df2.columns.get_loc("Volume")] = (df2['Volume'].iloc[r[0]:r[1]]*m_rcp).round().astype('int')
df2.iloc[r[0]:r[1], df2.columns.get_loc('Repaired?')] = True
if r[0] == r[1] - 1:
if self.interday:
Expand Down
4 changes: 2 additions & 2 deletions yfinance_cache/yfc_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -849,7 +849,7 @@ def GetTimestampNextSession(exchange, ts):
yfcu.TypeCheckStr(exchange, "exchange")
yfcu.TypeCheckDatetime(ts, "ts")

sched = GetExchangeSchedule(exchange, ts.date(), ts.date()+timedelta(days=7))
sched = GetExchangeSchedule(exchange, ts.date(), ts.date()+timedelta(days=10))
if "auction" in sched.columns:
sched = sched.copy()
f = ~(sched["auction"].isna())
Expand All @@ -862,7 +862,7 @@ def GetTimestampNextSession(exchange, ts):
if ts < sched["open"].iloc[i]:
tz = ZoneInfo(GetExchangeTzName(exchange))
return {"market_open": sched["open"].iloc[i].to_pydatetime().astimezone(tz), "market_close": sched["close"].iloc[i].to_pydatetime().astimezone(tz)}
raise Exception("Failed to find next '{0}' session for ts = {1}".format(exchange, ts))
raise Exception(f"Failed to find next '{exchange}' session for ts = {ts}")


def GetTimestampCurrentInterval(exchange, ts, interval, discardTimes=None, week7days=True, ignore_breaks=False):
Expand Down
22 changes: 22 additions & 0 deletions yfinance_cache/yfc_upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,3 +262,25 @@ def _fix_prices_inconsistencies():
with open(state_fp, 'w'):
pass


def _add_xcal_to_options():
d = yfcm.GetCacheDirpath()
yfc_dp = os.path.join(d, "_YFC_")
state_fp = os.path.join(yfc_dp, "have-added-xcal-to-options")
if os.path.isfile(state_fp):
return
if not os.path.isdir(d):
if not os.path.isdir(yfc_dp):
os.makedirs(yfc_dp)
with open(state_fp, 'w'):
pass
return

o = yfcm._option_manager
o.calendar.accept_unexpected_Yahoo_intervals = True

if not os.path.isdir(yfc_dp):
os.makedirs(yfc_dp)
with open(state_fp, 'w'):
pass

16 changes: 8 additions & 8 deletions yfinance_cache/yfc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,13 +436,13 @@ def VerifyPricesDf(h, df_yf, interval, rtol=0.0001, vol_rtol=0.005, exit_first_e
n_diff = np.sum(f_diff)
if not quiet:
print(f"WARNING: {istr}: {n_diff}/{n} differences in column {c}")
df_diffs = h_divs[f_diff].join(yf_divs[f_diff], lsuffix="_cache", rsuffix="_yf")
df_diffs.index = df_diffs.index.tz_convert(h_divs.index.tz)
if interday:
df_diffs.index = df_diffs.index.date
df_diffs["error"] = df_diffs[c+"_cache"] - df_diffs[c+"_yf"]
df_diffs["error %"] = (df_diffs["error"]*100 / df_diffs[c+"_yf"]).round(1).astype(str) + '%'
if not quiet:
df_diffs = h_divs[f_diff].join(yf_divs[f_diff], lsuffix="_cache", rsuffix="_yf")
df_diffs.index = df_diffs.index.tz_convert(h_divs.index.tz)
if interday:
df_diffs.index = df_diffs.index.tz_convert(df_yf.index.tz).date
df_diffs["error"] = df_diffs[c+"_cache"] - df_diffs[c+"_yf"]
df_diffs["error %"] = (df_diffs["error"]*100 / df_diffs[c+"_yf"]).round(1).astype(str) + '%'
print(df_diffs)
f_diff_all = f_diff_all | f_diff
if 'Dividends' not in errors_str:
Expand Down Expand Up @@ -487,7 +487,7 @@ def VerifyPricesDf(h, df_yf, interval, rtol=0.0001, vol_rtol=0.005, exit_first_e
df_diffs = h_ss.join(yf_ss[f_diff], lsuffix="_cache", rsuffix="_yf")
df_diffs.index = df_diffs.index.tz_convert(h_ss.index.tz)
if interday:
df_diffs.index = df_diffs.index.date
df_diffs.index = df_diffs.index.tz_convert(df_yf.index.tz).date
df_diffs["error"] = df_diffs[c+"_cache"] - df_diffs[c+"_yf"]
df_diffs["error %"] = (df_diffs["error"]*100 / df_diffs[c+"_yf"]).round(2).astype(str) + '%'
if not quiet:
Expand Down Expand Up @@ -546,7 +546,7 @@ def VerifyPricesDf(h, df_yf, interval, rtol=0.0001, vol_rtol=0.005, exit_first_e
df_diffs = h_cg.join(yf_cg[f_diff], lsuffix="_cache", rsuffix="_yf")
df_diffs.index = df_diffs.index.tz_convert(h_cg.index.tz)
if interday:
df_diffs.index = df_diffs.index.date
df_diffs.index = df_diffs.index.tz_convert(df_yf.index.tz).date
df_diffs["error"] = df_diffs[c+"_cache"] - df_diffs[c+"_yf"]
df_diffs["error %"] = (df_diffs["error"]*100 / df_diffs[c+"_yf"]).round(2).astype(str) + '%'
if not quiet:
Expand Down

0 comments on commit 343f470

Please sign in to comment.