Skip to content

Commit

Permalink
add yahooquery datasource (#127)
Browse files Browse the repository at this point in the history
Co-authored-by: none2003 <[email protected]>
  • Loading branch information
none2003 and none2003 authored Jun 22, 2024
1 parent 932f590 commit ab4c843
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 1 deletion.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,5 @@ Sphinx>=5.3.0
sphinx_rtd_theme>=2.0.0
sphinx-intl>=2.1.0
yfinance>=0.1.84
yahooquery>=2.3.7
ruff>=0.3.4
101 changes: 100 additions & 1 deletion src/pybroker/ext/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@
"""

from datetime import datetime
from typing import Optional
from typing import Final, Iterable, Optional, Union

import akshare
import pandas as pd
from yahooquery import Ticker

from pybroker.common import DataCol, to_datetime
from pybroker.data import DataSource
Expand Down Expand Up @@ -91,3 +92,101 @@ def _fetch_data(
]
]
return result


class YQDataSource(DataSource):
r"""Retrieves data from `Yahoo Finance <https://finance.yahoo.com/>`_\ .
Attributes:
ADJ_CLOSE: Column name of adjusted close prices.
"""

ADJ_CLOSE: Final = "adj_close"
__TIMEFRAME: Final = "1d"
_tf_to_period = {
"": "1d",
"1min": "1m",
"2min": "2m",
"5min": "5m",
"15min": "15m",
"30min": "30m",
"60min": "60m",
"90min": "90m",
"1hour": "1h",
"1day": "1d",
"5day": "5d",
"1week": "1wk",
}

def __init__(self, proxies: dict = None):
super().__init__()
self._scope.register_custom_cols(self.ADJ_CLOSE)
self.proxies = proxies

def query(
self,
symbols: Union[str, Iterable[str]],
start_date: Union[str, datetime],
end_date: Union[str, datetime],
_timeframe: Optional[str] = "",
_adjust: Optional[str] = None,
) -> pd.DataFrame:
r"""Queries data from `Yahoo Finance <https://finance.yahoo.com/>`_\ .
The timeframe of the data is limited to per day only.
Args:
symbols: Ticker symbols of the data to query.
start_date: Start date of the data to query (inclusive).
end_date: End date of the data to query (inclusive).
Returns:
:class:`pandas.DataFrame` containing the queried data.
"""
return super().query(symbols, start_date, end_date, self.__TIMEFRAME, _adjust)

def _fetch_data(
self,
symbols: frozenset[str],
start_date: datetime,
end_date: datetime,
_timeframe: Optional[str],
_adjust: Optional[str],
) -> pd.DataFrame:
""":meta private:"""
show_yf_progress_bar = (
not self._logger._disabled and not self._logger._progress_bar_disabled
)
tickers = Ticker(
symbols,
asynchronous=True,
progress=show_yf_progress_bar,
proxies=self.proxies,
)
df = tickers.history(
start=start_date,
end=end_date,
interval=self._tf_to_period[_timeframe],
adj_ohlc=_adjust,
)
if df.columns.empty:
return pd.DataFrame(
columns=[
DataCol.SYMBOL.value,
DataCol.DATE.value,
DataCol.OPEN.value,
DataCol.HIGH.value,
DataCol.LOW.value,
DataCol.CLOSE.value,
DataCol.VOLUME.value,
self.ADJ_CLOSE,
]
)
if df.empty:
return df
df = df.reset_index()
df["date"] = pd.to_datetime(df["date"])
df.rename(columns={"adjclose": self.ADJ_CLOSE}, inplace=True)
df = df[
["date", "symbol", "open", "high", "low", "close", "volume", self.ADJ_CLOSE]
]
return df

0 comments on commit ab4c843

Please sign in to comment.