Skip to content

Commit

Permalink
offline files improved
Browse files Browse the repository at this point in the history
  • Loading branch information
mkaanerkoc committed Feb 8, 2024
1 parent d24205b commit ada7d85
Show file tree
Hide file tree
Showing 4 changed files with 170 additions and 52 deletions.
79 changes: 78 additions & 1 deletion demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from fast_binance import (
OnlinePriceFetcher,
OfflineFileFetcher,
AggregatedTradesFile,
FuturesUsdtMetricsFile,
PremiumIndexKlinesFile,
PriceKlinesFile
Expand All @@ -26,6 +27,82 @@ def archieve_data():
res = ofl.download(files)
print(res)

def analyze_premium_index():
ofl = OfflineFileFetcher()
symbol = 'SANDUSDT'
dates = pd.date_range('2020-01-01', '2023-11-01', freq='1M')

months = [f'{date.year}-{date.month:02d}' for date in dates]

files = [PremiumIndexKlinesFile(symbol, month, 'monthly', '1m')
for symbol, month in product([symbol], months)]

res = ofl.download(files)
df = pd.concat(list(filter(lambda x: isinstance(x, pd.DataFrame), res)))
df.to_csv(f'{symbol}_premium_index.csv')

files = [PriceKlinesFile('futures', symbol, '1m', month, 'monthly')
for symbol, month in product([symbol], months)]

res = ofl.download(files)
df = pd.concat(list(filter(lambda x: isinstance(x, pd.DataFrame), res)))
df.to_csv(f'{symbol}_fut_klines.csv')


def analyze_premium_index_daily():
ofl = OfflineFileFetcher()
symbol = 'LOOMUSDT'

days = pd.date_range('2023-09-01', '2023-11-01', freq='1D')

files = [PremiumIndexKlinesFile(symbol, '1m', str(day.date()), 'daily')
for symbol, day in product([symbol], days)]

res = ofl.download(files)
df = pd.concat(list(filter(lambda x: isinstance(x, pd.DataFrame), res)))
df.to_csv(f'{symbol}_premium_index.csv')


files = [PriceKlinesFile('futures', symbol, '1m', str(day.date()), 'daily')
for symbol, day in product([symbol], days)]

res = ofl.download(files)
df = pd.concat(list(filter(lambda x: isinstance(x, pd.DataFrame), res)))
df.to_csv(f'{symbol}_fut_klines.csv')

def download_agg_trades():
ofl = OfflineFileFetcher()
symbol = 'LOOMUSDT'

days = pd.date_range('2023-09-01', '2023-11-01', freq='1D')

files = [AggregatedTradesFile(symbol, str(day.date()), 'daily', 'spot')
for symbol, day in product([symbol], days)]

res = ofl.download(files)
df = pd.concat(list(filter(lambda x: isinstance(x, pd.DataFrame), res)))
df.to_csv(f'{symbol}_agg_trades.csv')

def analyze_premium_index_daily_multiple_symbol():
ofl = OfflineFileFetcher()
symbol = 'LOOMUSDT'

days = pd.date_range('2023-09-01', '2023-11-01', freq='1D')

files = [PremiumIndexKlinesFile(symbol, str(day.date()), 'daily', '1m')
for symbol, day in product([symbol], days)]

res = ofl.download(files)
df = pd.concat(list(filter(lambda x: isinstance(x, pd.DataFrame), res)))
df.to_csv(f'{symbol}_premium_index.csv')

files = [PriceKlinesFile('futures', symbol, '1m', str(day.date()), 'daily')
for symbol, day in product([symbol], days)]

res = ofl.download(files)
df = pd.concat(list(filter(lambda x: isinstance(x, pd.DataFrame), res)))
df.to_csv(f'{symbol}_fut_klines.csv')


if __name__ == '__main__':
archieve_data()
download_agg_trades()
5 changes: 3 additions & 2 deletions fast_binance/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
__version__ = "1.0.3"
__version__ = "1.1.0"

from fast_binance.online_fetcher import OnlinePriceFetcher
from fast_binance.offline_fetcher import OfflineFileFetcher
from fast_binance.archieve_files import(
PriceKlinesFile,
FuturesUsdtMetricsFile,
MarkPriceKlines,
PremiumIndexKlinesFile
PremiumIndexKlinesFile,
AggregatedTradesFile
)
128 changes: 84 additions & 44 deletions fast_binance/archieve_files.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,52 @@
import pandas as pd


class FileInfo:
class AbstractFile:
_base_url = 'https://data.binance.vision/data'
@property
def source(self):
raise NotImplementedError('source() method should be implemented in derived classes.')

def prepare_df(self, df):
raise NotImplementedError('prepare_df() method should be implemented in derived classes.')

class PriceKlinesFile(FileInfo):

class KlinesFile(AbstractFile):
columns = ['open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time',
'quote_volume', 'count', 'taker_buy_volume', 'taker_buy_quote_volume', 'ignore']

def __init__(self, market, symbol, period, date, span):
self.market = 'spot' if market == 'spot' else 'futures/um'
self.symbol = symbol
self.period = period
self.date = date
self.span = span
self.name = f'{self.symbol}-{self.period}-{self.date}'

class MetricsFile:
def __init__(self, symbol, date):
self.symbol = symbol
self.date = date
self._base_url = 'https://data.binance.vision/data/futures/um'

@property
def source(self):
return f'{self._base_url}/daily/metrics/{self.symbol}/{self.symbol}-metrics-{self.date}.zip'


class PriceKlinesFile(KlinesFile):
type_mapping = {'open_time':int, 'open':float, 'high':float, 'low':float, 'close':float,
'volume':float, 'quote_volume':float, 'taker_buy_volume':float }

def __init__(self, market, symbol, period, date, span='daily'):
self.binance = BinanceKlinesFile(market, symbol, period, date, span)
super().__init__(market, symbol, period, date, span)

@property
def source(self):
'''
https://data.binance.vision/data/futures/um/daily/klines/ACHUSDT/15m/ACHUSDT-15m-2024-02-07.zip
'''
return f'{self._base_url}/{self.market}/{self.span}/klines/{self.symbol}/{self.period}/{self.name}.zip'

def prepare_df(self, df:pd.DataFrame):
try:
Expand All @@ -26,10 +59,9 @@ def prepare_df(self, df:pd.DataFrame):
finally:
return df


class FuturesUsdtMetricsFile(FileInfo):
class FuturesUsdtMetricsFile(AbstractFile):
def __init__(self, symbol, date):
self.binance = BinanceMetricsFile(symbol, date)
self.binance = MetricsFile(symbol, date)

def prepare_df(self, df):
try:
Expand All @@ -40,19 +72,16 @@ def prepare_df(self, df):
finally:
return df

class PremiumIndexKlinesFile(FileInfo):
columns = ['open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time',
'quote_volume', 'count', 'taker_buy_volume', 'taker_buy_quote_volume', 'ignore']

class PremiumIndexKlinesFile(KlinesFile):
type_mapping = {'open_time':int, 'open':float, 'high':float, 'low':float, 'close':float }

def __init__(self, symbol, date, span, period):
def __init__(self, symbol, period, date, span):
# Can we find span by looking at date?
self.binance = BinancePremiumIndexFile(symbol, date, span, period)
super().__init__('futures', symbol, period, date, span)


def prepare_df(self, df:pd.DataFrame):
try:
df.columns = self.columns
df = df.astype(self.type_mapping)
df['open_time'] = pd.to_datetime(df['open_time'], unit='ms')
df.set_index('open_time', inplace=True)
Expand All @@ -63,52 +92,63 @@ def prepare_df(self, df:pd.DataFrame):
print(e)
finally:
return df

@property
def source(self):
'''
https://data.binance.vision/data/futures/um/monthly/premiumIndexKlines/TRBUSDT/1m/TRBUSDT-1m-2023-09.zip
'''
return f'{self._base_url}/futures/um/{self.span}/premiumIndexKlines/' \
f'{self.symbol}/{self.period}/{self.symbol}-{self.period}-{self.date}.zip'

class MarkPriceKlines:
# TODO
pass

class BinanceKlinesFile:
def __init__(self, market, symbol, period, date, span):
self.market = market
self.symbol = symbol
self.period = period
self.date = date
self.span = span

@property
def path(self):
if self.market == 'futures':
return f"https://data.binance.vision/data/futures/um/"\
f"{self.span}/klines/{self.symbol}/{self.period}/{self.symbol}-{self.period}-{self.date}.zip"
elif self.market == 'spot':
return f"https://data.binance.vision/data/spot/"\
f"{self.span}/klines/{self.symbol}/{self.period}/{self.symbol}-{self.period}-{self.date}.zip"

class BinanceMetricsFile:
def __init__(self, symbol, date):
self.symbol = symbol
self.date = date
self._base_url = 'https://data.binance.vision/data/futures/um'

@property
def path(self):
return f'{self._base_url}/daily/metrics/{self.symbol}/{self.symbol}-metrics-{self.date}.zip'


class BinancePremiumIndexFile:
class PremiumIndexFile:
def __init__(self, symbol, date, span, period):
self.symbol = symbol
self.date = date
self.period = period
self._span = span
self._base_url = 'https://data.binance.vision/data/futures/um'


@property
def path(self):
def source(self):
'''
https://data.binance.vision/data/futures/um/monthly/premiumIndexKlines/TRBUSDT/1m/TRBUSDT-1m-2023-09.zip
'''
return f'{self._base_url}/{self._span}/premiumIndexKlines/' \
f'{self.symbol}/{self.period}/{self.symbol}-{self.period}-{self.date}.zip'


class AggregatedTradesFile(AbstractFile):
def __init__(self, symbol, date, span, market):
self.symbol = symbol
self.date = date
self.span = span
self.market = 'spot' if market == 'spot' else 'futures/um'
self.name = f'{self.symbol}-aggTrades-{self.date}' # ACAUSDT-aggTrades-2024-02-07
self.columns = ['aggTradeID', 'price', 'quantity', 'ftId', 'ltId', 'time', 'is_buyer_maker', 'ig'] \
if market == 'spot' \
else ['aggTradeID', 'price', 'quantity', 'ftId', 'ltId', 'time', 'is_buyer_maker', 'ig']

@property
def source(self):
'''
https://data.binance.vision/data/spot/daily/aggTrades/ACAUSDT/ACAUSDT-aggTrades-2024-02-07.zip
https://data.binance.vision/data/futures/um/daily/aggTrades/ACAUSDT/ACAUSDT-aggTrades-2024-02-07.zip
'''
return f'{self._base_url}/{self.market}/{self.span}/aggTrades/{self.symbol}/{self.name}.zip'

@property
def local(self):
pass

def prepare_df(self, df):
df.columns = ['aggTradeID', 'price', 'quantity', 'ftId', 'ltId', 'time', 'is_buyer_maker', 'ig']
df = df.astype({"time":int})
df['time'] = pd.to_datetime(df['time'], unit='ms')
df = df.drop(columns=['ftId', 'ltId', 'ig'])
df = df.set_index(df['time']).sort_index()
return df
10 changes: 5 additions & 5 deletions fast_binance/offline_fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pandas as pd
import zipfile

from fast_binance.archieve_files import FileInfo
from fast_binance.archieve_files import AbstractFile

from fast_binance.utils import (
chunked_iterable
Expand All @@ -15,14 +15,14 @@ class OfflineFileFetcher:
def __init__(self):
self._worker = 250

async def fetch_files(self, files:list[FileInfo]):
async def fetch_files(self, files:list[AbstractFile]):
res = []
async with aiohttp.ClientSession() as session:
for file_chunk in chunked_iterable(files, self._worker):
res.extend(await self._fetch_chunk(session, file_chunk))
return res

async def _fetch_chunk(self, session, files:list[FileInfo]):
async def _fetch_chunk(self, session, files:list[AbstractFile]):
tasks = []
for file in files:
task = asyncio.ensure_future(self._fetch_file(session, file))
Expand All @@ -34,8 +34,8 @@ async def _fetch_file(self, session, file):
downloads zip file and extract .csv file and add column information
returns pandas dataframe
'''

async with session.get(file.binance.path) as resp:
async with session.get(file.source) as resp:
assert resp.status == 200
data = await resp.read()
with zipfile.ZipFile(io.BytesIO(data)) as archive:
Expand Down

0 comments on commit ada7d85

Please sign in to comment.