From 39b4263b8bc4875ad06157ec529a71c56679c59c Mon Sep 17 00:00:00 2001 From: Meng Xiangzhuo Date: Wed, 13 Nov 2024 17:14:49 +0800 Subject: [PATCH] chore: improve comments --- freqtrade/exchange/binance.py | 14 +++++--- freqtrade/exchange/binance_public_data.py | 37 ++++++++++++++++------ tests/exchange/test_binance_public_data.py | 15 ++++++--- 3 files changed, 47 insertions(+), 19 deletions(-) diff --git a/freqtrade/exchange/binance.py b/freqtrade/exchange/binance.py index d361fd3da..8104876f8 100644 --- a/freqtrade/exchange/binance.py +++ b/freqtrade/exchange/binance.py @@ -5,12 +5,13 @@ from datetime import datetime, timezone from pathlib import Path import ccxt -from pandas import DataFrame, concat +from pandas import DataFrame from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS from freqtrade.enums import CandleType, MarginMode, PriceType, TradingMode from freqtrade.exceptions import DDosProtection, OperationalException, TemporaryError from freqtrade.exchange import Exchange, binance_public_data +from freqtrade.exchange.binance_public_data import concat from freqtrade.exchange.common import retrier from freqtrade.exchange.exchange_types import FtHas, Tickers from freqtrade.exchange.exchange_utils_timeframe import timeframe_to_msecs @@ -162,10 +163,11 @@ class Binance(Exchange): candle_type: CandleType, is_new_pair: bool = False, until_ms: int | None = None, - ): + ) -> DataFrame: """ - Fetch ohlcv fast by utilizing https://data.binance.vision + Fastly fetch OHLCV data by leveraging https://data.binance.vision. """ + # only download timeframes with significant improvements, otherwise fall back to rest API if (candle_type == CandleType.SPOT and timeframe in ["1s", "1m", "3m", "5m"]) or ( candle_type == CandleType.FUTURES and timeframe in ["1m", "3m", "5m", "15m", "30m"] ): @@ -179,10 +181,14 @@ class Binance(Exchange): markets=self.markets, ) ) + + # download the remaining data from rest API if df.empty: rest_since_ms = since_ms else: rest_since_ms = dt_ts(df.iloc[-1].date) + timeframe_to_msecs(timeframe) + + # make sure since <= until if until_ms and rest_since_ms > until_ms: rest_df = DataFrame() else: @@ -195,6 +201,7 @@ class Binance(Exchange): until_ms=until_ms, ) all_df = concat([df, rest_df]) + return all_df else: return super().get_historic_ohlcv( pair=pair, @@ -204,7 +211,6 @@ class Binance(Exchange): is_new_pair=is_new_pair, until_ms=until_ms, ) - return all_df def funding_fee_cutoff(self, open_date: datetime): """ diff --git a/freqtrade/exchange/binance_public_data.py b/freqtrade/exchange/binance_public_data.py index 33691f962..685c2c42f 100644 --- a/freqtrade/exchange/binance_public_data.py +++ b/freqtrade/exchange/binance_public_data.py @@ -42,12 +42,22 @@ async def fetch_ohlcv( stop_on_404: bool = True, ) -> DataFrame: """ - Fetch OHLCV data from https://data.binance.vision/ + Fetch OHLCV data from https://data.binance.vision + The function makes its best effort to download data within the time range + [`since_ms`, `until_ms`) -- including `since_ms`, but excluding `until_ms`. + If `stop_one_404` is True, this returned DataFrame is guaranteed to start from `since_ms` + with no gaps in the data. + :candle_type: Currently only spot and futures are supported + :pair: symbol name in CCXT convention + :since_ms: the start timestamp of data, including itself + :until_ms: the end timestamp of data, excluding itself :param until_ms: `None` indicates the timestamp of the latest available data + :markets: the CCXT markets dict, when it's None, the function will load the markets data + from a new `ccxt.binance` instance :param stop_on_404: Stop to download the following data when a 404 returned - :return: the date range is between [since_ms, until_ms), - return and empty DataFrame if no data available in the time range + :return: the date range is between [since_ms, until_ms), return an empty DataFrame if no data + available in the time range """ try: if candle_type == CandleType.SPOT: @@ -82,6 +92,7 @@ async def fetch_ohlcv( df = DataFrame() if not df.empty: + # only return the data within the requested time range return df.loc[(df["date"] >= start) & (df["date"] < end)] else: return df @@ -102,7 +113,9 @@ async def _fetch_ohlcv( end: datetime.date, stop_on_404: bool, ) -> DataFrame: + # daily dataframes dfs: list[DataFrame | None] = [] + # the current day being processing, starting at 1. current_day = 0 connector = aiohttp.TCPConnector(limit=100) @@ -116,8 +129,10 @@ async def _fetch_ohlcv( current_day += 1 if isinstance(result, Http404): if stop_on_404: + # A 404 error on the first day indicates missing data + # on https://data.binance.vision, we provide the warning and the advice. + # https://github.com/freqtrade/freqtrade/blob/acc53065e5fa7ab5197073276306dc9dc3adbfa3/tests/exchange_online/test_binance_compare_ohlcv.py#L7 if current_day == 1: - # https://github.com/freqtrade/freqtrade/blob/acc53065e5fa7ab5197073276306dc9dc3adbfa3/tests/exchange_online/test_binance_compare_ohlcv.py#L7 logger.warning( "Failed to use fast download, fall back to rest API download, this " "can take more time. If you're downloading BTC/USDT:USDT, " @@ -131,8 +146,7 @@ async def _fetch_ohlcv( dfs.append(None) elif isinstance(result, BaseException): logger.warning(f"An exception raised: : {result}") - # Directly return the existing data, do not allow the gap - # between the data + # Directly return the existing data, do not allow the gap within the data return concat(dfs) else: dfs.append(result) @@ -175,7 +189,7 @@ async def get_daily_ohlcv( session: aiohttp.ClientSession, retry_count: int = 3, retry_delay: float = 0.0, -) -> DataFrame | None | Exception: +) -> DataFrame | Exception: """ Get daily OHLCV from https://data.binance.vision See https://github.com/binance/binance-public-data @@ -225,7 +239,10 @@ async def get_daily_ohlcv( else: raise BadHttpStatus(f"{resp.status} - {resp.reason}") except Exception as e: - retry += 1 - if retry >= retry_count: - logger.debug(f"Failed to get data from {url}: {e}") + if isinstance(e, Http404): return e + else: + if retry >= retry_count: + logger.debug(f"Failed to get data from {url}: {e}") + return e + retry += 1 diff --git a/tests/exchange/test_binance_public_data.py b/tests/exchange/test_binance_public_data.py index 49caf5f06..a87804acc 100644 --- a/tests/exchange/test_binance_public_data.py +++ b/tests/exchange/test_binance_public_data.py @@ -254,43 +254,48 @@ async def test_get_daily_ohlcv(mocker, testdatadir): async with aiohttp.ClientSession() as session: path = testdatadir / "binance/binance_public_data/spot-klines-BTCUSDT-1h-2024-10-28.zip" - mocker.patch( + get = mocker.patch( "freqtrade.exchange.binance_public_data.aiohttp.ClientSession.get", return_value=MockResponse(path.read_bytes(), 200), ) df = await get_daily_ohlcv("spot", symbol, timeframe, date, session) + assert get.call_count == 1 assert df["date"].iloc[0] == first_date assert df["date"].iloc[-1] == last_date path = ( testdatadir / "binance/binance_public_data/futures-um-klines-BTCUSDT-1h-2024-10-28.zip" ) - mocker.patch( + get = mocker.patch( "freqtrade.exchange.binance_public_data.aiohttp.ClientSession.get", return_value=MockResponse(path.read_bytes(), 200), ) df = await get_daily_ohlcv("futures/um", symbol, timeframe, date, session) + assert get.call_count == 1 assert df["date"].iloc[0] == first_date assert df["date"].iloc[-1] == last_date - mocker.patch( + get = mocker.patch( "freqtrade.exchange.binance_public_data.aiohttp.ClientSession.get", return_value=MockResponse(b"", 404), ) df = await get_daily_ohlcv("spot", symbol, timeframe, date, session, retry_delay=0) + assert get.call_count == 1 assert isinstance(df, Http404) - mocker.patch( + get = mocker.patch( "freqtrade.exchange.binance_public_data.aiohttp.ClientSession.get", return_value=MockResponse(b"", 500), ) mocker.patch("asyncio.sleep") df = await get_daily_ohlcv("spot", symbol, timeframe, date, session) + assert get.call_count == 4 # 1 + 3 default retries assert isinstance(df, BadHttpStatus) - mocker.patch( + get = mocker.patch( "freqtrade.exchange.binance_public_data.aiohttp.ClientSession.get", return_value=MockResponse(b"nop", 200), ) df = await get_daily_ohlcv("spot", symbol, timeframe, date, session) + assert get.call_count == 4 # 1 + 3 default retries assert isinstance(df, zipfile.BadZipFile)