From 161ab14ed0be89fd3163693162a08cd22fd56248 Mon Sep 17 00:00:00 2001 From: Matthias Date: Tue, 15 Aug 2023 17:48:07 +0200 Subject: [PATCH] Avoid lookahead bias through informative pairs in callbacks --- freqtrade/data/dataprovider.py | 18 +++++++++++++++++- freqtrade/optimize/backtesting.py | 2 ++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/freqtrade/data/dataprovider.py b/freqtrade/data/dataprovider.py index 88cda07ab..11cbd7934 100644 --- a/freqtrade/data/dataprovider.py +++ b/freqtrade/data/dataprovider.py @@ -17,7 +17,7 @@ from freqtrade.constants import (FULL_DATAFRAME_THRESHOLD, Config, ListPairsWith from freqtrade.data.history import load_pair_history from freqtrade.enums import CandleType, RPCMessageType, RunMode from freqtrade.exceptions import ExchangeError, OperationalException -from freqtrade.exchange import Exchange, timeframe_to_seconds +from freqtrade.exchange import Exchange, timeframe_to_prev_date, timeframe_to_seconds from freqtrade.exchange.types import OrderBook from freqtrade.misc import append_candles_to_dataframe from freqtrade.rpc import RPCManager @@ -46,6 +46,8 @@ class DataProvider: self.__rpc = rpc self.__cached_pairs: Dict[PairWithTimeframe, Tuple[DataFrame, datetime]] = {} self.__slice_index: Optional[int] = None + self.__slice_date: Optional[datetime] = None + self.__cached_pairs_backtesting: Dict[PairWithTimeframe, DataFrame] = {} self.__producer_pairs_df: Dict[str, Dict[PairWithTimeframe, Tuple[DataFrame, datetime]]] = {} @@ -64,10 +66,19 @@ class DataProvider: def _set_dataframe_max_index(self, limit_index: int): """ Limit analyzed dataframe to max specified index. + Only relevant in backtesting. :param limit_index: dataframe index. """ self.__slice_index = limit_index + def _set_dataframe_max_date(self, limit_date: datetime): + """ + Limit infomrative dataframe to max specified index. + Only relevant in backtesting. + :param limit_date: "current date" + """ + self.__slice_date = limit_date + def _set_cached_df( self, pair: str, @@ -356,6 +367,11 @@ class DataProvider: # Get historical OHLCV data (cached on disk). timeframe = timeframe or self._config['timeframe'] data = self.historic_ohlcv(pair=pair, timeframe=timeframe, candle_type=candle_type) + # Cut date to timeframe-specific date. + # This is necessary to prevent lookahead bias in callbacks through informative pairs. + if self.__slice_date: + cutoff_date = timeframe_to_prev_date(timeframe, self.__slice_date) + data = data.loc[data['date'] < cutoff_date] if len(data) == 0: logger.warning(f"No data found for ({pair}, {timeframe}, {candle_type}).") return data diff --git a/freqtrade/optimize/backtesting.py b/freqtrade/optimize/backtesting.py index bdd04ba7f..4c941ea3a 100644 --- a/freqtrade/optimize/backtesting.py +++ b/freqtrade/optimize/backtesting.py @@ -1229,12 +1229,14 @@ class Backtesting: is_first = True current_time_det = current_time for det_row in detail_data[HEADERS].values.tolist(): + self.dataprovider._set_dataframe_max_date(current_time_det) open_trade_count_start = self.backtest_loop( det_row, pair, current_time_det, end_date, open_trade_count_start, trade_dir, is_first) current_time_det += timedelta(minutes=self.timeframe_detail_min) is_first = False else: + self.dataprovider._set_dataframe_max_date(current_time) open_trade_count_start = self.backtest_loop( row, pair, current_time, end_date, open_trade_count_start, trade_dir)