From 161ab14ed0be89fd3163693162a08cd22fd56248 Mon Sep 17 00:00:00 2001 From: Matthias Date: Tue, 15 Aug 2023 17:48:07 +0200 Subject: [PATCH 1/4] Avoid lookahead bias through informative pairs in callbacks --- freqtrade/data/dataprovider.py | 18 +++++++++++++++++- freqtrade/optimize/backtesting.py | 2 ++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/freqtrade/data/dataprovider.py b/freqtrade/data/dataprovider.py index 88cda07ab..11cbd7934 100644 --- a/freqtrade/data/dataprovider.py +++ b/freqtrade/data/dataprovider.py @@ -17,7 +17,7 @@ from freqtrade.constants import (FULL_DATAFRAME_THRESHOLD, Config, ListPairsWith from freqtrade.data.history import load_pair_history from freqtrade.enums import CandleType, RPCMessageType, RunMode from freqtrade.exceptions import ExchangeError, OperationalException -from freqtrade.exchange import Exchange, timeframe_to_seconds +from freqtrade.exchange import Exchange, timeframe_to_prev_date, timeframe_to_seconds from freqtrade.exchange.types import OrderBook from freqtrade.misc import append_candles_to_dataframe from freqtrade.rpc import RPCManager @@ -46,6 +46,8 @@ class DataProvider: self.__rpc = rpc self.__cached_pairs: Dict[PairWithTimeframe, Tuple[DataFrame, datetime]] = {} self.__slice_index: Optional[int] = None + self.__slice_date: Optional[datetime] = None + self.__cached_pairs_backtesting: Dict[PairWithTimeframe, DataFrame] = {} self.__producer_pairs_df: Dict[str, Dict[PairWithTimeframe, Tuple[DataFrame, datetime]]] = {} @@ -64,10 +66,19 @@ class DataProvider: def _set_dataframe_max_index(self, limit_index: int): """ Limit analyzed dataframe to max specified index. + Only relevant in backtesting. :param limit_index: dataframe index. """ self.__slice_index = limit_index + def _set_dataframe_max_date(self, limit_date: datetime): + """ + Limit infomrative dataframe to max specified index. + Only relevant in backtesting. + :param limit_date: "current date" + """ + self.__slice_date = limit_date + def _set_cached_df( self, pair: str, @@ -356,6 +367,11 @@ class DataProvider: # Get historical OHLCV data (cached on disk). timeframe = timeframe or self._config['timeframe'] data = self.historic_ohlcv(pair=pair, timeframe=timeframe, candle_type=candle_type) + # Cut date to timeframe-specific date. + # This is necessary to prevent lookahead bias in callbacks through informative pairs. + if self.__slice_date: + cutoff_date = timeframe_to_prev_date(timeframe, self.__slice_date) + data = data.loc[data['date'] < cutoff_date] if len(data) == 0: logger.warning(f"No data found for ({pair}, {timeframe}, {candle_type}).") return data diff --git a/freqtrade/optimize/backtesting.py b/freqtrade/optimize/backtesting.py index bdd04ba7f..4c941ea3a 100644 --- a/freqtrade/optimize/backtesting.py +++ b/freqtrade/optimize/backtesting.py @@ -1229,12 +1229,14 @@ class Backtesting: is_first = True current_time_det = current_time for det_row in detail_data[HEADERS].values.tolist(): + self.dataprovider._set_dataframe_max_date(current_time_det) open_trade_count_start = self.backtest_loop( det_row, pair, current_time_det, end_date, open_trade_count_start, trade_dir, is_first) current_time_det += timedelta(minutes=self.timeframe_detail_min) is_first = False else: + self.dataprovider._set_dataframe_max_date(current_time) open_trade_count_start = self.backtest_loop( row, pair, current_time, end_date, open_trade_count_start, trade_dir) From 045d8c6fcaaef445d80cb6debc0593626d116a1c Mon Sep 17 00:00:00 2001 From: Matthias Date: Tue, 15 Aug 2023 17:56:40 +0200 Subject: [PATCH 2/4] Add test for informative pair filtering --- tests/data/test_dataprovider.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/data/test_dataprovider.py b/tests/data/test_dataprovider.py index 4ff4f214b..31c6763bc 100644 --- a/tests/data/test_dataprovider.py +++ b/tests/data/test_dataprovider.py @@ -129,9 +129,14 @@ def test_get_pair_dataframe(mocker, default_conf, ohlcv_history, candle_type): default_conf["runmode"] = RunMode.BACKTEST dp = DataProvider(default_conf, exchange) assert dp.runmode == RunMode.BACKTEST - assert isinstance(dp.get_pair_dataframe( - "UNITTEST/BTC", timeframe, candle_type=candle_type), DataFrame) - # assert dp.get_pair_dataframe("NONESENSE/AAA", timeframe).empty + df = dp.get_pair_dataframe("UNITTEST/BTC", timeframe, candle_type=candle_type) + assert isinstance(df, DataFrame) + assert len(df) == 3 # ohlcv_history mock has just 3 rows + + dp._set_dataframe_max_date(ohlcv_history.iloc[-1]['date']) + df = dp.get_pair_dataframe("UNITTEST/BTC", timeframe, candle_type=candle_type) + assert isinstance(df, DataFrame) + assert len(df) == 2 # ohlcv_history is limited to 2 rows now def test_available_pairs(mocker, default_conf, ohlcv_history): From bea67822235b473537a5b82b86113845c2d2f319 Mon Sep 17 00:00:00 2001 From: Matthias Date: Tue, 15 Aug 2023 19:33:01 +0200 Subject: [PATCH 3/4] Ensure cutoffs in backtesting are properly tested --- tests/optimize/test_backtesting.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/optimize/test_backtesting.py b/tests/optimize/test_backtesting.py index c6e01f0ad..46a1d5d12 100644 --- a/tests/optimize/test_backtesting.py +++ b/tests/optimize/test_backtesting.py @@ -20,7 +20,7 @@ from freqtrade.data.dataprovider import DataProvider from freqtrade.data.history import get_timerange from freqtrade.enums import CandleType, ExitType, RunMode from freqtrade.exceptions import DependencyException, OperationalException -from freqtrade.exchange.exchange import timeframe_to_next_date +from freqtrade.exchange import timeframe_to_next_date, timeframe_to_prev_date from freqtrade.optimize.backtest_caching import get_backtest_metadata_filename, get_strategy_run_id from freqtrade.optimize.backtesting import Backtesting from freqtrade.persistence import LocalTrade, Trade @@ -1135,6 +1135,12 @@ def test_backtest_dataprovider_analyzed_df(default_conf, fee, mocker, testdatadi assert candle_date == current_time # These asserts don't properly raise as they are nested, # therefore we increment count and assert for that. + df = dp.get_pair_dataframe(pair, backtesting.strategy.timeframe) + prior_time = timeframe_to_prev_date(backtesting.strategy.timeframe, + candle_date - timedelta(seconds=1)) + assert prior_time == df.iloc[-1].squeeze()['date'] + assert df.iloc[-1].squeeze()['date'] < current_time + count += 1 backtesting.strategy.confirm_trade_entry = tmp_confirm_entry From 452e1ab0160c8b681e62c6046b8062d35f374e7e Mon Sep 17 00:00:00 2001 From: Matthias Date: Tue, 15 Aug 2023 19:43:04 +0200 Subject: [PATCH 4/4] get_analyzed_dataframe should provide dataframe with startup candles closes #7389 --- freqtrade/optimize/backtesting.py | 10 ++++++---- tests/optimize/test_backtesting.py | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/freqtrade/optimize/backtesting.py b/freqtrade/optimize/backtesting.py index 4c941ea3a..21390489e 100644 --- a/freqtrade/optimize/backtesting.py +++ b/freqtrade/optimize/backtesting.py @@ -369,13 +369,14 @@ class Backtesting: # Cleanup from prior runs pair_data.drop(HEADERS[5:] + ['buy', 'sell'], axis=1, errors='ignore') df_analyzed = self.strategy.ft_advise_signals(pair_data, {'pair': pair}) - # Trim startup period from analyzed dataframe - df_analyzed = processed[pair] = pair_data = trim_dataframe( - df_analyzed, self.timerange, startup_candles=self.required_startup) # Update dataprovider cache self.dataprovider._set_cached_df( pair, self.timeframe, df_analyzed, self.config['candle_type_def']) + # Trim startup period from analyzed dataframe + df_analyzed = processed[pair] = pair_data = trim_dataframe( + df_analyzed, self.timerange, startup_candles=self.required_startup) + # Create a copy of the dataframe before shifting, that way the entry signal/tag # remains on the correct candle for callbacks. df_analyzed = df_analyzed.copy() @@ -1196,7 +1197,8 @@ class Backtesting: row_index += 1 indexes[pair] = row_index - self.dataprovider._set_dataframe_max_index(row_index) + self.dataprovider._set_dataframe_max_index(self.required_startup + row_index) + self.dataprovider._set_dataframe_max_date(current_time) current_detail_time: datetime = row[DATE_IDX].to_pydatetime() trade_dir: Optional[LongShort] = self.check_for_trade_entry(row) diff --git a/tests/optimize/test_backtesting.py b/tests/optimize/test_backtesting.py index 46a1d5d12..ac409bf71 100644 --- a/tests/optimize/test_backtesting.py +++ b/tests/optimize/test_backtesting.py @@ -1359,11 +1359,11 @@ def test_backtest_multi_pair(default_conf, fee, mocker, tres, pair, testdatadir) # Cached data correctly removed amounts offset = 1 if tres == 0 else 0 - removed_candles = len(data[pair]) - offset - backtesting.strategy.startup_candle_count + removed_candles = len(data[pair]) - offset assert len(backtesting.dataprovider.get_analyzed_dataframe(pair, '5m')[0]) == removed_candles assert len( backtesting.dataprovider.get_analyzed_dataframe('NXT/BTC', '5m')[0] - ) == len(data['NXT/BTC']) - 1 - backtesting.strategy.startup_candle_count + ) == len(data['NXT/BTC']) - 1 backtesting.strategy.max_open_trades = 1 backtesting.config.update({'max_open_trades': 1})