Avoid lookahead bias through informative pairs in callbacks

This commit is contained in:
Matthias
2023-08-15 17:48:07 +02:00
parent 6f347b839a
commit 161ab14ed0
2 changed files with 19 additions and 1 deletions

View File

@@ -17,7 +17,7 @@ from freqtrade.constants import (FULL_DATAFRAME_THRESHOLD, Config, ListPairsWith
from freqtrade.data.history import load_pair_history
from freqtrade.enums import CandleType, RPCMessageType, RunMode
from freqtrade.exceptions import ExchangeError, OperationalException
from freqtrade.exchange import Exchange, timeframe_to_seconds
from freqtrade.exchange import Exchange, timeframe_to_prev_date, timeframe_to_seconds
from freqtrade.exchange.types import OrderBook
from freqtrade.misc import append_candles_to_dataframe
from freqtrade.rpc import RPCManager
@@ -46,6 +46,8 @@ class DataProvider:
self.__rpc = rpc
self.__cached_pairs: Dict[PairWithTimeframe, Tuple[DataFrame, datetime]] = {}
self.__slice_index: Optional[int] = None
self.__slice_date: Optional[datetime] = None
self.__cached_pairs_backtesting: Dict[PairWithTimeframe, DataFrame] = {}
self.__producer_pairs_df: Dict[str,
Dict[PairWithTimeframe, Tuple[DataFrame, datetime]]] = {}
@@ -64,10 +66,19 @@ class DataProvider:
def _set_dataframe_max_index(self, limit_index: int):
"""
Limit analyzed dataframe to max specified index.
Only relevant in backtesting.
:param limit_index: dataframe index.
"""
self.__slice_index = limit_index
def _set_dataframe_max_date(self, limit_date: datetime):
"""
Limit infomrative dataframe to max specified index.
Only relevant in backtesting.
:param limit_date: "current date"
"""
self.__slice_date = limit_date
def _set_cached_df(
self,
pair: str,
@@ -356,6 +367,11 @@ class DataProvider:
# Get historical OHLCV data (cached on disk).
timeframe = timeframe or self._config['timeframe']
data = self.historic_ohlcv(pair=pair, timeframe=timeframe, candle_type=candle_type)
# Cut date to timeframe-specific date.
# This is necessary to prevent lookahead bias in callbacks through informative pairs.
if self.__slice_date:
cutoff_date = timeframe_to_prev_date(timeframe, self.__slice_date)
data = data.loc[data['date'] < cutoff_date]
if len(data) == 0:
logger.warning(f"No data found for ({pair}, {timeframe}, {candle_type}).")
return data

View File

@@ -1229,12 +1229,14 @@ class Backtesting:
is_first = True
current_time_det = current_time
for det_row in detail_data[HEADERS].values.tolist():
self.dataprovider._set_dataframe_max_date(current_time_det)
open_trade_count_start = self.backtest_loop(
det_row, pair, current_time_det, end_date,
open_trade_count_start, trade_dir, is_first)
current_time_det += timedelta(minutes=self.timeframe_detail_min)
is_first = False
else:
self.dataprovider._set_dataframe_max_date(current_time)
open_trade_count_start = self.backtest_loop(
row, pair, current_time, end_date,
open_trade_count_start, trade_dir)