From fb52d322966382e09c39367c2a10ffbd8efc0aaa Mon Sep 17 00:00:00 2001 From: Matthias Date: Thu, 18 Oct 2018 19:42:54 +0200 Subject: [PATCH] Add validate_backtest_data function --- freqtrade/optimize/__init__.py | 19 ++++++++++ freqtrade/optimize/backtesting.py | 4 ++- freqtrade/tests/optimize/test_optimize.py | 44 ++++++++++++++++++++++- 3 files changed, 65 insertions(+), 2 deletions(-) diff --git a/freqtrade/optimize/__init__.py b/freqtrade/optimize/__init__.py index 5367f7663..d4cb6c067 100644 --- a/freqtrade/optimize/__init__.py +++ b/freqtrade/optimize/__init__.py @@ -10,6 +10,7 @@ except ImportError: _UJSON = False import logging import os +from datetime import datetime from typing import Optional, List, Dict, Tuple, Any import operator @@ -76,6 +77,24 @@ def get_timeframe(data: Dict[str, DataFrame]) -> Tuple[arrow.Arrow, arrow.Arrow] max(timeframe, key=operator.itemgetter(1))[1] +def validate_backtest_data(data: Dict[str, DataFrame], min_date: datetime, + max_date: datetime, ticker_interval_mins: int) -> None: + """ + Validates preprocessed backtesting data for missing values and shows warnings about it that. + + :param data: dictionary with preprocessed backtesting data + :param min_date: start-date of the data + :param max_date: end-date of the data + :param ticker_interval_mins: ticker interval in minutes + """ + # total difference in minutes / interval-minutes + expected_frames = int((max_date - min_date).total_seconds() // 60 // ticker_interval_mins) + for pair, df in data.items(): + if len(df) < expected_frames: + logger.warning('%s has missing frames: expected %s, got %s', + pair, expected_frames, len(df)) + + def load_tickerdata_file( datadir: str, pair: str, ticker_interval: str, diff --git a/freqtrade/optimize/backtesting.py b/freqtrade/optimize/backtesting.py index 695a52052..961cfb092 100644 --- a/freqtrade/optimize/backtesting.py +++ b/freqtrade/optimize/backtesting.py @@ -356,8 +356,10 @@ class Backtesting(object): # need to reprocess data every time to populate signals preprocessed = self.strategy.tickerdata_to_dataframe(data) - # Print timeframe min_date, max_date = optimize.get_timeframe(preprocessed) + # Validate dataframe for missing values + optimize.validate_backtest_data(preprocessed, min_date, max_date, + constants.TICKER_INTERVAL_MINUTES[self.ticker_interval]) logger.info( 'Measuring data from %s up to %s (%s days)..', min_date.isoformat(), diff --git a/freqtrade/tests/optimize/test_optimize.py b/freqtrade/tests/optimize/test_optimize.py index 061caf70b..7b13b498a 100644 --- a/freqtrade/tests/optimize/test_optimize.py +++ b/freqtrade/tests/optimize/test_optimize.py @@ -7,7 +7,7 @@ from shutil import copyfile import arrow -from freqtrade import optimize +from freqtrade import optimize, constants from freqtrade.arguments import TimeRange from freqtrade.misc import file_dump_json from freqtrade.optimize.__init__ import (download_backtesting_testdata, @@ -450,3 +450,45 @@ def test_get_timeframe(default_conf, mocker) -> None: min_date, max_date = optimize.get_timeframe(data) assert min_date.isoformat() == '2017-11-04T23:02:00+00:00' assert max_date.isoformat() == '2017-11-14T22:58:00+00:00' + + +def test_validate_backtest_data_warn(default_conf, mocker, caplog) -> None: + patch_exchange(mocker) + strategy = DefaultStrategy(default_conf) + + data = strategy.tickerdata_to_dataframe( + optimize.load_data( + None, + ticker_interval='1m', + pairs=['UNITTEST/BTC'] + ) + ) + min_date, max_date = optimize.get_timeframe(data) + caplog.clear() + optimize.validate_backtest_data(data, min_date, max_date, + constants.TICKER_INTERVAL_MINUTES["1m"]) + assert len(caplog.record_tuples) == 1 + assert log_has('UNITTEST/BTC has missing frames: expected 14396, got 13680', + caplog.record_tuples) + + +def test_validate_backtest_data(default_conf, mocker, caplog) -> None: + patch_exchange(mocker) + strategy = DefaultStrategy(default_conf) + + timerange = TimeRange('index', 'index', 200, 250) + data = strategy.tickerdata_to_dataframe( + optimize.load_data( + None, + ticker_interval='5m', + pairs=['UNITTEST/BTC'], + timerange=timerange + ) + ) + + min_date, max_date = optimize.get_timeframe(data) + caplog.clear() + optimize.validate_backtest_data(data, min_date, max_date, + constants.TICKER_INTERVAL_MINUTES["5m"]) + assert len(caplog.record_tuples) == 0 +