From 2d6a49013f3a4dbebd70035746fac086601a2fb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Jou=C3=9Fen?= Date: Fri, 15 Dec 2023 10:47:56 +0000 Subject: [PATCH 1/6] Fix duplicated data loading and timerange for populate_features --- docs/freqai-running.md | 2 +- freqtrade/data/dataprovider.py | 9 ++-- freqtrade/freqai/data_kitchen.py | 3 ++ tests/freqai/test_freqai_datakitchen.py | 61 +++++++++++++++++++++++++ 4 files changed, 71 insertions(+), 4 deletions(-) diff --git a/docs/freqai-running.md b/docs/freqai-running.md index 55f302d40..540aa700b 100644 --- a/docs/freqai-running.md +++ b/docs/freqai-running.md @@ -68,7 +68,7 @@ Backtesting mode requires [downloading the necessary data](#downloading-data-to- This way, you can return to using any model you wish by simply specifying the `identifier`. !!! Note - Backtesting calls `set_freqai_targets()` one time for each backtest window (where the number of windows is the full backtest timerange divided by the `backtest_period_days` parameter). Doing this means that the targets simulate dry/live behavior without look ahead bias. However, the definition of the features in `feature_engineering_*()` is performed once on the entire backtest timerange. This means that you should be sure that features do look-ahead into the future. + Backtesting calls `set_freqai_targets()` one time for each backtest window (where the number of windows is the full backtest timerange divided by the `backtest_period_days` parameter). Doing this means that the targets simulate dry/live behavior without look ahead bias. However, the definition of the features in `feature_engineering_*()` is performed once on the entire training timerange. This means that you should be sure that features do not look-ahead into the future. More details about look-ahead bias can be found in [Common Mistakes](strategy-customization.md#common-mistakes-when-developing-strategies). --- diff --git a/freqtrade/data/dataprovider.py b/freqtrade/data/dataprovider.py index 11cbd7934..dbed8b29b 100644 --- a/freqtrade/data/dataprovider.py +++ b/freqtrade/data/dataprovider.py @@ -311,11 +311,14 @@ class DataProvider: timerange = TimeRange.parse_timerange(None if self._config.get( 'timerange') is None else str(self._config.get('timerange'))) - # It is not necessary to add the training candles, as they - # were already added at the beginning of the backtest. - startup_candles = self.get_required_startup(str(timeframe), False) + startup_candles = self.get_required_startup(str(timeframe)) tf_seconds = timeframe_to_seconds(str(timeframe)) timerange.subtract_start(tf_seconds * startup_candles) + + logger.info(f"Loading data for {pair} {timeframe} " + f"from {timerange.start_fmt} " + f"to {timerange.stop_fmt}") + self.__cached_pairs_backtesting[saved_pair] = load_pair_history( pair=pair, timeframe=timeframe, diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 1bdd8b0d5..6d4d6c8dc 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -709,6 +709,8 @@ class FreqaiDataKitchen: pair, tf, strategy, corr_dataframes, base_dataframes, is_corr_pairs) informative_copy = informative_df.copy() + logger.debug(f"Populating features for {pair} {tf}") + for t in self.freqai_config["feature_parameters"]["indicator_periods_candles"]: df_features = strategy.feature_engineering_expand_all( informative_copy.copy(), t, metadata=metadata) @@ -788,6 +790,7 @@ class FreqaiDataKitchen: if not prediction_dataframe.empty: dataframe = prediction_dataframe.copy() + base_dataframes[self.config["timeframe"]] = dataframe.copy() else: dataframe = base_dataframes[self.config["timeframe"]].copy() diff --git a/tests/freqai/test_freqai_datakitchen.py b/tests/freqai/test_freqai_datakitchen.py index cac9d9838..ca7f8a9d4 100644 --- a/tests/freqai/test_freqai_datakitchen.py +++ b/tests/freqai/test_freqai_datakitchen.py @@ -3,6 +3,7 @@ from datetime import datetime, timedelta, timezone from pathlib import Path from unittest.mock import MagicMock +import pandas as pd import pytest from freqtrade.configuration import TimeRange @@ -135,3 +136,63 @@ def test_get_full_model_path(mocker, freqai_conf, model): model_path = freqai.dk.get_full_models_path(freqai_conf) assert model_path.is_dir() is True + + +def test_get_pair_data_for_features_with_prealoaded_data(mocker, freqai_conf): + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.dk = FreqaiDataKitchen(freqai_conf) + timerange = TimeRange.parse_timerange("20180110-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + + _, base_df = freqai.dd.get_base_and_corr_dataframes(timerange, "LTC/BTC", freqai.dk) + df = freqai.dk.get_pair_data_for_features("LTC/BTC", "5m", strategy, base_dataframes=base_df) + + assert df is base_df["5m"] + assert not df.empty + + +def test_get_pair_data_for_features_without_preloaded_data(mocker, freqai_conf): + freqai_conf.update({"timerange": "20180115-20180130"}) + + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.dk = FreqaiDataKitchen(freqai_conf) + timerange = TimeRange.parse_timerange("20180110-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + + base_df = {'5m': pd.DataFrame()} + df = freqai.dk.get_pair_data_for_features("LTC/BTC", "5m", strategy, base_dataframes=base_df) + + assert df is not base_df["5m"] + assert not df.empty + assert df.iloc[0]['date'].strftime("%Y-%m-%d %H:%M:%S") == "2018-01-11 23:00:00" + assert df.iloc[-1]['date'].strftime("%Y-%m-%d %H:%M:%S") == "2018-01-30 00:00:00" + + +def test_populate_features(mocker, freqai_conf): + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.dk = FreqaiDataKitchen(freqai_conf) + timerange = TimeRange.parse_timerange("20180115-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + + corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(timerange, "LTC/BTC", freqai.dk) + mocker.patch.object(strategy, 'feature_engineering_expand_all', return_value=base_df["5m"]) + df = freqai.dk.populate_features(base_df["5m"], "LTC/BTC", strategy, + base_dataframes=base_df, corr_dataframes=corr_df) + + strategy.feature_engineering_expand_all.assert_called_once() + pd.testing.assert_frame_equal(base_df["5m"], + strategy.feature_engineering_expand_all.call_args[0][0]) + + assert df.iloc[0]['date'].strftime("%Y-%m-%d %H:%M:%S") == "2018-01-15 00:00:00" From 663b1e11f8515781835b3dcd0b0c0e2c1d367bd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Jou=C3=9Fen?= Date: Fri, 15 Dec 2023 11:10:44 +0000 Subject: [PATCH 2/6] Add timeframe info into 'increase startup_candle_count' log message --- freqtrade/data/dataprovider.py | 3 ++- tests/freqai/test_freqai_backtesting.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/freqtrade/data/dataprovider.py b/freqtrade/data/dataprovider.py index dbed8b29b..dd9cae8b0 100644 --- a/freqtrade/data/dataprovider.py +++ b/freqtrade/data/dataprovider.py @@ -344,7 +344,8 @@ class DataProvider: if add_train_candles: train_candles = freqai_config['train_period_days'] * 86400 / tf_seconds total_candles = int(self._config['startup_candle_count'] + train_candles) - logger.info(f'Increasing startup_candle_count for freqai to {total_candles}') + logger.info(f'Increasing startup_candle_count for freqai on {timeframe} ' + f'to {total_candles}') return total_candles def get_pair_dataframe( diff --git a/tests/freqai/test_freqai_backtesting.py b/tests/freqai/test_freqai_backtesting.py index 0a8059966..845e6a992 100644 --- a/tests/freqai/test_freqai_backtesting.py +++ b/tests/freqai/test_freqai_backtesting.py @@ -51,7 +51,7 @@ def test_freqai_backtest_load_data(freqai_conf, mocker, caplog): backtesting = Backtesting(deepcopy(freqai_conf)) backtesting.load_bt_data() - assert log_has_re('Increasing startup_candle_count for freqai to.*', caplog) + assert log_has_re('Increasing startup_candle_count for freqai on.*to.*', caplog) Backtesting.cleanup() From 4f2d7b858f64f6aac85430fd5705cb708750cbd6 Mon Sep 17 00:00:00 2001 From: Matthias Date: Sun, 17 Dec 2023 20:11:20 +0100 Subject: [PATCH 3/6] Slightly enhance logging for clarity --- freqtrade/data/dataprovider.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/freqtrade/data/dataprovider.py b/freqtrade/data/dataprovider.py index dd9cae8b0..3ae9f3a7f 100644 --- a/freqtrade/data/dataprovider.py +++ b/freqtrade/data/dataprovider.py @@ -316,8 +316,7 @@ class DataProvider: timerange.subtract_start(tf_seconds * startup_candles) logger.info(f"Loading data for {pair} {timeframe} " - f"from {timerange.start_fmt} " - f"to {timerange.stop_fmt}") + f"from {timerange.start_fmt} to {timerange.stop_fmt}") self.__cached_pairs_backtesting[saved_pair] = load_pair_history( pair=pair, @@ -344,9 +343,9 @@ class DataProvider: if add_train_candles: train_candles = freqai_config['train_period_days'] * 86400 / tf_seconds total_candles = int(self._config['startup_candle_count'] + train_candles) - logger.info(f'Increasing startup_candle_count for freqai on {timeframe} ' - f'to {total_candles}') - return total_candles + logger.info( + f'Increasing startup_candle_count for freqai on {timeframe} to {total_candles}') + return total_candles def get_pair_dataframe( self, From 94363061ae54980c1b49bf8b7d221061392cf18d Mon Sep 17 00:00:00 2001 From: Matthias Date: Sun, 17 Dec 2023 20:11:10 +0100 Subject: [PATCH 4/6] Attempt fix timerange problem --- freqtrade/optimize/backtesting.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/freqtrade/optimize/backtesting.py b/freqtrade/optimize/backtesting.py index b8145b6c8..242323bec 100644 --- a/freqtrade/optimize/backtesting.py +++ b/freqtrade/optimize/backtesting.py @@ -145,13 +145,14 @@ class Backtesting: self.required_startup = max([strat.startup_candle_count for strat in self.strategylist]) self.exchange.validate_required_startup_candles(self.required_startup, self.timeframe) - if self.config.get('freqai', {}).get('enabled', False): - # For FreqAI, increase the required_startup to includes the training data - self.required_startup = self.dataprovider.get_required_startup(self.timeframe) - # Add maximum startup candle count to configuration for informative pairs support self.config['startup_candle_count'] = self.required_startup + if self.config.get('freqai', {}).get('enabled', False): + # For FreqAI, increase the required_startup to includes the training data + # This value should NOT be written to startup_candle_count + self.required_startup = self.dataprovider.get_required_startup(self.timeframe) + self.trading_mode: TradingMode = config.get('trading_mode', TradingMode.SPOT) # strategies which define "can_short=True" will fail to load in Spot mode. self._can_short = self.trading_mode != TradingMode.SPOT @@ -239,7 +240,7 @@ class Backtesting: pairs=self.pairlists.whitelist, timeframe=self.timeframe, timerange=self.timerange, - startup_candles=self.config['startup_candle_count'], + startup_candles=self.required_startup, fail_without_data=True, data_format=self.config['dataformat_ohlcv'], candle_type=self.config.get('candle_type_def', CandleType.SPOT) From cd28244cf1c4415450779ffadefa1ccfa4cb71cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Jou=C3=9Fen?= Date: Tue, 19 Dec 2023 17:14:02 +0000 Subject: [PATCH 5/6] Add and optimized test cases to verify consitent timerange behaviour within freqai. --- tests/freqai/test_freqai_backtesting.py | 57 +++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 4 deletions(-) diff --git a/tests/freqai/test_freqai_backtesting.py b/tests/freqai/test_freqai_backtesting.py index 845e6a992..eb58a0d23 100644 --- a/tests/freqai/test_freqai_backtesting.py +++ b/tests/freqai/test_freqai_backtesting.py @@ -6,11 +6,17 @@ from unittest.mock import PropertyMock import pytest from freqtrade.commands.optimize_commands import setup_optimize_configuration +from freqtrade.configuration.timerange import TimeRange +from freqtrade.data import history +from freqtrade.data.dataprovider import DataProvider from freqtrade.enums import RunMode +from freqtrade.enums.candletype import CandleType from freqtrade.exceptions import OperationalException +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.optimize.backtesting import Backtesting -from tests.conftest import (CURRENT_TEST_STRATEGY, get_args, log_has_re, patch_exchange, - patched_configuration_load_config_file) +from tests.conftest import (CURRENT_TEST_STRATEGY, get_args, get_patched_exchange, log_has_re, + patch_exchange, patched_configuration_load_config_file) +from tests.freqai.conftest import get_patched_freqai_strategy def test_freqai_backtest_start_backtest_list(freqai_conf, mocker, testdatadir, caplog): @@ -40,7 +46,16 @@ def test_freqai_backtest_start_backtest_list(freqai_conf, mocker, testdatadir, c Backtesting.cleanup() -def test_freqai_backtest_load_data(freqai_conf, mocker, caplog): +@pytest.mark.parametrize( + "timeframe, expected_startup_candle_count", + [ + ("5m", 876), + ("15m", 492), + ("1d", 302), + ], +) +def test_freqai_backtest_load_data(freqai_conf, mocker, caplog, + timeframe, expected_startup_candle_count): patch_exchange(mocker) now = datetime.now(timezone.utc) @@ -48,10 +63,14 @@ def test_freqai_backtest_load_data(freqai_conf, mocker, caplog): PropertyMock(return_value=['HULUMULU/USDT', 'XRP/USDT'])) mocker.patch('freqtrade.optimize.backtesting.history.load_data') mocker.patch('freqtrade.optimize.backtesting.history.get_timerange', return_value=(now, now)) + freqai_conf['timeframe'] = timeframe + freqai_conf.get('freqai', {}).get('feature_parameters', {}).update({'include_timeframes': []}) backtesting = Backtesting(deepcopy(freqai_conf)) backtesting.load_bt_data() - assert log_has_re('Increasing startup_candle_count for freqai on.*to.*', caplog) + assert log_has_re(f'Increasing startup_candle_count for freqai on {timeframe} ' + f'to {expected_startup_candle_count}', caplog) + assert history.load_data.call_args[1]['startup_candles'] == expected_startup_candle_count Backtesting.cleanup() @@ -85,3 +104,33 @@ def test_freqai_backtest_live_models_model_not_found(freqai_conf, mocker, testda Backtesting(bt_config) Backtesting.cleanup() + + +def test_freqai_backtest_consistent_timerange(mocker, freqai_conf): + mocker.patch('freqtrade.plugins.pairlistmanager.PairListManager.whitelist', + PropertyMock(return_value=['XRP/USDT:USDT'])) + + gbs = mocker.patch('freqtrade.optimize.backtesting.generate_backtest_stats') + + freqai_conf['candle_type_def'] = CandleType.FUTURES + freqai_conf.get('exchange', {}).update({'pair_whitelist': ['XRP/USDT:USDT']}) + freqai_conf.get('freqai', {}).get('feature_parameters', {}).update( + {'include_timeframes': ['5m', '1h'], 'include_corr_pairlist': []}) + freqai_conf['timerange'] = '20211120-20211121' + + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.dk = FreqaiDataKitchen(freqai_conf) + + timerange = TimeRange.parse_timerange("20211115-20211122") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + + backtesting = Backtesting(deepcopy(freqai_conf)) + backtesting.start() + + gbs.call_args[1]['min_date'] == datetime(2021, 11, 20, 0, 0, tzinfo=timezone.utc) + gbs.call_args[1]['max_date'] == datetime(2021, 11, 21, 0, 0, tzinfo=timezone.utc) From 3f44f56f6fe321c600cda4b5a20a121a3e8a9120 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Jou=C3=9Fen?= Date: Tue, 19 Dec 2023 17:30:15 +0000 Subject: [PATCH 6/6] Remove bool from get_required_startup because its unused. --- freqtrade/data/dataprovider.py | 6 ++-- tests/data/test_dataprovider.py | 51 ++++++++------------------------- 2 files changed, 14 insertions(+), 43 deletions(-) diff --git a/freqtrade/data/dataprovider.py b/freqtrade/data/dataprovider.py index 3ae9f3a7f..b737007c4 100644 --- a/freqtrade/data/dataprovider.py +++ b/freqtrade/data/dataprovider.py @@ -329,7 +329,7 @@ class DataProvider: ) return self.__cached_pairs_backtesting[saved_pair].copy() - def get_required_startup(self, timeframe: str, add_train_candles: bool = True) -> int: + def get_required_startup(self, timeframe: str) -> int: freqai_config = self._config.get('freqai', {}) if not freqai_config.get('enabled', False): return self._config.get('startup_candle_count', 0) @@ -339,9 +339,7 @@ class DataProvider: # make sure the startupcandles is at least the set maximum indicator periods self._config['startup_candle_count'] = max(startup_candles, max(indicator_periods)) tf_seconds = timeframe_to_seconds(timeframe) - train_candles = 0 - if add_train_candles: - train_candles = freqai_config['train_period_days'] * 86400 / tf_seconds + train_candles = freqai_config['train_period_days'] * 86400 / tf_seconds total_candles = int(self._config['startup_candle_count'] + train_candles) logger.info( f'Increasing startup_candle_count for freqai on {timeframe} to {total_candles}') diff --git a/tests/data/test_dataprovider.py b/tests/data/test_dataprovider.py index a61dd9679..e0231d892 100644 --- a/tests/data/test_dataprovider.py +++ b/tests/data/test_dataprovider.py @@ -508,16 +508,13 @@ def test_dp_get_required_startup(default_conf_usdt): dp = DataProvider(default_conf_usdt, None) # No FreqAI config - assert dp.get_required_startup('5m', False) == 0 - assert dp.get_required_startup('1h', False) == 0 - assert dp.get_required_startup('1d', False) == 0 - assert dp.get_required_startup('1d', True) == 0 + assert dp.get_required_startup('5m') == 0 + assert dp.get_required_startup('1h') == 0 assert dp.get_required_startup('1d') == 0 dp._config['startup_candle_count'] = 20 - assert dp.get_required_startup('5m', False) == 20 - assert dp.get_required_startup('5m', True) == 20 - assert dp.get_required_startup('1h', False) == 20 + assert dp.get_required_startup('5m') == 20 + assert dp.get_required_startup('1h') == 20 assert dp.get_required_startup('1h') == 20 # With freqAI config @@ -532,37 +529,19 @@ def test_dp_get_required_startup(default_conf_usdt): ] } } - assert dp.get_required_startup('5m', False) == 20 - assert dp.get_required_startup('5m', True) == 5780 - - assert dp.get_required_startup('1h', False) == 20 - assert dp.get_required_startup('1h', True) == 500 - - assert dp.get_required_startup('1d', False) == 20 - assert dp.get_required_startup('1d', True) == 40 + assert dp.get_required_startup('5m') == 5780 + assert dp.get_required_startup('1h') == 500 assert dp.get_required_startup('1d') == 40 # FreqAI kindof ignores startup_candle_count if it's below indicator_periods_candles dp._config['startup_candle_count'] = 0 - assert dp.get_required_startup('5m', False) == 20 - assert dp.get_required_startup('5m', True) == 5780 - - assert dp.get_required_startup('1h', False) == 20 - assert dp.get_required_startup('1h', True) == 500 - - assert dp.get_required_startup('1d', False) == 20 - assert dp.get_required_startup('1d', True) == 40 + assert dp.get_required_startup('5m') == 5780 + assert dp.get_required_startup('1h') == 500 assert dp.get_required_startup('1d') == 40 dp._config['freqai']['feature_parameters']['indicator_periods_candles'][1] = 50 - assert dp.get_required_startup('5m', False) == 50 - assert dp.get_required_startup('5m', True) == 5810 - - assert dp.get_required_startup('1h', False) == 50 - assert dp.get_required_startup('1h', True) == 530 - - assert dp.get_required_startup('1d', False) == 50 - assert dp.get_required_startup('1d', True) == 70 + assert dp.get_required_startup('5m') == 5810 + assert dp.get_required_startup('1h') == 530 assert dp.get_required_startup('1d') == 70 # scenario from issue https://github.com/freqtrade/freqtrade/issues/9432 @@ -577,12 +556,6 @@ def test_dp_get_required_startup(default_conf_usdt): } } dp._config['startup_candle_count'] = 40 - assert dp.get_required_startup('5m', False) == 40 - assert dp.get_required_startup('5m', True) == 51880 - - assert dp.get_required_startup('1h', False) == 40 - assert dp.get_required_startup('1h', True) == 4360 - - assert dp.get_required_startup('1d', False) == 40 - assert dp.get_required_startup('1d', True) == 220 + assert dp.get_required_startup('5m') == 51880 + assert dp.get_required_startup('1h') == 4360 assert dp.get_required_startup('1d') == 220