diff --git a/docs/freqai-running.md b/docs/freqai-running.md index 55f302d40..540aa700b 100644 --- a/docs/freqai-running.md +++ b/docs/freqai-running.md @@ -68,7 +68,7 @@ Backtesting mode requires [downloading the necessary data](#downloading-data-to- This way, you can return to using any model you wish by simply specifying the `identifier`. !!! Note - Backtesting calls `set_freqai_targets()` one time for each backtest window (where the number of windows is the full backtest timerange divided by the `backtest_period_days` parameter). Doing this means that the targets simulate dry/live behavior without look ahead bias. However, the definition of the features in `feature_engineering_*()` is performed once on the entire backtest timerange. This means that you should be sure that features do look-ahead into the future. + Backtesting calls `set_freqai_targets()` one time for each backtest window (where the number of windows is the full backtest timerange divided by the `backtest_period_days` parameter). Doing this means that the targets simulate dry/live behavior without look ahead bias. However, the definition of the features in `feature_engineering_*()` is performed once on the entire training timerange. This means that you should be sure that features do not look-ahead into the future. More details about look-ahead bias can be found in [Common Mistakes](strategy-customization.md#common-mistakes-when-developing-strategies). --- diff --git a/freqtrade/data/dataprovider.py b/freqtrade/data/dataprovider.py index 11cbd7934..dbed8b29b 100644 --- a/freqtrade/data/dataprovider.py +++ b/freqtrade/data/dataprovider.py @@ -311,11 +311,14 @@ class DataProvider: timerange = TimeRange.parse_timerange(None if self._config.get( 'timerange') is None else str(self._config.get('timerange'))) - # It is not necessary to add the training candles, as they - # were already added at the beginning of the backtest. - startup_candles = self.get_required_startup(str(timeframe), False) + startup_candles = self.get_required_startup(str(timeframe)) tf_seconds = timeframe_to_seconds(str(timeframe)) timerange.subtract_start(tf_seconds * startup_candles) + + logger.info(f"Loading data for {pair} {timeframe} " + f"from {timerange.start_fmt} " + f"to {timerange.stop_fmt}") + self.__cached_pairs_backtesting[saved_pair] = load_pair_history( pair=pair, timeframe=timeframe, diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 1bdd8b0d5..6d4d6c8dc 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -709,6 +709,8 @@ class FreqaiDataKitchen: pair, tf, strategy, corr_dataframes, base_dataframes, is_corr_pairs) informative_copy = informative_df.copy() + logger.debug(f"Populating features for {pair} {tf}") + for t in self.freqai_config["feature_parameters"]["indicator_periods_candles"]: df_features = strategy.feature_engineering_expand_all( informative_copy.copy(), t, metadata=metadata) @@ -788,6 +790,7 @@ class FreqaiDataKitchen: if not prediction_dataframe.empty: dataframe = prediction_dataframe.copy() + base_dataframes[self.config["timeframe"]] = dataframe.copy() else: dataframe = base_dataframes[self.config["timeframe"]].copy() diff --git a/tests/freqai/test_freqai_datakitchen.py b/tests/freqai/test_freqai_datakitchen.py index cac9d9838..ca7f8a9d4 100644 --- a/tests/freqai/test_freqai_datakitchen.py +++ b/tests/freqai/test_freqai_datakitchen.py @@ -3,6 +3,7 @@ from datetime import datetime, timedelta, timezone from pathlib import Path from unittest.mock import MagicMock +import pandas as pd import pytest from freqtrade.configuration import TimeRange @@ -135,3 +136,63 @@ def test_get_full_model_path(mocker, freqai_conf, model): model_path = freqai.dk.get_full_models_path(freqai_conf) assert model_path.is_dir() is True + + +def test_get_pair_data_for_features_with_prealoaded_data(mocker, freqai_conf): + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.dk = FreqaiDataKitchen(freqai_conf) + timerange = TimeRange.parse_timerange("20180110-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + + _, base_df = freqai.dd.get_base_and_corr_dataframes(timerange, "LTC/BTC", freqai.dk) + df = freqai.dk.get_pair_data_for_features("LTC/BTC", "5m", strategy, base_dataframes=base_df) + + assert df is base_df["5m"] + assert not df.empty + + +def test_get_pair_data_for_features_without_preloaded_data(mocker, freqai_conf): + freqai_conf.update({"timerange": "20180115-20180130"}) + + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.dk = FreqaiDataKitchen(freqai_conf) + timerange = TimeRange.parse_timerange("20180110-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + + base_df = {'5m': pd.DataFrame()} + df = freqai.dk.get_pair_data_for_features("LTC/BTC", "5m", strategy, base_dataframes=base_df) + + assert df is not base_df["5m"] + assert not df.empty + assert df.iloc[0]['date'].strftime("%Y-%m-%d %H:%M:%S") == "2018-01-11 23:00:00" + assert df.iloc[-1]['date'].strftime("%Y-%m-%d %H:%M:%S") == "2018-01-30 00:00:00" + + +def test_populate_features(mocker, freqai_conf): + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.dk = FreqaiDataKitchen(freqai_conf) + timerange = TimeRange.parse_timerange("20180115-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + + corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(timerange, "LTC/BTC", freqai.dk) + mocker.patch.object(strategy, 'feature_engineering_expand_all', return_value=base_df["5m"]) + df = freqai.dk.populate_features(base_df["5m"], "LTC/BTC", strategy, + base_dataframes=base_df, corr_dataframes=corr_df) + + strategy.feature_engineering_expand_all.assert_called_once() + pd.testing.assert_frame_equal(base_df["5m"], + strategy.feature_engineering_expand_all.call_args[0][0]) + + assert df.iloc[0]['date'].strftime("%Y-%m-%d %H:%M:%S") == "2018-01-15 00:00:00"