From a9d5e04a4320f321e9f19862e2c88be3c2b05f7b Mon Sep 17 00:00:00 2001 From: th0rntwig Date: Thu, 6 Oct 2022 19:26:33 +0200 Subject: [PATCH 1/3] Remove constant labels from prediction --- freqtrade/freqai/data_kitchen.py | 12 ++++++++++++ freqtrade/freqai/freqai_interface.py | 2 ++ tests/freqai/conftest.py | 2 ++ tests/freqai/test_freqai_interface.py | 10 ++++++++++ 4 files changed, 26 insertions(+) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 7ea2daf02..23bba3f1a 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -460,6 +460,18 @@ class FreqaiDataKitchen: return df + def check_pred_labels(self, df_predictions: DataFrame) -> None: + """ + Check that prediction feature labels match training feature labels. + :params: + :df_predictions: incoming predictions + """ + train_labels = self.data_dictionary["train_features"].columns + pred_labels = df_predictions.columns + if len(train_labels.difference(pred_labels)) != 0: + self.data_dictionary["prediction_features"] = df_predictions[train_labels] + return + def principal_component_analysis(self) -> None: """ Performs Principal Component Analysis on the data for dimensionality reduction diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 5ac7bc32c..62c814c72 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -492,6 +492,8 @@ class IFreqaiModel(ABC): # ensure user is feeding the correct indicators to the model self.check_if_feature_list_matches_strategy(dk) + dk.check_pred_labels(dk.data_dictionary['prediction_features']) + if ft_params.get('inlier_metric_window', 0): dk.compute_inlier_metric(set_='predict') diff --git a/tests/freqai/conftest.py b/tests/freqai/conftest.py index 026b45afc..df61b284a 100644 --- a/tests/freqai/conftest.py +++ b/tests/freqai/conftest.py @@ -107,6 +107,8 @@ def make_unfiltered_dataframe(mocker, freqai_conf): unfiltered_dataframe = freqai.dk.use_strategy_to_populate_indicators( strategy, corr_dataframes, base_dataframes, freqai.dk.pair ) + for i in range(5): + unfiltered_dataframe[f'constant_{i}'] = i unfiltered_dataframe = freqai.dk.slice_dataframe(new_timerange, unfiltered_dataframe) diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index a61853c47..238c0418c 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -181,6 +181,8 @@ def test_start_backtesting(mocker, freqai_conf, model, num_files, strat): corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk) df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC") + for i in range(5): + df[f'constant_{i}'] = i metadata = {"pair": "LTC/BTC"} freqai.start_backtesting(df, metadata, freqai.dk) @@ -208,6 +210,8 @@ def test_start_backtesting_subdaily_backtest_period(mocker, freqai_conf): corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk) df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC") + for i in range(5): + df[f'constant_{i}'] = i metadata = {"pair": "LTC/BTC"} freqai.start_backtesting(df, metadata, freqai.dk) @@ -233,6 +237,8 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog): corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk) df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC") + for i in range(5): + df[f'constant_{i}'] = i metadata = {"pair": "ADA/BTC"} freqai.start_backtesting(df, metadata, freqai.dk) @@ -256,6 +262,8 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog): corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk) df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC") + for i in range(5): + df[f'constant_{i}'] = i freqai.start_backtesting(df, metadata, freqai.dk) assert log_has_re( @@ -312,6 +320,8 @@ def test_follow_mode(mocker, freqai_conf): freqai.dd.load_all_pair_histories(timerange, freqai.dk) df = strategy.dp.get_pair_dataframe('ADA/BTC', '5m') + for i in range(5): + df[f'constant_{i}'] = i freqai.start_live(df, metadata, strategy, freqai.dk) assert len(freqai.dk.return_dataframe.index) == 5702 From 4daf0000c7f3619563dfe640ca40fd9880fe5366 Mon Sep 17 00:00:00 2001 From: th0rntwig Date: Sat, 8 Oct 2022 16:15:48 +0200 Subject: [PATCH 2/3] Move check and add log warning --- freqtrade/freqai/data_kitchen.py | 15 +++++++++++---- freqtrade/freqai/freqai_interface.py | 2 -- tests/freqai/test_freqai_interface.py | 22 ++++++++++++---------- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 23bba3f1a..de4a53a50 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -241,6 +241,7 @@ class FreqaiDataKitchen: self.data["filter_drop_index_training"] = drop_index else: + filtered_df = self.check_pred_labels(filtered_df) # we are backtesting so we need to preserve row number to send back to strategy, # so now we use do_predict to avoid any prediction based on a NaN drop_index = pd.isnull(filtered_df).any(axis=1) @@ -460,7 +461,7 @@ class FreqaiDataKitchen: return df - def check_pred_labels(self, df_predictions: DataFrame) -> None: + def check_pred_labels(self, df_predictions: DataFrame) -> DataFrame: """ Check that prediction feature labels match training feature labels. :params: @@ -468,9 +469,15 @@ class FreqaiDataKitchen: """ train_labels = self.data_dictionary["train_features"].columns pred_labels = df_predictions.columns - if len(train_labels.difference(pred_labels)) != 0: - self.data_dictionary["prediction_features"] = df_predictions[train_labels] - return + num_diffs = len(pred_labels.difference(train_labels)) + if num_diffs != 0: + df_predictions = df_predictions[train_labels] + logger.warning( + f"Removed {num_diffs} features from prediction features, " + f"these were likely considered constant values during most recent training." + ) + + return df_predictions def principal_component_analysis(self) -> None: """ diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 62c814c72..5ac7bc32c 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -492,8 +492,6 @@ class IFreqaiModel(ABC): # ensure user is feeding the correct indicators to the model self.check_if_feature_list_matches_strategy(dk) - dk.check_pred_labels(dk.data_dictionary['prediction_features']) - if ft_params.get('inlier_metric_window', 0): dk.compute_inlier_metric(set_='predict') diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index 238c0418c..c8444067a 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -157,7 +157,7 @@ def test_extract_data_and_train_model_Classifiers(mocker, freqai_conf, model): ("CatboostClassifier", 6, "freqai_test_classifier") ], ) -def test_start_backtesting(mocker, freqai_conf, model, num_files, strat): +def test_start_backtesting(mocker, freqai_conf, model, num_files, strat, caplog): freqai_conf.get("freqai", {}).update({"save_backtest_models": True}) freqai_conf['runmode'] = RunMode.BACKTEST Trade.use_db = False @@ -182,13 +182,21 @@ def test_start_backtesting(mocker, freqai_conf, model, num_files, strat): df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC") for i in range(5): - df[f'constant_{i}'] = i + df.loc[:, f'%-constant_{i}'] = i metadata = {"pair": "LTC/BTC"} freqai.start_backtesting(df, metadata, freqai.dk) model_folders = [x for x in freqai.dd.full_path.iterdir() if x.is_dir()] assert len(model_folders) == num_files + assert log_has_re( + "Removed features ", + caplog, + ) + assert log_has_re( + "Removed 5 features from prediction features, ", + caplog, + ) Backtesting.cleanup() shutil.rmtree(Path(freqai.dk.full_path)) @@ -210,8 +218,6 @@ def test_start_backtesting_subdaily_backtest_period(mocker, freqai_conf): corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk) df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC") - for i in range(5): - df[f'constant_{i}'] = i metadata = {"pair": "LTC/BTC"} freqai.start_backtesting(df, metadata, freqai.dk) @@ -237,8 +243,6 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog): corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk) df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC") - for i in range(5): - df[f'constant_{i}'] = i metadata = {"pair": "ADA/BTC"} freqai.start_backtesting(df, metadata, freqai.dk) @@ -262,8 +266,7 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog): corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk) df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC") - for i in range(5): - df[f'constant_{i}'] = i + freqai.start_backtesting(df, metadata, freqai.dk) assert log_has_re( @@ -320,8 +323,7 @@ def test_follow_mode(mocker, freqai_conf): freqai.dd.load_all_pair_histories(timerange, freqai.dk) df = strategy.dp.get_pair_dataframe('ADA/BTC', '5m') - for i in range(5): - df[f'constant_{i}'] = i + freqai.start_live(df, metadata, strategy, freqai.dk) assert len(freqai.dk.return_dataframe.index) == 5702 From c9eee2eba4b6c10c21c47fe9362f663feed1df41 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Mon, 10 Oct 2022 20:50:54 +0200 Subject: [PATCH 3/3] revert syntax --- tests/freqai/test_freqai_interface.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index c8444067a..445b718d2 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -182,7 +182,8 @@ def test_start_backtesting(mocker, freqai_conf, model, num_files, strat, caplog) df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC") for i in range(5): - df.loc[:, f'%-constant_{i}'] = i + df[f'%-constant_{i}'] = i + # df.loc[:, f'%-constant_{i}'] = i metadata = {"pair": "LTC/BTC"} freqai.start_backtesting(df, metadata, freqai.dk)