From f8d7c2e21dc11c5e716a431b51fcf4094213d365 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sat, 10 Jun 2023 12:48:27 +0200 Subject: [PATCH] add migration guide, add protections and migration assistance --- docs/freqai-feature-engineering.md | 66 ++++++++++++++++++++++++- docs/strategy_migration.md | 74 ++++++++++++++++++++++++++++ freqtrade/freqai/data_kitchen.py | 64 ++++++++++++++++++++++++ freqtrade/freqai/freqai_interface.py | 23 +++++++++ 4 files changed, 225 insertions(+), 2 deletions(-) diff --git a/docs/freqai-feature-engineering.md b/docs/freqai-feature-engineering.md index 6e3e7fda6..12e01e30d 100644 --- a/docs/freqai-feature-engineering.md +++ b/docs/freqai-feature-engineering.md @@ -219,7 +219,7 @@ where $W_i$ is the weight of data point $i$ in a total set of $n$ data points. B ![weight-factor](assets/freqai_weight-factor.jpg) -# Building the data pipeline +## Building the data pipeline By default, FreqAI builds a dynamic pipeline based on user congfiguration settings. The default settings are robust and designed to work with a variety of methods. These two steps are a `MinMaxScaler(-1,1)` and a `VarianceThreshold` which removes any column that has 0 variance. Users can activate other steps with more configuration parameters. For example if users add `use_SVM_to_remove_outliers: true` to the `freqai` config, then FreqAI will automatically add the [`SVMOutlierExtractor`](#identifying-outliers-using-a-support-vector-machine-svm) to the pipeline. Likewise, users can add `principal_component_analysis: true` to the `freqai` config to activate PCA. The [DissimilarityIndex](#identifying-outliers-with-the-dissimilarity-index-di) is activated with `DI_threshold: 1`. Finally, noise can also be added to the data with `noise_standard_deviation: 0.1`. Finally, users can add [DBSCAN](#identifying-outliers-with-dbscan) outlier removal with `use_DBSCAN_to_remove_outliers: true`. @@ -227,7 +227,7 @@ By default, FreqAI builds a dynamic pipeline based on user congfiguration settin Please review the [parameter table](freqai-parameter-table.md) for more information on these parameters. -## Customizing the pipeline +### Customizing the pipeline Users are encouraged to customize the data pipeline to their needs by building their own data pipeline. This can be done by simply setting `dk.feature_pipeline` to their desired `Pipeline` object inside their `IFreqaiModel` `train()` function, or if they prefer not to touch the `train()` function, they can override `define_data_pipeline`/`define_label_pipeline` functions in their `IFreqaiModel`: @@ -303,6 +303,68 @@ class MyCoolTransform(BaseTransform): !!! note "Hint" You can define this custom class in the same file as your `IFreqaiModel`. +### Migrating a custom `IFreqaiModel` to the new Pipeline + +If you have created your own custom `IFreqaiModel` with a custom `train()`/`predict()` function, *and* you still rely on `data_cleaning_train/predict()`, then you will need to migrate to the new pipeline. If your model does *not* rely on `data_cleaning_train/predict()`, then you do not need to worry about this migration. + +The conversion involves first removing `data_cleaning_train/predict()` and replacing them with a `define_data_pipeline()` and `define_label_pipeline()` function to your `IFreqaiModel` class: + +```python +class MyCoolFreqaiModel(BaseRegressionModel): + def train( + self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs + ) -> Any: + + # ... your custom stuff + + # Remove these lines + # data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered) + # self.data_cleaning_train(dk) + # data_dictionary = dk.normalize_data(data_dictionary) + + # Add these lines. Now we control the pipeline fit/transform ourselves + dd = dk.make_train_test_datasets(features_filtered, labels_filtered) + dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count) + dk.label_pipeline = self.define_label_pipeline(threads=dk.thread_count) + + (dd["train_features"], + dd["train_labels"], + dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"], + dd["train_labels"], + dd["train_weights"]) + + (dd["test_features"], + dd["test_labels"], + dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"], + dd["test_labels"], + dd["test_weights"]) + + dd["train_labels"], _, _ = dk.label_pipeline.fit_transform(dd["train_labels"]) + dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"]) + + def predict( + self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs + ) -> Tuple[DataFrame, npt.NDArray[np.int_]]: + + # ... your custom stuff + + # Remove these lines: + # self.data_cleaning_predict(dk) + + # Add these lines: + dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform( + dk.data_dictionary["prediction_features"], outlier_check=True) + + # Remove this line + # pred_df = dk.denormalize_labels_from_metadata(pred_df) + + # Replace with these lines + pred_df, _, _ = dk.label_pipeline.inverse_transform(pred_df) + if self.freqai_info.get("DI_threshold", 0) > 0: + dk.DI_values = dk.feature_pipeline["di"].di_values + else: + dk.DI_values = np.zeros(len(outliers.index)) + dk.do_predict = outliers.to_numpy() ## Outlier detection diff --git a/docs/strategy_migration.md b/docs/strategy_migration.md index 5ef7a5a4c..4c10fb126 100644 --- a/docs/strategy_migration.md +++ b/docs/strategy_migration.md @@ -728,3 +728,77 @@ Targets now get their own, dedicated method. return dataframe ``` + + +### FreqAI - New data Pipeline + +If you have created your own custom `IFreqaiModel` with a custom `train()`/`predict()` function, *and* you still rely on `data_cleaning_train/predict()`, then you will need to migrate to the new pipeline. If your model does *not* rely on `data_cleaning_train/predict()`, then you do not need to worry about this migration. That means that this migration guide is relevant for a very small percentage of power-users. If you stumbled upon this guide by mistake, feel free to inquire in depth about your problem in the Freqtrade discord server. + +The conversion involves first removing `data_cleaning_train/predict()` and replacing them with a `define_data_pipeline()` and `define_label_pipeline()` function to your `IFreqaiModel` class: + +```python linenums="1" hl_lines="10-13 41-42 48-49" +class MyCoolFreqaiModel(BaseRegressionModel): + """ + Some cool custom IFreqaiModel you made before Freqtrade version 2023.6 + """ + def train( + self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs + ) -> Any: + + # ... your custom stuff + + # Remove these lines + # data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered) + # self.data_cleaning_train(dk) + # data_dictionary = dk.normalize_data(data_dictionary) + + # Add these lines. Now we control the pipeline fit/transform ourselves + dd = dk.make_train_test_datasets(features_filtered, labels_filtered) + dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count) + dk.label_pipeline = self.define_label_pipeline(threads=dk.thread_count) + + (dd["train_features"], + dd["train_labels"], + dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"], + dd["train_labels"], + dd["train_weights"]) + + (dd["test_features"], + dd["test_labels"], + dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"], + dd["test_labels"], + dd["test_weights"]) + + dd["train_labels"], _, _ = dk.label_pipeline.fit_transform(dd["train_labels"]) + dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"]) + + def predict( + self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs + ) -> Tuple[DataFrame, npt.NDArray[np.int_]]: # 37 + + # ... your custom stuff + + # Remove these lines: + # self.data_cleaning_predict(dk) + + # Add these lines: + dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform( + dk.data_dictionary["prediction_features"], outlier_check=True) + + # Remove this line + # pred_df = dk.denormalize_labels_from_metadata(pred_df) + + # Replace with these lines + pred_df, _, _ = dk.label_pipeline.inverse_transform(pred_df) + if self.freqai_info.get("DI_threshold", 0) > 0: + dk.DI_values = dk.feature_pipeline["di"].di_values + else: + dk.DI_values = np.zeros(len(outliers.index)) + dk.do_predict = outliers.to_numpy() +``` + + +1. Features - Move to `feature_engineering_expand_all` +2. Basic features, not expanded across `include_periods_candles` - move to`feature_engineering_expand_basic()`. +3. Standard features which should not be expanded - move to `feature_engineering_standard()`. +4. Targets - Move this part to `set_freqai_targets()`. diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index de07865d3..215457992 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -12,6 +12,7 @@ import numpy.typing as npt import pandas as pd import psutil from datasieve.pipeline import Pipeline +from datasieve.transforms import SKLearnWrapper from pandas import DataFrame from sklearn.model_selection import train_test_split @@ -950,3 +951,66 @@ class FreqaiDataKitchen: timerange.startts += buffer * timeframe_to_seconds(self.config["timeframe"]) return timerange + + # deprecated functions + def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]: + """ + Deprecation warning, migration assistance + """ + ft = "https://www.freqtrade.io/en/latest" + logger.warning(f"Your custom IFreqaiModel relies on the deprecated" + " data pipeline. Please update your model to use the new data pipeline." + " This can be achieved by following the migration guide at " + f"{ft}/strategy_migration/#freqai-new-data-pipeline " + "We added a basic pipeline for you, but this will be removed " + "in a future version.\n" + "This version does not include any outlier configurations") + + import datasieve.transforms as ds + from sklearn.preprocessing import MinMaxScaler + dd = data_dictionary + + self.feature_pipeline = Pipeline([ + ('variance_threshold', ds.VarianceThreshold()), + ('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1)))) + ]) + + (dd["train_features"], + dd["train_labels"], + dd["train_weights"]) = self.feature_pipeline.fit_transform(dd["train_features"], + dd["train_labels"], + dd["train_weights"]) + + (dd["test_features"], + dd["test_labels"], + dd["test_weights"]) = self.feature_pipeline.transform(dd["test_features"], + dd["test_labels"], + dd["test_weights"]) + + self.label_pipeline = Pipeline([ + ('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1)))) + ]) + + dd["train_labels"], _, _ = self.label_pipeline.fit_transform(dd["train_labels"]) + dd["test_labels"], _, _ = self.label_pipeline.transform(dd["test_labels"]) + + return dd + + def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame: + """ + Deprecation warning, migration assistance + """ + ft = "https://www.freqtrade.io/en/latest" + logger.warning(f"Your custom IFreqaiModel relies on the deprecated" + " data pipeline. Please update your model to use the new data pipeline." + " This can be achieved by following the migration guide at " + f"{ft}/strategy_migration/#freqai-new-data-pipeline " + "We added a basic pipeline for you, but this will be removed " + "in a future version.\n" + "This version does not include any outlier configurations") + + pred_df, _, _ = self.label_pipeline.inverse_transform(df) + self.DI_values = np.zeros(len(pred_df.index)) + self.do_predict = np.ones(len(pred_df.index)) + + return pred_df diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 104fcb24d..eff8d4bd5 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -968,3 +968,26 @@ class IFreqaiModel(ABC): :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove data (NaNs) or felt uncertain about data (i.e. SVM and/or DI index) """ + + # deprecated functions + def data_cleaning_train(self, dk: FreqaiDataKitchen, pair: str): + """ + throw deprecation warning if this function is called + """ + ft = "https://www.freqtrade.io/en/latest" + logger.warning(f"Your model {self.__class__.__name__} relies on the deprecated" + " data pipeline. Please update your model to use the new data pipeline." + " This can be achieved by following the migration guide at " + f"{ft}/strategy_migration/#freqai-new-data-pipeline") + return + + def data_cleaning_predict(self, dk: FreqaiDataKitchen, pair: str): + """ + throw deprecation warning if this function is called + """ + ft = "https://www.freqtrade.io/en/latest" + logger.warning(f"Your model {self.__class__.__name__} relies on the deprecated" + " data pipeline. Please update your model to use the new data pipeline." + " This can be achieved by following the migration guide at " + f"{ft}/strategy_migration/#freqai-new-data-pipeline") + return