From f8d7c2e21dc11c5e716a431b51fcf4094213d365 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Sat, 10 Jun 2023 12:48:27 +0200
Subject: [PATCH] add migration guide, add protections and migration assistance

---
 docs/freqai-feature-engineering.md   | 66 ++++++++++++++++++++++++-
 docs/strategy_migration.md           | 74 ++++++++++++++++++++++++++++
 freqtrade/freqai/data_kitchen.py     | 64 ++++++++++++++++++++++++
 freqtrade/freqai/freqai_interface.py | 23 +++++++++
 4 files changed, 225 insertions(+), 2 deletions(-)

diff --git a/docs/freqai-feature-engineering.md b/docs/freqai-feature-engineering.md
index 6e3e7fda6..12e01e30d 100644
--- a/docs/freqai-feature-engineering.md
+++ b/docs/freqai-feature-engineering.md
@@ -219,7 +219,7 @@ where $W_i$ is the weight of data point $i$ in a total set of $n$ data points. B
 
 ![weight-factor](assets/freqai_weight-factor.jpg)
 
-# Building the data pipeline
+## Building the data pipeline
 
 By default, FreqAI builds a dynamic pipeline based on user congfiguration settings. The default settings are robust and designed to work with a variety of methods. These two steps are a `MinMaxScaler(-1,1)` and a `VarianceThreshold` which removes any column that has 0 variance. Users can activate other steps with more configuration parameters. For example if users add `use_SVM_to_remove_outliers: true` to the `freqai` config, then FreqAI will automatically add the [`SVMOutlierExtractor`](#identifying-outliers-using-a-support-vector-machine-svm) to the pipeline. Likewise, users can add `principal_component_analysis: true` to the `freqai` config to activate PCA. The [DissimilarityIndex](#identifying-outliers-with-the-dissimilarity-index-di) is activated with `DI_threshold: 1`. Finally, noise can also be added to the data with `noise_standard_deviation: 0.1`. Finally, users can add [DBSCAN](#identifying-outliers-with-dbscan) outlier removal with `use_DBSCAN_to_remove_outliers: true`.
 
@@ -227,7 +227,7 @@ By default, FreqAI builds a dynamic pipeline based on user congfiguration settin
     Please review the [parameter table](freqai-parameter-table.md) for more information on these parameters.
 
 
-## Customizing the pipeline
+### Customizing the pipeline
 
 Users are encouraged to customize the data pipeline to their needs by building their own data pipeline. This can be done by simply setting `dk.feature_pipeline` to their desired `Pipeline` object inside their `IFreqaiModel` `train()` function, or if they prefer not to touch the `train()` function, they can override `define_data_pipeline`/`define_label_pipeline` functions in their `IFreqaiModel`:
 
@@ -303,6 +303,68 @@ class MyCoolTransform(BaseTransform):
 !!! note "Hint"
     You can define this custom class in the same file as your `IFreqaiModel`.
 
+### Migrating a custom `IFreqaiModel` to the new Pipeline
+
+If you have created your own custom `IFreqaiModel` with a custom `train()`/`predict()` function, *and* you still rely on `data_cleaning_train/predict()`, then you will need to migrate to the new pipeline. If your model does *not* rely on `data_cleaning_train/predict()`, then you do not need to worry about this migration.
+
+The conversion involves first removing `data_cleaning_train/predict()` and replacing them with a `define_data_pipeline()` and `define_label_pipeline()` function to your `IFreqaiModel` class:
+
+```python
+class MyCoolFreqaiModel(BaseRegressionModel):
+    def train(
+        self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
+    ) -> Any:
+
+        # ... your custom stuff
+
+        # Remove these lines
+        # data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
+        # self.data_cleaning_train(dk)
+        # data_dictionary = dk.normalize_data(data_dictionary)
+
+        # Add these lines. Now we control the pipeline fit/transform ourselves
+        dd = dk.make_train_test_datasets(features_filtered, labels_filtered)
+        dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count)
+        dk.label_pipeline = self.define_label_pipeline(threads=dk.thread_count)
+
+        (dd["train_features"],
+         dd["train_labels"],
+         dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"],
+                                                                  dd["train_labels"],
+                                                                  dd["train_weights"])
+
+        (dd["test_features"],
+         dd["test_labels"],
+         dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
+                                                             dd["test_labels"],
+                                                             dd["test_weights"])
+
+        dd["train_labels"], _, _ = dk.label_pipeline.fit_transform(dd["train_labels"])
+        dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"])
+
+    def predict(
+        self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
+    ) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
+
+        # ... your custom stuff
+
+        # Remove these lines:
+        # self.data_cleaning_predict(dk)
+
+        # Add these lines:
+        dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform(
+            dk.data_dictionary["prediction_features"], outlier_check=True)
+
+        # Remove this line
+        # pred_df = dk.denormalize_labels_from_metadata(pred_df)
+
+        # Replace with these lines
+        pred_df, _, _ = dk.label_pipeline.inverse_transform(pred_df)
+        if self.freqai_info.get("DI_threshold", 0) > 0:
+            dk.DI_values = dk.feature_pipeline["di"].di_values
+        else:
+            dk.DI_values = np.zeros(len(outliers.index))
+        dk.do_predict = outliers.to_numpy()
 
 ## Outlier detection
 
diff --git a/docs/strategy_migration.md b/docs/strategy_migration.md
index 5ef7a5a4c..4c10fb126 100644
--- a/docs/strategy_migration.md
+++ b/docs/strategy_migration.md
@@ -728,3 +728,77 @@ Targets now get their own, dedicated method.
 
         return dataframe
 ```
+
+
+### FreqAI - New data Pipeline
+
+If you have created your own custom `IFreqaiModel` with a custom `train()`/`predict()` function, *and* you still rely on `data_cleaning_train/predict()`, then you will need to migrate to the new pipeline. If your model does *not* rely on `data_cleaning_train/predict()`, then you do not need to worry about this migration. That means that this migration guide is relevant for a very small percentage of power-users. If you stumbled upon this guide by mistake, feel free to inquire in depth about your problem in the Freqtrade discord server.
+
+The conversion involves first removing `data_cleaning_train/predict()` and replacing them with a `define_data_pipeline()` and `define_label_pipeline()` function to your `IFreqaiModel` class:
+
+```python  linenums="1" hl_lines="10-13 41-42 48-49"
+class MyCoolFreqaiModel(BaseRegressionModel):
+    """
+    Some cool custom IFreqaiModel you made before Freqtrade version 2023.6
+    """
+    def train(
+        self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
+    ) -> Any:
+
+        # ... your custom stuff
+
+        # Remove these lines
+        # data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
+        # self.data_cleaning_train(dk)
+        # data_dictionary = dk.normalize_data(data_dictionary)
+
+        # Add these lines. Now we control the pipeline fit/transform ourselves
+        dd = dk.make_train_test_datasets(features_filtered, labels_filtered)
+        dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count)
+        dk.label_pipeline = self.define_label_pipeline(threads=dk.thread_count)
+
+        (dd["train_features"],
+         dd["train_labels"],
+         dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"],
+                                                                  dd["train_labels"],
+                                                                  dd["train_weights"])
+
+        (dd["test_features"],
+         dd["test_labels"],
+         dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
+                                                             dd["test_labels"],
+                                                             dd["test_weights"])
+
+        dd["train_labels"], _, _ = dk.label_pipeline.fit_transform(dd["train_labels"])
+        dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"])
+
+    def predict(
+        self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
+    ) -> Tuple[DataFrame, npt.NDArray[np.int_]]: # 37
+
+        # ... your custom stuff
+
+        # Remove these lines:
+        # self.data_cleaning_predict(dk)
+
+        # Add these lines:
+        dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform(
+            dk.data_dictionary["prediction_features"], outlier_check=True)
+
+        # Remove this line
+        # pred_df = dk.denormalize_labels_from_metadata(pred_df)
+
+        # Replace with these lines
+        pred_df, _, _ = dk.label_pipeline.inverse_transform(pred_df)
+        if self.freqai_info.get("DI_threshold", 0) > 0:
+            dk.DI_values = dk.feature_pipeline["di"].di_values
+        else:
+            dk.DI_values = np.zeros(len(outliers.index))
+        dk.do_predict = outliers.to_numpy()
+```
+
+
+1. Features - Move to `feature_engineering_expand_all`
+2. Basic features, not expanded across `include_periods_candles` - move to`feature_engineering_expand_basic()`.
+3. Standard features which should not be expanded - move to `feature_engineering_standard()`.
+4. Targets - Move this part to `set_freqai_targets()`.
diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py
index de07865d3..215457992 100644
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -12,6 +12,7 @@ import numpy.typing as npt
 import pandas as pd
 import psutil
 from datasieve.pipeline import Pipeline
+from datasieve.transforms import SKLearnWrapper
 from pandas import DataFrame
 from sklearn.model_selection import train_test_split
 
@@ -950,3 +951,66 @@ class FreqaiDataKitchen:
             timerange.startts += buffer * timeframe_to_seconds(self.config["timeframe"])
 
         return timerange
+
+    # deprecated functions
+    def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
+        """
+        Deprecation warning, migration assistance
+        """
+        ft = "https://www.freqtrade.io/en/latest"
+        logger.warning(f"Your custom IFreqaiModel relies on the deprecated"
+                       " data pipeline. Please update your model to use the new data pipeline."
+                       " This can be achieved by following the migration guide at "
+                       f"{ft}/strategy_migration/#freqai-new-data-pipeline "
+                       "We added a basic pipeline for you, but this will be removed "
+                       "in a future version.\n"
+                       "This version does not include any outlier configurations")
+
+        import datasieve.transforms as ds
+        from sklearn.preprocessing import MinMaxScaler
+        dd = data_dictionary
+
+        self.feature_pipeline = Pipeline([
+            ('variance_threshold', ds.VarianceThreshold()),
+            ('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1))))
+            ])
+
+        (dd["train_features"],
+         dd["train_labels"],
+         dd["train_weights"]) = self.feature_pipeline.fit_transform(dd["train_features"],
+                                                                    dd["train_labels"],
+                                                                    dd["train_weights"])
+
+        (dd["test_features"],
+         dd["test_labels"],
+         dd["test_weights"]) = self.feature_pipeline.transform(dd["test_features"],
+                                                               dd["test_labels"],
+                                                               dd["test_weights"])
+
+        self.label_pipeline = Pipeline([
+            ('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1))))
+            ])
+
+        dd["train_labels"], _, _ = self.label_pipeline.fit_transform(dd["train_labels"])
+        dd["test_labels"], _, _ = self.label_pipeline.transform(dd["test_labels"])
+
+        return dd
+
+    def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
+        """
+        Deprecation warning, migration assistance
+        """
+        ft = "https://www.freqtrade.io/en/latest"
+        logger.warning(f"Your custom IFreqaiModel relies on the deprecated"
+                       " data pipeline. Please update your model to use the new data pipeline."
+                       " This can be achieved by following the migration guide at "
+                       f"{ft}/strategy_migration/#freqai-new-data-pipeline "
+                       "We added a basic pipeline for you, but this will be removed "
+                       "in a future version.\n"
+                       "This version does not include any outlier configurations")
+
+        pred_df, _, _ = self.label_pipeline.inverse_transform(df)
+        self.DI_values = np.zeros(len(pred_df.index))
+        self.do_predict = np.ones(len(pred_df.index))
+
+        return pred_df
diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index 104fcb24d..eff8d4bd5 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -968,3 +968,26 @@ class IFreqaiModel(ABC):
         :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
         data (NaNs) or felt uncertain about data (i.e. SVM and/or DI index)
         """
+
+    # deprecated functions
+    def data_cleaning_train(self, dk: FreqaiDataKitchen, pair: str):
+        """
+        throw deprecation warning if this function is called
+        """
+        ft = "https://www.freqtrade.io/en/latest"
+        logger.warning(f"Your model {self.__class__.__name__} relies on the deprecated"
+                       " data pipeline. Please update your model to use the new data pipeline."
+                       " This can be achieved by following the migration guide at "
+                       f"{ft}/strategy_migration/#freqai-new-data-pipeline")
+        return
+
+    def data_cleaning_predict(self, dk: FreqaiDataKitchen, pair: str):
+        """
+        throw deprecation warning if this function is called
+        """
+        ft = "https://www.freqtrade.io/en/latest"
+        logger.warning(f"Your model {self.__class__.__name__} relies on the deprecated"
+                       " data pipeline. Please update your model to use the new data pipeline."
+                       " This can be achieved by following the migration guide at "
+                       f"{ft}/strategy_migration/#freqai-new-data-pipeline")
+        return