From f6a32f4ffd91def67a98f77a2aafe513185805cd Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Mon, 29 May 2023 23:35:24 +0200
Subject: [PATCH] bump version

---
 freqtrade/freqai/data_kitchen.py     | 115 ---------------------------
 freqtrade/freqai/freqai_interface.py |   6 +-
 requirements-freqai.txt              |   2 +-
 3 files changed, 5 insertions(+), 118 deletions(-)

diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py
index 04182dc69..127193a35 100644
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -77,8 +77,6 @@ class FreqaiDataKitchen:
         self.backtest_predictions_folder: str = "backtesting_predictions"
         self.live = live
         self.pair = pair
-
-        # self.svm_model: linear_model.SGDOneClassSVM = None
         self.keras: bool = self.freqai_config.get("keras", False)
         self.set_all_pairs()
         self.backtest_live_models = config.get("freqai_backtest_live_models", False)
@@ -225,13 +223,6 @@ class FreqaiDataKitchen:
         drop_index = pd.isnull(filtered_df).any(axis=1)  # get the rows that have NaNs,
         drop_index = drop_index.replace(True, 1).replace(False, 0)  # pep8 requirement.
         if (training_filter):
-            # const_cols = list((filtered_df.nunique() == 1).loc[lambda x: x].index)
-            # if const_cols:
-            #     filtered_df = filtered_df.filter(filtered_df.columns.difference(const_cols))
-            #     self.data['constant_features_list'] = const_cols
-            #     logger.warning(f"Removed features {const_cols} with constant values.")
-            # else:
-            #     self.data['constant_features_list'] = []
 
             # we don't care about total row number (total no. datapoints) in training, we only care
             # about removing any row with NaNs
@@ -264,9 +255,6 @@ class FreqaiDataKitchen:
 
         else:
 
-            # if 'constant_features_list' in self.data and len(self.data['constant_features_list']):
-            #     filtered_df = self.check_pred_labels(filtered_df)
-
             # we are backtesting so we need to preserve row number to send back to strategy,
             # so now we use do_predict to avoid any prediction based on a NaN
             drop_index = pd.isnull(filtered_df).any(axis=1)
@@ -308,107 +296,6 @@ class FreqaiDataKitchen:
 
         return self.data_dictionary
 
-    # def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
-    #     """
-    #     Normalize all data in the data_dictionary according to the training dataset
-    #     :param data_dictionary: dictionary containing the cleaned and
-    #                             split training/test data/labels
-    #     :returns:
-    #     :data_dictionary: updated dictionary with standardized values.
-    #     """
-
-    #     # standardize the data by training stats
-    #     train_max = data_dictionary["train_features"].max()
-    #     train_min = data_dictionary["train_features"].min()
-    #     data_dictionary["train_features"] = (
-    #         2 * (data_dictionary["train_features"] - train_min) / (train_max - train_min) - 1
-    #     )
-    #     data_dictionary["test_features"] = (
-    #         2 * (data_dictionary["test_features"] - train_min) / (train_max - train_min) - 1
-    #     )
-
-    #     for item in train_max.keys():
-    #         self.data[item + "_max"] = train_max[item]
-    #         self.data[item + "_min"] = train_min[item]
-
-    #     for item in data_dictionary["train_labels"].keys():
-    #         if data_dictionary["train_labels"][item].dtype == object:
-    #             continue
-    #         train_labels_max = data_dictionary["train_labels"][item].max()
-    #         train_labels_min = data_dictionary["train_labels"][item].min()
-    #         data_dictionary["train_labels"][item] = (
-    #             2
-    #             * (data_dictionary["train_labels"][item] - train_labels_min)
-    #             / (train_labels_max - train_labels_min)
-    #             - 1
-    #         )
-    #         if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
-    #             data_dictionary["test_labels"][item] = (
-    #                 2
-    #                 * (data_dictionary["test_labels"][item] - train_labels_min)
-    #                 / (train_labels_max - train_labels_min)
-    #                 - 1
-    #             )
-
-    #         self.data[f"{item}_max"] = train_labels_max
-    #         self.data[f"{item}_min"] = train_labels_min
-    #     return data_dictionary
-
-    # def normalize_single_dataframe(self, df: DataFrame) -> DataFrame:
-
-    #     train_max = df.max()
-    #     train_min = df.min()
-    #     df = (
-    #         2 * (df - train_min) / (train_max - train_min) - 1
-    #     )
-
-    #     for item in train_max.keys():
-    #         self.data[item + "_max"] = train_max[item]
-    #         self.data[item + "_min"] = train_min[item]
-
-    #     return df
-
-    # def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
-    #     """
-    #     Normalize a set of data using the mean and standard deviation from
-    #     the associated training data.
-    #     :param df: Dataframe to be standardized
-    #     """
-
-    #     train_max = [None] * len(df.keys())
-    #     train_min = [None] * len(df.keys())
-
-    #     for i, item in enumerate(df.keys()):
-    #         train_max[i] = self.data[f"{item}_max"]
-    #         train_min[i] = self.data[f"{item}_min"]
-
-    #     train_max_series = pd.Series(train_max, index=df.keys())
-    #     train_min_series = pd.Series(train_min, index=df.keys())
-
-    #     df = (
-    #         2 * (df - train_min_series) / (train_max_series - train_min_series) - 1
-    #     )
-
-    #     return df
-
-    # def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
-    #     """
-    #     Denormalize a set of data using the mean and standard deviation from
-    #     the associated training data.
-    #     :param df: Dataframe of predictions to be denormalized
-    #     """
-
-    #     for label in df.columns:
-    #         if df[label].dtype == object or label in self.unique_class_list:
-    #             continue
-    #         df[label] = (
-    #             (df[label] + 1)
-    #             * (self.data[f"{label}_max"] - self.data[f"{label}_min"])
-    #             / 2
-    #         ) + self.data[f"{label}_min"]
-
-    #     return df
-
     def split_timerange(
         self, tr: str, train_split: int = 28, bt_split: float = 7
     ) -> Tuple[list, list]:
@@ -453,9 +340,7 @@ class FreqaiDataKitchen:
             tr_training_list_timerange.append(copy.deepcopy(timerange_train))
 
             # associated backtest period
-
             timerange_backtest.startts = timerange_train.stopts
-
             timerange_backtest.stopts = timerange_backtest.startts + int(bt_period)
 
             if timerange_backtest.stopts > config_timerange.stopts:
diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index 6dfa9855c..3f04b17fb 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -507,8 +507,10 @@ class IFreqaiModel(ABC):
 
     def define_data_pipeline(self, dk: FreqaiDataKitchen) -> None:
         ft_params = self.freqai_info["feature_parameters"]
-        dk.feature_pipeline = Pipeline(
-            [('scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1)))])
+        dk.feature_pipeline = Pipeline([
+            ('const', ds.DataSieveVarianceThreshold(threshold=0)),
+            ('scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1)))
+            ])
 
         if ft_params.get("principal_component_analysis", False):
             dk.feature_pipeline.steps += [('pca', ds.DataSievePCA())]
diff --git a/requirements-freqai.txt b/requirements-freqai.txt
index 81d49eee4..31c73b594 100644
--- a/requirements-freqai.txt
+++ b/requirements-freqai.txt
@@ -10,4 +10,4 @@ catboost==1.2; 'arm' not in platform_machine and (sys_platform != 'darwin' or py
 lightgbm==3.3.5
 xgboost==1.7.5
 tensorboard==2.13.0
-datasieve==0.0.9
+datasieve==0.1.0