bump version

2025-11-29 08:33:07 +00:00 · 2023-05-29 23:35:24 +02:00
parent 785f0d396f
commit f6a32f4ffd
3 changed files with 5 additions and 118 deletions
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -77,8 +77,6 @@ class FreqaiDataKitchen:
        self.backtest_predictions_folder: str = "backtesting_predictions"
        self.live = live
        self.pair = pair
-
-        # self.svm_model: linear_model.SGDOneClassSVM = None
        self.keras: bool = self.freqai_config.get("keras", False)
        self.set_all_pairs()
        self.backtest_live_models = config.get("freqai_backtest_live_models", False)
@@ -225,13 +223,6 @@ class FreqaiDataKitchen:
        drop_index = pd.isnull(filtered_df).any(axis=1)  # get the rows that have NaNs,
        drop_index = drop_index.replace(True, 1).replace(False, 0)  # pep8 requirement.
        if (training_filter):
-            # const_cols = list((filtered_df.nunique() == 1).loc[lambda x: x].index)
-            # if const_cols:
-            #     filtered_df = filtered_df.filter(filtered_df.columns.difference(const_cols))
-            #     self.data['constant_features_list'] = const_cols
-            #     logger.warning(f"Removed features {const_cols} with constant values.")
-            # else:
-            #     self.data['constant_features_list'] = []

            # we don't care about total row number (total no. datapoints) in training, we only care
            # about removing any row with NaNs
@@ -264,9 +255,6 @@ class FreqaiDataKitchen:

        else:

-            # if 'constant_features_list' in self.data and len(self.data['constant_features_list']):
-            #     filtered_df = self.check_pred_labels(filtered_df)
-
            # we are backtesting so we need to preserve row number to send back to strategy,
            # so now we use do_predict to avoid any prediction based on a NaN
            drop_index = pd.isnull(filtered_df).any(axis=1)
@@ -308,107 +296,6 @@ class FreqaiDataKitchen:

        return self.data_dictionary

-    # def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
-    #     """
-    #     Normalize all data in the data_dictionary according to the training dataset
-    #     :param data_dictionary: dictionary containing the cleaned and
-    #                             split training/test data/labels
-    #     :returns:
-    #     :data_dictionary: updated dictionary with standardized values.
-    #     """
-
-    #     # standardize the data by training stats
-    #     train_max = data_dictionary["train_features"].max()
-    #     train_min = data_dictionary["train_features"].min()
-    #     data_dictionary["train_features"] = (
-    #         2 * (data_dictionary["train_features"] - train_min) / (train_max - train_min) - 1
-    #     )
-    #     data_dictionary["test_features"] = (
-    #         2 * (data_dictionary["test_features"] - train_min) / (train_max - train_min) - 1
-    #     )
-
-    #     for item in train_max.keys():
-    #         self.data[item + "_max"] = train_max[item]
-    #         self.data[item + "_min"] = train_min[item]
-
-    #     for item in data_dictionary["train_labels"].keys():
-    #         if data_dictionary["train_labels"][item].dtype == object:
-    #             continue
-    #         train_labels_max = data_dictionary["train_labels"][item].max()
-    #         train_labels_min = data_dictionary["train_labels"][item].min()
-    #         data_dictionary["train_labels"][item] = (
-    #             2
-    #             * (data_dictionary["train_labels"][item] - train_labels_min)
-    #             / (train_labels_max - train_labels_min)
-    #             - 1
-    #         )
-    #         if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
-    #             data_dictionary["test_labels"][item] = (
-    #                 2
-    #                 * (data_dictionary["test_labels"][item] - train_labels_min)
-    #                 / (train_labels_max - train_labels_min)
-    #                 - 1
-    #             )
-
-    #         self.data[f"{item}_max"] = train_labels_max
-    #         self.data[f"{item}_min"] = train_labels_min
-    #     return data_dictionary
-
-    # def normalize_single_dataframe(self, df: DataFrame) -> DataFrame:
-
-    #     train_max = df.max()
-    #     train_min = df.min()
-    #     df = (
-    #         2 * (df - train_min) / (train_max - train_min) - 1
-    #     )
-
-    #     for item in train_max.keys():
-    #         self.data[item + "_max"] = train_max[item]
-    #         self.data[item + "_min"] = train_min[item]
-
-    #     return df
-
-    # def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
-    #     """
-    #     Normalize a set of data using the mean and standard deviation from
-    #     the associated training data.
-    #     :param df: Dataframe to be standardized
-    #     """
-
-    #     train_max = [None] * len(df.keys())
-    #     train_min = [None] * len(df.keys())
-
-    #     for i, item in enumerate(df.keys()):
-    #         train_max[i] = self.data[f"{item}_max"]
-    #         train_min[i] = self.data[f"{item}_min"]
-
-    #     train_max_series = pd.Series(train_max, index=df.keys())
-    #     train_min_series = pd.Series(train_min, index=df.keys())
-
-    #     df = (
-    #         2 * (df - train_min_series) / (train_max_series - train_min_series) - 1
-    #     )
-
-    #     return df
-
-    # def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
-    #     """
-    #     Denormalize a set of data using the mean and standard deviation from
-    #     the associated training data.
-    #     :param df: Dataframe of predictions to be denormalized
-    #     """
-
-    #     for label in df.columns:
-    #         if df[label].dtype == object or label in self.unique_class_list:
-    #             continue
-    #         df[label] = (
-    #             (df[label] + 1)
-    #             * (self.data[f"{label}_max"] - self.data[f"{label}_min"])
-    #             / 2
-    #         ) + self.data[f"{label}_min"]
-
-    #     return df
-
    def split_timerange(
        self, tr: str, train_split: int = 28, bt_split: float = 7
    ) -> Tuple[list, list]:
@@ -453,9 +340,7 @@ class FreqaiDataKitchen:
            tr_training_list_timerange.append(copy.deepcopy(timerange_train))

            # associated backtest period
-
            timerange_backtest.startts = timerange_train.stopts
-
            timerange_backtest.stopts = timerange_backtest.startts + int(bt_period)

            if timerange_backtest.stopts > config_timerange.stopts:
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -507,8 +507,10 @@ class IFreqaiModel(ABC):

    def define_data_pipeline(self, dk: FreqaiDataKitchen) -> None:
        ft_params = self.freqai_info["feature_parameters"]
-        dk.feature_pipeline = Pipeline(
-            [('scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1)))])
+        dk.feature_pipeline = Pipeline([
+            ('const', ds.DataSieveVarianceThreshold(threshold=0)),
+            ('scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1)))
+            ])

        if ft_params.get("principal_component_analysis", False):
            dk.feature_pipeline.steps += [('pca', ds.DataSievePCA())]
--- a/requirements-freqai.txt
+++ b/requirements-freqai.txt
@@ -10,4 +10,4 @@ catboost==1.2; 'arm' not in platform_machine and (sys_platform != 'darwin' or py
 lightgbm==3.3.5
 xgboost==1.7.5
 tensorboard==2.13.0
-datasieve==0.0.9
+datasieve==0.1.0