From f6a32f4ffd91def67a98f77a2aafe513185805cd Mon Sep 17 00:00:00 2001 From: robcaulk Date: Mon, 29 May 2023 23:35:24 +0200 Subject: [PATCH] bump version --- freqtrade/freqai/data_kitchen.py | 115 --------------------------- freqtrade/freqai/freqai_interface.py | 6 +- requirements-freqai.txt | 2 +- 3 files changed, 5 insertions(+), 118 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 04182dc69..127193a35 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -77,8 +77,6 @@ class FreqaiDataKitchen: self.backtest_predictions_folder: str = "backtesting_predictions" self.live = live self.pair = pair - - # self.svm_model: linear_model.SGDOneClassSVM = None self.keras: bool = self.freqai_config.get("keras", False) self.set_all_pairs() self.backtest_live_models = config.get("freqai_backtest_live_models", False) @@ -225,13 +223,6 @@ class FreqaiDataKitchen: drop_index = pd.isnull(filtered_df).any(axis=1) # get the rows that have NaNs, drop_index = drop_index.replace(True, 1).replace(False, 0) # pep8 requirement. if (training_filter): - # const_cols = list((filtered_df.nunique() == 1).loc[lambda x: x].index) - # if const_cols: - # filtered_df = filtered_df.filter(filtered_df.columns.difference(const_cols)) - # self.data['constant_features_list'] = const_cols - # logger.warning(f"Removed features {const_cols} with constant values.") - # else: - # self.data['constant_features_list'] = [] # we don't care about total row number (total no. datapoints) in training, we only care # about removing any row with NaNs @@ -264,9 +255,6 @@ class FreqaiDataKitchen: else: - # if 'constant_features_list' in self.data and len(self.data['constant_features_list']): - # filtered_df = self.check_pred_labels(filtered_df) - # we are backtesting so we need to preserve row number to send back to strategy, # so now we use do_predict to avoid any prediction based on a NaN drop_index = pd.isnull(filtered_df).any(axis=1) @@ -308,107 +296,6 @@ class FreqaiDataKitchen: return self.data_dictionary - # def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]: - # """ - # Normalize all data in the data_dictionary according to the training dataset - # :param data_dictionary: dictionary containing the cleaned and - # split training/test data/labels - # :returns: - # :data_dictionary: updated dictionary with standardized values. - # """ - - # # standardize the data by training stats - # train_max = data_dictionary["train_features"].max() - # train_min = data_dictionary["train_features"].min() - # data_dictionary["train_features"] = ( - # 2 * (data_dictionary["train_features"] - train_min) / (train_max - train_min) - 1 - # ) - # data_dictionary["test_features"] = ( - # 2 * (data_dictionary["test_features"] - train_min) / (train_max - train_min) - 1 - # ) - - # for item in train_max.keys(): - # self.data[item + "_max"] = train_max[item] - # self.data[item + "_min"] = train_min[item] - - # for item in data_dictionary["train_labels"].keys(): - # if data_dictionary["train_labels"][item].dtype == object: - # continue - # train_labels_max = data_dictionary["train_labels"][item].max() - # train_labels_min = data_dictionary["train_labels"][item].min() - # data_dictionary["train_labels"][item] = ( - # 2 - # * (data_dictionary["train_labels"][item] - train_labels_min) - # / (train_labels_max - train_labels_min) - # - 1 - # ) - # if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0: - # data_dictionary["test_labels"][item] = ( - # 2 - # * (data_dictionary["test_labels"][item] - train_labels_min) - # / (train_labels_max - train_labels_min) - # - 1 - # ) - - # self.data[f"{item}_max"] = train_labels_max - # self.data[f"{item}_min"] = train_labels_min - # return data_dictionary - - # def normalize_single_dataframe(self, df: DataFrame) -> DataFrame: - - # train_max = df.max() - # train_min = df.min() - # df = ( - # 2 * (df - train_min) / (train_max - train_min) - 1 - # ) - - # for item in train_max.keys(): - # self.data[item + "_max"] = train_max[item] - # self.data[item + "_min"] = train_min[item] - - # return df - - # def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame: - # """ - # Normalize a set of data using the mean and standard deviation from - # the associated training data. - # :param df: Dataframe to be standardized - # """ - - # train_max = [None] * len(df.keys()) - # train_min = [None] * len(df.keys()) - - # for i, item in enumerate(df.keys()): - # train_max[i] = self.data[f"{item}_max"] - # train_min[i] = self.data[f"{item}_min"] - - # train_max_series = pd.Series(train_max, index=df.keys()) - # train_min_series = pd.Series(train_min, index=df.keys()) - - # df = ( - # 2 * (df - train_min_series) / (train_max_series - train_min_series) - 1 - # ) - - # return df - - # def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame: - # """ - # Denormalize a set of data using the mean and standard deviation from - # the associated training data. - # :param df: Dataframe of predictions to be denormalized - # """ - - # for label in df.columns: - # if df[label].dtype == object or label in self.unique_class_list: - # continue - # df[label] = ( - # (df[label] + 1) - # * (self.data[f"{label}_max"] - self.data[f"{label}_min"]) - # / 2 - # ) + self.data[f"{label}_min"] - - # return df - def split_timerange( self, tr: str, train_split: int = 28, bt_split: float = 7 ) -> Tuple[list, list]: @@ -453,9 +340,7 @@ class FreqaiDataKitchen: tr_training_list_timerange.append(copy.deepcopy(timerange_train)) # associated backtest period - timerange_backtest.startts = timerange_train.stopts - timerange_backtest.stopts = timerange_backtest.startts + int(bt_period) if timerange_backtest.stopts > config_timerange.stopts: diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 6dfa9855c..3f04b17fb 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -507,8 +507,10 @@ class IFreqaiModel(ABC): def define_data_pipeline(self, dk: FreqaiDataKitchen) -> None: ft_params = self.freqai_info["feature_parameters"] - dk.feature_pipeline = Pipeline( - [('scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1)))]) + dk.feature_pipeline = Pipeline([ + ('const', ds.DataSieveVarianceThreshold(threshold=0)), + ('scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1))) + ]) if ft_params.get("principal_component_analysis", False): dk.feature_pipeline.steps += [('pca', ds.DataSievePCA())] diff --git a/requirements-freqai.txt b/requirements-freqai.txt index 81d49eee4..31c73b594 100644 --- a/requirements-freqai.txt +++ b/requirements-freqai.txt @@ -10,4 +10,4 @@ catboost==1.2; 'arm' not in platform_machine and (sys_platform != 'darwin' or py lightgbm==3.3.5 xgboost==1.7.5 tensorboard==2.13.0 -datasieve==0.0.9 +datasieve==0.1.0