From 88337b6c5eef4c8894911a2b48a12250498d1da3 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Thu, 8 Jun 2023 12:19:42 +0200 Subject: [PATCH] convert to using constants in data_drawer. Remove unneeded check_if_pred_in_spaces function --- freqtrade/freqai/data_drawer.py | 41 ++++++++++++++++++-------------- freqtrade/freqai/data_kitchen.py | 33 +------------------------ 2 files changed, 24 insertions(+), 50 deletions(-) diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index 067790b9a..2a3ec6dd2 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -27,6 +27,11 @@ from freqtrade.strategy.interface import IStrategy logger = logging.getLogger(__name__) +FEATURE_PIPELINE = "feature_pipeline" +LABEL_PIPELINE = "label_pipeline" +TRAINDF = "trained_df" +METADATA = "metadata" + class pair_info(TypedDict): model_filename: str @@ -424,7 +429,7 @@ class FreqaiDataDrawer: dk.data["training_features_list"] = list(dk.data_dictionary["train_features"].columns) dk.data["label_list"] = dk.label_list - with (save_path / f"{dk.model_filename}_metadata.json").open("w") as fp: + with (save_path / f"{dk.model_filename}_{METADATA}.json").open("w") as fp: rapidjson.dump(dk.data, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE) return @@ -454,19 +459,19 @@ class FreqaiDataDrawer: dk.data["training_features_list"] = dk.training_features_list dk.data["label_list"] = dk.label_list # store the metadata - with (save_path / f"{dk.model_filename}_metadata.json").open("w") as fp: + with (save_path / f"{dk.model_filename}_{METADATA}.json").open("w") as fp: rapidjson.dump(dk.data, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE) # save the pipelines to pickle files - with (save_path / f"{dk.model_filename}_feature_pipeline.pkl").open("wb") as fp: + with (save_path / f"{dk.model_filename}_{FEATURE_PIPELINE}.pkl").open("wb") as fp: cloudpickle.dump(dk.feature_pipeline, fp) - with (save_path / f"{dk.model_filename}_label_pipeline.pkl").open("wb") as fp: + with (save_path / f"{dk.model_filename}_{LABEL_PIPELINE}.pkl").open("wb") as fp: cloudpickle.dump(dk.label_pipeline, fp) # save the train data to file so we can check preds for area of applicability later dk.data_dictionary["train_features"].to_pickle( - save_path / f"{dk.model_filename}_trained_df.pkl" + save_path / f"{dk.model_filename}_{TRAINDF}.pkl" ) dk.data_dictionary["train_dates"].to_pickle( @@ -479,10 +484,10 @@ class FreqaiDataDrawer: if coin not in self.meta_data_dictionary: self.meta_data_dictionary[coin] = {} - self.meta_data_dictionary[coin]["train_df"] = dk.data_dictionary["train_features"] - self.meta_data_dictionary[coin]["meta_data"] = dk.data - self.meta_data_dictionary[coin]["feature_pipeline"] = dk.feature_pipeline - self.meta_data_dictionary[coin]["label_pipeline"] = dk.label_pipeline + self.meta_data_dictionary[coin][TRAINDF] = dk.data_dictionary["train_features"] + self.meta_data_dictionary[coin][METADATA] = dk.data + self.meta_data_dictionary[coin][FEATURE_PIPELINE] = dk.feature_pipeline + self.meta_data_dictionary[coin][LABEL_PIPELINE] = dk.label_pipeline self.save_drawer_to_disk() return @@ -492,7 +497,7 @@ class FreqaiDataDrawer: Load only metadata into datakitchen to increase performance during presaved backtesting (prediction file loading). """ - with (dk.data_path / f"{dk.model_filename}_metadata.json").open("r") as fp: + with (dk.data_path / f"{dk.model_filename}_{METADATA}.json").open("r") as fp: dk.data = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE) dk.training_features_list = dk.data["training_features_list"] dk.label_list = dk.data["label_list"] @@ -512,20 +517,20 @@ class FreqaiDataDrawer: dk.data_path = Path(self.pair_dict[coin]["data_path"]) if coin in self.meta_data_dictionary: - dk.data = self.meta_data_dictionary[coin]["meta_data"] - dk.data_dictionary["train_features"] = self.meta_data_dictionary[coin]["train_df"] - dk.feature_pipeline = self.meta_data_dictionary[coin]["feature_pipeline"] - dk.label_pipeline = self.meta_data_dictionary[coin]["label_pipeline"] + dk.data = self.meta_data_dictionary[coin][METADATA] + dk.data_dictionary["train_features"] = self.meta_data_dictionary[coin][TRAINDF] + dk.feature_pipeline = self.meta_data_dictionary[coin][FEATURE_PIPELINE] + dk.label_pipeline = self.meta_data_dictionary[coin][LABEL_PIPELINE] else: - with (dk.data_path / f"{dk.model_filename}_metadata.json").open("r") as fp: + with (dk.data_path / f"{dk.model_filename}_{METADATA}.json").open("r") as fp: dk.data = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE) dk.data_dictionary["train_features"] = pd.read_pickle( - dk.data_path / f"{dk.model_filename}_trained_df.pkl" + dk.data_path / f"{dk.model_filename}_{TRAINDF}.pkl" ) - with (dk.data_path / f"{dk.model_filename}_feature_pipeline.pkl").open("rb") as fp: + with (dk.data_path / f"{dk.model_filename}_{FEATURE_PIPELINE}.pkl").open("rb") as fp: dk.feature_pipeline = cloudpickle.load(fp) - with (dk.data_path / f"{dk.model_filename}_label_pipeline.pkl").open("rb") as fp: + with (dk.data_path / f"{dk.model_filename}_{LABEL_PIPELINE}.pkl").open("rb") as fp: dk.label_pipeline = cloudpickle.load(fp) dk.training_features_list = dk.data["training_features_list"] diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index ecdb2e109..de07865d3 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -13,7 +13,6 @@ import pandas as pd import psutil from datasieve.pipeline import Pipeline from pandas import DataFrame -from sklearn.metrics.pairwise import pairwise_distances from sklearn.model_selection import train_test_split from freqtrade.configuration import TimeRange @@ -82,6 +81,7 @@ class FreqaiDataKitchen: self.backtest_live_models = config.get("freqai_backtest_live_models", False) self.feature_pipeline = Pipeline() self.label_pipeline = Pipeline() + self.DI_values: npt.NDArray = np.array([]) if not self.live: self.full_path = self.get_full_models_path(self.config) @@ -391,37 +391,6 @@ class FreqaiDataKitchen: labels = [c for c in column_names if "&" in c] self.label_list = labels - def check_if_pred_in_training_spaces(self) -> None: - """ - Compares the distance from each prediction point to each training data - point. It uses this information to estimate a Dissimilarity Index (DI) - and avoid making predictions on any points that are too far away - from the training data set. - """ - - distance = pairwise_distances( - self.data_dictionary["train_features"], - self.data_dictionary["prediction_features"], - n_jobs=self.thread_count, - ) - - self.DI_values = distance.min(axis=0) / self.data["avg_mean_dist"] - - do_predict = np.where( - self.DI_values < self.freqai_config["feature_parameters"]["DI_threshold"], - 1, - 0, - ) - - if (len(do_predict) - do_predict.sum()) > 0: - logger.info( - f"{self.pair}: DI tossed {len(do_predict) - do_predict.sum()} predictions for " - "being too far from training data." - ) - - self.do_predict += do_predict - self.do_predict -= 1 - def set_weights_higher_recent(self, num_weights: int) -> npt.ArrayLike: """ Set weights so that recent data is more heavily weighted during