convert to using constants in data_drawer. Remove unneeded check_if_pred_in_spaces function

This commit is contained in:
robcaulk
2023-06-08 12:19:42 +02:00
parent e39e40dc60
commit 88337b6c5e
2 changed files with 24 additions and 50 deletions

View File

@@ -27,6 +27,11 @@ from freqtrade.strategy.interface import IStrategy
logger = logging.getLogger(__name__)
FEATURE_PIPELINE = "feature_pipeline"
LABEL_PIPELINE = "label_pipeline"
TRAINDF = "trained_df"
METADATA = "metadata"
class pair_info(TypedDict):
model_filename: str
@@ -424,7 +429,7 @@ class FreqaiDataDrawer:
dk.data["training_features_list"] = list(dk.data_dictionary["train_features"].columns)
dk.data["label_list"] = dk.label_list
with (save_path / f"{dk.model_filename}_metadata.json").open("w") as fp:
with (save_path / f"{dk.model_filename}_{METADATA}.json").open("w") as fp:
rapidjson.dump(dk.data, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE)
return
@@ -454,19 +459,19 @@ class FreqaiDataDrawer:
dk.data["training_features_list"] = dk.training_features_list
dk.data["label_list"] = dk.label_list
# store the metadata
with (save_path / f"{dk.model_filename}_metadata.json").open("w") as fp:
with (save_path / f"{dk.model_filename}_{METADATA}.json").open("w") as fp:
rapidjson.dump(dk.data, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE)
# save the pipelines to pickle files
with (save_path / f"{dk.model_filename}_feature_pipeline.pkl").open("wb") as fp:
with (save_path / f"{dk.model_filename}_{FEATURE_PIPELINE}.pkl").open("wb") as fp:
cloudpickle.dump(dk.feature_pipeline, fp)
with (save_path / f"{dk.model_filename}_label_pipeline.pkl").open("wb") as fp:
with (save_path / f"{dk.model_filename}_{LABEL_PIPELINE}.pkl").open("wb") as fp:
cloudpickle.dump(dk.label_pipeline, fp)
# save the train data to file so we can check preds for area of applicability later
dk.data_dictionary["train_features"].to_pickle(
save_path / f"{dk.model_filename}_trained_df.pkl"
save_path / f"{dk.model_filename}_{TRAINDF}.pkl"
)
dk.data_dictionary["train_dates"].to_pickle(
@@ -479,10 +484,10 @@ class FreqaiDataDrawer:
if coin not in self.meta_data_dictionary:
self.meta_data_dictionary[coin] = {}
self.meta_data_dictionary[coin]["train_df"] = dk.data_dictionary["train_features"]
self.meta_data_dictionary[coin]["meta_data"] = dk.data
self.meta_data_dictionary[coin]["feature_pipeline"] = dk.feature_pipeline
self.meta_data_dictionary[coin]["label_pipeline"] = dk.label_pipeline
self.meta_data_dictionary[coin][TRAINDF] = dk.data_dictionary["train_features"]
self.meta_data_dictionary[coin][METADATA] = dk.data
self.meta_data_dictionary[coin][FEATURE_PIPELINE] = dk.feature_pipeline
self.meta_data_dictionary[coin][LABEL_PIPELINE] = dk.label_pipeline
self.save_drawer_to_disk()
return
@@ -492,7 +497,7 @@ class FreqaiDataDrawer:
Load only metadata into datakitchen to increase performance during
presaved backtesting (prediction file loading).
"""
with (dk.data_path / f"{dk.model_filename}_metadata.json").open("r") as fp:
with (dk.data_path / f"{dk.model_filename}_{METADATA}.json").open("r") as fp:
dk.data = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
dk.training_features_list = dk.data["training_features_list"]
dk.label_list = dk.data["label_list"]
@@ -512,20 +517,20 @@ class FreqaiDataDrawer:
dk.data_path = Path(self.pair_dict[coin]["data_path"])
if coin in self.meta_data_dictionary:
dk.data = self.meta_data_dictionary[coin]["meta_data"]
dk.data_dictionary["train_features"] = self.meta_data_dictionary[coin]["train_df"]
dk.feature_pipeline = self.meta_data_dictionary[coin]["feature_pipeline"]
dk.label_pipeline = self.meta_data_dictionary[coin]["label_pipeline"]
dk.data = self.meta_data_dictionary[coin][METADATA]
dk.data_dictionary["train_features"] = self.meta_data_dictionary[coin][TRAINDF]
dk.feature_pipeline = self.meta_data_dictionary[coin][FEATURE_PIPELINE]
dk.label_pipeline = self.meta_data_dictionary[coin][LABEL_PIPELINE]
else:
with (dk.data_path / f"{dk.model_filename}_metadata.json").open("r") as fp:
with (dk.data_path / f"{dk.model_filename}_{METADATA}.json").open("r") as fp:
dk.data = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
dk.data_dictionary["train_features"] = pd.read_pickle(
dk.data_path / f"{dk.model_filename}_trained_df.pkl"
dk.data_path / f"{dk.model_filename}_{TRAINDF}.pkl"
)
with (dk.data_path / f"{dk.model_filename}_feature_pipeline.pkl").open("rb") as fp:
with (dk.data_path / f"{dk.model_filename}_{FEATURE_PIPELINE}.pkl").open("rb") as fp:
dk.feature_pipeline = cloudpickle.load(fp)
with (dk.data_path / f"{dk.model_filename}_label_pipeline.pkl").open("rb") as fp:
with (dk.data_path / f"{dk.model_filename}_{LABEL_PIPELINE}.pkl").open("rb") as fp:
dk.label_pipeline = cloudpickle.load(fp)
dk.training_features_list = dk.data["training_features_list"]

View File

@@ -13,7 +13,6 @@ import pandas as pd
import psutil
from datasieve.pipeline import Pipeline
from pandas import DataFrame
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.model_selection import train_test_split
from freqtrade.configuration import TimeRange
@@ -82,6 +81,7 @@ class FreqaiDataKitchen:
self.backtest_live_models = config.get("freqai_backtest_live_models", False)
self.feature_pipeline = Pipeline()
self.label_pipeline = Pipeline()
self.DI_values: npt.NDArray = np.array([])
if not self.live:
self.full_path = self.get_full_models_path(self.config)
@@ -391,37 +391,6 @@ class FreqaiDataKitchen:
labels = [c for c in column_names if "&" in c]
self.label_list = labels
def check_if_pred_in_training_spaces(self) -> None:
"""
Compares the distance from each prediction point to each training data
point. It uses this information to estimate a Dissimilarity Index (DI)
and avoid making predictions on any points that are too far away
from the training data set.
"""
distance = pairwise_distances(
self.data_dictionary["train_features"],
self.data_dictionary["prediction_features"],
n_jobs=self.thread_count,
)
self.DI_values = distance.min(axis=0) / self.data["avg_mean_dist"]
do_predict = np.where(
self.DI_values < self.freqai_config["feature_parameters"]["DI_threshold"],
1,
0,
)
if (len(do_predict) - do_predict.sum()) > 0:
logger.info(
f"{self.pair}: DI tossed {len(do_predict) - do_predict.sum()} predictions for "
"being too far from training data."
)
self.do_predict += do_predict
self.do_predict -= 1
def set_weights_higher_recent(self, num_weights: int) -> npt.ArrayLike:
"""
Set weights so that recent data is more heavily weighted during