mirror of
https://github.com/freqtrade/freqtrade.git
synced 2026-02-11 00:30:40 +00:00
convert to using constants in data_drawer. Remove unneeded check_if_pred_in_spaces function
This commit is contained in:
@@ -27,6 +27,11 @@ from freqtrade.strategy.interface import IStrategy
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
FEATURE_PIPELINE = "feature_pipeline"
|
||||
LABEL_PIPELINE = "label_pipeline"
|
||||
TRAINDF = "trained_df"
|
||||
METADATA = "metadata"
|
||||
|
||||
|
||||
class pair_info(TypedDict):
|
||||
model_filename: str
|
||||
@@ -424,7 +429,7 @@ class FreqaiDataDrawer:
|
||||
dk.data["training_features_list"] = list(dk.data_dictionary["train_features"].columns)
|
||||
dk.data["label_list"] = dk.label_list
|
||||
|
||||
with (save_path / f"{dk.model_filename}_metadata.json").open("w") as fp:
|
||||
with (save_path / f"{dk.model_filename}_{METADATA}.json").open("w") as fp:
|
||||
rapidjson.dump(dk.data, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE)
|
||||
|
||||
return
|
||||
@@ -454,19 +459,19 @@ class FreqaiDataDrawer:
|
||||
dk.data["training_features_list"] = dk.training_features_list
|
||||
dk.data["label_list"] = dk.label_list
|
||||
# store the metadata
|
||||
with (save_path / f"{dk.model_filename}_metadata.json").open("w") as fp:
|
||||
with (save_path / f"{dk.model_filename}_{METADATA}.json").open("w") as fp:
|
||||
rapidjson.dump(dk.data, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE)
|
||||
|
||||
# save the pipelines to pickle files
|
||||
with (save_path / f"{dk.model_filename}_feature_pipeline.pkl").open("wb") as fp:
|
||||
with (save_path / f"{dk.model_filename}_{FEATURE_PIPELINE}.pkl").open("wb") as fp:
|
||||
cloudpickle.dump(dk.feature_pipeline, fp)
|
||||
|
||||
with (save_path / f"{dk.model_filename}_label_pipeline.pkl").open("wb") as fp:
|
||||
with (save_path / f"{dk.model_filename}_{LABEL_PIPELINE}.pkl").open("wb") as fp:
|
||||
cloudpickle.dump(dk.label_pipeline, fp)
|
||||
|
||||
# save the train data to file so we can check preds for area of applicability later
|
||||
dk.data_dictionary["train_features"].to_pickle(
|
||||
save_path / f"{dk.model_filename}_trained_df.pkl"
|
||||
save_path / f"{dk.model_filename}_{TRAINDF}.pkl"
|
||||
)
|
||||
|
||||
dk.data_dictionary["train_dates"].to_pickle(
|
||||
@@ -479,10 +484,10 @@ class FreqaiDataDrawer:
|
||||
|
||||
if coin not in self.meta_data_dictionary:
|
||||
self.meta_data_dictionary[coin] = {}
|
||||
self.meta_data_dictionary[coin]["train_df"] = dk.data_dictionary["train_features"]
|
||||
self.meta_data_dictionary[coin]["meta_data"] = dk.data
|
||||
self.meta_data_dictionary[coin]["feature_pipeline"] = dk.feature_pipeline
|
||||
self.meta_data_dictionary[coin]["label_pipeline"] = dk.label_pipeline
|
||||
self.meta_data_dictionary[coin][TRAINDF] = dk.data_dictionary["train_features"]
|
||||
self.meta_data_dictionary[coin][METADATA] = dk.data
|
||||
self.meta_data_dictionary[coin][FEATURE_PIPELINE] = dk.feature_pipeline
|
||||
self.meta_data_dictionary[coin][LABEL_PIPELINE] = dk.label_pipeline
|
||||
self.save_drawer_to_disk()
|
||||
|
||||
return
|
||||
@@ -492,7 +497,7 @@ class FreqaiDataDrawer:
|
||||
Load only metadata into datakitchen to increase performance during
|
||||
presaved backtesting (prediction file loading).
|
||||
"""
|
||||
with (dk.data_path / f"{dk.model_filename}_metadata.json").open("r") as fp:
|
||||
with (dk.data_path / f"{dk.model_filename}_{METADATA}.json").open("r") as fp:
|
||||
dk.data = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
|
||||
dk.training_features_list = dk.data["training_features_list"]
|
||||
dk.label_list = dk.data["label_list"]
|
||||
@@ -512,20 +517,20 @@ class FreqaiDataDrawer:
|
||||
dk.data_path = Path(self.pair_dict[coin]["data_path"])
|
||||
|
||||
if coin in self.meta_data_dictionary:
|
||||
dk.data = self.meta_data_dictionary[coin]["meta_data"]
|
||||
dk.data_dictionary["train_features"] = self.meta_data_dictionary[coin]["train_df"]
|
||||
dk.feature_pipeline = self.meta_data_dictionary[coin]["feature_pipeline"]
|
||||
dk.label_pipeline = self.meta_data_dictionary[coin]["label_pipeline"]
|
||||
dk.data = self.meta_data_dictionary[coin][METADATA]
|
||||
dk.data_dictionary["train_features"] = self.meta_data_dictionary[coin][TRAINDF]
|
||||
dk.feature_pipeline = self.meta_data_dictionary[coin][FEATURE_PIPELINE]
|
||||
dk.label_pipeline = self.meta_data_dictionary[coin][LABEL_PIPELINE]
|
||||
else:
|
||||
with (dk.data_path / f"{dk.model_filename}_metadata.json").open("r") as fp:
|
||||
with (dk.data_path / f"{dk.model_filename}_{METADATA}.json").open("r") as fp:
|
||||
dk.data = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
|
||||
|
||||
dk.data_dictionary["train_features"] = pd.read_pickle(
|
||||
dk.data_path / f"{dk.model_filename}_trained_df.pkl"
|
||||
dk.data_path / f"{dk.model_filename}_{TRAINDF}.pkl"
|
||||
)
|
||||
with (dk.data_path / f"{dk.model_filename}_feature_pipeline.pkl").open("rb") as fp:
|
||||
with (dk.data_path / f"{dk.model_filename}_{FEATURE_PIPELINE}.pkl").open("rb") as fp:
|
||||
dk.feature_pipeline = cloudpickle.load(fp)
|
||||
with (dk.data_path / f"{dk.model_filename}_label_pipeline.pkl").open("rb") as fp:
|
||||
with (dk.data_path / f"{dk.model_filename}_{LABEL_PIPELINE}.pkl").open("rb") as fp:
|
||||
dk.label_pipeline = cloudpickle.load(fp)
|
||||
|
||||
dk.training_features_list = dk.data["training_features_list"]
|
||||
|
||||
@@ -13,7 +13,6 @@ import pandas as pd
|
||||
import psutil
|
||||
from datasieve.pipeline import Pipeline
|
||||
from pandas import DataFrame
|
||||
from sklearn.metrics.pairwise import pairwise_distances
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
from freqtrade.configuration import TimeRange
|
||||
@@ -82,6 +81,7 @@ class FreqaiDataKitchen:
|
||||
self.backtest_live_models = config.get("freqai_backtest_live_models", False)
|
||||
self.feature_pipeline = Pipeline()
|
||||
self.label_pipeline = Pipeline()
|
||||
self.DI_values: npt.NDArray = np.array([])
|
||||
|
||||
if not self.live:
|
||||
self.full_path = self.get_full_models_path(self.config)
|
||||
@@ -391,37 +391,6 @@ class FreqaiDataKitchen:
|
||||
labels = [c for c in column_names if "&" in c]
|
||||
self.label_list = labels
|
||||
|
||||
def check_if_pred_in_training_spaces(self) -> None:
|
||||
"""
|
||||
Compares the distance from each prediction point to each training data
|
||||
point. It uses this information to estimate a Dissimilarity Index (DI)
|
||||
and avoid making predictions on any points that are too far away
|
||||
from the training data set.
|
||||
"""
|
||||
|
||||
distance = pairwise_distances(
|
||||
self.data_dictionary["train_features"],
|
||||
self.data_dictionary["prediction_features"],
|
||||
n_jobs=self.thread_count,
|
||||
)
|
||||
|
||||
self.DI_values = distance.min(axis=0) / self.data["avg_mean_dist"]
|
||||
|
||||
do_predict = np.where(
|
||||
self.DI_values < self.freqai_config["feature_parameters"]["DI_threshold"],
|
||||
1,
|
||||
0,
|
||||
)
|
||||
|
||||
if (len(do_predict) - do_predict.sum()) > 0:
|
||||
logger.info(
|
||||
f"{self.pair}: DI tossed {len(do_predict) - do_predict.sum()} predictions for "
|
||||
"being too far from training data."
|
||||
)
|
||||
|
||||
self.do_predict += do_predict
|
||||
self.do_predict -= 1
|
||||
|
||||
def set_weights_higher_recent(self, num_weights: int) -> npt.ArrayLike:
|
||||
"""
|
||||
Set weights so that recent data is more heavily weighted during
|
||||
|
||||
Reference in New Issue
Block a user