From 5ac141f72b2df55d4ef9444a746860f73a82b8e6 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Tue, 6 Jun 2023 21:05:51 +0200 Subject: [PATCH] convert to new datasieve api --- docs/freqai-feature-engineering.md | 35 ++----------------- freqtrade/freqai/freqai_interface.py | 22 ++++++------ .../prediction_models/XGBoostRegressor.py | 6 ++-- freqtrade/freqai/transforms/__init__.py | 6 ---- .../freqai/transforms/quantile_transform.py | 28 --------------- requirements-freqai.txt | 2 +- 6 files changed, 18 insertions(+), 81 deletions(-) delete mode 100644 freqtrade/freqai/transforms/__init__.py delete mode 100644 freqtrade/freqai/transforms/quantile_transform.py diff --git a/docs/freqai-feature-engineering.md b/docs/freqai-feature-engineering.md index eb4b4272e..0eee0793b 100644 --- a/docs/freqai-feature-engineering.md +++ b/docs/freqai-feature-engineering.md @@ -254,47 +254,18 @@ Users are encouraged to customize the data pipeline to their needs by building t """ User defines their custom eature pipeline here (if they wish) """ - from freqtrade.freqai.transforms import FreqaiQuantileTransformer + from sklearn.preprocessing import QuantileTransformer dk.feature_pipeline = Pipeline([ - ('qt', FreqaiQuantileTransformer(output_distribution='normal')) + ('qt', SKLearnWrapper(QuantileTransformer(output_distribution='normal'))) ]) return ``` -Here, you are defining the exact pipeline that will be used for your feature set during training and prediction. If you have a custom step that you would like to add to the pipeline, you simply create a class that follows the DataSieve/SKLearn API. That means your step must have a `fit()`, `transform()`, `fit_transform()`, and `inverse_transform()` method. You can see examples of this in the `freqtrade.freqai.transforms` module where we use SKLearn `QuantileNormalization` to create a new step for the pipeline. +Here, you are defining the exact pipeline that will be used for your feature set during training and prediction. Here you can use *most* SKLearn transformation steps by wrapping them in the `SKLearnWrapper` class. As there is the `feature_pipeline`, there also exists a definition for the `label_pipeline` which can be defined the same way as the `feature_pipeline`, by overriding `define_label_pipeline`. -!!! note "Inheritence required" - While most SKLearn methods are very easy to override, as shown in freqtrade/freqai/transforms/quantile_transform.py, they still need to include passing X, y, and sample_weights through all `fit()`, `transform()`, `fit_transform()` and `inverse_transform()` functions, even if that means a direct pass through without modifications. - - - ## Outlier detection Equity and crypto markets suffer from a high level of non-patterned noise in the form of outlier data points. FreqAI implements a variety of methods to identify such outliers and hence mitigate risk. diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 3f04b17fb..ffe0ee8c3 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -12,8 +12,10 @@ import numpy as np import pandas as pd import psutil from datasieve.pipeline import Pipeline +from datasieve.transforms import SKLearnWrapper from numpy.typing import NDArray from pandas import DataFrame +from sklearn.preprocessing import MinMaxScaler from freqtrade.configuration import TimeRange from freqtrade.constants import Config @@ -509,25 +511,25 @@ class IFreqaiModel(ABC): ft_params = self.freqai_info["feature_parameters"] dk.feature_pipeline = Pipeline([ ('const', ds.DataSieveVarianceThreshold(threshold=0)), - ('scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1))) + ('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1)))) ]) if ft_params.get("principal_component_analysis", False): - dk.feature_pipeline.steps += [('pca', ds.DataSievePCA())] - dk.feature_pipeline.steps += [('post-pca-scaler', - ds.DataSieveMinMaxScaler(feature_range=(-1, 1)))] + dk.feature_pipeline.append(('pca', ds.DataSievePCA())) + dk.feature_pipeline.append(('post-pca-scaler', + SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1))))) if ft_params.get("use_SVM_to_remove_outliers", False): svm_params = ft_params.get( "svm_params", {"shuffle": False, "nu": 0.01}) - dk.feature_pipeline.steps += [('svm', ds.SVMOutlierExtractor(**svm_params))] + dk.feature_pipeline.append(('svm', ds.SVMOutlierExtractor(**svm_params))) di = ft_params.get("DI_threshold", 0) if di: - dk.feature_pipeline.steps += [('di', ds.DissimilarityIndex(di_threshold=di))] + dk.feature_pipeline.append(('di', ds.DissimilarityIndex(di_threshold=di))) if ft_params.get("use_DBSCAN_to_remove_outliers", False): - dk.feature_pipeline.steps += [('dbscan', ds.DataSieveDBSCAN())] + dk.feature_pipeline.append(('dbscan', ds.DataSieveDBSCAN())) dk.feature_pipeline.fitparams = dk.feature_pipeline._validate_fitparams( {}, dk.feature_pipeline.steps) @@ -538,7 +540,7 @@ class IFreqaiModel(ABC): def define_label_pipeline(self, dk: FreqaiDataKitchen) -> None: dk.label_pipeline = Pipeline([ - ('scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1))) + ('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1)))) ]) def model_exists(self, dk: FreqaiDataKitchen) -> bool: @@ -551,8 +553,6 @@ class IFreqaiModel(ABC): """ if self.dd.model_type == 'joblib': file_type = ".joblib" - elif self.dd.model_type == 'keras': - file_type = ".h5" elif self.dd.model_type in ["stable_baselines3", "sb3_contrib", "pytorch"]: file_type = ".zip" @@ -676,7 +676,7 @@ class IFreqaiModel(ABC): # # for keras type models, the conv_window needs to be prepended so # # viewing is correct in frequi - if self.freqai_info.get('keras', False) or self.ft_params.get('inlier_metric_window', 0): + if self.ft_params.get('inlier_metric_window', 0): n_lost_points = self.freqai_info.get('conv_width', 2) zeros_df = DataFrame(np.zeros((n_lost_points, len(hist_preds_df.columns))), columns=hist_preds_df.columns) diff --git a/freqtrade/freqai/prediction_models/XGBoostRegressor.py b/freqtrade/freqai/prediction_models/XGBoostRegressor.py index 88d348448..19c051b91 100644 --- a/freqtrade/freqai/prediction_models/XGBoostRegressor.py +++ b/freqtrade/freqai/prediction_models/XGBoostRegressor.py @@ -9,7 +9,7 @@ from freqtrade.freqai.tensorboard import TBCallback # from datasieve.pipeline import Pipeline -# from freqtrade.freqai.transforms import FreqaiQuantileTransformer +# from sklearn.preprocessing import QuantileTransformer logger = logging.getLogger(__name__) @@ -61,7 +61,7 @@ class XGBoostRegressor(BaseRegressionModel): # User defines their custom eature pipeline here (if they wish) # """ # dk.feature_pipeline = Pipeline([ - # ('qt', FreqaiQuantileTransformer(output_distribution='normal')) + # ('qt', SKLearnWrapper(QuantileTransformer(output_distribution='normal'))) # ]) # return @@ -71,7 +71,7 @@ class XGBoostRegressor(BaseRegressionModel): # User defines their custom label pipeline here (if they wish) # """ # dk.label_pipeline = Pipeline([ - # ('qt', FreqaiQuantileTransformer(output_distribution='normal')) + # ('qt', SKLearnWrapper(QuantileTransformer(output_distribution='normal'))) # ]) # return diff --git a/freqtrade/freqai/transforms/__init__.py b/freqtrade/freqai/transforms/__init__.py deleted file mode 100644 index 9b7d8ccf5..000000000 --- a/freqtrade/freqai/transforms/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from freqtrade.freqai.transforms.quantile_transform import FreqaiQuantileTransformer - - -__all__ = ( - "FreqaiQuantileTransformer", -) diff --git a/freqtrade/freqai/transforms/quantile_transform.py b/freqtrade/freqai/transforms/quantile_transform.py deleted file mode 100644 index 3d1bd2731..000000000 --- a/freqtrade/freqai/transforms/quantile_transform.py +++ /dev/null @@ -1,28 +0,0 @@ -from sklearn.preprocessing import QuantileTransformer - - -class FreqaiQuantileTransformer(QuantileTransformer): - """ - A subclass of the SKLearn Quantile that ensures fit, transform, fit_transform and - inverse_transform all take the full set of params X, y, sample_weight required to - benefit from the DataSieve features. - """ - - def __init__(self, **kwargs): - super().__init__(**kwargs) - - def fit_transform(self, X, y=None, sample_weight=None, feature_list=None, **kwargs): - super().fit(X) - X = super().transform(X) - return X, y, sample_weight, feature_list - - def fit(self, X, y=None, sample_weight=None, feature_list=None, **kwargs): - super().fit(X) - return X, y, sample_weight, feature_list - - def transform(self, X, y=None, sample_weight=None, feature_list=None, **kwargs): - X = super().transform(X) - return X, y, sample_weight, feature_list - - def inverse_transform(self, X, y=None, sample_weight=None, feature_list=None, **kwargs): - return super().inverse_transform(X), y, sample_weight, feature_list diff --git a/requirements-freqai.txt b/requirements-freqai.txt index 31c73b594..748950e24 100644 --- a/requirements-freqai.txt +++ b/requirements-freqai.txt @@ -10,4 +10,4 @@ catboost==1.2; 'arm' not in platform_machine and (sys_platform != 'darwin' or py lightgbm==3.3.5 xgboost==1.7.5 tensorboard==2.13.0 -datasieve==0.1.0 +datasieve==0.1.1