mirror of
https://github.com/freqtrade/freqtrade.git
synced 2026-01-20 05:50:36 +00:00
convert to new datasieve api
This commit is contained in:
@@ -254,47 +254,18 @@ Users are encouraged to customize the data pipeline to their needs by building t
|
||||
"""
|
||||
User defines their custom eature pipeline here (if they wish)
|
||||
"""
|
||||
from freqtrade.freqai.transforms import FreqaiQuantileTransformer
|
||||
from sklearn.preprocessing import QuantileTransformer
|
||||
dk.feature_pipeline = Pipeline([
|
||||
('qt', FreqaiQuantileTransformer(output_distribution='normal'))
|
||||
('qt', SKLearnWrapper(QuantileTransformer(output_distribution='normal')))
|
||||
])
|
||||
|
||||
return
|
||||
```
|
||||
|
||||
Here, you are defining the exact pipeline that will be used for your feature set during training and prediction. If you have a custom step that you would like to add to the pipeline, you simply create a class that follows the DataSieve/SKLearn API. That means your step must have a `fit()`, `transform()`, `fit_transform()`, and `inverse_transform()` method. You can see examples of this in the `freqtrade.freqai.transforms` module where we use SKLearn `QuantileNormalization` to create a new step for the pipeline.
|
||||
Here, you are defining the exact pipeline that will be used for your feature set during training and prediction. Here you can use *most* SKLearn transformation steps by wrapping them in the `SKLearnWrapper` class.
|
||||
|
||||
As there is the `feature_pipeline`, there also exists a definition for the `label_pipeline` which can be defined the same way as the `feature_pipeline`, by overriding `define_label_pipeline`.
|
||||
|
||||
!!! note "Inheritence required"
|
||||
While most SKLearn methods are very easy to override, as shown in freqtrade/freqai/transforms/quantile_transform.py, they still need to include passing X, y, and sample_weights through all `fit()`, `transform()`, `fit_transform()` and `inverse_transform()` functions, even if that means a direct pass through without modifications.
|
||||
|
||||
<!-- ## Data dimensionality reduction with Principal Component Analysis
|
||||
|
||||
You can reduce the dimensionality of your features by activating the `principal_component_analysis` in the config:
|
||||
|
||||
```json
|
||||
"freqai": {
|
||||
"feature_parameters" : {
|
||||
"principal_component_analysis": true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
This will perform PCA on the features and reduce their dimensionality so that the explained variance of the data set is >= 0.999. Reducing data dimensionality makes training the model faster and hence allows for more up-to-date models.
|
||||
|
||||
## Inlier metric
|
||||
|
||||
The `inlier_metric` is a metric aimed at quantifying how similar the features of a data point are to the most recent historical data points.
|
||||
|
||||
You define the lookback window by setting `inlier_metric_window` and FreqAI computes the distance between the present time point and each of the previous `inlier_metric_window` lookback points. A Weibull function is fit to each of the lookback distributions and its cumulative distribution function (CDF) is used to produce a quantile for each lookback point. The `inlier_metric` is then computed for each time point as the average of the corresponding lookback quantiles. The figure below explains the concept for an `inlier_metric_window` of 5.
|
||||
|
||||

|
||||
|
||||
FreqAI adds the `inlier_metric` to the training features and hence gives the model access to a novel type of temporal information.
|
||||
|
||||
This function does **not** remove outliers from the data set. -->
|
||||
|
||||
## Outlier detection
|
||||
|
||||
Equity and crypto markets suffer from a high level of non-patterned noise in the form of outlier data points. FreqAI implements a variety of methods to identify such outliers and hence mitigate risk.
|
||||
|
||||
@@ -12,8 +12,10 @@ import numpy as np
|
||||
import pandas as pd
|
||||
import psutil
|
||||
from datasieve.pipeline import Pipeline
|
||||
from datasieve.transforms import SKLearnWrapper
|
||||
from numpy.typing import NDArray
|
||||
from pandas import DataFrame
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
|
||||
from freqtrade.configuration import TimeRange
|
||||
from freqtrade.constants import Config
|
||||
@@ -509,25 +511,25 @@ class IFreqaiModel(ABC):
|
||||
ft_params = self.freqai_info["feature_parameters"]
|
||||
dk.feature_pipeline = Pipeline([
|
||||
('const', ds.DataSieveVarianceThreshold(threshold=0)),
|
||||
('scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1)))
|
||||
('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1))))
|
||||
])
|
||||
|
||||
if ft_params.get("principal_component_analysis", False):
|
||||
dk.feature_pipeline.steps += [('pca', ds.DataSievePCA())]
|
||||
dk.feature_pipeline.steps += [('post-pca-scaler',
|
||||
ds.DataSieveMinMaxScaler(feature_range=(-1, 1)))]
|
||||
dk.feature_pipeline.append(('pca', ds.DataSievePCA()))
|
||||
dk.feature_pipeline.append(('post-pca-scaler',
|
||||
SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1)))))
|
||||
|
||||
if ft_params.get("use_SVM_to_remove_outliers", False):
|
||||
svm_params = ft_params.get(
|
||||
"svm_params", {"shuffle": False, "nu": 0.01})
|
||||
dk.feature_pipeline.steps += [('svm', ds.SVMOutlierExtractor(**svm_params))]
|
||||
dk.feature_pipeline.append(('svm', ds.SVMOutlierExtractor(**svm_params)))
|
||||
|
||||
di = ft_params.get("DI_threshold", 0)
|
||||
if di:
|
||||
dk.feature_pipeline.steps += [('di', ds.DissimilarityIndex(di_threshold=di))]
|
||||
dk.feature_pipeline.append(('di', ds.DissimilarityIndex(di_threshold=di)))
|
||||
|
||||
if ft_params.get("use_DBSCAN_to_remove_outliers", False):
|
||||
dk.feature_pipeline.steps += [('dbscan', ds.DataSieveDBSCAN())]
|
||||
dk.feature_pipeline.append(('dbscan', ds.DataSieveDBSCAN()))
|
||||
|
||||
dk.feature_pipeline.fitparams = dk.feature_pipeline._validate_fitparams(
|
||||
{}, dk.feature_pipeline.steps)
|
||||
@@ -538,7 +540,7 @@ class IFreqaiModel(ABC):
|
||||
def define_label_pipeline(self, dk: FreqaiDataKitchen) -> None:
|
||||
|
||||
dk.label_pipeline = Pipeline([
|
||||
('scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1)))
|
||||
('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1))))
|
||||
])
|
||||
|
||||
def model_exists(self, dk: FreqaiDataKitchen) -> bool:
|
||||
@@ -551,8 +553,6 @@ class IFreqaiModel(ABC):
|
||||
"""
|
||||
if self.dd.model_type == 'joblib':
|
||||
file_type = ".joblib"
|
||||
elif self.dd.model_type == 'keras':
|
||||
file_type = ".h5"
|
||||
elif self.dd.model_type in ["stable_baselines3", "sb3_contrib", "pytorch"]:
|
||||
file_type = ".zip"
|
||||
|
||||
@@ -676,7 +676,7 @@ class IFreqaiModel(ABC):
|
||||
|
||||
# # for keras type models, the conv_window needs to be prepended so
|
||||
# # viewing is correct in frequi
|
||||
if self.freqai_info.get('keras', False) or self.ft_params.get('inlier_metric_window', 0):
|
||||
if self.ft_params.get('inlier_metric_window', 0):
|
||||
n_lost_points = self.freqai_info.get('conv_width', 2)
|
||||
zeros_df = DataFrame(np.zeros((n_lost_points, len(hist_preds_df.columns))),
|
||||
columns=hist_preds_df.columns)
|
||||
|
||||
@@ -9,7 +9,7 @@ from freqtrade.freqai.tensorboard import TBCallback
|
||||
|
||||
|
||||
# from datasieve.pipeline import Pipeline
|
||||
# from freqtrade.freqai.transforms import FreqaiQuantileTransformer
|
||||
# from sklearn.preprocessing import QuantileTransformer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -61,7 +61,7 @@ class XGBoostRegressor(BaseRegressionModel):
|
||||
# User defines their custom eature pipeline here (if they wish)
|
||||
# """
|
||||
# dk.feature_pipeline = Pipeline([
|
||||
# ('qt', FreqaiQuantileTransformer(output_distribution='normal'))
|
||||
# ('qt', SKLearnWrapper(QuantileTransformer(output_distribution='normal')))
|
||||
# ])
|
||||
|
||||
# return
|
||||
@@ -71,7 +71,7 @@ class XGBoostRegressor(BaseRegressionModel):
|
||||
# User defines their custom label pipeline here (if they wish)
|
||||
# """
|
||||
# dk.label_pipeline = Pipeline([
|
||||
# ('qt', FreqaiQuantileTransformer(output_distribution='normal'))
|
||||
# ('qt', SKLearnWrapper(QuantileTransformer(output_distribution='normal')))
|
||||
# ])
|
||||
|
||||
# return
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
from freqtrade.freqai.transforms.quantile_transform import FreqaiQuantileTransformer
|
||||
|
||||
|
||||
__all__ = (
|
||||
"FreqaiQuantileTransformer",
|
||||
)
|
||||
@@ -1,28 +0,0 @@
|
||||
from sklearn.preprocessing import QuantileTransformer
|
||||
|
||||
|
||||
class FreqaiQuantileTransformer(QuantileTransformer):
|
||||
"""
|
||||
A subclass of the SKLearn Quantile that ensures fit, transform, fit_transform and
|
||||
inverse_transform all take the full set of params X, y, sample_weight required to
|
||||
benefit from the DataSieve features.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def fit_transform(self, X, y=None, sample_weight=None, feature_list=None, **kwargs):
|
||||
super().fit(X)
|
||||
X = super().transform(X)
|
||||
return X, y, sample_weight, feature_list
|
||||
|
||||
def fit(self, X, y=None, sample_weight=None, feature_list=None, **kwargs):
|
||||
super().fit(X)
|
||||
return X, y, sample_weight, feature_list
|
||||
|
||||
def transform(self, X, y=None, sample_weight=None, feature_list=None, **kwargs):
|
||||
X = super().transform(X)
|
||||
return X, y, sample_weight, feature_list
|
||||
|
||||
def inverse_transform(self, X, y=None, sample_weight=None, feature_list=None, **kwargs):
|
||||
return super().inverse_transform(X), y, sample_weight, feature_list
|
||||
@@ -10,4 +10,4 @@ catboost==1.2; 'arm' not in platform_machine and (sys_platform != 'darwin' or py
|
||||
lightgbm==3.3.5
|
||||
xgboost==1.7.5
|
||||
tensorboard==2.13.0
|
||||
datasieve==0.1.0
|
||||
datasieve==0.1.1
|
||||
|
||||
Reference in New Issue
Block a user