From 97077ba18acd9ea0ad67ba45e917aca6bdcb3b0d Mon Sep 17 00:00:00 2001 From: robcaulk Date: Tue, 6 Sep 2022 20:30:37 +0200 Subject: [PATCH 1/8] add continual learning to catboost and friends --- docs/freqai.md | 1 + freqtrade/freqai/freqai_interface.py | 3 ++- .../prediction_models/BaseClassifierModel.py | 2 +- .../prediction_models/BaseRegressionModel.py | 2 +- .../prediction_models/BaseTensorFlowModel.py | 2 +- .../prediction_models/CatboostClassifier.py | 11 ++++++++--- .../freqai/prediction_models/CatboostRegressor.py | 15 ++++++++------- .../CatboostRegressorMultiTarget.py | 7 +++++-- .../prediction_models/LightGBMClassifier.py | 11 ++++++++--- .../freqai/prediction_models/LightGBMRegressor.py | 11 ++++++++--- .../LightGBMRegressorMultiTarget.py | 7 +++++-- 11 files changed, 48 insertions(+), 24 deletions(-) diff --git a/docs/freqai.md b/docs/freqai.md index c0844bf32..e790bbb81 100644 --- a/docs/freqai.md +++ b/docs/freqai.md @@ -98,6 +98,7 @@ Mandatory parameters are marked as **Required**, which means that they are requi | `expiration_hours` | Avoid making predictions if a model is more than `expiration_hours` old.
Defaults set to 0, which means models never expire.
**Datatype:** Positive integer. | `fit_live_predictions_candles` | Number of historical candles to use for computing target (label) statistics from prediction data, instead of from the training data set.
**Datatype:** Positive integer. | `follow_mode` | If true, this instance of FreqAI will look for models associated with `identifier` and load those for inferencing. A `follower` will **not** train new models.
**Datatype:** Boolean. Default: `False`. +| `continual_learning` | If true, FreqAI will start training new models from the final state of the most recently trained model.
**Datatype:** Boolean. Default: `False`. | | **Feature parameters** | `feature_parameters` | A dictionary containing the parameters used to engineer the feature set. Details and examples are shown [here](#feature-engineering).
**Datatype:** Dictionary. | `include_timeframes` | A list of timeframes that all indicators in `populate_any_indicators` will be created for. The list is added as features to the base asset feature set.
**Datatype:** List of timeframes (strings). diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index a9c21fb65..b6f3d8ebc 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -86,6 +86,7 @@ class IFreqaiModel(ABC): self.begin_time: float = 0 self.begin_time_train: float = 0 self.base_tf_seconds = timeframe_to_seconds(self.config['timeframe']) + self.continual_learning = self.freqai_info.get('continual_learning', False) self._threads: List[threading.Thread] = [] self._stop_event = threading.Event() @@ -674,7 +675,7 @@ class IFreqaiModel(ABC): """ @abstractmethod - def fit(self, data_dictionary: Dict[str, Any]) -> Any: + def fit(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen) -> Any: """ Most regressors use the same function names and arguments e.g. user can drop in LGBMRegressor in place of CatBoostRegressor and all data diff --git a/freqtrade/freqai/prediction_models/BaseClassifierModel.py b/freqtrade/freqai/prediction_models/BaseClassifierModel.py index 2edbf3b51..e51e26e0f 100644 --- a/freqtrade/freqai/prediction_models/BaseClassifierModel.py +++ b/freqtrade/freqai/prediction_models/BaseClassifierModel.py @@ -61,7 +61,7 @@ class BaseClassifierModel(IFreqaiModel): ) logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') - model = self.fit(data_dictionary) + model = self.fit(data_dictionary, dk) logger.info(f"--------------------done training {pair}--------------------") diff --git a/freqtrade/freqai/prediction_models/BaseRegressionModel.py b/freqtrade/freqai/prediction_models/BaseRegressionModel.py index 2ef175a2e..45f0c2937 100644 --- a/freqtrade/freqai/prediction_models/BaseRegressionModel.py +++ b/freqtrade/freqai/prediction_models/BaseRegressionModel.py @@ -60,7 +60,7 @@ class BaseRegressionModel(IFreqaiModel): ) logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') - model = self.fit(data_dictionary) + model = self.fit(data_dictionary, dk) logger.info(f"--------------------done training {pair}--------------------") diff --git a/freqtrade/freqai/prediction_models/BaseTensorFlowModel.py b/freqtrade/freqai/prediction_models/BaseTensorFlowModel.py index 04eff045f..66e6ec1fc 100644 --- a/freqtrade/freqai/prediction_models/BaseTensorFlowModel.py +++ b/freqtrade/freqai/prediction_models/BaseTensorFlowModel.py @@ -57,7 +57,7 @@ class BaseTensorFlowModel(IFreqaiModel): ) logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') - model = self.fit(data_dictionary) + model = self.fit(data_dictionary, dk) logger.info(f"--------------------done training {pair}--------------------") diff --git a/freqtrade/freqai/prediction_models/CatboostClassifier.py b/freqtrade/freqai/prediction_models/CatboostClassifier.py index b88b28b25..13395879a 100644 --- a/freqtrade/freqai/prediction_models/CatboostClassifier.py +++ b/freqtrade/freqai/prediction_models/CatboostClassifier.py @@ -2,7 +2,7 @@ import logging from typing import Any, Dict from catboost import CatBoostClassifier, Pool - +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.prediction_models.BaseClassifierModel import BaseClassifierModel @@ -16,7 +16,7 @@ class CatboostClassifier(BaseClassifierModel): has its own DataHandler where data is held, saved, loaded, and managed. """ - def fit(self, data_dictionary: Dict) -> Any: + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen) -> Any: """ User sets up the training and test data to fit their desired model here :params: @@ -36,6 +36,11 @@ class CatboostClassifier(BaseClassifierModel): **self.model_training_parameters, ) - cbr.fit(train_data) + if dk.pair not in self.dd.model_dictionary or not self.continual_learning: + init_model = None + else: + init_model = self.dd.model_dictionary[dk.pair] + + cbr.fit(train_data, init_model=init_model) return cbr diff --git a/freqtrade/freqai/prediction_models/CatboostRegressor.py b/freqtrade/freqai/prediction_models/CatboostRegressor.py index d93569c91..0b8bc162b 100644 --- a/freqtrade/freqai/prediction_models/CatboostRegressor.py +++ b/freqtrade/freqai/prediction_models/CatboostRegressor.py @@ -3,6 +3,7 @@ import logging from typing import Any, Dict from catboost import CatBoostRegressor, Pool +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel @@ -17,7 +18,7 @@ class CatboostRegressor(BaseRegressionModel): has its own DataHandler where data is held, saved, loaded, and managed. """ - def fit(self, data_dictionary: Dict) -> Any: + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen) -> Any: """ User sets up the training and test data to fit their desired model here :param data_dictionary: the dictionary constructed by DataHandler to hold @@ -38,16 +39,16 @@ class CatboostRegressor(BaseRegressionModel): weight=data_dictionary["test_weights"], ) + if dk.pair not in self.dd.model_dictionary or not self.continual_learning: + init_model = None + else: + init_model = self.dd.model_dictionary[dk.pair] + model = CatBoostRegressor( allow_writing_files=False, **self.model_training_parameters, ) - model.fit(X=train_data, eval_set=test_data) - - # some evidence that catboost pools have memory leaks: - # https://github.com/catboost/catboost/issues/1835 - del train_data, test_data - gc.collect() + model.fit(X=train_data, eval_set=test_data, init_model=init_model) return model diff --git a/freqtrade/freqai/prediction_models/CatboostRegressorMultiTarget.py b/freqtrade/freqai/prediction_models/CatboostRegressorMultiTarget.py index 9894decd1..9ed61488c 100644 --- a/freqtrade/freqai/prediction_models/CatboostRegressorMultiTarget.py +++ b/freqtrade/freqai/prediction_models/CatboostRegressorMultiTarget.py @@ -3,7 +3,7 @@ from typing import Any, Dict from catboost import CatBoostRegressor # , Pool from sklearn.multioutput import MultiOutputRegressor - +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel @@ -17,7 +17,7 @@ class CatboostRegressorMultiTarget(BaseRegressionModel): has its own DataHandler where data is held, saved, loaded, and managed. """ - def fit(self, data_dictionary: Dict) -> Any: + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen) -> Any: """ User sets up the training and test data to fit their desired model here :param data_dictionary: the dictionary constructed by DataHandler to hold @@ -34,6 +34,9 @@ class CatboostRegressorMultiTarget(BaseRegressionModel): eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"]) sample_weight = data_dictionary["train_weights"] + if self.continual_learning: + logger.warning('Continual learning not supported for MultiTarget models') + model = MultiOutputRegressor(estimator=cbr) model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set) diff --git a/freqtrade/freqai/prediction_models/LightGBMClassifier.py b/freqtrade/freqai/prediction_models/LightGBMClassifier.py index 4ac2c448b..0023a9f69 100644 --- a/freqtrade/freqai/prediction_models/LightGBMClassifier.py +++ b/freqtrade/freqai/prediction_models/LightGBMClassifier.py @@ -4,7 +4,7 @@ from typing import Any, Dict from lightgbm import LGBMClassifier from freqtrade.freqai.prediction_models.BaseClassifierModel import BaseClassifierModel - +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen logger = logging.getLogger(__name__) @@ -16,7 +16,7 @@ class LightGBMClassifier(BaseClassifierModel): has its own DataHandler where data is held, saved, loaded, and managed. """ - def fit(self, data_dictionary: Dict) -> Any: + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen) -> Any: """ User sets up the training and test data to fit their desired model here :params: @@ -35,9 +35,14 @@ class LightGBMClassifier(BaseClassifierModel): y = data_dictionary["train_labels"].to_numpy()[:, 0] train_weights = data_dictionary["train_weights"] + if dk.pair not in self.dd.model_dictionary or not self.continual_learning: + init_model = None + else: + init_model = self.dd.model_dictionary[dk.pair] + model = LGBMClassifier(**self.model_training_parameters) model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights, - eval_sample_weight=[test_weights]) + eval_sample_weight=[test_weights], init_model=init_model) return model diff --git a/freqtrade/freqai/prediction_models/LightGBMRegressor.py b/freqtrade/freqai/prediction_models/LightGBMRegressor.py index 2431fd2ad..81f0e6d22 100644 --- a/freqtrade/freqai/prediction_models/LightGBMRegressor.py +++ b/freqtrade/freqai/prediction_models/LightGBMRegressor.py @@ -4,7 +4,7 @@ from typing import Any, Dict from lightgbm import LGBMRegressor from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel - +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen logger = logging.getLogger(__name__) @@ -16,7 +16,7 @@ class LightGBMRegressor(BaseRegressionModel): has its own DataHandler where data is held, saved, loaded, and managed. """ - def fit(self, data_dictionary: Dict) -> Any: + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen) -> Any: """ Most regressors use the same function names and arguments e.g. user can drop in LGBMRegressor in place of CatBoostRegressor and all data @@ -35,9 +35,14 @@ class LightGBMRegressor(BaseRegressionModel): y = data_dictionary["train_labels"] train_weights = data_dictionary["train_weights"] + if dk.pair not in self.dd.model_dictionary or not self.continual_learning: + init_model = None + else: + init_model = self.dd.model_dictionary[dk.pair] + model = LGBMRegressor(**self.model_training_parameters) model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights, - eval_sample_weight=[eval_weights]) + eval_sample_weight=[eval_weights], init_model=init_model) return model diff --git a/freqtrade/freqai/prediction_models/LightGBMRegressorMultiTarget.py b/freqtrade/freqai/prediction_models/LightGBMRegressorMultiTarget.py index ecd405369..2b25493e0 100644 --- a/freqtrade/freqai/prediction_models/LightGBMRegressorMultiTarget.py +++ b/freqtrade/freqai/prediction_models/LightGBMRegressorMultiTarget.py @@ -5,7 +5,7 @@ from lightgbm import LGBMRegressor from sklearn.multioutput import MultiOutputRegressor from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel - +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen logger = logging.getLogger(__name__) @@ -17,7 +17,7 @@ class LightGBMRegressorMultiTarget(BaseRegressionModel): has its own DataHandler where data is held, saved, loaded, and managed. """ - def fit(self, data_dictionary: Dict) -> Any: + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen) -> Any: """ User sets up the training and test data to fit their desired model here :param data_dictionary: the dictionary constructed by DataHandler to hold @@ -31,6 +31,9 @@ class LightGBMRegressorMultiTarget(BaseRegressionModel): eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"]) sample_weight = data_dictionary["train_weights"] + if self.continual_learning: + logger.warning('Continual learning not supported for MultiTarget models') + model = MultiOutputRegressor(estimator=lgb) model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set) train_score = model.score(X, y) From 4c9ac6b7c0959a7d596279a1388145f8a90bd8da Mon Sep 17 00:00:00 2001 From: robcaulk Date: Wed, 7 Sep 2022 18:58:55 +0200 Subject: [PATCH 2/8] add kwargs, reduce duplicated code --- freqtrade/freqai/freqai_interface.py | 15 ++++++++++++--- .../prediction_models/CatboostClassifier.py | 8 +++----- .../freqai/prediction_models/CatboostRegressor.py | 10 +++------- .../CatboostRegressorMultiTarget.py | 3 ++- .../prediction_models/LightGBMClassifier.py | 10 ++++------ .../freqai/prediction_models/LightGBMRegressor.py | 10 ++++------ .../LightGBMRegressorMultiTarget.py | 5 +++-- 7 files changed, 31 insertions(+), 30 deletions(-) diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index b6f3d8ebc..101df88ec 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -661,11 +661,20 @@ class IFreqaiModel(ABC): self.train_time = 0 return + def get_init_model(self, pair: str) -> Any: + if pair not in self.dd.model_dictionary or not self.continual_learning: + init_model = None + else: + init_model = self.dd.model_dictionary[pair] + + return init_model + # Following methods which are overridden by user made prediction models. # See freqai/prediction_models/CatboostPredictionModel.py for an example. @abstractmethod - def train(self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen) -> Any: + def train(self, unfiltered_dataframe: DataFrame, pair: str, + dk: FreqaiDataKitchen, **kwargs) -> Any: """ Filter the training data and train a model to it. Train makes heavy use of the datahandler for storing, saving, loading, and analyzing the data. @@ -675,7 +684,7 @@ class IFreqaiModel(ABC): """ @abstractmethod - def fit(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen) -> Any: + def fit(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen, **kwargs) -> Any: """ Most regressors use the same function names and arguments e.g. user can drop in LGBMRegressor in place of CatBoostRegressor and all data @@ -688,7 +697,7 @@ class IFreqaiModel(ABC): @abstractmethod def predict( - self, dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = True + self, dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = True, **kwargs ) -> Tuple[DataFrame, NDArray[np.int_]]: """ Filter the prediction features data and predict with it. diff --git a/freqtrade/freqai/prediction_models/CatboostClassifier.py b/freqtrade/freqai/prediction_models/CatboostClassifier.py index 13395879a..cd7afd392 100644 --- a/freqtrade/freqai/prediction_models/CatboostClassifier.py +++ b/freqtrade/freqai/prediction_models/CatboostClassifier.py @@ -2,6 +2,7 @@ import logging from typing import Any, Dict from catboost import CatBoostClassifier, Pool + from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.prediction_models.BaseClassifierModel import BaseClassifierModel @@ -16,7 +17,7 @@ class CatboostClassifier(BaseClassifierModel): has its own DataHandler where data is held, saved, loaded, and managed. """ - def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen) -> Any: + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: """ User sets up the training and test data to fit their desired model here :params: @@ -36,10 +37,7 @@ class CatboostClassifier(BaseClassifierModel): **self.model_training_parameters, ) - if dk.pair not in self.dd.model_dictionary or not self.continual_learning: - init_model = None - else: - init_model = self.dd.model_dictionary[dk.pair] + init_model = self.get_init_model(dk.pair) cbr.fit(train_data, init_model=init_model) diff --git a/freqtrade/freqai/prediction_models/CatboostRegressor.py b/freqtrade/freqai/prediction_models/CatboostRegressor.py index 0b8bc162b..1ce31b628 100644 --- a/freqtrade/freqai/prediction_models/CatboostRegressor.py +++ b/freqtrade/freqai/prediction_models/CatboostRegressor.py @@ -1,10 +1,9 @@ -import gc import logging from typing import Any, Dict from catboost import CatBoostRegressor, Pool -from freqtrade.freqai.data_kitchen import FreqaiDataKitchen +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel @@ -18,7 +17,7 @@ class CatboostRegressor(BaseRegressionModel): has its own DataHandler where data is held, saved, loaded, and managed. """ - def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen) -> Any: + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: """ User sets up the training and test data to fit their desired model here :param data_dictionary: the dictionary constructed by DataHandler to hold @@ -39,10 +38,7 @@ class CatboostRegressor(BaseRegressionModel): weight=data_dictionary["test_weights"], ) - if dk.pair not in self.dd.model_dictionary or not self.continual_learning: - init_model = None - else: - init_model = self.dd.model_dictionary[dk.pair] + init_model = self.get_init_model(dk.pair) model = CatBoostRegressor( allow_writing_files=False, diff --git a/freqtrade/freqai/prediction_models/CatboostRegressorMultiTarget.py b/freqtrade/freqai/prediction_models/CatboostRegressorMultiTarget.py index 9ed61488c..bc52bfdd9 100644 --- a/freqtrade/freqai/prediction_models/CatboostRegressorMultiTarget.py +++ b/freqtrade/freqai/prediction_models/CatboostRegressorMultiTarget.py @@ -3,6 +3,7 @@ from typing import Any, Dict from catboost import CatBoostRegressor # , Pool from sklearn.multioutput import MultiOutputRegressor + from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel @@ -17,7 +18,7 @@ class CatboostRegressorMultiTarget(BaseRegressionModel): has its own DataHandler where data is held, saved, loaded, and managed. """ - def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen) -> Any: + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: """ User sets up the training and test data to fit their desired model here :param data_dictionary: the dictionary constructed by DataHandler to hold diff --git a/freqtrade/freqai/prediction_models/LightGBMClassifier.py b/freqtrade/freqai/prediction_models/LightGBMClassifier.py index 0023a9f69..69867eae3 100644 --- a/freqtrade/freqai/prediction_models/LightGBMClassifier.py +++ b/freqtrade/freqai/prediction_models/LightGBMClassifier.py @@ -3,8 +3,9 @@ from typing import Any, Dict from lightgbm import LGBMClassifier -from freqtrade.freqai.prediction_models.BaseClassifierModel import BaseClassifierModel from freqtrade.freqai.data_kitchen import FreqaiDataKitchen +from freqtrade.freqai.prediction_models.BaseClassifierModel import BaseClassifierModel + logger = logging.getLogger(__name__) @@ -16,7 +17,7 @@ class LightGBMClassifier(BaseClassifierModel): has its own DataHandler where data is held, saved, loaded, and managed. """ - def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen) -> Any: + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: """ User sets up the training and test data to fit their desired model here :params: @@ -35,10 +36,7 @@ class LightGBMClassifier(BaseClassifierModel): y = data_dictionary["train_labels"].to_numpy()[:, 0] train_weights = data_dictionary["train_weights"] - if dk.pair not in self.dd.model_dictionary or not self.continual_learning: - init_model = None - else: - init_model = self.dd.model_dictionary[dk.pair] + init_model = self.get_init_model(dk.pair) model = LGBMClassifier(**self.model_training_parameters) diff --git a/freqtrade/freqai/prediction_models/LightGBMRegressor.py b/freqtrade/freqai/prediction_models/LightGBMRegressor.py index 81f0e6d22..99e9ff887 100644 --- a/freqtrade/freqai/prediction_models/LightGBMRegressor.py +++ b/freqtrade/freqai/prediction_models/LightGBMRegressor.py @@ -3,8 +3,9 @@ from typing import Any, Dict from lightgbm import LGBMRegressor -from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel from freqtrade.freqai.data_kitchen import FreqaiDataKitchen +from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel + logger = logging.getLogger(__name__) @@ -16,7 +17,7 @@ class LightGBMRegressor(BaseRegressionModel): has its own DataHandler where data is held, saved, loaded, and managed. """ - def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen) -> Any: + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: """ Most regressors use the same function names and arguments e.g. user can drop in LGBMRegressor in place of CatBoostRegressor and all data @@ -35,10 +36,7 @@ class LightGBMRegressor(BaseRegressionModel): y = data_dictionary["train_labels"] train_weights = data_dictionary["train_weights"] - if dk.pair not in self.dd.model_dictionary or not self.continual_learning: - init_model = None - else: - init_model = self.dd.model_dictionary[dk.pair] + init_model = self.get_init_model(dk.pair) model = LGBMRegressor(**self.model_training_parameters) diff --git a/freqtrade/freqai/prediction_models/LightGBMRegressorMultiTarget.py b/freqtrade/freqai/prediction_models/LightGBMRegressorMultiTarget.py index 2b25493e0..c34680dbe 100644 --- a/freqtrade/freqai/prediction_models/LightGBMRegressorMultiTarget.py +++ b/freqtrade/freqai/prediction_models/LightGBMRegressorMultiTarget.py @@ -4,8 +4,9 @@ from typing import Any, Dict from lightgbm import LGBMRegressor from sklearn.multioutput import MultiOutputRegressor -from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel from freqtrade.freqai.data_kitchen import FreqaiDataKitchen +from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel + logger = logging.getLogger(__name__) @@ -17,7 +18,7 @@ class LightGBMRegressorMultiTarget(BaseRegressionModel): has its own DataHandler where data is held, saved, loaded, and managed. """ - def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen) -> Any: + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: """ User sets up the training and test data to fit their desired model here :param data_dictionary: the dictionary constructed by DataHandler to hold From 1b6410d7d1e20d4ede790ac46d4232d8cddaa181 Mon Sep 17 00:00:00 2001 From: Emre Date: Thu, 8 Sep 2022 14:12:19 +0300 Subject: [PATCH 3/8] Add XGBoostRegressor for freqAI, fix mypy errors --- .../prediction_models/BaseClassifierModel.py | 8 ++-- .../prediction_models/BaseRegressionModel.py | 8 ++-- .../prediction_models/BaseTensorFlowModel.py | 2 +- .../prediction_models/XGBoostRegressor.py | 46 +++++++++++++++++++ requirements-freqai.txt | 1 + tests/freqai/test_freqai_interface.py | 31 +++++++++++++ 6 files changed, 87 insertions(+), 9 deletions(-) create mode 100644 freqtrade/freqai/prediction_models/XGBoostRegressor.py diff --git a/freqtrade/freqai/prediction_models/BaseClassifierModel.py b/freqtrade/freqai/prediction_models/BaseClassifierModel.py index e51e26e0f..291bacc82 100644 --- a/freqtrade/freqai/prediction_models/BaseClassifierModel.py +++ b/freqtrade/freqai/prediction_models/BaseClassifierModel.py @@ -21,7 +21,7 @@ class BaseClassifierModel(IFreqaiModel): """ def train( - self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen + self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs ) -> Any: """ Filter the training data and train a model to it. Train makes heavy use of the datakitchen @@ -68,7 +68,7 @@ class BaseClassifierModel(IFreqaiModel): return model def predict( - self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False + self, dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False, **kwargs ) -> Tuple[DataFrame, npt.NDArray[np.int_]]: """ Filter the prediction features data and predict with it. @@ -79,9 +79,9 @@ class BaseClassifierModel(IFreqaiModel): data (NaNs) or felt uncertain about data (PCA and DI index) """ - dk.find_features(unfiltered_dataframe) + dk.find_features(dataframe) filtered_dataframe, _ = dk.filter_features( - unfiltered_dataframe, dk.training_features_list, training_filter=False + dataframe, dk.training_features_list, training_filter=False ) filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe) dk.data_dictionary["prediction_features"] = filtered_dataframe diff --git a/freqtrade/freqai/prediction_models/BaseRegressionModel.py b/freqtrade/freqai/prediction_models/BaseRegressionModel.py index 45f0c2937..da6fba571 100644 --- a/freqtrade/freqai/prediction_models/BaseRegressionModel.py +++ b/freqtrade/freqai/prediction_models/BaseRegressionModel.py @@ -20,7 +20,7 @@ class BaseRegressionModel(IFreqaiModel): """ def train( - self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen + self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs ) -> Any: """ Filter the training data and train a model to it. Train makes heavy use of the datakitchen @@ -67,7 +67,7 @@ class BaseRegressionModel(IFreqaiModel): return model def predict( - self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False + self, dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False, **kwargs ) -> Tuple[DataFrame, npt.NDArray[np.int_]]: """ Filter the prediction features data and predict with it. @@ -78,9 +78,9 @@ class BaseRegressionModel(IFreqaiModel): data (NaNs) or felt uncertain about data (PCA and DI index) """ - dk.find_features(unfiltered_dataframe) + dk.find_features(dataframe) filtered_dataframe, _ = dk.filter_features( - unfiltered_dataframe, dk.training_features_list, training_filter=False + dataframe, dk.training_features_list, training_filter=False ) filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe) dk.data_dictionary["prediction_features"] = filtered_dataframe diff --git a/freqtrade/freqai/prediction_models/BaseTensorFlowModel.py b/freqtrade/freqai/prediction_models/BaseTensorFlowModel.py index 66e6ec1fc..6fb49239b 100644 --- a/freqtrade/freqai/prediction_models/BaseTensorFlowModel.py +++ b/freqtrade/freqai/prediction_models/BaseTensorFlowModel.py @@ -17,7 +17,7 @@ class BaseTensorFlowModel(IFreqaiModel): """ def train( - self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen + self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs ) -> Any: """ Filter the training data and train a model to it. Train makes heavy use of the datakitchen diff --git a/freqtrade/freqai/prediction_models/XGBoostRegressor.py b/freqtrade/freqai/prediction_models/XGBoostRegressor.py new file mode 100644 index 000000000..a8f250d16 --- /dev/null +++ b/freqtrade/freqai/prediction_models/XGBoostRegressor.py @@ -0,0 +1,46 @@ +import logging +from typing import Any, Dict + +import xgboost as xgb + +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen +from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel + + +logger = logging.getLogger(__name__) + + +class XGBoostRegressor(BaseRegressionModel): + """ + User created prediction model. The class needs to override three necessary + functions, predict(), train(), fit(). The class inherits ModelHandler which + has its own DataHandler where data is held, saved, loaded, and managed. + """ + + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: + """ + User sets up the training and test data to fit their desired model here + :param data_dictionary: the dictionary constructed by DataHandler to hold + all the training and test data/labels. + """ + + xgb.set_config(verbosity=2) + xgb.config_context(verbosity=2) + + X = data_dictionary["train_features"] + y = data_dictionary["train_labels"] + + if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0: + eval_set = None + else: + eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"])] + + sample_weight = data_dictionary["train_weights"] + + xgb_model = self.get_init_model(dk.pair) + + model = xgb.XGBRegressor(**self.model_training_parameters) + + model.fit(X=X, y=y, sample_weight=sample_weight, eval_set=eval_set, xgb_model=xgb_model) + + return model diff --git a/requirements-freqai.txt b/requirements-freqai.txt index 26e4617af..e8d950382 100644 --- a/requirements-freqai.txt +++ b/requirements-freqai.txt @@ -6,3 +6,4 @@ scikit-learn==1.1.2 joblib==1.1.0 catboost==1.0.6; platform_machine != 'aarch64' lightgbm==3.3.2 +xgboost==1.6.2 diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index 5441b3c24..7783c00e7 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -172,6 +172,37 @@ def test_train_model_in_series_LightGBMClassifier(mocker, freqai_conf): shutil.rmtree(Path(freqai.dk.full_path)) +def test_train_model_in_series_XGBoostRegressor(mocker, freqai_conf): + freqai_conf.update({"timerange": "20180110-20180130"}) + freqai_conf.update({"freqaimodel": "XGBoostRegressor"}) + freqai_conf.update({"strategy": "freqai_test_strat"}) + + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.live = True + freqai.dk = FreqaiDataKitchen(freqai_conf) + timerange = TimeRange.parse_timerange("20180110-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + + freqai.dd.pair_dict = MagicMock() + + data_load_timerange = TimeRange.parse_timerange("20180110-20180130") + new_timerange = TimeRange.parse_timerange("20180120-20180130") + + freqai.train_model_in_series(new_timerange, "ADA/BTC", + strategy, freqai.dk, data_load_timerange) + + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file() + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").is_file() + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_trained_df.pkl").is_file() + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_svm_model.joblib").is_file() + + shutil.rmtree(Path(freqai.dk.full_path)) + + def test_start_backtesting(mocker, freqai_conf): freqai_conf.update({"timerange": "20180120-20180130"}) freqai_conf.get("freqai", {}).update({"save_backtest_models": True}) From df6e43d2c599badef3247f8878cacec2a81f5f79 Mon Sep 17 00:00:00 2001 From: Emre Date: Fri, 9 Sep 2022 00:11:09 +0300 Subject: [PATCH 4/8] Add XGBoostRegressorMultiTarget class --- .../XGBoostRegressorMultiTarget.py | 43 +++++++++++++++++++ tests/freqai/test_freqai_interface.py | 31 +++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 freqtrade/freqai/prediction_models/XGBoostRegressorMultiTarget.py diff --git a/freqtrade/freqai/prediction_models/XGBoostRegressorMultiTarget.py b/freqtrade/freqai/prediction_models/XGBoostRegressorMultiTarget.py new file mode 100644 index 000000000..5283501d1 --- /dev/null +++ b/freqtrade/freqai/prediction_models/XGBoostRegressorMultiTarget.py @@ -0,0 +1,43 @@ +import logging +from typing import Any, Dict + +from sklearn.multioutput import MultiOutputRegressor +from xgboost import XGBRegressor + +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen +from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel + + +logger = logging.getLogger(__name__) + + +class XGBoostRegressorMultiTarget(BaseRegressionModel): + """ + User created prediction model. The class needs to override three necessary + functions, predict(), train(), fit(). The class inherits ModelHandler which + has its own DataHandler where data is held, saved, loaded, and managed. + """ + + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: + """ + User sets up the training and test data to fit their desired model here + :param data_dictionary: the dictionary constructed by DataHandler to hold + all the training and test data/labels. + """ + + xgb = XGBRegressor(**self.model_training_parameters) + + X = data_dictionary["train_features"] + y = data_dictionary["train_labels"] + eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"]) + sample_weight = data_dictionary["train_weights"] + + if self.continual_learning: + logger.warning('Continual learning not supported for MultiTarget models') + + model = MultiOutputRegressor(estimator=xgb) + model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set) + train_score = model.score(X, y) + test_score = model.score(*eval_set) + logger.info(f"Train score {train_score}, Test score {test_score}") + return model diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index 7783c00e7..ff0eb24a9 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -203,6 +203,37 @@ def test_train_model_in_series_XGBoostRegressor(mocker, freqai_conf): shutil.rmtree(Path(freqai.dk.full_path)) +def test_train_model_in_series_XGBoostRegressorMultiModel(mocker, freqai_conf): + freqai_conf.update({"timerange": "20180110-20180130"}) + freqai_conf.update({"freqaimodel": "XGBoostRegressorMultiTarget"}) + freqai_conf.update({"strategy": "freqai_test_multimodel_strat"}) + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.live = True + freqai.dk = FreqaiDataKitchen(freqai_conf) + timerange = TimeRange.parse_timerange("20180110-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + + freqai.dd.pair_dict = MagicMock() + + data_load_timerange = TimeRange.parse_timerange("20180110-20180130") + new_timerange = TimeRange.parse_timerange("20180120-20180130") + + freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) + + assert len(freqai.dk.label_list) == 2 + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file() + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").is_file() + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_trained_df.pkl").is_file() + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_svm_model.joblib").is_file() + assert len(freqai.dk.data['training_features_list']) == 26 + + shutil.rmtree(Path(freqai.dk.full_path)) + + def test_start_backtesting(mocker, freqai_conf): freqai_conf.update({"timerange": "20180120-20180130"}) freqai_conf.get("freqai", {}).update({"save_backtest_models": True}) From acb410a0defb4626b52879b56e3d4ab8824c1085 Mon Sep 17 00:00:00 2001 From: Emre Date: Fri, 9 Sep 2022 00:11:43 +0300 Subject: [PATCH 5/8] Remove verbosity params --- freqtrade/freqai/prediction_models/XGBoostRegressor.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/freqtrade/freqai/prediction_models/XGBoostRegressor.py b/freqtrade/freqai/prediction_models/XGBoostRegressor.py index a8f250d16..acc4386f5 100644 --- a/freqtrade/freqai/prediction_models/XGBoostRegressor.py +++ b/freqtrade/freqai/prediction_models/XGBoostRegressor.py @@ -1,7 +1,7 @@ import logging from typing import Any, Dict -import xgboost as xgb +from xgboost import XGBRegressor from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel @@ -24,9 +24,6 @@ class XGBoostRegressor(BaseRegressionModel): all the training and test data/labels. """ - xgb.set_config(verbosity=2) - xgb.config_context(verbosity=2) - X = data_dictionary["train_features"] y = data_dictionary["train_labels"] @@ -39,7 +36,7 @@ class XGBoostRegressor(BaseRegressionModel): xgb_model = self.get_init_model(dk.pair) - model = xgb.XGBRegressor(**self.model_training_parameters) + model = XGBRegressor(**self.model_training_parameters) model.fit(X=X, y=y, sample_weight=sample_weight, eval_set=eval_set, xgb_model=xgb_model) From a826c0eb837af2f4f4b68660481e5c9f436caac7 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Fri, 9 Sep 2022 19:17:15 +0200 Subject: [PATCH 6/8] ensure signatures match, reduce verbosity --- freqtrade/freqai/freqai_interface.py | 8 +++--- .../prediction_models/BaseClassifierModel.py | 26 +++++++++---------- .../prediction_models/BaseRegressionModel.py | 26 +++++++++---------- .../prediction_models/BaseTensorFlowModel.py | 10 +++---- 4 files changed, 35 insertions(+), 35 deletions(-) diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 101df88ec..e4f77a9cf 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -673,12 +673,12 @@ class IFreqaiModel(ABC): # See freqai/prediction_models/CatboostPredictionModel.py for an example. @abstractmethod - def train(self, unfiltered_dataframe: DataFrame, pair: str, + def train(self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs) -> Any: """ Filter the training data and train a model to it. Train makes heavy use of the datahandler for storing, saving, loading, and analyzing the data. - :param unfiltered_dataframe: Full dataframe for the current training period + :param unfiltered_df: Full dataframe for the current training period :param metadata: pair metadata from strategy. :return: Trained model which can be used to inference (self.predict) """ @@ -697,11 +697,11 @@ class IFreqaiModel(ABC): @abstractmethod def predict( - self, dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = True, **kwargs + self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs ) -> Tuple[DataFrame, NDArray[np.int_]]: """ Filter the prediction features data and predict with it. - :param unfiltered_dataframe: Full dataframe for the current backtest period. + :param unfiltered_df: Full dataframe for the current backtest period. :param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only :param first: boolean = whether this is the first prediction or not. :return: diff --git a/freqtrade/freqai/prediction_models/BaseClassifierModel.py b/freqtrade/freqai/prediction_models/BaseClassifierModel.py index 291bacc82..5142ffb0d 100644 --- a/freqtrade/freqai/prediction_models/BaseClassifierModel.py +++ b/freqtrade/freqai/prediction_models/BaseClassifierModel.py @@ -21,12 +21,12 @@ class BaseClassifierModel(IFreqaiModel): """ def train( - self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs + self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs ) -> Any: """ Filter the training data and train a model to it. Train makes heavy use of the datakitchen for storing, saving, loading, and analyzing the data. - :param unfiltered_dataframe: Full dataframe for the current training period + :param unfiltered_df: Full dataframe for the current training period :param metadata: pair metadata from strategy. :return: :model: Trained model which can be used to inference (self.predict) @@ -36,14 +36,14 @@ class BaseClassifierModel(IFreqaiModel): # filter the features requested by user in the configuration file and elegantly handle NaNs features_filtered, labels_filtered = dk.filter_features( - unfiltered_dataframe, + unfiltered_df, dk.training_features_list, dk.label_list, training_filter=True, ) - start_date = unfiltered_dataframe["date"].iloc[0].strftime("%Y-%m-%d") - end_date = unfiltered_dataframe["date"].iloc[-1].strftime("%Y-%m-%d") + start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d") + end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d") logger.info(f"-------------------- Training on data from {start_date} to " f"{end_date}--------------------") # split data into train/test data. @@ -68,25 +68,25 @@ class BaseClassifierModel(IFreqaiModel): return model def predict( - self, dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False, **kwargs + self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs ) -> Tuple[DataFrame, npt.NDArray[np.int_]]: """ Filter the prediction features data and predict with it. - :param: unfiltered_dataframe: Full dataframe for the current backtest period. + :param: unfiltered_df: Full dataframe for the current backtest period. :return: :pred_df: dataframe containing the predictions :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove data (NaNs) or felt uncertain about data (PCA and DI index) """ - dk.find_features(dataframe) - filtered_dataframe, _ = dk.filter_features( - dataframe, dk.training_features_list, training_filter=False + dk.find_features(unfiltered_df) + filtered_df, _ = dk.filter_features( + unfiltered_df, dk.training_features_list, training_filter=False ) - filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe) - dk.data_dictionary["prediction_features"] = filtered_dataframe + filtered_df = dk.normalize_data_from_metadata(filtered_df) + dk.data_dictionary["prediction_features"] = filtered_df - self.data_cleaning_predict(dk, filtered_dataframe) + self.data_cleaning_predict(dk, filtered_df) predictions = self.model.predict(dk.data_dictionary["prediction_features"]) pred_df = DataFrame(predictions, columns=dk.label_list) diff --git a/freqtrade/freqai/prediction_models/BaseRegressionModel.py b/freqtrade/freqai/prediction_models/BaseRegressionModel.py index da6fba571..1d87e42c0 100644 --- a/freqtrade/freqai/prediction_models/BaseRegressionModel.py +++ b/freqtrade/freqai/prediction_models/BaseRegressionModel.py @@ -20,12 +20,12 @@ class BaseRegressionModel(IFreqaiModel): """ def train( - self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs + self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs ) -> Any: """ Filter the training data and train a model to it. Train makes heavy use of the datakitchen for storing, saving, loading, and analyzing the data. - :param unfiltered_dataframe: Full dataframe for the current training period + :param unfiltered_df: Full dataframe for the current training period :param metadata: pair metadata from strategy. :return: :model: Trained model which can be used to inference (self.predict) @@ -35,14 +35,14 @@ class BaseRegressionModel(IFreqaiModel): # filter the features requested by user in the configuration file and elegantly handle NaNs features_filtered, labels_filtered = dk.filter_features( - unfiltered_dataframe, + unfiltered_df, dk.training_features_list, dk.label_list, training_filter=True, ) - start_date = unfiltered_dataframe["date"].iloc[0].strftime("%Y-%m-%d") - end_date = unfiltered_dataframe["date"].iloc[-1].strftime("%Y-%m-%d") + start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d") + end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d") logger.info(f"-------------------- Training on data from {start_date} to " f"{end_date}--------------------") # split data into train/test data. @@ -67,26 +67,26 @@ class BaseRegressionModel(IFreqaiModel): return model def predict( - self, dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False, **kwargs + self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs ) -> Tuple[DataFrame, npt.NDArray[np.int_]]: """ Filter the prediction features data and predict with it. - :param: unfiltered_dataframe: Full dataframe for the current backtest period. + :param: unfiltered_df: Full dataframe for the current backtest period. :return: :pred_df: dataframe containing the predictions :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove data (NaNs) or felt uncertain about data (PCA and DI index) """ - dk.find_features(dataframe) - filtered_dataframe, _ = dk.filter_features( - dataframe, dk.training_features_list, training_filter=False + dk.find_features(unfiltered_df) + filtered_df, _ = dk.filter_features( + unfiltered_df, dk.training_features_list, training_filter=False ) - filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe) - dk.data_dictionary["prediction_features"] = filtered_dataframe + filtered_df = dk.normalize_data_from_metadata(filtered_df) + dk.data_dictionary["prediction_features"] = filtered_df # optional additional data cleaning/analysis - self.data_cleaning_predict(dk, filtered_dataframe) + self.data_cleaning_predict(dk, filtered_df) predictions = self.model.predict(dk.data_dictionary["prediction_features"]) pred_df = DataFrame(predictions, columns=dk.label_list) diff --git a/freqtrade/freqai/prediction_models/BaseTensorFlowModel.py b/freqtrade/freqai/prediction_models/BaseTensorFlowModel.py index 6fb49239b..eea80f3a2 100644 --- a/freqtrade/freqai/prediction_models/BaseTensorFlowModel.py +++ b/freqtrade/freqai/prediction_models/BaseTensorFlowModel.py @@ -17,12 +17,12 @@ class BaseTensorFlowModel(IFreqaiModel): """ def train( - self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs + self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs ) -> Any: """ Filter the training data and train a model to it. Train makes heavy use of the datakitchen for storing, saving, loading, and analyzing the data. - :param unfiltered_dataframe: Full dataframe for the current training period + :param unfiltered_df: Full dataframe for the current training period :param metadata: pair metadata from strategy. :return: :model: Trained model which can be used to inference (self.predict) @@ -32,14 +32,14 @@ class BaseTensorFlowModel(IFreqaiModel): # filter the features requested by user in the configuration file and elegantly handle NaNs features_filtered, labels_filtered = dk.filter_features( - unfiltered_dataframe, + unfiltered_df, dk.training_features_list, dk.label_list, training_filter=True, ) - start_date = unfiltered_dataframe["date"].iloc[0].strftime("%Y-%m-%d") - end_date = unfiltered_dataframe["date"].iloc[-1].strftime("%Y-%m-%d") + start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d") + end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d") logger.info(f"-------------------- Training on data from {start_date} to " f"{end_date}--------------------") # split data into train/test data. From 170bec0438e10955052fe7782ede7a42a2310cc8 Mon Sep 17 00:00:00 2001 From: Matthias Date: Sat, 10 Sep 2022 08:24:23 +0200 Subject: [PATCH 7/8] Fix failing XGBoost tests --- tests/freqai/test_freqai_interface.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index d95ead90c..5f8eeb086 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -174,7 +174,7 @@ def test_extract_data_and_train_model_LightGBMClassifier(mocker, freqai_conf): shutil.rmtree(Path(freqai.dk.full_path)) -def test_train_model_in_series_XGBoostRegressor(mocker, freqai_conf): +def test_extract_data_and_train_model_XGBoostRegressor(mocker, freqai_conf): freqai_conf.update({"timerange": "20180110-20180130"}) freqai_conf.update({"freqaimodel": "XGBoostRegressor"}) freqai_conf.update({"strategy": "freqai_test_strat"}) @@ -194,8 +194,8 @@ def test_train_model_in_series_XGBoostRegressor(mocker, freqai_conf): data_load_timerange = TimeRange.parse_timerange("20180110-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130") - freqai.train_model_in_series(new_timerange, "ADA/BTC", - strategy, freqai.dk, data_load_timerange) + freqai.extract_data_and_train_model( + new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file() assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").is_file() @@ -205,7 +205,7 @@ def test_train_model_in_series_XGBoostRegressor(mocker, freqai_conf): shutil.rmtree(Path(freqai.dk.full_path)) -def test_train_model_in_series_XGBoostRegressorMultiModel(mocker, freqai_conf): +def test_extract_data_and_train_model_XGBoostRegressorMultiModel(mocker, freqai_conf): freqai_conf.update({"timerange": "20180110-20180130"}) freqai_conf.update({"freqaimodel": "XGBoostRegressorMultiTarget"}) freqai_conf.update({"strategy": "freqai_test_multimodel_strat"}) @@ -224,7 +224,8 @@ def test_train_model_in_series_XGBoostRegressorMultiModel(mocker, freqai_conf): data_load_timerange = TimeRange.parse_timerange("20180110-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130") - freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) + freqai.extract_data_and_train_model( + new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) assert len(freqai.dk.label_list) == 2 assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file() From 10b6aebc5f5ada8b1e6aef733f7c8ccf4e29e8ba Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sat, 10 Sep 2022 16:54:13 +0200 Subject: [PATCH 8/8] enable continual learning and evaluation sets on multioutput models. --- .../BaseClassifierModel.py | 0 .../BaseRegressionModel.py | 0 .../BaseTensorFlowModel.py | 0 .../base_models/FreqaiMultiOutputRegressor.py | 75 +++++++++++++++++++ .../prediction_models/CatboostClassifier.py | 2 +- .../prediction_models/CatboostRegressor.py | 2 +- .../CatboostRegressorMultiTarget.py | 41 +++++++--- .../prediction_models/LightGBMClassifier.py | 2 +- .../prediction_models/LightGBMRegressor.py | 2 +- .../LightGBMRegressorMultiTarget.py | 41 +++++++--- .../prediction_models/XGBoostRegressor.py | 6 +- .../XGBoostRegressorMultiTarget.py | 37 ++++++--- 12 files changed, 170 insertions(+), 38 deletions(-) rename freqtrade/freqai/{prediction_models => base_models}/BaseClassifierModel.py (100%) rename freqtrade/freqai/{prediction_models => base_models}/BaseRegressionModel.py (100%) rename freqtrade/freqai/{prediction_models => base_models}/BaseTensorFlowModel.py (100%) create mode 100644 freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py diff --git a/freqtrade/freqai/prediction_models/BaseClassifierModel.py b/freqtrade/freqai/base_models/BaseClassifierModel.py similarity index 100% rename from freqtrade/freqai/prediction_models/BaseClassifierModel.py rename to freqtrade/freqai/base_models/BaseClassifierModel.py diff --git a/freqtrade/freqai/prediction_models/BaseRegressionModel.py b/freqtrade/freqai/base_models/BaseRegressionModel.py similarity index 100% rename from freqtrade/freqai/prediction_models/BaseRegressionModel.py rename to freqtrade/freqai/base_models/BaseRegressionModel.py diff --git a/freqtrade/freqai/prediction_models/BaseTensorFlowModel.py b/freqtrade/freqai/base_models/BaseTensorFlowModel.py similarity index 100% rename from freqtrade/freqai/prediction_models/BaseTensorFlowModel.py rename to freqtrade/freqai/base_models/BaseTensorFlowModel.py diff --git a/freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py b/freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py new file mode 100644 index 000000000..aa5dbe629 --- /dev/null +++ b/freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py @@ -0,0 +1,75 @@ + +from joblib import Parallel +from sklearn.multioutput import MultiOutputRegressor, _fit_estimator +from sklearn.utils.fixes import delayed +from sklearn.utils.validation import has_fit_parameter + + +class FreqaiMultiOutputRegressor(MultiOutputRegressor): + + def fit(self, X, y, sample_weight=None, fit_params=None): + """Fit the model to data, separately for each output variable. + Parameters + ---------- + X : {array-like, sparse matrix} of shape (n_samples, n_features) + The input data. + y : {array-like, sparse matrix} of shape (n_samples, n_outputs) + Multi-output targets. An indicator matrix turns on multilabel + estimation. + sample_weight : array-like of shape (n_samples,), default=None + Sample weights. If `None`, then samples are equally weighted. + Only supported if the underlying regressor supports sample + weights. + fit_params : A list of dicts for the fit_params + Parameters passed to the ``estimator.fit`` method of each step. + Each dict may contain same or different values (e.g. different + eval_sets or init_models) + .. versionadded:: 0.23 + Returns + ------- + self : object + Returns a fitted instance. + """ + + if not hasattr(self.estimator, "fit"): + raise ValueError("The base estimator should implement a fit method") + + y = self._validate_data(X="no_validation", y=y, multi_output=True) + + # if is_classifier(self): + # check_classification_targets(y) + + if y.ndim == 1: + raise ValueError( + "y must have at least two dimensions for " + "multi-output regression but has only one." + ) + + if sample_weight is not None and not has_fit_parameter( + self.estimator, "sample_weight" + ): + raise ValueError("Underlying estimator does not support sample weights.") + + # fit_params_validated = _check_fit_params(X, fit_params) + + if not fit_params: + fit_params = [None] * y.shape[1] + + # if not init_models: + # init_models = [None] * y.shape[1] + + self.estimators_ = Parallel(n_jobs=self.n_jobs)( + delayed(_fit_estimator)( + self.estimator, X, y[:, i], sample_weight, **fit_params[i] + # init_model=init_models[i], eval_set=eval_sets[i], + # **fit_params_validated + ) + for i in range(y.shape[1]) + ) + + if hasattr(self.estimators_[0], "n_features_in_"): + self.n_features_in_ = self.estimators_[0].n_features_in_ + if hasattr(self.estimators_[0], "feature_names_in_"): + self.feature_names_in_ = self.estimators_[0].feature_names_in_ + + return diff --git a/freqtrade/freqai/prediction_models/CatboostClassifier.py b/freqtrade/freqai/prediction_models/CatboostClassifier.py index cd7afd392..60536e6de 100644 --- a/freqtrade/freqai/prediction_models/CatboostClassifier.py +++ b/freqtrade/freqai/prediction_models/CatboostClassifier.py @@ -3,8 +3,8 @@ from typing import Any, Dict from catboost import CatBoostClassifier, Pool +from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from freqtrade.freqai.prediction_models.BaseClassifierModel import BaseClassifierModel logger = logging.getLogger(__name__) diff --git a/freqtrade/freqai/prediction_models/CatboostRegressor.py b/freqtrade/freqai/prediction_models/CatboostRegressor.py index 1ce31b628..73cf6c88a 100644 --- a/freqtrade/freqai/prediction_models/CatboostRegressor.py +++ b/freqtrade/freqai/prediction_models/CatboostRegressor.py @@ -3,8 +3,8 @@ from typing import Any, Dict from catboost import CatBoostRegressor, Pool +from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel logger = logging.getLogger(__name__) diff --git a/freqtrade/freqai/prediction_models/CatboostRegressorMultiTarget.py b/freqtrade/freqai/prediction_models/CatboostRegressorMultiTarget.py index bc52bfdd9..a376b2c33 100644 --- a/freqtrade/freqai/prediction_models/CatboostRegressorMultiTarget.py +++ b/freqtrade/freqai/prediction_models/CatboostRegressorMultiTarget.py @@ -1,11 +1,11 @@ import logging from typing import Any, Dict -from catboost import CatBoostRegressor # , Pool -from sklearn.multioutput import MultiOutputRegressor +from catboost import CatBoostRegressor, Pool +from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel +from freqtrade.freqai.base_models.FreqaiMultiOutputRegressor import FreqaiMultiOutputRegressor from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel logger = logging.getLogger(__name__) @@ -32,17 +32,34 @@ class CatboostRegressorMultiTarget(BaseRegressionModel): X = data_dictionary["train_features"] y = data_dictionary["train_labels"] - eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"]) + sample_weight = data_dictionary["train_weights"] - if self.continual_learning: - logger.warning('Continual learning not supported for MultiTarget models') - - model = MultiOutputRegressor(estimator=cbr) - model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set) + eval_sets = [None] * y.shape[1] if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0: - train_score = model.score(X, y) - test_score = model.score(*eval_set) - logger.info(f"Train score {train_score}, Test score {test_score}") + eval_sets = [None] * data_dictionary['test_labels'].shape[1] + + for i in range(data_dictionary['test_labels'].shape[1]): + eval_sets[i] = Pool( + data=data_dictionary["test_features"], + label=data_dictionary["test_labels"].iloc[:, i], + weight=data_dictionary["test_weights"], + ) + + init_model = self.get_init_model(dk.pair) + + if init_model: + init_models = init_model.estimators_ + else: + init_models = [None] * y.shape[1] + + fit_params = [] + for i in range(len(eval_sets)): + fit_params.append( + {'eval_set': eval_sets[i], 'init_model': init_models[i]}) + + model = FreqaiMultiOutputRegressor(estimator=cbr) + model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params) + return model diff --git a/freqtrade/freqai/prediction_models/LightGBMClassifier.py b/freqtrade/freqai/prediction_models/LightGBMClassifier.py index 69867eae3..3eec516ba 100644 --- a/freqtrade/freqai/prediction_models/LightGBMClassifier.py +++ b/freqtrade/freqai/prediction_models/LightGBMClassifier.py @@ -3,8 +3,8 @@ from typing import Any, Dict from lightgbm import LGBMClassifier +from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from freqtrade.freqai.prediction_models.BaseClassifierModel import BaseClassifierModel logger = logging.getLogger(__name__) diff --git a/freqtrade/freqai/prediction_models/LightGBMRegressor.py b/freqtrade/freqai/prediction_models/LightGBMRegressor.py index 99e9ff887..85c9b691c 100644 --- a/freqtrade/freqai/prediction_models/LightGBMRegressor.py +++ b/freqtrade/freqai/prediction_models/LightGBMRegressor.py @@ -3,8 +3,8 @@ from typing import Any, Dict from lightgbm import LGBMRegressor +from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel logger = logging.getLogger(__name__) diff --git a/freqtrade/freqai/prediction_models/LightGBMRegressorMultiTarget.py b/freqtrade/freqai/prediction_models/LightGBMRegressorMultiTarget.py index c34680dbe..7a9b5c36a 100644 --- a/freqtrade/freqai/prediction_models/LightGBMRegressorMultiTarget.py +++ b/freqtrade/freqai/prediction_models/LightGBMRegressorMultiTarget.py @@ -2,10 +2,10 @@ import logging from typing import Any, Dict from lightgbm import LGBMRegressor -from sklearn.multioutput import MultiOutputRegressor +from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel +from freqtrade.freqai.base_models.FreqaiMultiOutputRegressor import FreqaiMultiOutputRegressor from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel logger = logging.getLogger(__name__) @@ -29,15 +29,36 @@ class LightGBMRegressorMultiTarget(BaseRegressionModel): X = data_dictionary["train_features"] y = data_dictionary["train_labels"] - eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"]) sample_weight = data_dictionary["train_weights"] - if self.continual_learning: - logger.warning('Continual learning not supported for MultiTarget models') + eval_weights = None + eval_sets = [None] * y.shape[1] - model = MultiOutputRegressor(estimator=lgb) - model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set) - train_score = model.score(X, y) - test_score = model.score(*eval_set) - logger.info(f"Train score {train_score}, Test score {test_score}") + if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0: + eval_weights = [data_dictionary["test_weights"]] + eval_sets = [(None, None)] * data_dictionary['test_labels'].shape[1] # type: ignore + for i in range(data_dictionary['test_labels'].shape[1]): + eval_sets[i] = ( # type: ignore + data_dictionary["test_features"], + data_dictionary["test_labels"].iloc[:, i] + ) + + init_model = self.get_init_model(dk.pair) + if init_model: + init_models = init_model.estimators_ + else: + init_models = [None] * y.shape[1] + + fit_params = [] + for i in range(len(eval_sets)): + fit_params.append( + {'eval_set': eval_sets[i], 'eval_sample_weight': eval_weights, + 'init_model': init_models[i]}) + + model = FreqaiMultiOutputRegressor(estimator=lgb) + model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params) + + # model = FreqaiMultiOutputRegressor(estimator=lgb) + # model.fit(X=X, y=y, sample_weight=sample_weight, init_models=init_models, + # eval_sets=eval_sets, eval_sample_weight=eval_weights) return model diff --git a/freqtrade/freqai/prediction_models/XGBoostRegressor.py b/freqtrade/freqai/prediction_models/XGBoostRegressor.py index acc4386f5..c9be9ce74 100644 --- a/freqtrade/freqai/prediction_models/XGBoostRegressor.py +++ b/freqtrade/freqai/prediction_models/XGBoostRegressor.py @@ -3,8 +3,8 @@ from typing import Any, Dict from xgboost import XGBRegressor +from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel logger = logging.getLogger(__name__) @@ -31,6 +31,7 @@ class XGBoostRegressor(BaseRegressionModel): eval_set = None else: eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"])] + eval_weights = [data_dictionary['test_weights']] sample_weight = data_dictionary["train_weights"] @@ -38,6 +39,7 @@ class XGBoostRegressor(BaseRegressionModel): model = XGBRegressor(**self.model_training_parameters) - model.fit(X=X, y=y, sample_weight=sample_weight, eval_set=eval_set, xgb_model=xgb_model) + model.fit(X=X, y=y, sample_weight=sample_weight, eval_set=eval_set, + sample_weight_eval_set=eval_weights, xgb_model=xgb_model) return model diff --git a/freqtrade/freqai/prediction_models/XGBoostRegressorMultiTarget.py b/freqtrade/freqai/prediction_models/XGBoostRegressorMultiTarget.py index 5283501d1..38c478c0b 100644 --- a/freqtrade/freqai/prediction_models/XGBoostRegressorMultiTarget.py +++ b/freqtrade/freqai/prediction_models/XGBoostRegressorMultiTarget.py @@ -1,11 +1,11 @@ import logging from typing import Any, Dict -from sklearn.multioutput import MultiOutputRegressor from xgboost import XGBRegressor +from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel +from freqtrade.freqai.base_models.FreqaiMultiOutputRegressor import FreqaiMultiOutputRegressor from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel logger = logging.getLogger(__name__) @@ -29,15 +29,32 @@ class XGBoostRegressorMultiTarget(BaseRegressionModel): X = data_dictionary["train_features"] y = data_dictionary["train_labels"] - eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"]) sample_weight = data_dictionary["train_weights"] - if self.continual_learning: - logger.warning('Continual learning not supported for MultiTarget models') + eval_weights = None + eval_sets = [None] * y.shape[1] + + if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0: + eval_weights = [data_dictionary["test_weights"]] + for i in range(data_dictionary['test_labels'].shape[1]): + eval_sets[i] = [( # type: ignore + data_dictionary["test_features"], + data_dictionary["test_labels"].iloc[:, i] + )] + + init_model = self.get_init_model(dk.pair) + if init_model: + init_models = init_model.estimators_ + else: + init_models = [None] * y.shape[1] + + fit_params = [] + for i in range(len(eval_sets)): + fit_params.append( + {'eval_set': eval_sets[i], 'sample_weight_eval_set': eval_weights, + 'xgb_model': init_models[i]}) + + model = FreqaiMultiOutputRegressor(estimator=xgb) + model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params) - model = MultiOutputRegressor(estimator=xgb) - model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set) - train_score = model.score(X, y) - test_score = model.score(*eval_set) - logger.info(f"Train score {train_score}, Test score {test_score}") return model