Merge pull request #8634 from freqtrade/bug-fix/continual_learning

fix bug in continual_learning for PyTorch* models
2026-02-03 04:41:23 +00:00 · 2023-05-13 15:32:49 +02:00
parent d50e221e62 18c1eda09b
commit 90ac387444
6 changed files with 39 additions and 36 deletions
--- a/docs/freqai-parameter-table.md
+++ b/docs/freqai-parameter-table.md
@@ -18,7 +18,7 @@ Mandatory parameters are marked as **Required** and have to be set in one of the
 | `purge_old_models` | Number of models to keep on disk (not relevant to backtesting). Default is 2, which means that dry/live runs will keep the latest 2 models on disk. Setting to 0 keeps all models. This parameter also accepts a boolean to maintain backwards compatibility. <br> **Datatype:** Integer. <br> Default: `2`.
 | `save_backtest_models` | Save models to disk when running backtesting. Backtesting operates most efficiently by saving the prediction data and reusing them directly for subsequent runs (when you wish to tune entry/exit parameters). Saving backtesting models to disk also allows to use the same model files for starting a dry/live instance with the same model `identifier`. <br> **Datatype:** Boolean. <br> Default: `False` (no models are saved).
 | `fit_live_predictions_candles` | Number of historical candles to use for computing target (label) statistics from prediction data, instead of from the training dataset (more information can be found [here](freqai-configuration.md#creating-a-dynamic-target-threshold)). <br> **Datatype:** Positive integer.
-| `continual_learning` | Use the final state of the most recently trained model as starting point for the new model, allowing for incremental learning (more information can be found [here](freqai-running.md#continual-learning)). <br> **Datatype:** Boolean. <br> Default: `False`.
+| `continual_learning` | Use the final state of the most recently trained model as starting point for the new model, allowing for incremental learning (more information can be found [here](freqai-running.md#continual-learning)). Beware that this is currently a naive approach to incremental learning, and it has a high probability of overfitting/getting stuck in local minima while the market moves away from your model. We have the connections here primarily for experimental purposes and so that it is ready for more mature approaches to continual learning in chaotic systems like the crypto market. <br> **Datatype:** Boolean. <br> Default: `False`.
 | `write_metrics_to_disk` | Collect train timings, inference timings and cpu usage in json file. <br> **Datatype:** Boolean. <br> Default: `False`
 | `data_kitchen_thread_count` | <br> Designate the number of threads you want to use for data processing (outlier methods, normalization, etc.). This has no impact on the number of threads used for training. If user does not set it (default), FreqAI will use max number of threads - 2 (leaving 1 physical core available for Freqtrade bot and FreqUI) <br> **Datatype:** Positive integer.

--- a/docs/freqai-running.md
+++ b/docs/freqai-running.md
@@ -131,6 +131,9 @@ You can choose to adopt a continual learning scheme by setting `"continual_learn
 ???+ danger "Continual learning enforces a constant parameter space"
    Since `continual_learning` means that the model parameter space *cannot* change between trainings, `principal_component_analysis` is automatically disabled when `continual_learning` is enabled. Hint: PCA changes the parameter space and the number of features, learn more about PCA [here](freqai-feature-engineering.md#data-dimensionality-reduction-with-principal-component-analysis).

+???+ danger "Experimental functionality"
+    Beware that this is currently a naive approach to incremental learning, and it has a high probability of overfitting/getting stuck in local minima while the market moves away from your model. We have the mechanics available in FreqAI primarily for experimental purposes and so that it is ready for more mature approaches to continual learning in chaotic systems like the crypto market.
+
 ## Hyperopt

 You can hyperopt using the same command as for [typical Freqtrade hyperopt](hyperopt.md):
--- a/freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py
+++ b/freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py
@@ -74,16 +74,17 @@ class PyTorchMLPClassifier(BasePyTorchClassifier):
        model.to(self.device)
        optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate)
        criterion = torch.nn.CrossEntropyLoss()
-        init_model = self.get_init_model(dk.pair)
-        trainer = PyTorchModelTrainer(
-            model=model,
-            optimizer=optimizer,
-            criterion=criterion,
-            model_meta_data={"class_names": class_names},
-            device=self.device,
-            init_model=init_model,
-            data_convertor=self.data_convertor,
-            **self.trainer_kwargs,
-        )
+        # check if continual_learning is activated, and retreive the model to continue training
+        trainer = self.get_init_model(dk.pair)
+        if trainer is None:
+            trainer = PyTorchModelTrainer(
+                model=model,
+                optimizer=optimizer,
+                criterion=criterion,
+                model_meta_data={"class_names": class_names},
+                device=self.device,
+                data_convertor=self.data_convertor,
+                **self.trainer_kwargs,
+            )
        trainer.fit(data_dictionary, self.splits)
        return trainer
--- a/freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py
+++ b/freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py
@@ -69,15 +69,16 @@ class PyTorchMLPRegressor(BasePyTorchRegressor):
        model.to(self.device)
        optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate)
        criterion = torch.nn.MSELoss()
-        init_model = self.get_init_model(dk.pair)
-        trainer = PyTorchModelTrainer(
-            model=model,
-            optimizer=optimizer,
-            criterion=criterion,
-            device=self.device,
-            init_model=init_model,
-            data_convertor=self.data_convertor,
-            **self.trainer_kwargs,
-        )
+        # check if continual_learning is activated, and retreive the model to continue training
+        trainer = self.get_init_model(dk.pair)
+        if trainer is None:
+            trainer = PyTorchModelTrainer(
+                model=model,
+                optimizer=optimizer,
+                criterion=criterion,
+                device=self.device,
+                data_convertor=self.data_convertor,
+                **self.trainer_kwargs,
+            )
        trainer.fit(data_dictionary, self.splits)
        return trainer
--- a/freqtrade/freqai/prediction_models/PyTorchTransformerRegressor.py
+++ b/freqtrade/freqai/prediction_models/PyTorchTransformerRegressor.py
@@ -75,17 +75,18 @@ class PyTorchTransformerRegressor(BasePyTorchRegressor):
        model.to(self.device)
        optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate)
        criterion = torch.nn.MSELoss()
-        init_model = self.get_init_model(dk.pair)
-        trainer = PyTorchTransformerTrainer(
-            model=model,
-            optimizer=optimizer,
-            criterion=criterion,
-            device=self.device,
-            init_model=init_model,
-            data_convertor=self.data_convertor,
-            window_size=self.window_size,
-            **self.trainer_kwargs,
-        )
+        # check if continual_learning is activated, and retreive the model to continue training
+        trainer = self.get_init_model(dk.pair)
+        if trainer is None:
+            trainer = PyTorchTransformerTrainer(
+                model=model,
+                optimizer=optimizer,
+                criterion=criterion,
+                device=self.device,
+                data_convertor=self.data_convertor,
+                window_size=self.window_size,
+                **self.trainer_kwargs,
+            )
        trainer.fit(data_dictionary, self.splits)
        return trainer

--- a/freqtrade/freqai/torch/PyTorchModelTrainer.py
+++ b/freqtrade/freqai/torch/PyTorchModelTrainer.py
@@ -25,7 +25,6 @@ class PyTorchModelTrainer(PyTorchTrainerInterface):
            optimizer: Optimizer,
            criterion: nn.Module,
            device: str,
-            init_model: Dict,
            data_convertor: PyTorchDataConvertor,
            model_meta_data: Dict[str, Any] = {},
            window_size: int = 1,
@@ -56,8 +55,6 @@ class PyTorchModelTrainer(PyTorchTrainerInterface):
        self.max_n_eval_batches: Optional[int] = kwargs.get("max_n_eval_batches", None)
        self.data_convertor = data_convertor
        self.window_size: int = window_size
-        if init_model:
-            self.load_from_checkpoint(init_model)

    def fit(self, data_dictionary: Dict[str, pd.DataFrame], splits: List[str]):
        """