From 3ae3cc63dfe1553c1859b157aca0742ecc742a8a Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Sat, 13 May 2023 11:14:16 +0000
Subject: [PATCH 1/4] fix bug in continual_learning for PyTorch* models

---
 .../prediction_models/PyTorchMLPClassifier.py | 23 ++++++++++---------
 .../prediction_models/PyTorchMLPRegressor.py  | 21 +++++++++--------
 .../PyTorchTransformerRegressor.py            | 23 ++++++++++---------
 freqtrade/freqai/torch/PyTorchModelTrainer.py |  6 ++---
 4 files changed, 38 insertions(+), 35 deletions(-)

diff --git a/freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py b/freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py
index ea7981405..b29d20112 100644
--- a/freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py
+++ b/freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py
@@ -74,16 +74,17 @@ class PyTorchMLPClassifier(BasePyTorchClassifier):
         model.to(self.device)
         optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate)
         criterion = torch.nn.CrossEntropyLoss()
-        init_model = self.get_init_model(dk.pair)
-        trainer = PyTorchModelTrainer(
-            model=model,
-            optimizer=optimizer,
-            criterion=criterion,
-            model_meta_data={"class_names": class_names},
-            device=self.device,
-            init_model=init_model,
-            data_convertor=self.data_convertor,
-            **self.trainer_kwargs,
-        )
+        # check if continual_learning is activated, and retreive the model to continue training
+        trainer = self.get_init_model(dk.pair)
+        if trainer is None:
+            trainer = PyTorchModelTrainer(
+                model=model,
+                optimizer=optimizer,
+                criterion=criterion,
+                model_meta_data={"class_names": class_names},
+                device=self.device,
+                data_convertor=self.data_convertor,
+                **self.trainer_kwargs,
+            )
         trainer.fit(data_dictionary, self.splits)
         return trainer
diff --git a/freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py b/freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py
index 64f0f4b03..6e1270102 100644
--- a/freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py
+++ b/freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py
@@ -69,15 +69,16 @@ class PyTorchMLPRegressor(BasePyTorchRegressor):
         model.to(self.device)
         optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate)
         criterion = torch.nn.MSELoss()
-        init_model = self.get_init_model(dk.pair)
-        trainer = PyTorchModelTrainer(
-            model=model,
-            optimizer=optimizer,
-            criterion=criterion,
-            device=self.device,
-            init_model=init_model,
-            data_convertor=self.data_convertor,
-            **self.trainer_kwargs,
-        )
+        # check if continual_learning is activated, and retreive the model to continue training
+        trainer = self.get_init_model(dk.pair)
+        if trainer is None:
+            trainer = PyTorchModelTrainer(
+                model=model,
+                optimizer=optimizer,
+                criterion=criterion,
+                device=self.device,
+                data_convertor=self.data_convertor,
+                **self.trainer_kwargs,
+            )
         trainer.fit(data_dictionary, self.splits)
         return trainer
diff --git a/freqtrade/freqai/prediction_models/PyTorchTransformerRegressor.py b/freqtrade/freqai/prediction_models/PyTorchTransformerRegressor.py
index e760f6e68..5e84ada72 100644
--- a/freqtrade/freqai/prediction_models/PyTorchTransformerRegressor.py
+++ b/freqtrade/freqai/prediction_models/PyTorchTransformerRegressor.py
@@ -75,17 +75,18 @@ class PyTorchTransformerRegressor(BasePyTorchRegressor):
         model.to(self.device)
         optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate)
         criterion = torch.nn.MSELoss()
-        init_model = self.get_init_model(dk.pair)
-        trainer = PyTorchTransformerTrainer(
-            model=model,
-            optimizer=optimizer,
-            criterion=criterion,
-            device=self.device,
-            init_model=init_model,
-            data_convertor=self.data_convertor,
-            window_size=self.window_size,
-            **self.trainer_kwargs,
-        )
+        # check if continual_learning is activated, and retreive the model to continue training
+        trainer = self.get_init_model(dk.pair)
+        if trainer is None:
+            trainer = PyTorchTransformerTrainer(
+                model=model,
+                optimizer=optimizer,
+                criterion=criterion,
+                device=self.device,
+                data_convertor=self.data_convertor,
+                window_size=self.window_size,
+                **self.trainer_kwargs,
+            )
         trainer.fit(data_dictionary, self.splits)
         return trainer
 
diff --git a/freqtrade/freqai/torch/PyTorchModelTrainer.py b/freqtrade/freqai/torch/PyTorchModelTrainer.py
index a3b0d9b9c..a25fa45bc 100644
--- a/freqtrade/freqai/torch/PyTorchModelTrainer.py
+++ b/freqtrade/freqai/torch/PyTorchModelTrainer.py
@@ -25,7 +25,7 @@ class PyTorchModelTrainer(PyTorchTrainerInterface):
             optimizer: Optimizer,
             criterion: nn.Module,
             device: str,
-            init_model: Dict,
+            # init_model: Dict,
             data_convertor: PyTorchDataConvertor,
             model_meta_data: Dict[str, Any] = {},
             window_size: int = 1,
@@ -56,8 +56,8 @@ class PyTorchModelTrainer(PyTorchTrainerInterface):
         self.max_n_eval_batches: Optional[int] = kwargs.get("max_n_eval_batches", None)
         self.data_convertor = data_convertor
         self.window_size: int = window_size
-        if init_model:
-            self.load_from_checkpoint(init_model)
+        # if init_model:
+        #     self.load_from_checkpoint(init_model)
 
     def fit(self, data_dictionary: Dict[str, pd.DataFrame], splits: List[str]):
         """

From fad1c198562a64464c78fb5816a6e4700a77457f Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Sat, 13 May 2023 11:21:43 +0000
Subject: [PATCH 2/4] add warnings in the doc for users to better understand
 the limitations of continual_learning

---
 docs/freqai-running.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/freqai-running.md b/docs/freqai-running.md
index f3ccc546f..47d2ec4b3 100644
--- a/docs/freqai-running.md
+++ b/docs/freqai-running.md
@@ -131,6 +131,9 @@ You can choose to adopt a continual learning scheme by setting `"continual_learn
 ???+ danger "Continual learning enforces a constant parameter space"
     Since `continual_learning` means that the model parameter space *cannot* change between trainings, `principal_component_analysis` is automatically disabled when `continual_learning` is enabled. Hint: PCA changes the parameter space and the number of features, learn more about PCA [here](freqai-feature-engineering.md#data-dimensionality-reduction-with-principal-component-analysis).
 
+???+ danger "Experimental functionality"
+    Beware that this is currently a naive approach to incremental learning, and it has a high probability of overfitting/getting stuck in local minima while the market moves away from your model. We have the mechanics available in FreqAI primarily for experimental purposes and so that it is ready for more mature approaches to continual learning in chaotic systems like the crypto market.
+
 ## Hyperopt
 
 You can hyperopt using the same command as for [typical Freqtrade hyperopt](hyperopt.md):

From 2ec1302c109dfbdd13bfe3db3e0313b561eeb522 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Sat, 13 May 2023 11:23:57 +0000
Subject: [PATCH 3/4] add warnings in the doc for users to better understand
 the limitations of continual_learning

---
 docs/freqai-parameter-table.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/freqai-parameter-table.md b/docs/freqai-parameter-table.md
index 76c175304..ef1a23401 100644
--- a/docs/freqai-parameter-table.md
+++ b/docs/freqai-parameter-table.md
@@ -18,7 +18,7 @@ Mandatory parameters are marked as **Required** and have to be set in one of the
 | `purge_old_models` | Number of models to keep on disk (not relevant to backtesting). Default is 2, which means that dry/live runs will keep the latest 2 models on disk. Setting to 0 keeps all models. This parameter also accepts a boolean to maintain backwards compatibility. <br> **Datatype:** Integer. <br> Default: `2`.
 | `save_backtest_models` | Save models to disk when running backtesting. Backtesting operates most efficiently by saving the prediction data and reusing them directly for subsequent runs (when you wish to tune entry/exit parameters). Saving backtesting models to disk also allows to use the same model files for starting a dry/live instance with the same model `identifier`. <br> **Datatype:** Boolean. <br> Default: `False` (no models are saved).
 | `fit_live_predictions_candles` | Number of historical candles to use for computing target (label) statistics from prediction data, instead of from the training dataset (more information can be found [here](freqai-configuration.md#creating-a-dynamic-target-threshold)). <br> **Datatype:** Positive integer.
-| `continual_learning` | Use the final state of the most recently trained model as starting point for the new model, allowing for incremental learning (more information can be found [here](freqai-running.md#continual-learning)). <br> **Datatype:** Boolean. <br> Default: `False`.
+| `continual_learning` | Use the final state of the most recently trained model as starting point for the new model, allowing for incremental learning (more information can be found [here](freqai-running.md#continual-learning)). Beware that this is currently a naive approach to incremental learning, and it has a high probability of overfitting/getting stuck in local minima while the market moves away from your model. We have the connections here primarily for experimental purposes and so that it is ready for more mature approaches to continual learning in chaotic systems like the crypto market. <br> **Datatype:** Boolean. <br> Default: `False`.
 | `write_metrics_to_disk` | Collect train timings, inference timings and cpu usage in json file. <br> **Datatype:** Boolean. <br> Default: `False`
 | `data_kitchen_thread_count` | <br> Designate the number of threads you want to use for data processing (outlier methods, normalization, etc.). This has no impact on the number of threads used for training. If user does not set it (default), FreqAI will use max number of threads - 2 (leaving 1 physical core available for Freqtrade bot and FreqUI) <br> **Datatype:** Positive integer.
 

From 18c1eda09b281ed43fdff3e55de6e29cd8cc1ab6 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Sat, 13 May 2023 11:27:36 +0000
Subject: [PATCH 4/4] remove commented lines

---
 freqtrade/freqai/torch/PyTorchModelTrainer.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/freqtrade/freqai/torch/PyTorchModelTrainer.py b/freqtrade/freqai/torch/PyTorchModelTrainer.py
index a25fa45bc..a9310a182 100644
--- a/freqtrade/freqai/torch/PyTorchModelTrainer.py
+++ b/freqtrade/freqai/torch/PyTorchModelTrainer.py
@@ -25,7 +25,6 @@ class PyTorchModelTrainer(PyTorchTrainerInterface):
             optimizer: Optimizer,
             criterion: nn.Module,
             device: str,
-            # init_model: Dict,
             data_convertor: PyTorchDataConvertor,
             model_meta_data: Dict[str, Any] = {},
             window_size: int = 1,
@@ -56,8 +55,6 @@ class PyTorchModelTrainer(PyTorchTrainerInterface):
         self.max_n_eval_batches: Optional[int] = kwargs.get("max_n_eval_batches", None)
         self.data_convertor = data_convertor
         self.window_size: int = window_size
-        # if init_model:
-        #     self.load_from_checkpoint(init_model)
 
     def fit(self, data_dictionary: Dict[str, pd.DataFrame], splits: List[str]):
         """