From 9cb45a38108fcd33352bf8c645adc2a46c4b2218 Mon Sep 17 00:00:00 2001 From: yinon Date: Thu, 13 Jul 2023 15:37:50 +0000 Subject: [PATCH 01/17] pytorch - bugfix - explicitly assign tensor to var as .to() is not inplace operation --- freqtrade/freqai/torch/PyTorchModelTrainer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/freqtrade/freqai/torch/PyTorchModelTrainer.py b/freqtrade/freqai/torch/PyTorchModelTrainer.py index 603e7ac12..e74b572fd 100644 --- a/freqtrade/freqai/torch/PyTorchModelTrainer.py +++ b/freqtrade/freqai/torch/PyTorchModelTrainer.py @@ -83,8 +83,8 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): for i, batch_data in enumerate(data_loaders_dictionary["train"]): xb, yb = batch_data - xb.to(self.device) - yb.to(self.device) + xb = xb.to(self.device) + yb = yb.to(self.device) yb_pred = self.model(xb) loss = self.criterion(yb_pred, yb) From 0c9aa86885ca8019f2a1c73629f66d01b8817818 Mon Sep 17 00:00:00 2001 From: yinon Date: Thu, 13 Jul 2023 15:38:58 +0000 Subject: [PATCH 02/17] pytorch - data convertor - create tensor directly on device, simplify code --- .../freqai/torch/PyTorchDataConvertor.py | 30 +++++++------------ 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/freqtrade/freqai/torch/PyTorchDataConvertor.py b/freqtrade/freqai/torch/PyTorchDataConvertor.py index e6b815373..0af14dd14 100644 --- a/freqtrade/freqai/torch/PyTorchDataConvertor.py +++ b/freqtrade/freqai/torch/PyTorchDataConvertor.py @@ -1,5 +1,4 @@ from abc import ABC, abstractmethod -from typing import Optional import pandas as pd import torch @@ -12,14 +11,14 @@ class PyTorchDataConvertor(ABC): """ @abstractmethod - def convert_x(self, df: pd.DataFrame, device: Optional[str] = None) -> torch.Tensor: + def convert_x(self, df: pd.DataFrame, device: str) -> torch.Tensor: """ :param df: "*_features" dataframe. :param device: The device to use for training (e.g. 'cpu', 'cuda'). """ @abstractmethod - def convert_y(self, df: pd.DataFrame, device: Optional[str] = None) -> torch.Tensor: + def convert_y(self, df: pd.DataFrame, device: str) -> torch.Tensor: """ :param df: "*_labels" dataframe. :param device: The device to use for training (e.g. 'cpu', 'cuda'). @@ -33,8 +32,8 @@ class DefaultPyTorchDataConvertor(PyTorchDataConvertor): def __init__( self, - target_tensor_type: Optional[torch.dtype] = None, - squeeze_target_tensor: bool = False + target_tensor_type: torch.dtype = torch.float32, + squeeze_target_tensor: bool = False, ): """ :param target_tensor_type: type of target tensor, for classification use @@ -45,23 +44,14 @@ class DefaultPyTorchDataConvertor(PyTorchDataConvertor): self._target_tensor_type = target_tensor_type self._squeeze_target_tensor = squeeze_target_tensor - def convert_x(self, df: pd.DataFrame, device: Optional[str] = None) -> torch.Tensor: - x = torch.from_numpy(df.values).float() - if device: - x = x.to(device) - + def convert_x(self, df: pd.DataFrame, device: str) -> torch.Tensor: + numpy_arrays = df.values + x = torch.tensor(numpy_arrays, device=device, dtype=torch.float32) return x - def convert_y(self, df: pd.DataFrame, device: Optional[str] = None) -> torch.Tensor: - y = torch.from_numpy(df.values) - - if self._target_tensor_type: - y = y.to(self._target_tensor_type) - + def convert_y(self, df: pd.DataFrame, device: str) -> torch.Tensor: + numpy_arrays = df.values + y = torch.tensor(numpy_arrays, device=device, dtype=self._target_tensor_type) if self._squeeze_target_tensor: y = y.squeeze() - - if device: - y = y.to(device) - return y From 49a7de4ebdf7ea3621f072957ca936b26f82fc03 Mon Sep 17 00:00:00 2001 From: yinon Date: Thu, 13 Jul 2023 15:39:47 +0000 Subject: [PATCH 03/17] pytorch - trainer - add device arg to load method --- freqtrade/freqai/torch/PyTorchModelTrainer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/freqtrade/freqai/torch/PyTorchModelTrainer.py b/freqtrade/freqai/torch/PyTorchModelTrainer.py index e74b572fd..b49e16196 100644 --- a/freqtrade/freqai/torch/PyTorchModelTrainer.py +++ b/freqtrade/freqai/torch/PyTorchModelTrainer.py @@ -182,8 +182,8 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): "pytrainer": self }, path) - def load(self, path: Path): - checkpoint = torch.load(path) + def load(self, path: Path, device: str = None): + checkpoint = torch.load(path, map_location=device) return self.load_from_checkpoint(checkpoint) def load_from_checkpoint(self, checkpoint: Dict): From 588ffeedc146c790a7eb81defe1bf0221d3ee934 Mon Sep 17 00:00:00 2001 From: yinon Date: Thu, 13 Jul 2023 15:40:40 +0000 Subject: [PATCH 04/17] pytorch - trainer - reomve max_n_eval_batches arg from estimate loss method --- freqtrade/freqai/torch/PyTorchModelTrainer.py | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/freqtrade/freqai/torch/PyTorchModelTrainer.py b/freqtrade/freqai/torch/PyTorchModelTrainer.py index b49e16196..fe9919810 100644 --- a/freqtrade/freqai/torch/PyTorchModelTrainer.py +++ b/freqtrade/freqai/torch/PyTorchModelTrainer.py @@ -1,5 +1,4 @@ import logging -import math from pathlib import Path from typing import Any, Dict, List, Optional @@ -53,7 +52,7 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): self.device = device self.max_iters: int = kwargs.get("max_iters", 100) self.batch_size: int = kwargs.get("batch_size", 64) - self.max_n_eval_batches: Optional[int] = kwargs.get("max_n_eval_batches", None) + self.max_n_eval_batches: Optional[int] = kwargs.get("max_n_eval_batches", None) # TODO change this to n_batches self.data_convertor = data_convertor self.window_size: int = window_size self.tb_logger = tb_logger @@ -95,25 +94,16 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): # evaluation if "test" in splits: - self.estimate_loss( - data_loaders_dictionary, - self.max_n_eval_batches, - "test" - ) + self.estimate_loss(data_loaders_dictionary, "test") @torch.no_grad() def estimate_loss( self, data_loader_dictionary: Dict[str, DataLoader], - max_n_eval_batches: Optional[int], split: str, ) -> None: self.model.eval() - n_batches = 0 for i, batch_data in enumerate(data_loader_dictionary[split]): - if max_n_eval_batches and i > max_n_eval_batches: - n_batches += 1 - break xb, yb = batch_data xb.to(self.device) yb.to(self.device) @@ -158,8 +148,8 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): across different n_obs - the number of data points. """ - n_batches = math.ceil(n_obs // batch_size) - epochs = math.ceil(n_iters // n_batches) + n_batches = n_obs // batch_size + epochs = n_iters // n_batches if epochs <= 10: logger.warning("User set `max_iters` in such a way that the trainer will only perform " f" {epochs} epochs. Please consider increasing this value accordingly") From 7d28dad209784b48799ce9099bdd442b243e4632 Mon Sep 17 00:00:00 2001 From: Yinon Polak Date: Thu, 13 Jul 2023 19:41:39 +0300 Subject: [PATCH 05/17] pytorch - add n_epochs param to trainer --- .../prediction_models/PyTorchMLPClassifier.py | 2 +- .../prediction_models/PyTorchMLPRegressor.py | 2 +- .../PyTorchTransformerRegressor.py | 2 +- freqtrade/freqai/torch/PyTorchModelTrainer.py | 19 ++++++++----------- tests/freqai/conftest.py | 2 +- 5 files changed, 12 insertions(+), 15 deletions(-) diff --git a/freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py b/freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py index 71279dba9..ca333d9cf 100644 --- a/freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py +++ b/freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py @@ -28,7 +28,7 @@ class PyTorchMLPClassifier(BasePyTorchClassifier): "trainer_kwargs": { "max_iters": 5000, "batch_size": 64, - "max_n_eval_batches": null, + "n_epochs": null, }, "model_kwargs": { "hidden_dim": 512, diff --git a/freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py b/freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py index 9f4534487..42fddf8ff 100644 --- a/freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py +++ b/freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py @@ -29,7 +29,7 @@ class PyTorchMLPRegressor(BasePyTorchRegressor): "trainer_kwargs": { "max_iters": 5000, "batch_size": 64, - "max_n_eval_batches": null, + "n_epochs": null, }, "model_kwargs": { "hidden_dim": 512, diff --git a/freqtrade/freqai/prediction_models/PyTorchTransformerRegressor.py b/freqtrade/freqai/prediction_models/PyTorchTransformerRegressor.py index a76bab05c..32663c86b 100644 --- a/freqtrade/freqai/prediction_models/PyTorchTransformerRegressor.py +++ b/freqtrade/freqai/prediction_models/PyTorchTransformerRegressor.py @@ -32,7 +32,7 @@ class PyTorchTransformerRegressor(BasePyTorchRegressor): "trainer_kwargs": { "max_iters": 5000, "batch_size": 64, - "max_n_eval_batches": null + "n_epochs": null }, "model_kwargs": { "hidden_dim": 512, diff --git a/freqtrade/freqai/torch/PyTorchModelTrainer.py b/freqtrade/freqai/torch/PyTorchModelTrainer.py index fe9919810..a34d673b4 100644 --- a/freqtrade/freqai/torch/PyTorchModelTrainer.py +++ b/freqtrade/freqai/torch/PyTorchModelTrainer.py @@ -40,10 +40,10 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): :param model_meta_data: Additional metadata about the model (optional). :param data_convertor: convertor from pd.DataFrame to torch.tensor. :param max_iters: The number of training iterations to run. - iteration here refers to the number of times we call - self.optimizer.step(). used to calculate n_epochs. + iteration here refers to the number of times optimizer.step() is called, + used to calculate n_epochs. ignored if n_epochs is set. + :param n_epochs: The maximum number batches to use for evaluation. :param batch_size: The size of the batches to use during training. - :param max_n_eval_batches: The maximum number batches to use for evaluation. """ self.model = model self.optimizer = optimizer @@ -51,8 +51,8 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): self.model_meta_data = model_meta_data self.device = device self.max_iters: int = kwargs.get("max_iters", 100) + self.n_epochs: Optional[int] = kwargs.get("n_epochs", None) self.batch_size: int = kwargs.get("batch_size", 64) - self.max_n_eval_batches: Optional[int] = kwargs.get("max_n_eval_batches", None) # TODO change this to n_batches self.data_convertor = data_convertor self.window_size: int = window_size self.tb_logger = tb_logger @@ -71,16 +71,13 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): backpropagation. - Updates the model's parameters using an optimizer. """ - data_loaders_dictionary = self.create_data_loaders_dictionary(data_dictionary, splits) - epochs = self.calc_n_epochs( - n_obs=len(data_dictionary["train_features"]), - batch_size=self.batch_size, - n_iters=self.max_iters - ) self.model.train() + + data_loaders_dictionary = self.create_data_loaders_dictionary(data_dictionary, splits) + n_obs = len(data_dictionary["train_features"]) + epochs = self.n_epochs or self.calc_n_epochs(n_obs=n_obs, batch_size=self.batch_size, n_iters=self.max_iters) for epoch in range(1, epochs + 1): for i, batch_data in enumerate(data_loaders_dictionary["train"]): - xb, yb = batch_data xb = xb.to(self.device) yb = yb.to(self.device) diff --git a/tests/freqai/conftest.py b/tests/freqai/conftest.py index 4c4891ceb..96716e83f 100644 --- a/tests/freqai/conftest.py +++ b/tests/freqai/conftest.py @@ -99,7 +99,7 @@ def mock_pytorch_mlp_model_training_parameters() -> Dict[str, Any]: "trainer_kwargs": { "max_iters": 1, "batch_size": 64, - "max_n_eval_batches": 1, + "n_epochs": None, }, "model_kwargs": { "hidden_dim": 32, From 5734358d91399e5e4caac8c9722bf8a23165d863 Mon Sep 17 00:00:00 2001 From: Yinon Polak Date: Thu, 13 Jul 2023 20:59:33 +0300 Subject: [PATCH 06/17] pytorch - trainer - add assertion that either n_epochs or max_iters is been set. --- freqtrade/freqai/torch/PyTorchModelTrainer.py | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/freqtrade/freqai/torch/PyTorchModelTrainer.py b/freqtrade/freqai/torch/PyTorchModelTrainer.py index a34d673b4..efdf3ed5a 100644 --- a/freqtrade/freqai/torch/PyTorchModelTrainer.py +++ b/freqtrade/freqai/torch/PyTorchModelTrainer.py @@ -39,9 +39,9 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): state_dict and model_meta_data saved by self.save() method. :param model_meta_data: Additional metadata about the model (optional). :param data_convertor: convertor from pd.DataFrame to torch.tensor. - :param max_iters: The number of training iterations to run. - iteration here refers to the number of times optimizer.step() is called, - used to calculate n_epochs. ignored if n_epochs is set. + :param max_iters: used to calculate n_epochs. The number of training iterations to run. + iteration here refers to the number of times optimizer.step() is called. + ignored if n_epochs is set. :param n_epochs: The maximum number batches to use for evaluation. :param batch_size: The size of the batches to use during training. """ @@ -52,6 +52,9 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): self.device = device self.max_iters: int = kwargs.get("max_iters", 100) self.n_epochs: Optional[int] = kwargs.get("n_epochs", None) + if not self.max_iters and not self.n_epochs: + raise Exception("Either `max_iters` or `n_epochs` should be set.") + self.batch_size: int = kwargs.get("batch_size", 64) self.data_convertor = data_convertor self.window_size: int = window_size @@ -75,8 +78,8 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): data_loaders_dictionary = self.create_data_loaders_dictionary(data_dictionary, splits) n_obs = len(data_dictionary["train_features"]) - epochs = self.n_epochs or self.calc_n_epochs(n_obs=n_obs, batch_size=self.batch_size, n_iters=self.max_iters) - for epoch in range(1, epochs + 1): + n_epochs = self.n_epochs or self.calc_n_epochs(n_obs=n_obs, batch_size=self.batch_size, n_iters=self.max_iters) + for epoch in range(1, n_epochs + 1): for i, batch_data in enumerate(data_loaders_dictionary["train"]): xb, yb = batch_data xb = xb.to(self.device) @@ -146,14 +149,14 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): """ n_batches = n_obs // batch_size - epochs = n_iters // n_batches - if epochs <= 10: - logger.warning("User set `max_iters` in such a way that the trainer will only perform " - f" {epochs} epochs. Please consider increasing this value accordingly") - if epochs <= 1: - logger.warning("Epochs set to 1. Please review your `max_iters` value") - epochs = 1 - return epochs + n_epochs = min(n_iters // n_batches, 1) + if n_epochs <= 10: + logger.warning( + f"Setting low n_epochs. {n_epochs} = n_epochs = n_iters // n_batches = {n_iters} // {n_batches}. " + f"Please consider increasing `max_iters` hyper-parameter." + ) + + return n_epochs def save(self, path: Path): """ From 9fb0ce664c76c02bdc15d351604385ad25bc43c9 Mon Sep 17 00:00:00 2001 From: Yinon Polak Date: Thu, 13 Jul 2023 21:32:46 +0300 Subject: [PATCH 07/17] pytorch - ruff fixes --- freqtrade/freqai/torch/PyTorchModelTrainer.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/freqtrade/freqai/torch/PyTorchModelTrainer.py b/freqtrade/freqai/torch/PyTorchModelTrainer.py index efdf3ed5a..e6691f3db 100644 --- a/freqtrade/freqai/torch/PyTorchModelTrainer.py +++ b/freqtrade/freqai/torch/PyTorchModelTrainer.py @@ -78,7 +78,11 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): data_loaders_dictionary = self.create_data_loaders_dictionary(data_dictionary, splits) n_obs = len(data_dictionary["train_features"]) - n_epochs = self.n_epochs or self.calc_n_epochs(n_obs=n_obs, batch_size=self.batch_size, n_iters=self.max_iters) + n_epochs = self.n_epochs or self.calc_n_epochs( + n_obs=n_obs, + batch_size=self.batch_size, + n_iters=self.max_iters, + ) for epoch in range(1, n_epochs + 1): for i, batch_data in enumerate(data_loaders_dictionary["train"]): xb, yb = batch_data @@ -152,7 +156,7 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): n_epochs = min(n_iters // n_batches, 1) if n_epochs <= 10: logger.warning( - f"Setting low n_epochs. {n_epochs} = n_epochs = n_iters // n_batches = {n_iters} // {n_batches}. " + f"Setting low n_epochs: {n_epochs}. " f"Please consider increasing `max_iters` hyper-parameter." ) From ffcba45b1bda92a2d71b7a4d40254e2d0c352aa6 Mon Sep 17 00:00:00 2001 From: Yinon Polak Date: Thu, 13 Jul 2023 21:36:14 +0300 Subject: [PATCH 08/17] pytorch - mypy fixes --- freqtrade/freqai/torch/PyTorchModelTrainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freqtrade/freqai/torch/PyTorchModelTrainer.py b/freqtrade/freqai/torch/PyTorchModelTrainer.py index e6691f3db..e6638d4fd 100644 --- a/freqtrade/freqai/torch/PyTorchModelTrainer.py +++ b/freqtrade/freqai/torch/PyTorchModelTrainer.py @@ -176,7 +176,7 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): "pytrainer": self }, path) - def load(self, path: Path, device: str = None): + def load(self, path: Path, device: Optional[str] = None): checkpoint = torch.load(path, map_location=device) return self.load_from_checkpoint(checkpoint) From 77f1584713a52751c9b621c3faf37c1cd8ba1f63 Mon Sep 17 00:00:00 2001 From: Yinon Polak Date: Sat, 15 Jul 2023 14:37:44 +0300 Subject: [PATCH 09/17] pytorch - trainer - bugfix step tensorboard step usage --- freqtrade/freqai/torch/PyTorchModelTrainer.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/freqtrade/freqai/torch/PyTorchModelTrainer.py b/freqtrade/freqai/torch/PyTorchModelTrainer.py index e6638d4fd..1692b4acf 100644 --- a/freqtrade/freqai/torch/PyTorchModelTrainer.py +++ b/freqtrade/freqai/torch/PyTorchModelTrainer.py @@ -59,6 +59,7 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): self.data_convertor = data_convertor self.window_size: int = window_size self.tb_logger = tb_logger + self.test_batch_counter = 0 def fit(self, data_dictionary: Dict[str, pd.DataFrame], splits: List[str]): """ @@ -83,8 +84,10 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): batch_size=self.batch_size, n_iters=self.max_iters, ) - for epoch in range(1, n_epochs + 1): - for i, batch_data in enumerate(data_loaders_dictionary["train"]): + + batch_counter = 0 + for epoch in range(n_epochs): + for _, batch_data in enumerate(data_loaders_dictionary["train"]): xb, yb = batch_data xb = xb.to(self.device) yb = yb.to(self.device) @@ -94,7 +97,8 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): self.optimizer.zero_grad(set_to_none=True) loss.backward() self.optimizer.step() - self.tb_logger.log_scalar("train_loss", loss.item(), i) + self.tb_logger.log_scalar("train_loss", loss.item(), batch_counter) + batch_counter += 1 # evaluation if "test" in splits: @@ -107,14 +111,15 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): split: str, ) -> None: self.model.eval() - for i, batch_data in enumerate(data_loader_dictionary[split]): + for _, batch_data in enumerate(data_loader_dictionary[split]): xb, yb = batch_data xb.to(self.device) yb.to(self.device) yb_pred = self.model(xb) loss = self.criterion(yb_pred, yb) - self.tb_logger.log_scalar(f"{split}_loss", loss.item(), i) + self.tb_logger.log_scalar(f"{split}_loss", loss.item(), self.test_batch_counter) + self.test_batch_counter += 1 self.model.train() From d61f512e200c214d285aaff5775c54c46987d343 Mon Sep 17 00:00:00 2001 From: Yinon Polak Date: Sat, 15 Jul 2023 14:43:05 +0300 Subject: [PATCH 10/17] pytorch - trainer - clean code --- freqtrade/freqai/torch/PyTorchModelTrainer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/freqtrade/freqai/torch/PyTorchModelTrainer.py b/freqtrade/freqai/torch/PyTorchModelTrainer.py index 1692b4acf..7a8857994 100644 --- a/freqtrade/freqai/torch/PyTorchModelTrainer.py +++ b/freqtrade/freqai/torch/PyTorchModelTrainer.py @@ -86,7 +86,7 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): ) batch_counter = 0 - for epoch in range(n_epochs): + for _ in range(n_epochs): for _, batch_data in enumerate(data_loaders_dictionary["train"]): xb, yb = batch_data xb = xb.to(self.device) @@ -171,7 +171,7 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): """ - Saving any nn.Module state_dict - Saving model_meta_data, this dict should contain any additional data that the - user needs to store. e.g class_names for classification models. + user needs to store. e.g. class_names for classification models. """ torch.save({ From 836d7b885a0b91aabbce30a80833236fdafac9b0 Mon Sep 17 00:00:00 2001 From: yinon Date: Fri, 4 Aug 2023 12:50:01 +0000 Subject: [PATCH 11/17] pytorch - trainer - set default usage of n_epochs instead of max_iters --- freqtrade/freqai/torch/PyTorchModelTrainer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/freqtrade/freqai/torch/PyTorchModelTrainer.py b/freqtrade/freqai/torch/PyTorchModelTrainer.py index 7a8857994..dc34e8907 100644 --- a/freqtrade/freqai/torch/PyTorchModelTrainer.py +++ b/freqtrade/freqai/torch/PyTorchModelTrainer.py @@ -50,8 +50,8 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): self.criterion = criterion self.model_meta_data = model_meta_data self.device = device - self.max_iters: int = kwargs.get("max_iters", 100) - self.n_epochs: Optional[int] = kwargs.get("n_epochs", None) + self.max_iters: int = kwargs.get("max_iters", None) + self.n_epochs: Optional[int] = kwargs.get("n_epochs", 10) if not self.max_iters and not self.n_epochs: raise Exception("Either `max_iters` or `n_epochs` should be set.") From 777d25192c6cdcf642929c916df2b9c5432422f5 Mon Sep 17 00:00:00 2001 From: yinon Date: Fri, 4 Aug 2023 12:51:42 +0000 Subject: [PATCH 12/17] pytorch - bugfix - explicitly assign tensor to var as .to() is not inplace operation --- freqtrade/freqai/torch/PyTorchModelTrainer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/freqtrade/freqai/torch/PyTorchModelTrainer.py b/freqtrade/freqai/torch/PyTorchModelTrainer.py index dc34e8907..e7c4d53be 100644 --- a/freqtrade/freqai/torch/PyTorchModelTrainer.py +++ b/freqtrade/freqai/torch/PyTorchModelTrainer.py @@ -113,8 +113,8 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): self.model.eval() for _, batch_data in enumerate(data_loader_dictionary[split]): xb, yb = batch_data - xb.to(self.device) - yb.to(self.device) + xb = xb.to(self.device) + yb = yb.to(self.device) yb_pred = self.model(xb) loss = self.criterion(yb_pred, yb) From d17bf6350d10ed3533ac264a7de24faaac572090 Mon Sep 17 00:00:00 2001 From: yinon Date: Fri, 4 Aug 2023 12:52:55 +0000 Subject: [PATCH 13/17] pytorch - trainer - revert load changes --- freqtrade/freqai/torch/PyTorchModelTrainer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/freqtrade/freqai/torch/PyTorchModelTrainer.py b/freqtrade/freqai/torch/PyTorchModelTrainer.py index e7c4d53be..2b0090c78 100644 --- a/freqtrade/freqai/torch/PyTorchModelTrainer.py +++ b/freqtrade/freqai/torch/PyTorchModelTrainer.py @@ -181,8 +181,8 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): "pytrainer": self }, path) - def load(self, path: Path, device: Optional[str] = None): - checkpoint = torch.load(path, map_location=device) + def load(self, path: Path): + checkpoint = torch.load(path) return self.load_from_checkpoint(checkpoint) def load_from_checkpoint(self, checkpoint: Dict): From a3c6904fbcca31642aee4fa4b69fce293ee02010 Mon Sep 17 00:00:00 2001 From: yinon Date: Fri, 4 Aug 2023 13:45:21 +0000 Subject: [PATCH 14/17] pytorch - naming refactor - max_iters to n_steps --- .../prediction_models/PyTorchMLPClassifier.py | 2 +- .../prediction_models/PyTorchMLPRegressor.py | 2 +- .../PyTorchTransformerRegressor.py | 2 +- freqtrade/freqai/torch/PyTorchModelTrainer.py | 14 +++++++------- tests/freqai/conftest.py | 4 ++-- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py b/freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py index ca333d9cf..9aabdf7ad 100644 --- a/freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py +++ b/freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py @@ -26,7 +26,7 @@ class PyTorchMLPClassifier(BasePyTorchClassifier): "model_training_parameters" : { "learning_rate": 3e-4, "trainer_kwargs": { - "max_iters": 5000, + "n_steps": 5000, "batch_size": 64, "n_epochs": null, }, diff --git a/freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py b/freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py index 42fddf8ff..dc8dc4b61 100644 --- a/freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py +++ b/freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py @@ -27,7 +27,7 @@ class PyTorchMLPRegressor(BasePyTorchRegressor): "model_training_parameters" : { "learning_rate": 3e-4, "trainer_kwargs": { - "max_iters": 5000, + "n_steps": 5000, "batch_size": 64, "n_epochs": null, }, diff --git a/freqtrade/freqai/prediction_models/PyTorchTransformerRegressor.py b/freqtrade/freqai/prediction_models/PyTorchTransformerRegressor.py index 32663c86b..846d6df2e 100644 --- a/freqtrade/freqai/prediction_models/PyTorchTransformerRegressor.py +++ b/freqtrade/freqai/prediction_models/PyTorchTransformerRegressor.py @@ -30,7 +30,7 @@ class PyTorchTransformerRegressor(BasePyTorchRegressor): "model_training_parameters" : { "learning_rate": 3e-4, "trainer_kwargs": { - "max_iters": 5000, + "n_steps": 5000, "batch_size": 64, "n_epochs": null }, diff --git a/freqtrade/freqai/torch/PyTorchModelTrainer.py b/freqtrade/freqai/torch/PyTorchModelTrainer.py index 2b0090c78..44f7dec4e 100644 --- a/freqtrade/freqai/torch/PyTorchModelTrainer.py +++ b/freqtrade/freqai/torch/PyTorchModelTrainer.py @@ -39,7 +39,7 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): state_dict and model_meta_data saved by self.save() method. :param model_meta_data: Additional metadata about the model (optional). :param data_convertor: convertor from pd.DataFrame to torch.tensor. - :param max_iters: used to calculate n_epochs. The number of training iterations to run. + :param n_steps: used to calculate n_epochs. The number of training iterations to run. iteration here refers to the number of times optimizer.step() is called. ignored if n_epochs is set. :param n_epochs: The maximum number batches to use for evaluation. @@ -50,10 +50,10 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): self.criterion = criterion self.model_meta_data = model_meta_data self.device = device - self.max_iters: int = kwargs.get("max_iters", None) + self.n_steps: int = kwargs.get("n_steps", None) self.n_epochs: Optional[int] = kwargs.get("n_epochs", 10) - if not self.max_iters and not self.n_epochs: - raise Exception("Either `max_iters` or `n_epochs` should be set.") + if not self.n_steps and not self.n_epochs: + raise Exception("Either `n_steps` or `n_epochs` should be set.") self.batch_size: int = kwargs.get("batch_size", 64) self.data_convertor = data_convertor @@ -82,7 +82,7 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): n_epochs = self.n_epochs or self.calc_n_epochs( n_obs=n_obs, batch_size=self.batch_size, - n_iters=self.max_iters, + n_iters=self.n_steps, ) batch_counter = 0 @@ -153,7 +153,7 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): Calculates the number of epochs required to reach the maximum number of iterations specified in the model training parameters. - the motivation here is that `max_iters` is easier to optimize and keep stable, + the motivation here is that `n_steps` is easier to optimize and keep stable, across different n_obs - the number of data points. """ @@ -162,7 +162,7 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): if n_epochs <= 10: logger.warning( f"Setting low n_epochs: {n_epochs}. " - f"Please consider increasing `max_iters` hyper-parameter." + f"Please consider increasing `n_steps` hyper-parameter." ) return n_epochs diff --git a/tests/freqai/conftest.py b/tests/freqai/conftest.py index 96716e83f..9c7a950e7 100644 --- a/tests/freqai/conftest.py +++ b/tests/freqai/conftest.py @@ -97,9 +97,9 @@ def mock_pytorch_mlp_model_training_parameters() -> Dict[str, Any]: return { "learning_rate": 3e-4, "trainer_kwargs": { - "max_iters": 1, + "n_steps": None, "batch_size": 64, - "n_epochs": None, + "n_epochs": 1, }, "model_kwargs": { "hidden_dim": 32, From 9f69a45afd5ff93c74bb4fab88cb03b3cefed600 Mon Sep 17 00:00:00 2001 From: yinon Date: Fri, 4 Aug 2023 13:46:30 +0000 Subject: [PATCH 15/17] pytorch - documentation update --- docs/freqai-parameter-table.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/freqai-parameter-table.md b/docs/freqai-parameter-table.md index 5e60d2a07..de0b666ca 100644 --- a/docs/freqai-parameter-table.md +++ b/docs/freqai-parameter-table.md @@ -100,12 +100,12 @@ Mandatory parameters are marked as **Required** and have to be set in one of the #### trainer_kwargs -| Parameter | Description | -|------------|-------------| -| | **Model training parameters within the `freqai.model_training_parameters.model_kwargs` sub dictionary** -| `max_iters` | The number of training iterations to run. iteration here refers to the number of times we call self.optimizer.step(). used to calculate n_epochs.
**Datatype:** int.
Default: `100`. -| `batch_size` | The size of the batches to use during training..
**Datatype:** int.
Default: `64`. -| `max_n_eval_batches` | The maximum number batches to use for evaluation..
**Datatype:** int, optional.
Default: `None`. +| Parameter | Description | +|----------------------|-------------| +| | **Model training parameters within the `freqai.model_training_parameters.model_kwargs` sub dictionary** +| `n_epochs` | The `n_epochs` parameter is a crucial setting in the PyTorch training loop that determines the number of times the entire training dataset will be used to update the model's parameters. An epoch represents one full pass through the entire training dataset.
**Datatype:** int.
Default: `10`. +| `n_steps` | An alternative way of setting `n_epochs` - the number of training iterations to run. Iteration here refer to the number of times we call `optimizer.step()`. a simplified version of the function:

n_epochs = n_steps / (n_obs / batch_size)

The motivation here is that `n_steps` is easier to optimize and keep stable across different n_obs - the number of data points.

**Datatype:** int. optional.
Default: `None`. +| `batch_size` | The size of the batches to use during training..
**Datatype:** int.
Default: `64`. ### Additional parameters From 23d2bad2a08e09fdd8ec1f02f1af318a05e510a9 Mon Sep 17 00:00:00 2001 From: yinon Date: Fri, 4 Aug 2023 14:33:59 +0000 Subject: [PATCH 16/17] pytorch - set n_steps type as optional --- freqtrade/freqai/torch/PyTorchModelTrainer.py | 20 +++++++------------ 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/freqtrade/freqai/torch/PyTorchModelTrainer.py b/freqtrade/freqai/torch/PyTorchModelTrainer.py index 44f7dec4e..371a953e7 100644 --- a/freqtrade/freqai/torch/PyTorchModelTrainer.py +++ b/freqtrade/freqai/torch/PyTorchModelTrainer.py @@ -50,9 +50,9 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): self.criterion = criterion self.model_meta_data = model_meta_data self.device = device - self.n_steps: int = kwargs.get("n_steps", None) self.n_epochs: Optional[int] = kwargs.get("n_epochs", 10) - if not self.n_steps and not self.n_epochs: + self.n_steps: Optional[int] = kwargs.get("n_steps", None) + if self.n_steps is None and not self.n_epochs: raise Exception("Either `n_steps` or `n_epochs` should be set.") self.batch_size: int = kwargs.get("batch_size", 64) @@ -79,12 +79,7 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): data_loaders_dictionary = self.create_data_loaders_dictionary(data_dictionary, splits) n_obs = len(data_dictionary["train_features"]) - n_epochs = self.n_epochs or self.calc_n_epochs( - n_obs=n_obs, - batch_size=self.batch_size, - n_iters=self.n_steps, - ) - + n_epochs = self.n_epochs or self.calc_n_epochs(n_obs=n_obs) batch_counter = 0 for _ in range(n_epochs): for _, batch_data in enumerate(data_loaders_dictionary["train"]): @@ -147,8 +142,7 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): return data_loader_dictionary - @staticmethod - def calc_n_epochs(n_obs: int, batch_size: int, n_iters: int) -> int: + def calc_n_epochs(self, n_obs: int) -> int: """ Calculates the number of epochs required to reach the maximum number of iterations specified in the model training parameters. @@ -156,9 +150,9 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): the motivation here is that `n_steps` is easier to optimize and keep stable, across different n_obs - the number of data points. """ - - n_batches = n_obs // batch_size - n_epochs = min(n_iters // n_batches, 1) + assert isinstance(self.n_steps, int), "Either `n_steps` or `n_epochs` should be set." + n_batches = n_obs // self.batch_size + n_epochs = min(self.n_steps // n_batches, 1) if n_epochs <= 10: logger.warning( f"Setting low n_epochs: {n_epochs}. " From bdf89efd113944f0b00ca6733b8acb905022471e Mon Sep 17 00:00:00 2001 From: yinon Date: Fri, 4 Aug 2023 14:42:28 +0000 Subject: [PATCH 17/17] pytorch - improve docs --- docs/freqai-parameter-table.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/freqai-parameter-table.md b/docs/freqai-parameter-table.md index de0b666ca..95687c7ab 100644 --- a/docs/freqai-parameter-table.md +++ b/docs/freqai-parameter-table.md @@ -100,12 +100,12 @@ Mandatory parameters are marked as **Required** and have to be set in one of the #### trainer_kwargs -| Parameter | Description | -|----------------------|-------------| -| | **Model training parameters within the `freqai.model_training_parameters.model_kwargs` sub dictionary** -| `n_epochs` | The `n_epochs` parameter is a crucial setting in the PyTorch training loop that determines the number of times the entire training dataset will be used to update the model's parameters. An epoch represents one full pass through the entire training dataset.
**Datatype:** int.
Default: `10`. -| `n_steps` | An alternative way of setting `n_epochs` - the number of training iterations to run. Iteration here refer to the number of times we call `optimizer.step()`. a simplified version of the function:

n_epochs = n_steps / (n_obs / batch_size)

The motivation here is that `n_steps` is easier to optimize and keep stable across different n_obs - the number of data points.

**Datatype:** int. optional.
Default: `None`. -| `batch_size` | The size of the batches to use during training..
**Datatype:** int.
Default: `64`. +| Parameter | Description | +|--------------|-------------| +| | **Model training parameters within the `freqai.model_training_parameters.model_kwargs` sub dictionary** +| `n_epochs` | The `n_epochs` parameter is a crucial setting in the PyTorch training loop that determines the number of times the entire training dataset will be used to update the model's parameters. An epoch represents one full pass through the entire training dataset. Overrides `n_steps`. Either `n_epochs` or `n_steps` must be set.

**Datatype:** int. optional.
Default: `10`. +| `n_steps` | An alternative way of setting `n_epochs` - the number of training iterations to run. Iteration here refer to the number of times we call `optimizer.step()`. Ignored if `n_epochs` is set. A simplified version of the function:

n_epochs = n_steps / (n_obs / batch_size)

The motivation here is that `n_steps` is easier to optimize and keep stable across different n_obs - the number of data points.

**Datatype:** int. optional.
Default: `None`. +| `batch_size` | The size of the batches to use during training.

**Datatype:** int.
Default: `64`. ### Additional parameters