From ffc4d8726346f4bf03f8c0bbae6bc5535392b013 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Wed, 10 May 2023 09:48:36 +0000 Subject: [PATCH] add tensorboard integration to XGBoost and PyTorch et al --- docs/freqai-running.md | 5 +- freqtrade/freqai/__init__.py | 14 ++++ freqtrade/freqai/freqai_interface.py | 3 + .../prediction_models/PyTorchMLPClassifier.py | 1 + .../prediction_models/PyTorchMLPRegressor.py | 1 + .../PyTorchTransformerRegressor.py | 4 +- .../prediction_models/XGBoostRegressor.py | 4 + freqtrade/freqai/tensorboard.py | 77 +++++++++++++++++++ freqtrade/freqai/torch/PyTorchModelTrainer.py | 19 ++--- 9 files changed, 112 insertions(+), 16 deletions(-) create mode 100644 freqtrade/freqai/tensorboard.py diff --git a/docs/freqai-running.md b/docs/freqai-running.md index f3ccc546f..5ea67651f 100644 --- a/docs/freqai-running.md +++ b/docs/freqai-running.md @@ -158,7 +158,10 @@ This specific hyperopt would help you understand the appropriate `DI_values` for ## Using Tensorboard -CatBoost models benefit from tracking training metrics via Tensorboard. You can take advantage of the FreqAI integration to track training and evaluation performance across all coins and across all retrainings. Tensorboard is activated via the following command: +!!! note "Availability" + FreqAI includes tensorboard for a variety of models, including XGBoost, all PyTorch models, Reinforcement Learning, and Catboost. If you would like to see Tensorboard integrated into another model type, please open an issue on the [Freqtrade GitHub](https://github.com/freqtrade/freqtrade/issues) + +The easiest way to use tensorboard is to open a separate shell and run: ```bash cd freqtrade diff --git a/freqtrade/freqai/__init__.py b/freqtrade/freqai/__init__.py index e69de29bb..5fb6e5be0 100644 --- a/freqtrade/freqai/__init__.py +++ b/freqtrade/freqai/__init__.py @@ -0,0 +1,14 @@ +# ensure users can still use a non-torch freqai version +try: + from freqtrade.freqai.tensorboard import TensorBoardCallback, TensorboardLogger + TBLogger = TensorboardLogger + TBCallback = TensorBoardCallback +except ModuleNotFoundError: + from freqtrade.freqai.tensorboard import BaseTensorBoardCallback, BaseTensorboardLogger + TBLogger = BaseTensorboardLogger # type: ignore + TBCallback = BaseTensorBoardCallback # type: ignore + +__all__ = ( + "TBLogger", + "TBCallback" +) diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 1669d1483..cf8097870 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -19,6 +19,7 @@ from freqtrade.data.dataprovider import DataProvider from freqtrade.enums import RunMode from freqtrade.exceptions import OperationalException from freqtrade.exchange import timeframe_to_seconds +from freqtrade.freqai import TBLogger from freqtrade.freqai.data_drawer import FreqaiDataDrawer from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.utils import plot_feature_importance, record_params @@ -630,7 +631,9 @@ class IFreqaiModel(ABC): dk.find_features(unfiltered_dataframe) dk.find_labels(unfiltered_dataframe) + self.tb_logger = TBLogger(dk.data_path) model = self.train(unfiltered_dataframe, pair, dk) + self.tb_logger.close() self.dd.pair_dict[pair]["trained_timestamp"] = new_trained_timerange.stopts dk.set_new_model_names(pair, new_trained_timerange.stopts) diff --git a/freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py b/freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py index ea7981405..0ebe8d129 100644 --- a/freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py +++ b/freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py @@ -83,6 +83,7 @@ class PyTorchMLPClassifier(BasePyTorchClassifier): device=self.device, init_model=init_model, data_convertor=self.data_convertor, + tb_logger=self.tb_logger, **self.trainer_kwargs, ) trainer.fit(data_dictionary, self.splits) diff --git a/freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py b/freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py index 64f0f4b03..7d87f5226 100644 --- a/freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py +++ b/freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py @@ -77,6 +77,7 @@ class PyTorchMLPRegressor(BasePyTorchRegressor): device=self.device, init_model=init_model, data_convertor=self.data_convertor, + tb_logger=self.tb_logger, **self.trainer_kwargs, ) trainer.fit(data_dictionary, self.splits) diff --git a/freqtrade/freqai/prediction_models/PyTorchTransformerRegressor.py b/freqtrade/freqai/prediction_models/PyTorchTransformerRegressor.py index e760f6e68..d135b690b 100644 --- a/freqtrade/freqai/prediction_models/PyTorchTransformerRegressor.py +++ b/freqtrade/freqai/prediction_models/PyTorchTransformerRegressor.py @@ -32,8 +32,7 @@ class PyTorchTransformerRegressor(BasePyTorchRegressor): "trainer_kwargs": { "max_iters": 5000, "batch_size": 64, - "max_n_eval_batches": null, - "window_size": 10 + "max_n_eval_batches": null }, "model_kwargs": { "hidden_dim": 512, @@ -84,6 +83,7 @@ class PyTorchTransformerRegressor(BasePyTorchRegressor): init_model=init_model, data_convertor=self.data_convertor, window_size=self.window_size, + tb_logger=self.tb_logger, **self.trainer_kwargs, ) trainer.fit(data_dictionary, self.splits) diff --git a/freqtrade/freqai/prediction_models/XGBoostRegressor.py b/freqtrade/freqai/prediction_models/XGBoostRegressor.py index 93dfb319e..b4cdead65 100644 --- a/freqtrade/freqai/prediction_models/XGBoostRegressor.py +++ b/freqtrade/freqai/prediction_models/XGBoostRegressor.py @@ -3,6 +3,7 @@ from typing import Any, Dict from xgboost import XGBRegressor +from freqtrade.freqai import TBCallback from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel from freqtrade.freqai.data_kitchen import FreqaiDataKitchen @@ -44,7 +45,10 @@ class XGBoostRegressor(BaseRegressionModel): model = XGBRegressor(**self.model_training_parameters) + model.set_params(callbacks=[TBCallback(dk.data_path)]) model.fit(X=X, y=y, sample_weight=sample_weight, eval_set=eval_set, sample_weight_eval_set=eval_weights, xgb_model=xgb_model) + # set the callbacks to empty so that we can serialize to disk later + model.set_params(callbacks=[]) return model diff --git a/freqtrade/freqai/tensorboard.py b/freqtrade/freqai/tensorboard.py new file mode 100644 index 000000000..cb536008e --- /dev/null +++ b/freqtrade/freqai/tensorboard.py @@ -0,0 +1,77 @@ +import logging +from pathlib import Path +from typing import Any + +import xgboost as xgb + + +logger = logging.getLogger(__name__) + + +class BaseTensorboardLogger: + def __init__(self, logdir: str = "tensorboard", id: str = "unique-id"): + logger.warning("Tensorboard is not installed, no logs will be written." + "Use ensure torch is installed, or use the torch/RL docker images") + + def log_scaler(self, tag: str, scalar_value: Any, step: int): + return + + def close(self): + return + + +class BaseTensorBoardCallback(xgb.callback.TrainingCallback): + + def __init__(self, logdir: str = "tensorboard", id: str = "uniqu-id", test_size=1): + logger.warning("Tensorboard is not installed, no logs will be written." + "Use ensure torch is installed, or use the torch/RL docker images") + + def after_iteration( + self, model, epoch: int, evals_log: xgb.callback.TrainingCallback.EvalsLog + ) -> bool: + return False + + def after_training(self, model): + return model + + +class TensorboardLogger(BaseTensorboardLogger): + def __init__(self, logdir: Path = Path("tensorboard")): + from torch.utils.tensorboard import SummaryWriter + self.writer: SummaryWriter = SummaryWriter(f"{str(logdir)}/tensorboard") + + def log_scalar(self, tag: str, scalar_value: Any, step: int): + self.writer.add_scalar(tag, scalar_value, step) + + def close(self): + self.writer.flush() + self.writer.close() + + +class TensorBoardCallback(BaseTensorBoardCallback): + + def __init__(self, logdir: Path = Path("tensorboard")): + from torch.utils.tensorboard import SummaryWriter + self.writer: SummaryWriter = SummaryWriter(f"{str(logdir)}/tensorboard") + + def after_iteration( + self, model, epoch: int, evals_log: xgb.callback.TrainingCallback.EvalsLog + ) -> bool: + if not evals_log: + return False + + for data, metric in evals_log.items(): + for metric_name, log in metric.items(): + score = log[-1][0] if isinstance(log[-1], tuple) else log[-1] + if data == "train": + self.writer.add_scalar("train_loss", score**2, epoch) + else: + self.writer.add_scalar("valid_loss", score**2, epoch) + + return False + + def after_training(self, model): + self.writer.flush() + self.writer.close() + + return model diff --git a/freqtrade/freqai/torch/PyTorchModelTrainer.py b/freqtrade/freqai/torch/PyTorchModelTrainer.py index a3b0d9b9c..6d0441f07 100644 --- a/freqtrade/freqai/torch/PyTorchModelTrainer.py +++ b/freqtrade/freqai/torch/PyTorchModelTrainer.py @@ -29,6 +29,7 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): data_convertor: PyTorchDataConvertor, model_meta_data: Dict[str, Any] = {}, window_size: int = 1, + tb_logger: Any = None, **kwargs ): """ @@ -56,6 +57,7 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): self.max_n_eval_batches: Optional[int] = kwargs.get("max_n_eval_batches", None) self.data_convertor = data_convertor self.window_size: int = window_size + self.tb_logger = tb_logger if init_model: self.load_from_checkpoint(init_model) @@ -81,8 +83,6 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): ) self.model.train() for epoch in range(1, epochs + 1): - # training - losses = [] for i, batch_data in enumerate(data_loaders_dictionary["train"]): xb, yb = batch_data @@ -94,20 +94,15 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): self.optimizer.zero_grad(set_to_none=True) loss.backward() self.optimizer.step() - losses.append(loss.item()) - train_loss = sum(losses) / len(losses) - log_message = f"epoch {epoch}/{epochs}: train loss {train_loss:.4f}" + self.tb_logger.log_scalar("train_loss", loss.item(), i) # evaluation if "test" in splits: - test_loss = self.estimate_loss( + self.estimate_loss( data_loaders_dictionary, self.max_n_eval_batches, "test" ) - log_message += f" ; test loss {test_loss:.4f}" - - logger.info(log_message) @torch.no_grad() def estimate_loss( @@ -115,10 +110,9 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): data_loader_dictionary: Dict[str, DataLoader], max_n_eval_batches: Optional[int], split: str, - ) -> float: + ) -> None: self.model.eval() n_batches = 0 - losses = [] for i, batch_data in enumerate(data_loader_dictionary[split]): if max_n_eval_batches and i > max_n_eval_batches: n_batches += 1 @@ -129,10 +123,9 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): yb_pred = self.model(xb) loss = self.criterion(yb_pred, yb) - losses.append(loss.item()) + self.tb_logger.log_scalar(f"{split}_loss", loss.item(), i) self.model.train() - return sum(losses) / len(losses) def create_data_loaders_dictionary( self,