add tensorboard integration to XGBoost and PyTorch et al

This commit is contained in:
robcaulk
2023-05-10 09:48:36 +00:00
parent d02cf8f0b7
commit ffc4d87263
9 changed files with 112 additions and 16 deletions

View File

@@ -158,7 +158,10 @@ This specific hyperopt would help you understand the appropriate `DI_values` for
## Using Tensorboard
CatBoost models benefit from tracking training metrics via Tensorboard. You can take advantage of the FreqAI integration to track training and evaluation performance across all coins and across all retrainings. Tensorboard is activated via the following command:
!!! note "Availability"
FreqAI includes tensorboard for a variety of models, including XGBoost, all PyTorch models, Reinforcement Learning, and Catboost. If you would like to see Tensorboard integrated into another model type, please open an issue on the [Freqtrade GitHub](https://github.com/freqtrade/freqtrade/issues)
The easiest way to use tensorboard is to open a separate shell and run:
```bash
cd freqtrade

View File

@@ -0,0 +1,14 @@
# ensure users can still use a non-torch freqai version
try:
from freqtrade.freqai.tensorboard import TensorBoardCallback, TensorboardLogger
TBLogger = TensorboardLogger
TBCallback = TensorBoardCallback
except ModuleNotFoundError:
from freqtrade.freqai.tensorboard import BaseTensorBoardCallback, BaseTensorboardLogger
TBLogger = BaseTensorboardLogger # type: ignore
TBCallback = BaseTensorBoardCallback # type: ignore
__all__ = (
"TBLogger",
"TBCallback"
)

View File

@@ -19,6 +19,7 @@ from freqtrade.data.dataprovider import DataProvider
from freqtrade.enums import RunMode
from freqtrade.exceptions import OperationalException
from freqtrade.exchange import timeframe_to_seconds
from freqtrade.freqai import TBLogger
from freqtrade.freqai.data_drawer import FreqaiDataDrawer
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.utils import plot_feature_importance, record_params
@@ -630,7 +631,9 @@ class IFreqaiModel(ABC):
dk.find_features(unfiltered_dataframe)
dk.find_labels(unfiltered_dataframe)
self.tb_logger = TBLogger(dk.data_path)
model = self.train(unfiltered_dataframe, pair, dk)
self.tb_logger.close()
self.dd.pair_dict[pair]["trained_timestamp"] = new_trained_timerange.stopts
dk.set_new_model_names(pair, new_trained_timerange.stopts)

View File

@@ -83,6 +83,7 @@ class PyTorchMLPClassifier(BasePyTorchClassifier):
device=self.device,
init_model=init_model,
data_convertor=self.data_convertor,
tb_logger=self.tb_logger,
**self.trainer_kwargs,
)
trainer.fit(data_dictionary, self.splits)

View File

@@ -77,6 +77,7 @@ class PyTorchMLPRegressor(BasePyTorchRegressor):
device=self.device,
init_model=init_model,
data_convertor=self.data_convertor,
tb_logger=self.tb_logger,
**self.trainer_kwargs,
)
trainer.fit(data_dictionary, self.splits)

View File

@@ -32,8 +32,7 @@ class PyTorchTransformerRegressor(BasePyTorchRegressor):
"trainer_kwargs": {
"max_iters": 5000,
"batch_size": 64,
"max_n_eval_batches": null,
"window_size": 10
"max_n_eval_batches": null
},
"model_kwargs": {
"hidden_dim": 512,
@@ -84,6 +83,7 @@ class PyTorchTransformerRegressor(BasePyTorchRegressor):
init_model=init_model,
data_convertor=self.data_convertor,
window_size=self.window_size,
tb_logger=self.tb_logger,
**self.trainer_kwargs,
)
trainer.fit(data_dictionary, self.splits)

View File

@@ -3,6 +3,7 @@ from typing import Any, Dict
from xgboost import XGBRegressor
from freqtrade.freqai import TBCallback
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
@@ -44,7 +45,10 @@ class XGBoostRegressor(BaseRegressionModel):
model = XGBRegressor(**self.model_training_parameters)
model.set_params(callbacks=[TBCallback(dk.data_path)])
model.fit(X=X, y=y, sample_weight=sample_weight, eval_set=eval_set,
sample_weight_eval_set=eval_weights, xgb_model=xgb_model)
# set the callbacks to empty so that we can serialize to disk later
model.set_params(callbacks=[])
return model

View File

@@ -0,0 +1,77 @@
import logging
from pathlib import Path
from typing import Any
import xgboost as xgb
logger = logging.getLogger(__name__)
class BaseTensorboardLogger:
def __init__(self, logdir: str = "tensorboard", id: str = "unique-id"):
logger.warning("Tensorboard is not installed, no logs will be written."
"Use ensure torch is installed, or use the torch/RL docker images")
def log_scaler(self, tag: str, scalar_value: Any, step: int):
return
def close(self):
return
class BaseTensorBoardCallback(xgb.callback.TrainingCallback):
def __init__(self, logdir: str = "tensorboard", id: str = "uniqu-id", test_size=1):
logger.warning("Tensorboard is not installed, no logs will be written."
"Use ensure torch is installed, or use the torch/RL docker images")
def after_iteration(
self, model, epoch: int, evals_log: xgb.callback.TrainingCallback.EvalsLog
) -> bool:
return False
def after_training(self, model):
return model
class TensorboardLogger(BaseTensorboardLogger):
def __init__(self, logdir: Path = Path("tensorboard")):
from torch.utils.tensorboard import SummaryWriter
self.writer: SummaryWriter = SummaryWriter(f"{str(logdir)}/tensorboard")
def log_scalar(self, tag: str, scalar_value: Any, step: int):
self.writer.add_scalar(tag, scalar_value, step)
def close(self):
self.writer.flush()
self.writer.close()
class TensorBoardCallback(BaseTensorBoardCallback):
def __init__(self, logdir: Path = Path("tensorboard")):
from torch.utils.tensorboard import SummaryWriter
self.writer: SummaryWriter = SummaryWriter(f"{str(logdir)}/tensorboard")
def after_iteration(
self, model, epoch: int, evals_log: xgb.callback.TrainingCallback.EvalsLog
) -> bool:
if not evals_log:
return False
for data, metric in evals_log.items():
for metric_name, log in metric.items():
score = log[-1][0] if isinstance(log[-1], tuple) else log[-1]
if data == "train":
self.writer.add_scalar("train_loss", score**2, epoch)
else:
self.writer.add_scalar("valid_loss", score**2, epoch)
return False
def after_training(self, model):
self.writer.flush()
self.writer.close()
return model

View File

@@ -29,6 +29,7 @@ class PyTorchModelTrainer(PyTorchTrainerInterface):
data_convertor: PyTorchDataConvertor,
model_meta_data: Dict[str, Any] = {},
window_size: int = 1,
tb_logger: Any = None,
**kwargs
):
"""
@@ -56,6 +57,7 @@ class PyTorchModelTrainer(PyTorchTrainerInterface):
self.max_n_eval_batches: Optional[int] = kwargs.get("max_n_eval_batches", None)
self.data_convertor = data_convertor
self.window_size: int = window_size
self.tb_logger = tb_logger
if init_model:
self.load_from_checkpoint(init_model)
@@ -81,8 +83,6 @@ class PyTorchModelTrainer(PyTorchTrainerInterface):
)
self.model.train()
for epoch in range(1, epochs + 1):
# training
losses = []
for i, batch_data in enumerate(data_loaders_dictionary["train"]):
xb, yb = batch_data
@@ -94,20 +94,15 @@ class PyTorchModelTrainer(PyTorchTrainerInterface):
self.optimizer.zero_grad(set_to_none=True)
loss.backward()
self.optimizer.step()
losses.append(loss.item())
train_loss = sum(losses) / len(losses)
log_message = f"epoch {epoch}/{epochs}: train loss {train_loss:.4f}"
self.tb_logger.log_scalar("train_loss", loss.item(), i)
# evaluation
if "test" in splits:
test_loss = self.estimate_loss(
self.estimate_loss(
data_loaders_dictionary,
self.max_n_eval_batches,
"test"
)
log_message += f" ; test loss {test_loss:.4f}"
logger.info(log_message)
@torch.no_grad()
def estimate_loss(
@@ -115,10 +110,9 @@ class PyTorchModelTrainer(PyTorchTrainerInterface):
data_loader_dictionary: Dict[str, DataLoader],
max_n_eval_batches: Optional[int],
split: str,
) -> float:
) -> None:
self.model.eval()
n_batches = 0
losses = []
for i, batch_data in enumerate(data_loader_dictionary[split]):
if max_n_eval_batches and i > max_n_eval_batches:
n_batches += 1
@@ -129,10 +123,9 @@ class PyTorchModelTrainer(PyTorchTrainerInterface):
yb_pred = self.model(xb)
loss = self.criterion(yb_pred, yb)
losses.append(loss.item())
self.tb_logger.log_scalar(f"{split}_loss", loss.item(), i)
self.model.train()
return sum(losses) / len(losses)
def create_data_loaders_dictionary(
self,