ruff format: freqai

2025-12-17 21:31:14 +00:00 · 2024-05-12 17:12:20 +02:00
parent e4e8c3967c
commit d1db43dee0
44 changed files with 1111 additions and 900 deletions
--- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
+++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
@@ -30,10 +30,10 @@ from freqtrade.persistence import Trade

 logger = logging.getLogger(__name__)

-torch.multiprocessing.set_sharing_strategy('file_system')
+torch.multiprocessing.set_sharing_strategy("file_system")

-SB3_MODELS = ['PPO', 'A2C', 'DQN']
-SB3_CONTRIB_MODELS = ['TRPO', 'ARS', 'RecurrentPPO', 'MaskablePPO', 'QRDQN']
+SB3_MODELS = ["PPO", "A2C", "DQN"]
+SB3_CONTRIB_MODELS = ["TRPO", "ARS", "RecurrentPPO", "MaskablePPO", "QRDQN"]


 class BaseReinforcementLearningModel(IFreqaiModel):
@@ -42,57 +42,60 @@ class BaseReinforcementLearningModel(IFreqaiModel):
    """

    def __init__(self, **kwargs) -> None:
-        super().__init__(config=kwargs['config'])
-        self.max_threads = min(self.freqai_info['rl_config'].get(
-            'cpu_count', 1), max(int(self.max_system_threads / 2), 1))
+        super().__init__(config=kwargs["config"])
+        self.max_threads = min(
+            self.freqai_info["rl_config"].get("cpu_count", 1),
+            max(int(self.max_system_threads / 2), 1),
+        )
        th.set_num_threads(self.max_threads)
-        self.reward_params = self.freqai_info['rl_config']['model_reward_parameters']
+        self.reward_params = self.freqai_info["rl_config"]["model_reward_parameters"]
        self.train_env: Union[VecMonitor, SubprocVecEnv, gym.Env] = gym.Env()
        self.eval_env: Union[VecMonitor, SubprocVecEnv, gym.Env] = gym.Env()
        self.eval_callback: Optional[MaskableEvalCallback] = None
-        self.model_type = self.freqai_info['rl_config']['model_type']
-        self.rl_config = self.freqai_info['rl_config']
+        self.model_type = self.freqai_info["rl_config"]["model_type"]
+        self.rl_config = self.freqai_info["rl_config"]
        self.df_raw: DataFrame = DataFrame()
-        self.continual_learning = self.freqai_info.get('continual_learning', False)
+        self.continual_learning = self.freqai_info.get("continual_learning", False)
        if self.model_type in SB3_MODELS:
-            import_str = 'stable_baselines3'
+            import_str = "stable_baselines3"
        elif self.model_type in SB3_CONTRIB_MODELS:
-            import_str = 'sb3_contrib'
+            import_str = "sb3_contrib"
        else:
-            raise OperationalException(f'{self.model_type} not available in stable_baselines3 or '
-                                       f'sb3_contrib. please choose one of {SB3_MODELS} or '
-                                       f'{SB3_CONTRIB_MODELS}')
+            raise OperationalException(
+                f"{self.model_type} not available in stable_baselines3 or "
+                f"sb3_contrib. please choose one of {SB3_MODELS} or "
+                f"{SB3_CONTRIB_MODELS}"
+            )

        mod = importlib.import_module(import_str, self.model_type)
        self.MODELCLASS = getattr(mod, self.model_type)
-        self.policy_type = self.freqai_info['rl_config']['policy_type']
+        self.policy_type = self.freqai_info["rl_config"]["policy_type"]
        self.unset_outlier_removal()
-        self.net_arch = self.rl_config.get('net_arch', [128, 128])
+        self.net_arch = self.rl_config.get("net_arch", [128, 128])
        self.dd.model_type = import_str
-        self.tensorboard_callback: TensorboardCallback = \
-            TensorboardCallback(verbose=1, actions=BaseActions)
+        self.tensorboard_callback: TensorboardCallback = TensorboardCallback(
+            verbose=1, actions=BaseActions
+        )

    def unset_outlier_removal(self):
        """
        If user has activated any function that may remove training points, this
        function will set them to false and warn them
        """
-        if self.ft_params.get('use_SVM_to_remove_outliers', False):
-            self.ft_params.update({'use_SVM_to_remove_outliers': False})
-            logger.warning('User tried to use SVM with RL. Deactivating SVM.')
-        if self.ft_params.get('use_DBSCAN_to_remove_outliers', False):
-            self.ft_params.update({'use_DBSCAN_to_remove_outliers': False})
-            logger.warning('User tried to use DBSCAN with RL. Deactivating DBSCAN.')
-        if self.ft_params.get('DI_threshold', False):
-            self.ft_params.update({'DI_threshold': False})
-            logger.warning('User tried to use DI_threshold with RL. Deactivating DI_threshold.')
-        if self.freqai_info['data_split_parameters'].get('shuffle', False):
-            self.freqai_info['data_split_parameters'].update({'shuffle': False})
-            logger.warning('User tried to shuffle training data. Setting shuffle to False')
+        if self.ft_params.get("use_SVM_to_remove_outliers", False):
+            self.ft_params.update({"use_SVM_to_remove_outliers": False})
+            logger.warning("User tried to use SVM with RL. Deactivating SVM.")
+        if self.ft_params.get("use_DBSCAN_to_remove_outliers", False):
+            self.ft_params.update({"use_DBSCAN_to_remove_outliers": False})
+            logger.warning("User tried to use DBSCAN with RL. Deactivating DBSCAN.")
+        if self.ft_params.get("DI_threshold", False):
+            self.ft_params.update({"DI_threshold": False})
+            logger.warning("User tried to use DI_threshold with RL. Deactivating DI_threshold.")
+        if self.freqai_info["data_split_parameters"].get("shuffle", False):
+            self.freqai_info["data_split_parameters"].update({"shuffle": False})
+            logger.warning("User tried to shuffle training data. Setting shuffle to False")

-    def train(
-        self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
-    ) -> Any:
+    def train(self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs) -> Any:
        """
        Filter the training data and train a model to it. Train makes heavy use of the datakitchen
        for storing, saving, loading, and analyzing the data.
@@ -111,8 +114,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
            training_filter=True,
        )

-        dd: Dict[str, Any] = dk.make_train_test_datasets(
-            features_filtered, labels_filtered)
+        dd: Dict[str, Any] = dk.make_train_test_datasets(features_filtered, labels_filtered)
        self.df_raw = copy.deepcopy(dd["train_features"])
        dk.fit_labels()  # FIXME useless for now, but just satiating append methods

@@ -121,18 +123,18 @@ class BaseReinforcementLearningModel(IFreqaiModel):

        dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count)

-        (dd["train_features"],
-         dd["train_labels"],
-         dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"],
-                                                                  dd["train_labels"],
-                                                                  dd["train_weights"])
+        (dd["train_features"], dd["train_labels"], dd["train_weights"]) = (
+            dk.feature_pipeline.fit_transform(
+                dd["train_features"], dd["train_labels"], dd["train_weights"]
+            )
+        )

-        if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
-            (dd["test_features"],
-             dd["test_labels"],
-             dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
-                                                                 dd["test_labels"],
-                                                                 dd["test_weights"])
+        if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) != 0:
+            (dd["test_features"], dd["test_labels"], dd["test_weights"]) = (
+                dk.feature_pipeline.transform(
+                    dd["test_features"], dd["test_labels"], dd["test_weights"]
+                )
+            )

        logger.info(
            f'Training model on {len(dk.data_dictionary["train_features"].columns)}'
@@ -147,9 +149,13 @@ class BaseReinforcementLearningModel(IFreqaiModel):

        return model

-    def set_train_and_eval_environments(self, data_dictionary: Dict[str, DataFrame],
-                                        prices_train: DataFrame, prices_test: DataFrame,
-                                        dk: FreqaiDataKitchen):
+    def set_train_and_eval_environments(
+        self,
+        data_dictionary: Dict[str, DataFrame],
+        prices_train: DataFrame,
+        prices_test: DataFrame,
+        dk: FreqaiDataKitchen,
+    ):
        """
        User can override this if they are using a custom MyRLEnv
        :param data_dictionary: dict = common data dictionary containing train and test
@@ -165,11 +171,14 @@ class BaseReinforcementLearningModel(IFreqaiModel):

        self.train_env = self.MyRLEnv(df=train_df, prices=prices_train, **env_info)
        self.eval_env = Monitor(self.MyRLEnv(df=test_df, prices=prices_test, **env_info))
-        self.eval_callback = MaskableEvalCallback(self.eval_env, deterministic=True,
-                                                  render=False, eval_freq=len(train_df),
-                                                  best_model_save_path=str(dk.data_path),
-                                                  use_masking=(self.model_type == 'MaskablePPO' and
-                                                               is_masking_supported(self.eval_env)))
+        self.eval_callback = MaskableEvalCallback(
+            self.eval_env,
+            deterministic=True,
+            render=False,
+            eval_freq=len(train_df),
+            best_model_save_path=str(dk.data_path),
+            use_masking=(self.model_type == "MaskablePPO" and is_masking_supported(self.eval_env)),
+        )

        actions = self.train_env.get_actions()
        self.tensorboard_callback = TensorboardCallback(verbose=1, actions=actions)
@@ -178,16 +187,19 @@ class BaseReinforcementLearningModel(IFreqaiModel):
        """
        Create dictionary of environment arguments
        """
-        env_info = {"window_size": self.CONV_WIDTH,
-                    "reward_kwargs": self.reward_params,
-                    "config": self.config,
-                    "live": self.live,
-                    "can_short": self.can_short,
-                    "pair": pair,
-                    "df_raw": self.df_raw}
+        env_info = {
+            "window_size": self.CONV_WIDTH,
+            "reward_kwargs": self.reward_params,
+            "config": self.config,
+            "live": self.live,
+            "can_short": self.can_short,
+            "pair": pair,
+            "df_raw": self.df_raw,
+        }
        if self.data_provider:
-            env_info["fee"] = self.data_provider._exchange \
-                .get_fee(symbol=self.data_provider.current_whitelist()[0])  # type: ignore
+            env_info["fee"] = self.data_provider._exchange.get_fee(
+                symbol=self.data_provider.current_whitelist()[0]
+            )  # type: ignore

        return env_info

@@ -219,11 +231,12 @@ class BaseReinforcementLearningModel(IFreqaiModel):
        for trade in open_trades:
            if trade.pair == pair:
                if self.data_provider._exchange is None:  # type: ignore
-                    logger.error('No exchange available.')
+                    logger.error("No exchange available.")
                    return 0, 0, 0
                else:
                    current_rate = self.data_provider._exchange.get_rate(  # type: ignore
-                                pair, refresh=False, side="exit", is_short=trade.is_short)
+                        pair, refresh=False, side="exit", is_short=trade.is_short
+                    )

                now = datetime.now(timezone.utc).timestamp()
                trade_duration = int((now - trade.open_date_utc.timestamp()) / self.base_tf_seconds)
@@ -255,16 +268,17 @@ class BaseReinforcementLearningModel(IFreqaiModel):
        dk.data_dictionary["prediction_features"] = self.drop_ohlc_from_df(filtered_dataframe, dk)

        dk.data_dictionary["prediction_features"], _, _ = dk.feature_pipeline.transform(
-            dk.data_dictionary["prediction_features"], outlier_check=True)
+            dk.data_dictionary["prediction_features"], outlier_check=True
+        )

-        pred_df = self.rl_model_predict(
-            dk.data_dictionary["prediction_features"], dk, self.model)
+        pred_df = self.rl_model_predict(dk.data_dictionary["prediction_features"], dk, self.model)
        pred_df.fillna(0, inplace=True)

        return (pred_df, dk.do_predict)

-    def rl_model_predict(self, dataframe: DataFrame,
-                         dk: FreqaiDataKitchen, model: Any) -> DataFrame:
+    def rl_model_predict(
+        self, dataframe: DataFrame, dk: FreqaiDataKitchen, model: Any
+    ) -> DataFrame:
        """
        A helper function to make predictions in the Reinforcement learning module.
        :param dataframe: DataFrame = the dataframe of features to make the predictions on
@@ -275,11 +289,11 @@ class BaseReinforcementLearningModel(IFreqaiModel):

        def _predict(window):
            observations = dataframe.iloc[window.index]
-            if self.live and self.rl_config.get('add_state_info', False):
+            if self.live and self.rl_config.get("add_state_info", False):
                market_side, current_profit, trade_duration = self.get_state_info(dk.pair)
-                observations['current_profit_pct'] = current_profit
-                observations['position'] = market_side
-                observations['trade_duration'] = trade_duration
+                observations["current_profit_pct"] = current_profit
+                observations["position"] = market_side
+                observations["trade_duration"] = trade_duration
            res, _ = model.predict(observations, deterministic=True)
            return res

@@ -287,23 +301,31 @@ class BaseReinforcementLearningModel(IFreqaiModel):

        return output

-    def build_ohlc_price_dataframes(self, data_dictionary: dict,
-                                    pair: str, dk: FreqaiDataKitchen) -> Tuple[DataFrame,
-                                                                               DataFrame]:
+    def build_ohlc_price_dataframes(
+        self, data_dictionary: dict, pair: str, dk: FreqaiDataKitchen
+    ) -> Tuple[DataFrame, DataFrame]:
        """
        Builds the train prices and test prices for the environment.
        """

-        pair = pair.replace(':', '')
+        pair = pair.replace(":", "")
        train_df = data_dictionary["train_features"]
        test_df = data_dictionary["test_features"]

        # price data for model training and evaluation
-        tf = self.config['timeframe']
-        rename_dict = {'%-raw_open': 'open', '%-raw_low': 'low',
-                       '%-raw_high': ' high', '%-raw_close': 'close'}
-        rename_dict_old = {f'%-{pair}raw_open_{tf}': 'open', f'%-{pair}raw_low_{tf}': 'low',
-                           f'%-{pair}raw_high_{tf}': ' high', f'%-{pair}raw_close_{tf}': 'close'}
+        tf = self.config["timeframe"]
+        rename_dict = {
+            "%-raw_open": "open",
+            "%-raw_low": "low",
+            "%-raw_high": " high",
+            "%-raw_close": "close",
+        }
+        rename_dict_old = {
+            f"%-{pair}raw_open_{tf}": "open",
+            f"%-{pair}raw_low_{tf}": "low",
+            f"%-{pair}raw_high_{tf}": " high",
+            f"%-{pair}raw_close_{tf}": "close",
+        }

        prices_train = train_df.filter(rename_dict.keys(), axis=1)
        prices_train_old = train_df.filter(rename_dict_old.keys(), axis=1)
@@ -311,17 +333,21 @@ class BaseReinforcementLearningModel(IFreqaiModel):
            if not prices_train_old.empty:
                prices_train = prices_train_old
                rename_dict = rename_dict_old
-            logger.warning('Reinforcement learning module didn\'t find the correct raw prices '
-                           'assigned in feature_engineering_standard(). '
-                           'Please assign them with:\n'
-                           'dataframe["%-raw_close"] = dataframe["close"]\n'
-                           'dataframe["%-raw_open"] = dataframe["open"]\n'
-                           'dataframe["%-raw_high"] = dataframe["high"]\n'
-                           'dataframe["%-raw_low"] = dataframe["low"]\n'
-                           'inside `feature_engineering_standard()')
+            logger.warning(
+                "Reinforcement learning module didn't find the correct raw prices "
+                "assigned in feature_engineering_standard(). "
+                "Please assign them with:\n"
+                'dataframe["%-raw_close"] = dataframe["close"]\n'
+                'dataframe["%-raw_open"] = dataframe["open"]\n'
+                'dataframe["%-raw_high"] = dataframe["high"]\n'
+                'dataframe["%-raw_low"] = dataframe["low"]\n'
+                "inside `feature_engineering_standard()"
+            )
        elif prices_train.empty:
-            raise OperationalException("No prices found, please follow log warning "
-                                       "instructions to correct the strategy.")
+            raise OperationalException(
+                "No prices found, please follow log warning "
+                "instructions to correct the strategy."
+            )

        prices_train.rename(columns=rename_dict, inplace=True)
        prices_train.reset_index(drop=True)
@@ -339,7 +365,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
        """
        Given a dataframe, drop the ohlc data
        """
-        drop_list = ['%-raw_open', '%-raw_low', '%-raw_high', '%-raw_close']
+        drop_list = ["%-raw_open", "%-raw_low", "%-raw_high", "%-raw_close"]

        if self.rl_config["drop_ohlc_from_features"]:
            df.drop(drop_list, axis=1, inplace=True)
@@ -358,7 +384,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
        if exists:
            model = self.MODELCLASS.load(dk.data_path / f"{dk.model_filename}_model")
        else:
-            logger.info('No model file on disk to continue learning from.')
+            logger.info("No model file on disk to continue learning from.")

        return model

@@ -400,15 +426,18 @@ class BaseReinforcementLearningModel(IFreqaiModel):
                return -2

            pnl = self.get_unrealized_profit()
-            factor = 100.
+            factor = 100.0

            # you can use feature values from dataframe
-            rsi_now = self.raw_features[f"%-rsi-period-10_shift-1_{self.pair}_"
-                                        f"{self.config['timeframe']}"].iloc[self._current_tick]
+            rsi_now = self.raw_features[
+                f"%-rsi-period-10_shift-1_{self.pair}_" f"{self.config['timeframe']}"
+            ].iloc[self._current_tick]

            # reward agent for entering trades
-            if (action in (Actions.Long_enter.value, Actions.Short_enter.value)
-                    and self._position == Positions.Neutral):
+            if (
+                action in (Actions.Long_enter.value, Actions.Short_enter.value)
+                and self._position == Positions.Neutral
+            ):
                if rsi_now < 40:
                    factor = 40 / rsi_now
                else:
@@ -419,7 +448,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
            if action == Actions.Neutral.value and self._position == Positions.Neutral:
                return -1

-            max_trade_duration = self.rl_config.get('max_trade_duration_candles', 300)
+            max_trade_duration = self.rl_config.get("max_trade_duration_candles", 300)
            if self._last_trade_tick:
                trade_duration = self._current_tick - self._last_trade_tick
            else:
@@ -431,28 +460,36 @@ class BaseReinforcementLearningModel(IFreqaiModel):
                factor *= 0.5

            # discourage sitting in position
-            if (self._position in (Positions.Short, Positions.Long) and
-               action == Actions.Neutral.value):
+            if (
+                self._position in (Positions.Short, Positions.Long)
+                and action == Actions.Neutral.value
+            ):
                return -1 * trade_duration / max_trade_duration

            # close long
            if action == Actions.Long_exit.value and self._position == Positions.Long:
                if pnl > self.profit_aim * self.rr:
-                    factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
+                    factor *= self.rl_config["model_reward_parameters"].get("win_reward_factor", 2)
                return float(pnl * factor)

            # close short
            if action == Actions.Short_exit.value and self._position == Positions.Short:
                if pnl > self.profit_aim * self.rr:
-                    factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
+                    factor *= self.rl_config["model_reward_parameters"].get("win_reward_factor", 2)
                return float(pnl * factor)

-            return 0.
+            return 0.0


-def make_env(MyRLEnv: Type[BaseEnvironment], env_id: str, rank: int,
-             seed: int, train_df: DataFrame, price: DataFrame,
-             env_info: Dict[str, Any] = {}) -> Callable:
+def make_env(
+    MyRLEnv: Type[BaseEnvironment],
+    env_id: str,
+    rank: int,
+    seed: int,
+    train_df: DataFrame,
+    price: DataFrame,
+    env_info: Dict[str, Any] = {},
+) -> Callable:
    """
    Utility function for multiprocessed env.

@@ -465,10 +502,9 @@ def make_env(MyRLEnv: Type[BaseEnvironment], env_id: str, rank: int,
    """

    def _init() -> gym.Env:
-
-        env = MyRLEnv(df=train_df, prices=price, id=env_id, seed=seed + rank,
-                      **env_info)
+        env = MyRLEnv(df=train_df, prices=price, id=env_id, seed=seed + rank, **env_info)

        return env
+
    set_random_seed(seed)
    return _init