ruff format: freqai

This commit is contained in:
Matthias
2024-05-12 17:12:20 +02:00
parent e4e8c3967c
commit d1db43dee0
44 changed files with 1111 additions and 900 deletions

View File

@@ -30,10 +30,10 @@ from freqtrade.persistence import Trade
logger = logging.getLogger(__name__)
torch.multiprocessing.set_sharing_strategy('file_system')
torch.multiprocessing.set_sharing_strategy("file_system")
SB3_MODELS = ['PPO', 'A2C', 'DQN']
SB3_CONTRIB_MODELS = ['TRPO', 'ARS', 'RecurrentPPO', 'MaskablePPO', 'QRDQN']
SB3_MODELS = ["PPO", "A2C", "DQN"]
SB3_CONTRIB_MODELS = ["TRPO", "ARS", "RecurrentPPO", "MaskablePPO", "QRDQN"]
class BaseReinforcementLearningModel(IFreqaiModel):
@@ -42,57 +42,60 @@ class BaseReinforcementLearningModel(IFreqaiModel):
"""
def __init__(self, **kwargs) -> None:
super().__init__(config=kwargs['config'])
self.max_threads = min(self.freqai_info['rl_config'].get(
'cpu_count', 1), max(int(self.max_system_threads / 2), 1))
super().__init__(config=kwargs["config"])
self.max_threads = min(
self.freqai_info["rl_config"].get("cpu_count", 1),
max(int(self.max_system_threads / 2), 1),
)
th.set_num_threads(self.max_threads)
self.reward_params = self.freqai_info['rl_config']['model_reward_parameters']
self.reward_params = self.freqai_info["rl_config"]["model_reward_parameters"]
self.train_env: Union[VecMonitor, SubprocVecEnv, gym.Env] = gym.Env()
self.eval_env: Union[VecMonitor, SubprocVecEnv, gym.Env] = gym.Env()
self.eval_callback: Optional[MaskableEvalCallback] = None
self.model_type = self.freqai_info['rl_config']['model_type']
self.rl_config = self.freqai_info['rl_config']
self.model_type = self.freqai_info["rl_config"]["model_type"]
self.rl_config = self.freqai_info["rl_config"]
self.df_raw: DataFrame = DataFrame()
self.continual_learning = self.freqai_info.get('continual_learning', False)
self.continual_learning = self.freqai_info.get("continual_learning", False)
if self.model_type in SB3_MODELS:
import_str = 'stable_baselines3'
import_str = "stable_baselines3"
elif self.model_type in SB3_CONTRIB_MODELS:
import_str = 'sb3_contrib'
import_str = "sb3_contrib"
else:
raise OperationalException(f'{self.model_type} not available in stable_baselines3 or '
f'sb3_contrib. please choose one of {SB3_MODELS} or '
f'{SB3_CONTRIB_MODELS}')
raise OperationalException(
f"{self.model_type} not available in stable_baselines3 or "
f"sb3_contrib. please choose one of {SB3_MODELS} or "
f"{SB3_CONTRIB_MODELS}"
)
mod = importlib.import_module(import_str, self.model_type)
self.MODELCLASS = getattr(mod, self.model_type)
self.policy_type = self.freqai_info['rl_config']['policy_type']
self.policy_type = self.freqai_info["rl_config"]["policy_type"]
self.unset_outlier_removal()
self.net_arch = self.rl_config.get('net_arch', [128, 128])
self.net_arch = self.rl_config.get("net_arch", [128, 128])
self.dd.model_type = import_str
self.tensorboard_callback: TensorboardCallback = \
TensorboardCallback(verbose=1, actions=BaseActions)
self.tensorboard_callback: TensorboardCallback = TensorboardCallback(
verbose=1, actions=BaseActions
)
def unset_outlier_removal(self):
"""
If user has activated any function that may remove training points, this
function will set them to false and warn them
"""
if self.ft_params.get('use_SVM_to_remove_outliers', False):
self.ft_params.update({'use_SVM_to_remove_outliers': False})
logger.warning('User tried to use SVM with RL. Deactivating SVM.')
if self.ft_params.get('use_DBSCAN_to_remove_outliers', False):
self.ft_params.update({'use_DBSCAN_to_remove_outliers': False})
logger.warning('User tried to use DBSCAN with RL. Deactivating DBSCAN.')
if self.ft_params.get('DI_threshold', False):
self.ft_params.update({'DI_threshold': False})
logger.warning('User tried to use DI_threshold with RL. Deactivating DI_threshold.')
if self.freqai_info['data_split_parameters'].get('shuffle', False):
self.freqai_info['data_split_parameters'].update({'shuffle': False})
logger.warning('User tried to shuffle training data. Setting shuffle to False')
if self.ft_params.get("use_SVM_to_remove_outliers", False):
self.ft_params.update({"use_SVM_to_remove_outliers": False})
logger.warning("User tried to use SVM with RL. Deactivating SVM.")
if self.ft_params.get("use_DBSCAN_to_remove_outliers", False):
self.ft_params.update({"use_DBSCAN_to_remove_outliers": False})
logger.warning("User tried to use DBSCAN with RL. Deactivating DBSCAN.")
if self.ft_params.get("DI_threshold", False):
self.ft_params.update({"DI_threshold": False})
logger.warning("User tried to use DI_threshold with RL. Deactivating DI_threshold.")
if self.freqai_info["data_split_parameters"].get("shuffle", False):
self.freqai_info["data_split_parameters"].update({"shuffle": False})
logger.warning("User tried to shuffle training data. Setting shuffle to False")
def train(
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
) -> Any:
def train(self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
Filter the training data and train a model to it. Train makes heavy use of the datakitchen
for storing, saving, loading, and analyzing the data.
@@ -111,8 +114,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
training_filter=True,
)
dd: Dict[str, Any] = dk.make_train_test_datasets(
features_filtered, labels_filtered)
dd: Dict[str, Any] = dk.make_train_test_datasets(features_filtered, labels_filtered)
self.df_raw = copy.deepcopy(dd["train_features"])
dk.fit_labels() # FIXME useless for now, but just satiating append methods
@@ -121,18 +123,18 @@ class BaseReinforcementLearningModel(IFreqaiModel):
dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count)
(dd["train_features"],
dd["train_labels"],
dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"],
dd["train_labels"],
dd["train_weights"])
(dd["train_features"], dd["train_labels"], dd["train_weights"]) = (
dk.feature_pipeline.fit_transform(
dd["train_features"], dd["train_labels"], dd["train_weights"]
)
)
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
(dd["test_features"],
dd["test_labels"],
dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
dd["test_labels"],
dd["test_weights"])
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) != 0:
(dd["test_features"], dd["test_labels"], dd["test_weights"]) = (
dk.feature_pipeline.transform(
dd["test_features"], dd["test_labels"], dd["test_weights"]
)
)
logger.info(
f'Training model on {len(dk.data_dictionary["train_features"].columns)}'
@@ -147,9 +149,13 @@ class BaseReinforcementLearningModel(IFreqaiModel):
return model
def set_train_and_eval_environments(self, data_dictionary: Dict[str, DataFrame],
prices_train: DataFrame, prices_test: DataFrame,
dk: FreqaiDataKitchen):
def set_train_and_eval_environments(
self,
data_dictionary: Dict[str, DataFrame],
prices_train: DataFrame,
prices_test: DataFrame,
dk: FreqaiDataKitchen,
):
"""
User can override this if they are using a custom MyRLEnv
:param data_dictionary: dict = common data dictionary containing train and test
@@ -165,11 +171,14 @@ class BaseReinforcementLearningModel(IFreqaiModel):
self.train_env = self.MyRLEnv(df=train_df, prices=prices_train, **env_info)
self.eval_env = Monitor(self.MyRLEnv(df=test_df, prices=prices_test, **env_info))
self.eval_callback = MaskableEvalCallback(self.eval_env, deterministic=True,
render=False, eval_freq=len(train_df),
best_model_save_path=str(dk.data_path),
use_masking=(self.model_type == 'MaskablePPO' and
is_masking_supported(self.eval_env)))
self.eval_callback = MaskableEvalCallback(
self.eval_env,
deterministic=True,
render=False,
eval_freq=len(train_df),
best_model_save_path=str(dk.data_path),
use_masking=(self.model_type == "MaskablePPO" and is_masking_supported(self.eval_env)),
)
actions = self.train_env.get_actions()
self.tensorboard_callback = TensorboardCallback(verbose=1, actions=actions)
@@ -178,16 +187,19 @@ class BaseReinforcementLearningModel(IFreqaiModel):
"""
Create dictionary of environment arguments
"""
env_info = {"window_size": self.CONV_WIDTH,
"reward_kwargs": self.reward_params,
"config": self.config,
"live": self.live,
"can_short": self.can_short,
"pair": pair,
"df_raw": self.df_raw}
env_info = {
"window_size": self.CONV_WIDTH,
"reward_kwargs": self.reward_params,
"config": self.config,
"live": self.live,
"can_short": self.can_short,
"pair": pair,
"df_raw": self.df_raw,
}
if self.data_provider:
env_info["fee"] = self.data_provider._exchange \
.get_fee(symbol=self.data_provider.current_whitelist()[0]) # type: ignore
env_info["fee"] = self.data_provider._exchange.get_fee(
symbol=self.data_provider.current_whitelist()[0]
) # type: ignore
return env_info
@@ -219,11 +231,12 @@ class BaseReinforcementLearningModel(IFreqaiModel):
for trade in open_trades:
if trade.pair == pair:
if self.data_provider._exchange is None: # type: ignore
logger.error('No exchange available.')
logger.error("No exchange available.")
return 0, 0, 0
else:
current_rate = self.data_provider._exchange.get_rate( # type: ignore
pair, refresh=False, side="exit", is_short=trade.is_short)
pair, refresh=False, side="exit", is_short=trade.is_short
)
now = datetime.now(timezone.utc).timestamp()
trade_duration = int((now - trade.open_date_utc.timestamp()) / self.base_tf_seconds)
@@ -255,16 +268,17 @@ class BaseReinforcementLearningModel(IFreqaiModel):
dk.data_dictionary["prediction_features"] = self.drop_ohlc_from_df(filtered_dataframe, dk)
dk.data_dictionary["prediction_features"], _, _ = dk.feature_pipeline.transform(
dk.data_dictionary["prediction_features"], outlier_check=True)
dk.data_dictionary["prediction_features"], outlier_check=True
)
pred_df = self.rl_model_predict(
dk.data_dictionary["prediction_features"], dk, self.model)
pred_df = self.rl_model_predict(dk.data_dictionary["prediction_features"], dk, self.model)
pred_df.fillna(0, inplace=True)
return (pred_df, dk.do_predict)
def rl_model_predict(self, dataframe: DataFrame,
dk: FreqaiDataKitchen, model: Any) -> DataFrame:
def rl_model_predict(
self, dataframe: DataFrame, dk: FreqaiDataKitchen, model: Any
) -> DataFrame:
"""
A helper function to make predictions in the Reinforcement learning module.
:param dataframe: DataFrame = the dataframe of features to make the predictions on
@@ -275,11 +289,11 @@ class BaseReinforcementLearningModel(IFreqaiModel):
def _predict(window):
observations = dataframe.iloc[window.index]
if self.live and self.rl_config.get('add_state_info', False):
if self.live and self.rl_config.get("add_state_info", False):
market_side, current_profit, trade_duration = self.get_state_info(dk.pair)
observations['current_profit_pct'] = current_profit
observations['position'] = market_side
observations['trade_duration'] = trade_duration
observations["current_profit_pct"] = current_profit
observations["position"] = market_side
observations["trade_duration"] = trade_duration
res, _ = model.predict(observations, deterministic=True)
return res
@@ -287,23 +301,31 @@ class BaseReinforcementLearningModel(IFreqaiModel):
return output
def build_ohlc_price_dataframes(self, data_dictionary: dict,
pair: str, dk: FreqaiDataKitchen) -> Tuple[DataFrame,
DataFrame]:
def build_ohlc_price_dataframes(
self, data_dictionary: dict, pair: str, dk: FreqaiDataKitchen
) -> Tuple[DataFrame, DataFrame]:
"""
Builds the train prices and test prices for the environment.
"""
pair = pair.replace(':', '')
pair = pair.replace(":", "")
train_df = data_dictionary["train_features"]
test_df = data_dictionary["test_features"]
# price data for model training and evaluation
tf = self.config['timeframe']
rename_dict = {'%-raw_open': 'open', '%-raw_low': 'low',
'%-raw_high': ' high', '%-raw_close': 'close'}
rename_dict_old = {f'%-{pair}raw_open_{tf}': 'open', f'%-{pair}raw_low_{tf}': 'low',
f'%-{pair}raw_high_{tf}': ' high', f'%-{pair}raw_close_{tf}': 'close'}
tf = self.config["timeframe"]
rename_dict = {
"%-raw_open": "open",
"%-raw_low": "low",
"%-raw_high": " high",
"%-raw_close": "close",
}
rename_dict_old = {
f"%-{pair}raw_open_{tf}": "open",
f"%-{pair}raw_low_{tf}": "low",
f"%-{pair}raw_high_{tf}": " high",
f"%-{pair}raw_close_{tf}": "close",
}
prices_train = train_df.filter(rename_dict.keys(), axis=1)
prices_train_old = train_df.filter(rename_dict_old.keys(), axis=1)
@@ -311,17 +333,21 @@ class BaseReinforcementLearningModel(IFreqaiModel):
if not prices_train_old.empty:
prices_train = prices_train_old
rename_dict = rename_dict_old
logger.warning('Reinforcement learning module didn\'t find the correct raw prices '
'assigned in feature_engineering_standard(). '
'Please assign them with:\n'
'dataframe["%-raw_close"] = dataframe["close"]\n'
'dataframe["%-raw_open"] = dataframe["open"]\n'
'dataframe["%-raw_high"] = dataframe["high"]\n'
'dataframe["%-raw_low"] = dataframe["low"]\n'
'inside `feature_engineering_standard()')
logger.warning(
"Reinforcement learning module didn't find the correct raw prices "
"assigned in feature_engineering_standard(). "
"Please assign them with:\n"
'dataframe["%-raw_close"] = dataframe["close"]\n'
'dataframe["%-raw_open"] = dataframe["open"]\n'
'dataframe["%-raw_high"] = dataframe["high"]\n'
'dataframe["%-raw_low"] = dataframe["low"]\n'
"inside `feature_engineering_standard()"
)
elif prices_train.empty:
raise OperationalException("No prices found, please follow log warning "
"instructions to correct the strategy.")
raise OperationalException(
"No prices found, please follow log warning "
"instructions to correct the strategy."
)
prices_train.rename(columns=rename_dict, inplace=True)
prices_train.reset_index(drop=True)
@@ -339,7 +365,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
"""
Given a dataframe, drop the ohlc data
"""
drop_list = ['%-raw_open', '%-raw_low', '%-raw_high', '%-raw_close']
drop_list = ["%-raw_open", "%-raw_low", "%-raw_high", "%-raw_close"]
if self.rl_config["drop_ohlc_from_features"]:
df.drop(drop_list, axis=1, inplace=True)
@@ -358,7 +384,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
if exists:
model = self.MODELCLASS.load(dk.data_path / f"{dk.model_filename}_model")
else:
logger.info('No model file on disk to continue learning from.')
logger.info("No model file on disk to continue learning from.")
return model
@@ -400,15 +426,18 @@ class BaseReinforcementLearningModel(IFreqaiModel):
return -2
pnl = self.get_unrealized_profit()
factor = 100.
factor = 100.0
# you can use feature values from dataframe
rsi_now = self.raw_features[f"%-rsi-period-10_shift-1_{self.pair}_"
f"{self.config['timeframe']}"].iloc[self._current_tick]
rsi_now = self.raw_features[
f"%-rsi-period-10_shift-1_{self.pair}_" f"{self.config['timeframe']}"
].iloc[self._current_tick]
# reward agent for entering trades
if (action in (Actions.Long_enter.value, Actions.Short_enter.value)
and self._position == Positions.Neutral):
if (
action in (Actions.Long_enter.value, Actions.Short_enter.value)
and self._position == Positions.Neutral
):
if rsi_now < 40:
factor = 40 / rsi_now
else:
@@ -419,7 +448,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
if action == Actions.Neutral.value and self._position == Positions.Neutral:
return -1
max_trade_duration = self.rl_config.get('max_trade_duration_candles', 300)
max_trade_duration = self.rl_config.get("max_trade_duration_candles", 300)
if self._last_trade_tick:
trade_duration = self._current_tick - self._last_trade_tick
else:
@@ -431,28 +460,36 @@ class BaseReinforcementLearningModel(IFreqaiModel):
factor *= 0.5
# discourage sitting in position
if (self._position in (Positions.Short, Positions.Long) and
action == Actions.Neutral.value):
if (
self._position in (Positions.Short, Positions.Long)
and action == Actions.Neutral.value
):
return -1 * trade_duration / max_trade_duration
# close long
if action == Actions.Long_exit.value and self._position == Positions.Long:
if pnl > self.profit_aim * self.rr:
factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
factor *= self.rl_config["model_reward_parameters"].get("win_reward_factor", 2)
return float(pnl * factor)
# close short
if action == Actions.Short_exit.value and self._position == Positions.Short:
if pnl > self.profit_aim * self.rr:
factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
factor *= self.rl_config["model_reward_parameters"].get("win_reward_factor", 2)
return float(pnl * factor)
return 0.
return 0.0
def make_env(MyRLEnv: Type[BaseEnvironment], env_id: str, rank: int,
seed: int, train_df: DataFrame, price: DataFrame,
env_info: Dict[str, Any] = {}) -> Callable:
def make_env(
MyRLEnv: Type[BaseEnvironment],
env_id: str,
rank: int,
seed: int,
train_df: DataFrame,
price: DataFrame,
env_info: Dict[str, Any] = {},
) -> Callable:
"""
Utility function for multiprocessed env.
@@ -465,10 +502,9 @@ def make_env(MyRLEnv: Type[BaseEnvironment], env_id: str, rank: int,
"""
def _init() -> gym.Env:
env = MyRLEnv(df=train_df, prices=price, id=env_id, seed=seed + rank,
**env_info)
env = MyRLEnv(df=train_df, prices=price, id=env_id, seed=seed + rank, **env_info)
return env
set_random_seed(seed)
return _init