mirror of
https://github.com/freqtrade/freqtrade.git
synced 2025-12-17 21:31:14 +00:00
ruff format: freqai
This commit is contained in:
@@ -30,10 +30,10 @@ from freqtrade.persistence import Trade
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
torch.multiprocessing.set_sharing_strategy('file_system')
|
||||
torch.multiprocessing.set_sharing_strategy("file_system")
|
||||
|
||||
SB3_MODELS = ['PPO', 'A2C', 'DQN']
|
||||
SB3_CONTRIB_MODELS = ['TRPO', 'ARS', 'RecurrentPPO', 'MaskablePPO', 'QRDQN']
|
||||
SB3_MODELS = ["PPO", "A2C", "DQN"]
|
||||
SB3_CONTRIB_MODELS = ["TRPO", "ARS", "RecurrentPPO", "MaskablePPO", "QRDQN"]
|
||||
|
||||
|
||||
class BaseReinforcementLearningModel(IFreqaiModel):
|
||||
@@ -42,57 +42,60 @@ class BaseReinforcementLearningModel(IFreqaiModel):
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs) -> None:
|
||||
super().__init__(config=kwargs['config'])
|
||||
self.max_threads = min(self.freqai_info['rl_config'].get(
|
||||
'cpu_count', 1), max(int(self.max_system_threads / 2), 1))
|
||||
super().__init__(config=kwargs["config"])
|
||||
self.max_threads = min(
|
||||
self.freqai_info["rl_config"].get("cpu_count", 1),
|
||||
max(int(self.max_system_threads / 2), 1),
|
||||
)
|
||||
th.set_num_threads(self.max_threads)
|
||||
self.reward_params = self.freqai_info['rl_config']['model_reward_parameters']
|
||||
self.reward_params = self.freqai_info["rl_config"]["model_reward_parameters"]
|
||||
self.train_env: Union[VecMonitor, SubprocVecEnv, gym.Env] = gym.Env()
|
||||
self.eval_env: Union[VecMonitor, SubprocVecEnv, gym.Env] = gym.Env()
|
||||
self.eval_callback: Optional[MaskableEvalCallback] = None
|
||||
self.model_type = self.freqai_info['rl_config']['model_type']
|
||||
self.rl_config = self.freqai_info['rl_config']
|
||||
self.model_type = self.freqai_info["rl_config"]["model_type"]
|
||||
self.rl_config = self.freqai_info["rl_config"]
|
||||
self.df_raw: DataFrame = DataFrame()
|
||||
self.continual_learning = self.freqai_info.get('continual_learning', False)
|
||||
self.continual_learning = self.freqai_info.get("continual_learning", False)
|
||||
if self.model_type in SB3_MODELS:
|
||||
import_str = 'stable_baselines3'
|
||||
import_str = "stable_baselines3"
|
||||
elif self.model_type in SB3_CONTRIB_MODELS:
|
||||
import_str = 'sb3_contrib'
|
||||
import_str = "sb3_contrib"
|
||||
else:
|
||||
raise OperationalException(f'{self.model_type} not available in stable_baselines3 or '
|
||||
f'sb3_contrib. please choose one of {SB3_MODELS} or '
|
||||
f'{SB3_CONTRIB_MODELS}')
|
||||
raise OperationalException(
|
||||
f"{self.model_type} not available in stable_baselines3 or "
|
||||
f"sb3_contrib. please choose one of {SB3_MODELS} or "
|
||||
f"{SB3_CONTRIB_MODELS}"
|
||||
)
|
||||
|
||||
mod = importlib.import_module(import_str, self.model_type)
|
||||
self.MODELCLASS = getattr(mod, self.model_type)
|
||||
self.policy_type = self.freqai_info['rl_config']['policy_type']
|
||||
self.policy_type = self.freqai_info["rl_config"]["policy_type"]
|
||||
self.unset_outlier_removal()
|
||||
self.net_arch = self.rl_config.get('net_arch', [128, 128])
|
||||
self.net_arch = self.rl_config.get("net_arch", [128, 128])
|
||||
self.dd.model_type = import_str
|
||||
self.tensorboard_callback: TensorboardCallback = \
|
||||
TensorboardCallback(verbose=1, actions=BaseActions)
|
||||
self.tensorboard_callback: TensorboardCallback = TensorboardCallback(
|
||||
verbose=1, actions=BaseActions
|
||||
)
|
||||
|
||||
def unset_outlier_removal(self):
|
||||
"""
|
||||
If user has activated any function that may remove training points, this
|
||||
function will set them to false and warn them
|
||||
"""
|
||||
if self.ft_params.get('use_SVM_to_remove_outliers', False):
|
||||
self.ft_params.update({'use_SVM_to_remove_outliers': False})
|
||||
logger.warning('User tried to use SVM with RL. Deactivating SVM.')
|
||||
if self.ft_params.get('use_DBSCAN_to_remove_outliers', False):
|
||||
self.ft_params.update({'use_DBSCAN_to_remove_outliers': False})
|
||||
logger.warning('User tried to use DBSCAN with RL. Deactivating DBSCAN.')
|
||||
if self.ft_params.get('DI_threshold', False):
|
||||
self.ft_params.update({'DI_threshold': False})
|
||||
logger.warning('User tried to use DI_threshold with RL. Deactivating DI_threshold.')
|
||||
if self.freqai_info['data_split_parameters'].get('shuffle', False):
|
||||
self.freqai_info['data_split_parameters'].update({'shuffle': False})
|
||||
logger.warning('User tried to shuffle training data. Setting shuffle to False')
|
||||
if self.ft_params.get("use_SVM_to_remove_outliers", False):
|
||||
self.ft_params.update({"use_SVM_to_remove_outliers": False})
|
||||
logger.warning("User tried to use SVM with RL. Deactivating SVM.")
|
||||
if self.ft_params.get("use_DBSCAN_to_remove_outliers", False):
|
||||
self.ft_params.update({"use_DBSCAN_to_remove_outliers": False})
|
||||
logger.warning("User tried to use DBSCAN with RL. Deactivating DBSCAN.")
|
||||
if self.ft_params.get("DI_threshold", False):
|
||||
self.ft_params.update({"DI_threshold": False})
|
||||
logger.warning("User tried to use DI_threshold with RL. Deactivating DI_threshold.")
|
||||
if self.freqai_info["data_split_parameters"].get("shuffle", False):
|
||||
self.freqai_info["data_split_parameters"].update({"shuffle": False})
|
||||
logger.warning("User tried to shuffle training data. Setting shuffle to False")
|
||||
|
||||
def train(
|
||||
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
|
||||
) -> Any:
|
||||
def train(self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
Filter the training data and train a model to it. Train makes heavy use of the datakitchen
|
||||
for storing, saving, loading, and analyzing the data.
|
||||
@@ -111,8 +114,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
|
||||
training_filter=True,
|
||||
)
|
||||
|
||||
dd: Dict[str, Any] = dk.make_train_test_datasets(
|
||||
features_filtered, labels_filtered)
|
||||
dd: Dict[str, Any] = dk.make_train_test_datasets(features_filtered, labels_filtered)
|
||||
self.df_raw = copy.deepcopy(dd["train_features"])
|
||||
dk.fit_labels() # FIXME useless for now, but just satiating append methods
|
||||
|
||||
@@ -121,18 +123,18 @@ class BaseReinforcementLearningModel(IFreqaiModel):
|
||||
|
||||
dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count)
|
||||
|
||||
(dd["train_features"],
|
||||
dd["train_labels"],
|
||||
dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"],
|
||||
dd["train_labels"],
|
||||
dd["train_weights"])
|
||||
(dd["train_features"], dd["train_labels"], dd["train_weights"]) = (
|
||||
dk.feature_pipeline.fit_transform(
|
||||
dd["train_features"], dd["train_labels"], dd["train_weights"]
|
||||
)
|
||||
)
|
||||
|
||||
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
|
||||
(dd["test_features"],
|
||||
dd["test_labels"],
|
||||
dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
|
||||
dd["test_labels"],
|
||||
dd["test_weights"])
|
||||
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) != 0:
|
||||
(dd["test_features"], dd["test_labels"], dd["test_weights"]) = (
|
||||
dk.feature_pipeline.transform(
|
||||
dd["test_features"], dd["test_labels"], dd["test_weights"]
|
||||
)
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f'Training model on {len(dk.data_dictionary["train_features"].columns)}'
|
||||
@@ -147,9 +149,13 @@ class BaseReinforcementLearningModel(IFreqaiModel):
|
||||
|
||||
return model
|
||||
|
||||
def set_train_and_eval_environments(self, data_dictionary: Dict[str, DataFrame],
|
||||
prices_train: DataFrame, prices_test: DataFrame,
|
||||
dk: FreqaiDataKitchen):
|
||||
def set_train_and_eval_environments(
|
||||
self,
|
||||
data_dictionary: Dict[str, DataFrame],
|
||||
prices_train: DataFrame,
|
||||
prices_test: DataFrame,
|
||||
dk: FreqaiDataKitchen,
|
||||
):
|
||||
"""
|
||||
User can override this if they are using a custom MyRLEnv
|
||||
:param data_dictionary: dict = common data dictionary containing train and test
|
||||
@@ -165,11 +171,14 @@ class BaseReinforcementLearningModel(IFreqaiModel):
|
||||
|
||||
self.train_env = self.MyRLEnv(df=train_df, prices=prices_train, **env_info)
|
||||
self.eval_env = Monitor(self.MyRLEnv(df=test_df, prices=prices_test, **env_info))
|
||||
self.eval_callback = MaskableEvalCallback(self.eval_env, deterministic=True,
|
||||
render=False, eval_freq=len(train_df),
|
||||
best_model_save_path=str(dk.data_path),
|
||||
use_masking=(self.model_type == 'MaskablePPO' and
|
||||
is_masking_supported(self.eval_env)))
|
||||
self.eval_callback = MaskableEvalCallback(
|
||||
self.eval_env,
|
||||
deterministic=True,
|
||||
render=False,
|
||||
eval_freq=len(train_df),
|
||||
best_model_save_path=str(dk.data_path),
|
||||
use_masking=(self.model_type == "MaskablePPO" and is_masking_supported(self.eval_env)),
|
||||
)
|
||||
|
||||
actions = self.train_env.get_actions()
|
||||
self.tensorboard_callback = TensorboardCallback(verbose=1, actions=actions)
|
||||
@@ -178,16 +187,19 @@ class BaseReinforcementLearningModel(IFreqaiModel):
|
||||
"""
|
||||
Create dictionary of environment arguments
|
||||
"""
|
||||
env_info = {"window_size": self.CONV_WIDTH,
|
||||
"reward_kwargs": self.reward_params,
|
||||
"config": self.config,
|
||||
"live": self.live,
|
||||
"can_short": self.can_short,
|
||||
"pair": pair,
|
||||
"df_raw": self.df_raw}
|
||||
env_info = {
|
||||
"window_size": self.CONV_WIDTH,
|
||||
"reward_kwargs": self.reward_params,
|
||||
"config": self.config,
|
||||
"live": self.live,
|
||||
"can_short": self.can_short,
|
||||
"pair": pair,
|
||||
"df_raw": self.df_raw,
|
||||
}
|
||||
if self.data_provider:
|
||||
env_info["fee"] = self.data_provider._exchange \
|
||||
.get_fee(symbol=self.data_provider.current_whitelist()[0]) # type: ignore
|
||||
env_info["fee"] = self.data_provider._exchange.get_fee(
|
||||
symbol=self.data_provider.current_whitelist()[0]
|
||||
) # type: ignore
|
||||
|
||||
return env_info
|
||||
|
||||
@@ -219,11 +231,12 @@ class BaseReinforcementLearningModel(IFreqaiModel):
|
||||
for trade in open_trades:
|
||||
if trade.pair == pair:
|
||||
if self.data_provider._exchange is None: # type: ignore
|
||||
logger.error('No exchange available.')
|
||||
logger.error("No exchange available.")
|
||||
return 0, 0, 0
|
||||
else:
|
||||
current_rate = self.data_provider._exchange.get_rate( # type: ignore
|
||||
pair, refresh=False, side="exit", is_short=trade.is_short)
|
||||
pair, refresh=False, side="exit", is_short=trade.is_short
|
||||
)
|
||||
|
||||
now = datetime.now(timezone.utc).timestamp()
|
||||
trade_duration = int((now - trade.open_date_utc.timestamp()) / self.base_tf_seconds)
|
||||
@@ -255,16 +268,17 @@ class BaseReinforcementLearningModel(IFreqaiModel):
|
||||
dk.data_dictionary["prediction_features"] = self.drop_ohlc_from_df(filtered_dataframe, dk)
|
||||
|
||||
dk.data_dictionary["prediction_features"], _, _ = dk.feature_pipeline.transform(
|
||||
dk.data_dictionary["prediction_features"], outlier_check=True)
|
||||
dk.data_dictionary["prediction_features"], outlier_check=True
|
||||
)
|
||||
|
||||
pred_df = self.rl_model_predict(
|
||||
dk.data_dictionary["prediction_features"], dk, self.model)
|
||||
pred_df = self.rl_model_predict(dk.data_dictionary["prediction_features"], dk, self.model)
|
||||
pred_df.fillna(0, inplace=True)
|
||||
|
||||
return (pred_df, dk.do_predict)
|
||||
|
||||
def rl_model_predict(self, dataframe: DataFrame,
|
||||
dk: FreqaiDataKitchen, model: Any) -> DataFrame:
|
||||
def rl_model_predict(
|
||||
self, dataframe: DataFrame, dk: FreqaiDataKitchen, model: Any
|
||||
) -> DataFrame:
|
||||
"""
|
||||
A helper function to make predictions in the Reinforcement learning module.
|
||||
:param dataframe: DataFrame = the dataframe of features to make the predictions on
|
||||
@@ -275,11 +289,11 @@ class BaseReinforcementLearningModel(IFreqaiModel):
|
||||
|
||||
def _predict(window):
|
||||
observations = dataframe.iloc[window.index]
|
||||
if self.live and self.rl_config.get('add_state_info', False):
|
||||
if self.live and self.rl_config.get("add_state_info", False):
|
||||
market_side, current_profit, trade_duration = self.get_state_info(dk.pair)
|
||||
observations['current_profit_pct'] = current_profit
|
||||
observations['position'] = market_side
|
||||
observations['trade_duration'] = trade_duration
|
||||
observations["current_profit_pct"] = current_profit
|
||||
observations["position"] = market_side
|
||||
observations["trade_duration"] = trade_duration
|
||||
res, _ = model.predict(observations, deterministic=True)
|
||||
return res
|
||||
|
||||
@@ -287,23 +301,31 @@ class BaseReinforcementLearningModel(IFreqaiModel):
|
||||
|
||||
return output
|
||||
|
||||
def build_ohlc_price_dataframes(self, data_dictionary: dict,
|
||||
pair: str, dk: FreqaiDataKitchen) -> Tuple[DataFrame,
|
||||
DataFrame]:
|
||||
def build_ohlc_price_dataframes(
|
||||
self, data_dictionary: dict, pair: str, dk: FreqaiDataKitchen
|
||||
) -> Tuple[DataFrame, DataFrame]:
|
||||
"""
|
||||
Builds the train prices and test prices for the environment.
|
||||
"""
|
||||
|
||||
pair = pair.replace(':', '')
|
||||
pair = pair.replace(":", "")
|
||||
train_df = data_dictionary["train_features"]
|
||||
test_df = data_dictionary["test_features"]
|
||||
|
||||
# price data for model training and evaluation
|
||||
tf = self.config['timeframe']
|
||||
rename_dict = {'%-raw_open': 'open', '%-raw_low': 'low',
|
||||
'%-raw_high': ' high', '%-raw_close': 'close'}
|
||||
rename_dict_old = {f'%-{pair}raw_open_{tf}': 'open', f'%-{pair}raw_low_{tf}': 'low',
|
||||
f'%-{pair}raw_high_{tf}': ' high', f'%-{pair}raw_close_{tf}': 'close'}
|
||||
tf = self.config["timeframe"]
|
||||
rename_dict = {
|
||||
"%-raw_open": "open",
|
||||
"%-raw_low": "low",
|
||||
"%-raw_high": " high",
|
||||
"%-raw_close": "close",
|
||||
}
|
||||
rename_dict_old = {
|
||||
f"%-{pair}raw_open_{tf}": "open",
|
||||
f"%-{pair}raw_low_{tf}": "low",
|
||||
f"%-{pair}raw_high_{tf}": " high",
|
||||
f"%-{pair}raw_close_{tf}": "close",
|
||||
}
|
||||
|
||||
prices_train = train_df.filter(rename_dict.keys(), axis=1)
|
||||
prices_train_old = train_df.filter(rename_dict_old.keys(), axis=1)
|
||||
@@ -311,17 +333,21 @@ class BaseReinforcementLearningModel(IFreqaiModel):
|
||||
if not prices_train_old.empty:
|
||||
prices_train = prices_train_old
|
||||
rename_dict = rename_dict_old
|
||||
logger.warning('Reinforcement learning module didn\'t find the correct raw prices '
|
||||
'assigned in feature_engineering_standard(). '
|
||||
'Please assign them with:\n'
|
||||
'dataframe["%-raw_close"] = dataframe["close"]\n'
|
||||
'dataframe["%-raw_open"] = dataframe["open"]\n'
|
||||
'dataframe["%-raw_high"] = dataframe["high"]\n'
|
||||
'dataframe["%-raw_low"] = dataframe["low"]\n'
|
||||
'inside `feature_engineering_standard()')
|
||||
logger.warning(
|
||||
"Reinforcement learning module didn't find the correct raw prices "
|
||||
"assigned in feature_engineering_standard(). "
|
||||
"Please assign them with:\n"
|
||||
'dataframe["%-raw_close"] = dataframe["close"]\n'
|
||||
'dataframe["%-raw_open"] = dataframe["open"]\n'
|
||||
'dataframe["%-raw_high"] = dataframe["high"]\n'
|
||||
'dataframe["%-raw_low"] = dataframe["low"]\n'
|
||||
"inside `feature_engineering_standard()"
|
||||
)
|
||||
elif prices_train.empty:
|
||||
raise OperationalException("No prices found, please follow log warning "
|
||||
"instructions to correct the strategy.")
|
||||
raise OperationalException(
|
||||
"No prices found, please follow log warning "
|
||||
"instructions to correct the strategy."
|
||||
)
|
||||
|
||||
prices_train.rename(columns=rename_dict, inplace=True)
|
||||
prices_train.reset_index(drop=True)
|
||||
@@ -339,7 +365,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
|
||||
"""
|
||||
Given a dataframe, drop the ohlc data
|
||||
"""
|
||||
drop_list = ['%-raw_open', '%-raw_low', '%-raw_high', '%-raw_close']
|
||||
drop_list = ["%-raw_open", "%-raw_low", "%-raw_high", "%-raw_close"]
|
||||
|
||||
if self.rl_config["drop_ohlc_from_features"]:
|
||||
df.drop(drop_list, axis=1, inplace=True)
|
||||
@@ -358,7 +384,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
|
||||
if exists:
|
||||
model = self.MODELCLASS.load(dk.data_path / f"{dk.model_filename}_model")
|
||||
else:
|
||||
logger.info('No model file on disk to continue learning from.')
|
||||
logger.info("No model file on disk to continue learning from.")
|
||||
|
||||
return model
|
||||
|
||||
@@ -400,15 +426,18 @@ class BaseReinforcementLearningModel(IFreqaiModel):
|
||||
return -2
|
||||
|
||||
pnl = self.get_unrealized_profit()
|
||||
factor = 100.
|
||||
factor = 100.0
|
||||
|
||||
# you can use feature values from dataframe
|
||||
rsi_now = self.raw_features[f"%-rsi-period-10_shift-1_{self.pair}_"
|
||||
f"{self.config['timeframe']}"].iloc[self._current_tick]
|
||||
rsi_now = self.raw_features[
|
||||
f"%-rsi-period-10_shift-1_{self.pair}_" f"{self.config['timeframe']}"
|
||||
].iloc[self._current_tick]
|
||||
|
||||
# reward agent for entering trades
|
||||
if (action in (Actions.Long_enter.value, Actions.Short_enter.value)
|
||||
and self._position == Positions.Neutral):
|
||||
if (
|
||||
action in (Actions.Long_enter.value, Actions.Short_enter.value)
|
||||
and self._position == Positions.Neutral
|
||||
):
|
||||
if rsi_now < 40:
|
||||
factor = 40 / rsi_now
|
||||
else:
|
||||
@@ -419,7 +448,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
|
||||
if action == Actions.Neutral.value and self._position == Positions.Neutral:
|
||||
return -1
|
||||
|
||||
max_trade_duration = self.rl_config.get('max_trade_duration_candles', 300)
|
||||
max_trade_duration = self.rl_config.get("max_trade_duration_candles", 300)
|
||||
if self._last_trade_tick:
|
||||
trade_duration = self._current_tick - self._last_trade_tick
|
||||
else:
|
||||
@@ -431,28 +460,36 @@ class BaseReinforcementLearningModel(IFreqaiModel):
|
||||
factor *= 0.5
|
||||
|
||||
# discourage sitting in position
|
||||
if (self._position in (Positions.Short, Positions.Long) and
|
||||
action == Actions.Neutral.value):
|
||||
if (
|
||||
self._position in (Positions.Short, Positions.Long)
|
||||
and action == Actions.Neutral.value
|
||||
):
|
||||
return -1 * trade_duration / max_trade_duration
|
||||
|
||||
# close long
|
||||
if action == Actions.Long_exit.value and self._position == Positions.Long:
|
||||
if pnl > self.profit_aim * self.rr:
|
||||
factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
|
||||
factor *= self.rl_config["model_reward_parameters"].get("win_reward_factor", 2)
|
||||
return float(pnl * factor)
|
||||
|
||||
# close short
|
||||
if action == Actions.Short_exit.value and self._position == Positions.Short:
|
||||
if pnl > self.profit_aim * self.rr:
|
||||
factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
|
||||
factor *= self.rl_config["model_reward_parameters"].get("win_reward_factor", 2)
|
||||
return float(pnl * factor)
|
||||
|
||||
return 0.
|
||||
return 0.0
|
||||
|
||||
|
||||
def make_env(MyRLEnv: Type[BaseEnvironment], env_id: str, rank: int,
|
||||
seed: int, train_df: DataFrame, price: DataFrame,
|
||||
env_info: Dict[str, Any] = {}) -> Callable:
|
||||
def make_env(
|
||||
MyRLEnv: Type[BaseEnvironment],
|
||||
env_id: str,
|
||||
rank: int,
|
||||
seed: int,
|
||||
train_df: DataFrame,
|
||||
price: DataFrame,
|
||||
env_info: Dict[str, Any] = {},
|
||||
) -> Callable:
|
||||
"""
|
||||
Utility function for multiprocessed env.
|
||||
|
||||
@@ -465,10 +502,9 @@ def make_env(MyRLEnv: Type[BaseEnvironment], env_id: str, rank: int,
|
||||
"""
|
||||
|
||||
def _init() -> gym.Env:
|
||||
|
||||
env = MyRLEnv(df=train_df, prices=price, id=env_id, seed=seed + rank,
|
||||
**env_info)
|
||||
env = MyRLEnv(df=train_df, prices=price, id=env_id, seed=seed + rank, **env_info)
|
||||
|
||||
return env
|
||||
|
||||
set_random_seed(seed)
|
||||
return _init
|
||||
|
||||
Reference in New Issue
Block a user