From 27dce20b294e2388804992882dece3e33d4a4fa7 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sun, 4 Sep 2022 11:21:54 +0200 Subject: [PATCH] fix bug in Base4ActionRLEnv, improve example strats --- freqtrade/freqai/RL/Base4ActionRLEnv.py | 2 +- ...c.py => ReinforcementLearningExample4ac.py} | 18 +++++++----------- .../ReinforcementLearningExample5ac.py | 2 +- 3 files changed, 9 insertions(+), 13 deletions(-) rename freqtrade/freqai/example_strats/{ReinforcementLearningExample3ac.py => ReinforcementLearningExample4ac.py} (92%) diff --git a/freqtrade/freqai/RL/Base4ActionRLEnv.py b/freqtrade/freqai/RL/Base4ActionRLEnv.py index d2b92a954..70a625136 100644 --- a/freqtrade/freqai/RL/Base4ActionRLEnv.py +++ b/freqtrade/freqai/RL/Base4ActionRLEnv.py @@ -31,7 +31,7 @@ class Base4ActionRLEnv(BaseEnvironment): if self._current_tick == self._end_tick: self._done = True - self.update_portfolio_log_returns(action) + self._update_unrealized_total_profit() self._update_profit(action) step_reward = self.calculate_reward(action) diff --git a/freqtrade/freqai/example_strats/ReinforcementLearningExample3ac.py b/freqtrade/freqai/example_strats/ReinforcementLearningExample4ac.py similarity index 92% rename from freqtrade/freqai/example_strats/ReinforcementLearningExample3ac.py rename to freqtrade/freqai/example_strats/ReinforcementLearningExample4ac.py index ec0977455..d9932eea7 100644 --- a/freqtrade/freqai/example_strats/ReinforcementLearningExample3ac.py +++ b/freqtrade/freqai/example_strats/ReinforcementLearningExample4ac.py @@ -11,7 +11,7 @@ from freqtrade.strategy import DecimalParameter, IntParameter, IStrategy, merge_ logger = logging.getLogger(__name__) -class ReinforcementLearningExample3ac(IStrategy): +class ReinforcementLearningExample4ac(IStrategy): """ Test strategy - used for testing freqAI functionalities. DO not use in production. @@ -106,8 +106,8 @@ class ReinforcementLearningExample3ac(IStrategy): # For RL, this is not a target, it is simply a filler until actions come out # of the model. - # for Base3ActionEnv, 2 is netural (hold) - df["&-action"] = 2 + # for Base4ActionEnv, 0 is netural (hold) + df["&-action"] = 0 return df @@ -119,14 +119,14 @@ class ReinforcementLearningExample3ac(IStrategy): def populate_entry_trend(self, df: DataFrame, metadata: dict) -> DataFrame: - enter_long_conditions = [df["do_predict"] == 1, df["&-action"] == 1] + enter_long_conditions = [df["do_predict"] == 1, df["&-action"] == 2] if enter_long_conditions: df.loc[ reduce(lambda x, y: x & y, enter_long_conditions), ["enter_long", "enter_tag"] ] = (1, "long") - enter_short_conditions = [df["do_predict"] == 1, df["&-action"] == 2] + enter_short_conditions = [df["do_predict"] == 1, df["&-action"] == 3] if enter_short_conditions: df.loc[ @@ -136,12 +136,8 @@ class ReinforcementLearningExample3ac(IStrategy): return df def populate_exit_trend(self, df: DataFrame, metadata: dict) -> DataFrame: - exit_long_conditions = [df["do_predict"] == 1, df["&-action"] == 2] + exit_long_conditions = [df["do_predict"] == 1, df["&-action"] == 1] if exit_long_conditions: - df.loc[reduce(lambda x, y: x & y, exit_long_conditions), "exit_long"] = 1 - - exit_short_conditions = [df["do_predict"] == 1, df["&-action"] == 1] - if exit_short_conditions: - df.loc[reduce(lambda x, y: x & y, exit_short_conditions), "exit_short"] = 1 + df.loc[reduce(lambda x, y: x & y, exit_long_conditions), "exit"] = 1 return df diff --git a/freqtrade/freqai/example_strats/ReinforcementLearningExample5ac.py b/freqtrade/freqai/example_strats/ReinforcementLearningExample5ac.py index 15a263b94..2118e1221 100644 --- a/freqtrade/freqai/example_strats/ReinforcementLearningExample5ac.py +++ b/freqtrade/freqai/example_strats/ReinforcementLearningExample5ac.py @@ -107,7 +107,7 @@ class ReinforcementLearningExample5ac(IStrategy): # For RL, there are no direct targets to set. This is filler (neutral) # until the agent sends an action. - df["&-action"] = 2 + df["&-action"] = 0 return df