From 27dce20b294e2388804992882dece3e33d4a4fa7 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Sun, 4 Sep 2022 11:21:54 +0200
Subject: [PATCH] fix bug in Base4ActionRLEnv, improve example strats

---
 freqtrade/freqai/RL/Base4ActionRLEnv.py        |  2 +-
 ...c.py => ReinforcementLearningExample4ac.py} | 18 +++++++-----------
 .../ReinforcementLearningExample5ac.py         |  2 +-
 3 files changed, 9 insertions(+), 13 deletions(-)
 rename freqtrade/freqai/example_strats/{ReinforcementLearningExample3ac.py => ReinforcementLearningExample4ac.py} (92%)

diff --git a/freqtrade/freqai/RL/Base4ActionRLEnv.py b/freqtrade/freqai/RL/Base4ActionRLEnv.py
index d2b92a954..70a625136 100644
--- a/freqtrade/freqai/RL/Base4ActionRLEnv.py
+++ b/freqtrade/freqai/RL/Base4ActionRLEnv.py
@@ -31,7 +31,7 @@ class Base4ActionRLEnv(BaseEnvironment):
         if self._current_tick == self._end_tick:
             self._done = True
 
-        self.update_portfolio_log_returns(action)
+        self._update_unrealized_total_profit()
 
         self._update_profit(action)
         step_reward = self.calculate_reward(action)
diff --git a/freqtrade/freqai/example_strats/ReinforcementLearningExample3ac.py b/freqtrade/freqai/example_strats/ReinforcementLearningExample4ac.py
similarity index 92%
rename from freqtrade/freqai/example_strats/ReinforcementLearningExample3ac.py
rename to freqtrade/freqai/example_strats/ReinforcementLearningExample4ac.py
index ec0977455..d9932eea7 100644
--- a/freqtrade/freqai/example_strats/ReinforcementLearningExample3ac.py
+++ b/freqtrade/freqai/example_strats/ReinforcementLearningExample4ac.py
@@ -11,7 +11,7 @@ from freqtrade.strategy import DecimalParameter, IntParameter, IStrategy, merge_
 logger = logging.getLogger(__name__)
 
 
-class ReinforcementLearningExample3ac(IStrategy):
+class ReinforcementLearningExample4ac(IStrategy):
     """
     Test strategy - used for testing freqAI functionalities.
     DO not use in production.
@@ -106,8 +106,8 @@ class ReinforcementLearningExample3ac(IStrategy):
 
             # For RL, this is not a target, it is simply a filler until actions come out
             # of the model.
-            # for Base3ActionEnv, 2 is netural (hold)
-            df["&-action"] = 2
+            # for Base4ActionEnv, 0 is netural (hold)
+            df["&-action"] = 0
 
         return df
 
@@ -119,14 +119,14 @@ class ReinforcementLearningExample3ac(IStrategy):
 
     def populate_entry_trend(self, df: DataFrame, metadata: dict) -> DataFrame:
 
-        enter_long_conditions = [df["do_predict"] == 1, df["&-action"] == 1]
+        enter_long_conditions = [df["do_predict"] == 1, df["&-action"] == 2]
 
         if enter_long_conditions:
             df.loc[
                 reduce(lambda x, y: x & y, enter_long_conditions), ["enter_long", "enter_tag"]
             ] = (1, "long")
 
-        enter_short_conditions = [df["do_predict"] == 1, df["&-action"] == 2]
+        enter_short_conditions = [df["do_predict"] == 1, df["&-action"] == 3]
 
         if enter_short_conditions:
             df.loc[
@@ -136,12 +136,8 @@ class ReinforcementLearningExample3ac(IStrategy):
         return df
 
     def populate_exit_trend(self, df: DataFrame, metadata: dict) -> DataFrame:
-        exit_long_conditions = [df["do_predict"] == 1, df["&-action"] == 2]
+        exit_long_conditions = [df["do_predict"] == 1, df["&-action"] == 1]
         if exit_long_conditions:
-            df.loc[reduce(lambda x, y: x & y, exit_long_conditions), "exit_long"] = 1
-
-        exit_short_conditions = [df["do_predict"] == 1, df["&-action"] == 1]
-        if exit_short_conditions:
-            df.loc[reduce(lambda x, y: x & y, exit_short_conditions), "exit_short"] = 1
+            df.loc[reduce(lambda x, y: x & y, exit_long_conditions), "exit"] = 1
 
         return df
diff --git a/freqtrade/freqai/example_strats/ReinforcementLearningExample5ac.py b/freqtrade/freqai/example_strats/ReinforcementLearningExample5ac.py
index 15a263b94..2118e1221 100644
--- a/freqtrade/freqai/example_strats/ReinforcementLearningExample5ac.py
+++ b/freqtrade/freqai/example_strats/ReinforcementLearningExample5ac.py
@@ -107,7 +107,7 @@ class ReinforcementLearningExample5ac(IStrategy):
 
             # For RL, there are no direct targets to set. This is filler (neutral)
             # until the agent sends an action.
-            df["&-action"] = 2
+            df["&-action"] = 0
 
         return df