From d12a7ff18b4f09af248e51d331855b6f7aa196fb Mon Sep 17 00:00:00 2001
From: hippocritical <gunngirspam@gmail.com>
Date: Wed, 22 Mar 2023 12:32:39 +0100
Subject: [PATCH 001/130] freqtrades' merge broke my side, fixed it by porting
 it over to my develop branch, no changes with this commit logic-wise.

---
 freqtrade/commands/__init__.py                |   3 +-
 freqtrade/commands/arguments.py               |  23 +-
 freqtrade/commands/strategy_utils_commands.py |  45 ++++
 .../backtest_lookahead_bias_checker.py        | 241 ++++++++++++++++++
 4 files changed, 308 insertions(+), 4 deletions(-)
 create mode 100644 freqtrade/strategy/backtest_lookahead_bias_checker.py

diff --git a/freqtrade/commands/__init__.py b/freqtrade/commands/__init__.py
index 66a9c995b..8add45241 100644
--- a/freqtrade/commands/__init__.py
+++ b/freqtrade/commands/__init__.py
@@ -22,6 +22,7 @@ from freqtrade.commands.optimize_commands import (start_backtesting, start_backt
                                                   start_edge, start_hyperopt)
 from freqtrade.commands.pairlist_commands import start_test_pairlist
 from freqtrade.commands.plot_commands import start_plot_dataframe, start_plot_profit
-from freqtrade.commands.strategy_utils_commands import start_strategy_update
+from freqtrade.commands.strategy_utils_commands import (start_backtest_lookahead_bias_checker,
+                                                        start_strategy_update)
 from freqtrade.commands.trade_commands import start_trading
 from freqtrade.commands.webserver_commands import start_webserver
diff --git a/freqtrade/commands/arguments.py b/freqtrade/commands/arguments.py
index 47aa37fdf..d79216b21 100644
--- a/freqtrade/commands/arguments.py
+++ b/freqtrade/commands/arguments.py
@@ -116,8 +116,14 @@ NO_CONF_REQURIED = ["convert-data", "convert-trade-data", "download-data", "list
 
 NO_CONF_ALLOWED = ["create-userdir", "list-exchanges", "new-strategy"]
 
-ARGS_STRATEGY_UTILS = ["strategy_list", "strategy_path", "recursive_strategy_search"]
+ARGS_STRATEGY_UPDATER = ARGS_COMMON_OPTIMIZE + ["strategy_list"]
 
+ARGS_BACKTEST_LOOKAHEAD_BIAS_CHECKER = ARGS_BACKTEST
+
+
+# + ["target_trades", "minimum_trades",
+# "target_trades", "exportfilename"]
+# will be added when the base version works.
 
 class Arguments:
     """
@@ -192,7 +198,8 @@ class Arguments:
         self.parser = argparse.ArgumentParser(description='Free, open source crypto trading bot')
         self._build_args(optionlist=['version'], parser=self.parser)
 
-        from freqtrade.commands import (start_analysis_entries_exits, start_backtesting,
+        from freqtrade.commands import (start_analysis_entries_exits,
+                                        start_backtest_lookahead_bias_checker, start_backtesting,
                                         start_backtesting_show, start_convert_data,
                                         start_convert_db, start_convert_trades,
                                         start_create_userdir, start_download_data, start_edge,
@@ -450,4 +457,14 @@ class Arguments:
                                                           'files to the current version',
                                                      parents=[_common_parser])
         strategy_updater_cmd.set_defaults(func=start_strategy_update)
-        self._build_args(optionlist=ARGS_STRATEGY_UTILS, parser=strategy_updater_cmd)
+        self._build_args(optionlist=ARGS_STRATEGY_UPDATER, parser=strategy_updater_cmd)
+
+        # Add backtest lookahead bias checker subcommand
+        backtest_lookahead_bias_checker_cmd = \
+            subparsers.add_parser('backtest_lookahead_bias_checker',
+                                  help="checks for potential look ahead bias",
+                                  parents=[_common_parser])
+        backtest_lookahead_bias_checker_cmd.set_defaults(func=start_backtest_lookahead_bias_checker)
+
+        self._build_args(optionlist=ARGS_BACKTEST_LOOKAHEAD_BIAS_CHECKER,
+                         parser=backtest_lookahead_bias_checker_cmd)
diff --git a/freqtrade/commands/strategy_utils_commands.py b/freqtrade/commands/strategy_utils_commands.py
index e579ec475..8bce9d4f9 100644
--- a/freqtrade/commands/strategy_utils_commands.py
+++ b/freqtrade/commands/strategy_utils_commands.py
@@ -7,6 +7,7 @@ from typing import Any, Dict
 from freqtrade.configuration import setup_utils_configuration
 from freqtrade.enums import RunMode
 from freqtrade.resolvers import StrategyResolver
+from freqtrade.strategy.backtest_lookahead_bias_checker import backtest_lookahead_bias_checker
 from freqtrade.strategy.strategyupdater import StrategyUpdater
 
 
@@ -53,3 +54,47 @@ def start_conversion(strategy_obj, config):
     instance_strategy_updater.start(config, strategy_obj)
     elapsed = time.perf_counter() - start
     print(f"Conversion of {Path(strategy_obj['location']).name} took {elapsed:.1f} seconds.")
+
+    # except:
+    #    pass
+
+
+def start_backtest_lookahead_bias_checker(args: Dict[str, Any]) -> None:
+    """
+    Start the backtest bias tester script
+    :param args: Cli args from Arguments()
+    :return: None
+    """
+    config = setup_utils_configuration(args, RunMode.UTIL_NO_EXCHANGE)
+
+    strategy_objs = StrategyResolver.search_all_objects(
+        config, enum_failed=False, recursive=config.get('recursive_strategy_search', False))
+
+    filtered_strategy_objs = []
+    if 'strategy_list' in args and args['strategy_list'] is not None:
+        for args_strategy in args['strategy_list']:
+            for strategy_obj in strategy_objs:
+                if (strategy_obj['name'] == args_strategy
+                        and strategy_obj not in filtered_strategy_objs):
+                    filtered_strategy_objs.append(strategy_obj)
+                    break
+
+        for filtered_strategy_obj in filtered_strategy_objs:
+            initialize_single_lookahead_bias_checker(filtered_strategy_obj, config)
+    else:
+        processed_locations = set()
+        for strategy_obj in strategy_objs:
+            if strategy_obj['location'] not in processed_locations:
+                processed_locations.add(strategy_obj['location'])
+                initialize_single_lookahead_bias_checker(strategy_obj, config)
+
+
+def initialize_single_lookahead_bias_checker(strategy_obj, config):
+    # try:
+    print(f"Bias test of {Path(strategy_obj['location']).name} started.")
+    instance_backtest_lookahead_bias_checker = backtest_lookahead_bias_checker()
+    start = time.perf_counter()
+    instance_backtest_lookahead_bias_checker.start(config, strategy_obj)
+    elapsed = time.perf_counter() - start
+    print(f"checking look ahead bias via backtests of {Path(strategy_obj['location']).name} "
+          f"took {elapsed:.1f} seconds.")
diff --git a/freqtrade/strategy/backtest_lookahead_bias_checker.py b/freqtrade/strategy/backtest_lookahead_bias_checker.py
new file mode 100644
index 000000000..288786c19
--- /dev/null
+++ b/freqtrade/strategy/backtest_lookahead_bias_checker.py
@@ -0,0 +1,241 @@
+# pragma pylint: disable=missing-docstring, W0212, line-too-long, C0103, unused-argument
+from copy import deepcopy
+from datetime import datetime, timedelta, timezone
+
+import pandas
+
+from freqtrade.configuration import TimeRange
+from freqtrade.data.history import get_timerange
+from freqtrade.exchange import timeframe_to_minutes
+from freqtrade.optimize.backtesting import Backtesting
+
+
+class backtest_lookahead_bias_checker:
+    class varHolder:
+        timerange: TimeRange
+        data: pandas.DataFrame
+        indicators: pandas.DataFrame
+        result: pandas.DataFrame
+        compared: pandas.DataFrame
+        from_dt: datetime
+        to_dt: datetime
+        compared_dt: datetime
+
+    class analysis:
+        def __init__(self):
+            self.total_signals = 0
+            self.false_entry_signals = 0
+            self.false_exit_signals = 0
+            self.false_indicators = []
+            self.has_bias = False
+
+        total_signals: int
+        false_entry_signals: int
+        false_exit_signals: int
+
+        false_indicators: list
+        has_bias: bool
+
+    def __init__(self):
+        self.strategy_obj
+        self.current_analysis
+        self.config
+        self.full_varHolder
+        self.entry_varholder
+        self.exit_varholder
+        self.backtesting
+        self.signals_to_check: int = 20
+        self.current_analysis
+        self.full_varHolder.from_dt
+        self.full_varHolder.to_dt
+
+    @staticmethod
+    def dt_to_timestamp(dt):
+        timestamp = int(dt.replace(tzinfo=timezone.utc).timestamp())
+        return timestamp
+
+    def get_result(self, backtesting, processed):
+        min_date, max_date = get_timerange(processed)
+
+        result = backtesting.backtest(
+            processed=deepcopy(processed),
+            start_date=min_date,
+            end_date=max_date
+        )
+        return result
+
+    # analyzes two data frames with processed indicators and shows differences between them.
+    def analyze_indicators(self, full_vars, cut_vars, current_pair):
+        # extract dataframes
+        cut_df = cut_vars.indicators[current_pair]
+        full_df = full_vars.indicators[current_pair]
+
+        # cut longer dataframe to length of the shorter
+        full_df_cut = full_df[
+            (full_df.date == cut_vars.compared_dt)
+        ].reset_index(drop=True)
+        cut_df_cut = cut_df[
+            (cut_df.date == cut_vars.compared_dt)
+        ].reset_index(drop=True)
+
+        # compare dataframes
+        if full_df_cut.shape[0] != 0:
+            if cut_df_cut.shape[0] != 0:
+                compare_df = full_df_cut.compare(cut_df_cut)
+
+                # skippedColumns = ["date", "open", "high", "low", "close", "volume"]
+                for col_name, values in compare_df.items():
+                    col_idx = compare_df.columns.get_loc(col_name)
+                    compare_df_row = compare_df.iloc[0]
+                    # compare_df now is comprised of tuples with [1] having either 'self' or 'other'
+                    if 'other' in col_name[1]:
+                        continue
+                    self_value = compare_df_row[col_idx]
+                    other_value = compare_df_row[col_idx + 1]
+                    other_value = compare_df_row[col_idx + 1]
+
+                    # output differences
+                    if self_value != other_value:
+
+                        if not self.current_analysis.false_indicators.__contains__(col_name[0]):
+                            self.current_analysis.false_indicators.append(col_name[0])
+                            print(f"=> found look ahead bias in indicator {col_name[0]}. " +
+                                  f"{str(self_value)} != {str(other_value)}")
+                # return compare_df
+
+    def report_signal(self, result, column_name, checked_timestamp):
+        df = result['results']
+        row_count = df[column_name].shape[0]
+
+        if row_count == 0:
+            return False
+        else:
+
+            df_cut = df[(df[column_name] == checked_timestamp)]
+            if df_cut[column_name].shape[0] == 0:
+                # print("did NOT find the same signal in column " + column_name +
+                #       " at timestamp " + str(checked_timestamp))
+                return False
+            else:
+                return True
+        return False
+
+    def prepare_data(self, varholder, var_pairs):
+        self.config['timerange'] = \
+            str(int(self.dt_to_timestamp(varholder.from_dt))) + "-" + \
+            str(int(self.dt_to_timestamp(varholder.to_dt)))
+        self.backtesting = Backtesting(self.config)
+        self.backtesting._set_strategy(self.backtesting.strategylist[0])
+        varholder.data, varholder.timerange = self.backtesting.load_bt_data()
+        varholder.indicators = self.backtesting.strategy.advise_all_indicators(varholder.data)
+        varholder.result = self.get_result(self.backtesting, varholder.indicators)
+
+    def start(self, config, strategy_obj: dict) -> None:
+        self.strategy_obj = strategy_obj
+        self.config = config
+        self.current_analysis = backtest_lookahead_bias_checker.analysis()
+
+        max_try_signals: int = 3
+        found_signals: int = 0
+        continue_with_strategy = True
+
+        # first we need to get the necessary entry/exit signals
+        # so we start by 14 days and increase in 1 month steps
+        # until we have the desired trade amount.
+        for try_buysignals in range(max_try_signals):  # range(3) = 0..2
+            # re-initialize backtesting-variable
+            self.full_varHolder = backtest_lookahead_bias_checker.varHolder()
+
+            # define datetimes in human readable format
+            self.full_varHolder.from_dt = datetime(2022, 9, 1)
+            self.full_varHolder.to_dt = datetime(2022, 9, 15) + timedelta(days=30 * try_buysignals)
+
+            self.prepare_data(self.full_varHolder, self.config['pairs'])
+
+            found_signals = self.full_varHolder.result['results'].shape[0] + 1
+            if try_buysignals == max_try_signals - 1:
+                if found_signals < self.signals_to_check / 2:
+                    print(f"... only found {str(int(found_signals / 2))} "
+                          f"buy signals for {self.strategy_obj['name']}. "
+                          f"Cancelling...")
+                    continue_with_strategy = False
+                else:
+                    print(
+                        f"Found {str(found_signals)} buy signals. "
+                        f"Going with max {str(self.signals_to_check)} "
+                        f" buy signals in the full timerange from "
+                        f"{str(self.full_varHolder.from_dt)} to {str(self.full_varHolder.to_dt)}")
+                    break
+            elif found_signals < self.signals_to_check:
+                print(
+                    f"Only found {str(found_signals)} buy signals in the full timerange from "
+                    f"{str(self.full_varHolder.from_dt)} to "
+                    f"{str(self.full_varHolder.to_dt)}. "
+                    f"will increase timerange trying to get at least "
+                    f"{str(self.signals_to_check)} signals.")
+            else:
+                print(
+                    f"Found {str(found_signals)} buy signals, more than necessary. "
+                    f"Reducing to {str(self.signals_to_check)} "
+                    f"checked buy signals in the full timerange from "
+                    f"{str(self.full_varHolder.from_dt)} to {str(self.full_varHolder.to_dt)}")
+                break
+        if not continue_with_strategy:
+            return
+
+        for idx, result_row in self.full_varHolder.result['results'].iterrows():
+            if self.current_analysis.total_signals == self.signals_to_check:
+                break
+
+            # if force-sold, ignore this signal since here it will unconditionally exit.
+            if result_row.close_date == self.dt_to_timestamp(self.full_varHolder.to_dt):
+                continue
+
+            self.current_analysis.total_signals += 1
+
+            self.entry_varholder = backtest_lookahead_bias_checker.varHolder()
+            self.exit_varholder = backtest_lookahead_bias_checker.varHolder()
+
+            self.entry_varholder.from_dt = self.full_varHolder.from_dt  # result_row['open_date']
+            self.entry_varholder.compared_dt = result_row['open_date']
+
+            # to_dt needs +1 candle since it won't buy on the last candle
+            self.entry_varholder.to_dt = result_row['open_date'] + \
+                timedelta(minutes=timeframe_to_minutes(self.config['timeframe']) * 2)
+
+            self.prepare_data(self.entry_varholder, [result_row['pair']])
+
+            # ---
+            # print("analyzing the sell signal")
+            # to_dt needs +1 candle since it will always sell all trades on the last candle
+            self.exit_varholder.from_dt = self.full_varHolder.from_dt  # result_row['open_date']
+            self.exit_varholder.to_dt = \
+                result_row['close_date'] + \
+                timedelta(minutes=timeframe_to_minutes(self.config['timeframe']))
+            self.exit_varholder.compared_dt = result_row['close_date']
+
+            self.prepare_data(self.exit_varholder, [result_row['pair']])
+
+            # register if buy signal is broken
+            if not self.report_signal(
+                    self.entry_varholder.result,
+                    "open_date", self.entry_varholder.compared_dt):
+                self.current_analysis.false_entry_signals += 1
+
+            # register if buy or sell signal is broken
+            if not self.report_signal(self.entry_varholder.result,
+                                      "open_date", self.entry_varholder.compared_dt) \
+                    or not self.report_signal(self.exit_varholder.result,
+                                              "close_date", self.exit_varholder.compared_dt):
+                self.current_analysis.false_exit_signals += 1
+
+            self.analyze_indicators(self.full_varHolder, self.entry_varholder, result_row['pair'])
+            self.analyze_indicators(self.full_varHolder, self.exit_varholder, result_row['pair'])
+
+        if self.current_analysis.false_entry_signals > 0 or \
+                self.current_analysis.false_exit_signals > 0 or \
+                len(self.current_analysis.false_indicators) > 0:
+            print(" => " + self.strategy_obj['name'] + ": bias detected!")
+            self.current_analysis.has_bias = True
+        else:
+            print(self.strategy_obj['name'] + ": no bias detected")

From 7bd55971dc8e1ab8a447c12952885c5975ddaae0 Mon Sep 17 00:00:00 2001
From: hippocritical <gunngirspam@gmail.com>
Date: Tue, 28 Mar 2023 22:20:00 +0200
Subject: [PATCH 002/130] strategy_updater: removed args_common_optimize for
 strategy-updater

backtest_lookahead_bias_checker:
added args and cli-options for minimum and target trade amounts
fixed code according to best-practice coding requests of matthias (CamelCase etc)
---
 freqtrade/commands/arguments.py               |   7 +-
 freqtrade/commands/cli_options.py             |  14 ++
 freqtrade/commands/strategy_utils_commands.py |  27 ++-
 .../backtest_lookahead_bias_checker.py        | 229 ++++++++----------
 4 files changed, 146 insertions(+), 131 deletions(-)

diff --git a/freqtrade/commands/arguments.py b/freqtrade/commands/arguments.py
index d79216b21..6cb727eaf 100644
--- a/freqtrade/commands/arguments.py
+++ b/freqtrade/commands/arguments.py
@@ -116,9 +116,10 @@ NO_CONF_REQURIED = ["convert-data", "convert-trade-data", "download-data", "list
 
 NO_CONF_ALLOWED = ["create-userdir", "list-exchanges", "new-strategy"]
 
-ARGS_STRATEGY_UPDATER = ARGS_COMMON_OPTIMIZE + ["strategy_list"]
+ARGS_STRATEGY_UPDATER = ["strategy_list"]
 
-ARGS_BACKTEST_LOOKAHEAD_BIAS_CHECKER = ARGS_BACKTEST
+ARGS_BACKTEST_LOOKAHEAD_BIAS_CHECKER = ARGS_BACKTEST + ["minimum_trade_amount",
+                                                        "targeted_trade_amount"]
 
 
 # + ["target_trades", "minimum_trades",
@@ -461,7 +462,7 @@ class Arguments:
 
         # Add backtest lookahead bias checker subcommand
         backtest_lookahead_bias_checker_cmd = \
-            subparsers.add_parser('backtest_lookahead_bias_checker',
+            subparsers.add_parser('backtest-lookahead-bias-checker',
                                   help="checks for potential look ahead bias",
                                   parents=[_common_parser])
         backtest_lookahead_bias_checker_cmd.set_defaults(func=start_backtest_lookahead_bias_checker)
diff --git a/freqtrade/commands/cli_options.py b/freqtrade/commands/cli_options.py
index f1474ec69..5d2af934f 100644
--- a/freqtrade/commands/cli_options.py
+++ b/freqtrade/commands/cli_options.py
@@ -675,4 +675,18 @@ AVAILABLE_CLI_OPTIONS = {
         help='Run backtest with ready models.',
         action='store_true'
     ),
+    "minimum_trade_amount": Arg(
+        '--minimum-trade-amount',
+        help='set INT minimum trade amount',
+        type=check_int_positive,
+        metavar='INT',
+        default=10,
+    ),
+    "targeted_trade_amount": Arg(
+        '--targeted-trade-amount',
+        help='set INT targeted trade amount',
+        type=check_int_positive,
+        metavar='INT',
+        default=20,
+    )
 }
diff --git a/freqtrade/commands/strategy_utils_commands.py b/freqtrade/commands/strategy_utils_commands.py
index 8bce9d4f9..663ea571a 100644
--- a/freqtrade/commands/strategy_utils_commands.py
+++ b/freqtrade/commands/strategy_utils_commands.py
@@ -7,7 +7,7 @@ from typing import Any, Dict
 from freqtrade.configuration import setup_utils_configuration
 from freqtrade.enums import RunMode
 from freqtrade.resolvers import StrategyResolver
-from freqtrade.strategy.backtest_lookahead_bias_checker import backtest_lookahead_bias_checker
+from freqtrade.strategy.backtest_lookahead_bias_checker import BacktestLookaheadBiasChecker
 from freqtrade.strategy.strategyupdater import StrategyUpdater
 
 
@@ -67,9 +67,16 @@ def start_backtest_lookahead_bias_checker(args: Dict[str, Any]) -> None:
     """
     config = setup_utils_configuration(args, RunMode.UTIL_NO_EXCHANGE)
 
+    if args['targeted_trade_amount'] < args['minimum_trade_amount']:
+        # add logic that tells the user to check the configuration
+        # since this combo doesn't make any sense.
+        pass
+
     strategy_objs = StrategyResolver.search_all_objects(
         config, enum_failed=False, recursive=config.get('recursive_strategy_search', False))
 
+    bias_checker_instances = []
+
     filtered_strategy_objs = []
     if 'strategy_list' in args and args['strategy_list'] is not None:
         for args_strategy in args['strategy_list']:
@@ -80,21 +87,29 @@ def start_backtest_lookahead_bias_checker(args: Dict[str, Any]) -> None:
                     break
 
         for filtered_strategy_obj in filtered_strategy_objs:
-            initialize_single_lookahead_bias_checker(filtered_strategy_obj, config)
+            bias_checker_instances = initialize_single_lookahead_bias_checker(
+                filtered_strategy_obj, config, args)
     else:
         processed_locations = set()
         for strategy_obj in strategy_objs:
             if strategy_obj['location'] not in processed_locations:
                 processed_locations.add(strategy_obj['location'])
-                initialize_single_lookahead_bias_checker(strategy_obj, config)
+                bias_checker_instances = initialize_single_lookahead_bias_checker(
+                    strategy_obj, config, args)
+    create_result_list(bias_checker_instances)
 
 
-def initialize_single_lookahead_bias_checker(strategy_obj, config):
+def create_result_list(bias_checker_instances):
+    pass
+
+
+def initialize_single_lookahead_bias_checker(strategy_obj, config, args):
     # try:
     print(f"Bias test of {Path(strategy_obj['location']).name} started.")
-    instance_backtest_lookahead_bias_checker = backtest_lookahead_bias_checker()
+    instance_backtest_lookahead_bias_checker = BacktestLookaheadBiasChecker()
     start = time.perf_counter()
-    instance_backtest_lookahead_bias_checker.start(config, strategy_obj)
+    current_instance = instance_backtest_lookahead_bias_checker.start(config, strategy_obj, args)
     elapsed = time.perf_counter() - start
     print(f"checking look ahead bias via backtests of {Path(strategy_obj['location']).name} "
           f"took {elapsed:.1f} seconds.")
+    return current_instance
diff --git a/freqtrade/strategy/backtest_lookahead_bias_checker.py b/freqtrade/strategy/backtest_lookahead_bias_checker.py
index 288786c19..c4a321a4a 100644
--- a/freqtrade/strategy/backtest_lookahead_bias_checker.py
+++ b/freqtrade/strategy/backtest_lookahead_bias_checker.py
@@ -1,4 +1,4 @@
-# pragma pylint: disable=missing-docstring, W0212, line-too-long, C0103, unused-argument
+import copy
 from copy import deepcopy
 from datetime import datetime, timedelta, timezone
 
@@ -10,8 +10,8 @@ from freqtrade.exchange import timeframe_to_minutes
 from freqtrade.optimize.backtesting import Backtesting
 
 
-class backtest_lookahead_bias_checker:
-    class varHolder:
+class BacktestLookaheadBiasChecker:
+    class VarHolder:
         timerange: TimeRange
         data: pandas.DataFrame
         indicators: pandas.DataFrame
@@ -21,7 +21,7 @@ class backtest_lookahead_bias_checker:
         to_dt: datetime
         compared_dt: datetime
 
-    class analysis:
+    class Analysis:
         def __init__(self):
             self.total_signals = 0
             self.false_entry_signals = 0
@@ -37,24 +37,24 @@ class backtest_lookahead_bias_checker:
         has_bias: bool
 
     def __init__(self):
-        self.strategy_obj
-        self.current_analysis
-        self.config
-        self.full_varHolder
-        self.entry_varholder
-        self.exit_varholder
-        self.backtesting
-        self.signals_to_check: int = 20
-        self.current_analysis
-        self.full_varHolder.from_dt
-        self.full_varHolder.to_dt
+        self.strategy_obj = None
+        self.current_analysis = None
+        self.local_config = None
+        self.full_varHolder = None
+        self.entry_varHolder = None
+        self.exit_varHolder = None
+        self.backtesting = None
+        self.current_analysis = None
+        self.minimum_trade_amount = None
+        self.targeted_trade_amount = None
 
     @staticmethod
     def dt_to_timestamp(dt):
         timestamp = int(dt.replace(tzinfo=timezone.utc).timestamp())
         return timestamp
 
-    def get_result(self, backtesting, processed):
+    @staticmethod
+    def get_result(backtesting, processed):
         min_date, max_date = get_timerange(processed)
 
         result = backtesting.backtest(
@@ -64,6 +64,24 @@ class backtest_lookahead_bias_checker:
         )
         return result
 
+    @staticmethod
+    def report_signal(result, column_name, checked_timestamp):
+        df = result['results']
+        row_count = df[column_name].shape[0]
+
+        if row_count == 0:
+            return False
+        else:
+
+            df_cut = df[(df[column_name] == checked_timestamp)]
+            if df_cut[column_name].shape[0] == 0:
+                # print("did NOT find the same signal in column " + column_name +
+                #       " at timestamp " + str(checked_timestamp))
+                return False
+            else:
+                return True
+        return False
+
     # analyzes two data frames with processed indicators and shows differences between them.
     def analyze_indicators(self, full_vars, cut_vars, current_pair):
         # extract dataframes
@@ -87,12 +105,11 @@ class backtest_lookahead_bias_checker:
                 for col_name, values in compare_df.items():
                     col_idx = compare_df.columns.get_loc(col_name)
                     compare_df_row = compare_df.iloc[0]
-                    # compare_df now is comprised of tuples with [1] having either 'self' or 'other'
+                    # compare_df now comprises tuples with [1] having either 'self' or 'other'
                     if 'other' in col_name[1]:
                         continue
                     self_value = compare_df_row[col_idx]
                     other_value = compare_df_row[col_idx + 1]
-                    other_value = compare_df_row[col_idx + 1]
 
                     # output differences
                     if self_value != other_value:
@@ -101,90 +118,62 @@ class backtest_lookahead_bias_checker:
                             self.current_analysis.false_indicators.append(col_name[0])
                             print(f"=> found look ahead bias in indicator {col_name[0]}. " +
                                   f"{str(self_value)} != {str(other_value)}")
-                # return compare_df
 
-    def report_signal(self, result, column_name, checked_timestamp):
-        df = result['results']
-        row_count = df[column_name].shape[0]
-
-        if row_count == 0:
-            return False
-        else:
-
-            df_cut = df[(df[column_name] == checked_timestamp)]
-            if df_cut[column_name].shape[0] == 0:
-                # print("did NOT find the same signal in column " + column_name +
-                #       " at timestamp " + str(checked_timestamp))
-                return False
-            else:
-                return True
-        return False
-
-    def prepare_data(self, varholder, var_pairs):
-        self.config['timerange'] = \
-            str(int(self.dt_to_timestamp(varholder.from_dt))) + "-" + \
-            str(int(self.dt_to_timestamp(varholder.to_dt)))
-        self.backtesting = Backtesting(self.config)
+    def prepare_data(self, varHolder, pairs_to_load):
+        prepare_data_config = copy.deepcopy(self.local_config)
+        prepare_data_config['timerange'] = (str(self.dt_to_timestamp(varHolder.from_dt)) + "-" +
+                                            str(self.dt_to_timestamp(varHolder.to_dt)))
+        prepare_data_config['pairs'] = pairs_to_load
+        self.backtesting = Backtesting(prepare_data_config)
         self.backtesting._set_strategy(self.backtesting.strategylist[0])
-        varholder.data, varholder.timerange = self.backtesting.load_bt_data()
-        varholder.indicators = self.backtesting.strategy.advise_all_indicators(varholder.data)
-        varholder.result = self.get_result(self.backtesting, varholder.indicators)
+        varHolder.data, varHolder.timerange = self.backtesting.load_bt_data()
+        varHolder.indicators = self.backtesting.strategy.advise_all_indicators(varHolder.data)
+        varHolder.result = self.get_result(self.backtesting, varHolder.indicators)
 
-    def start(self, config, strategy_obj: dict) -> None:
-        self.strategy_obj = strategy_obj
-        self.config = config
-        self.current_analysis = backtest_lookahead_bias_checker.analysis()
+    def update_output_file(self):
+        pass
 
-        max_try_signals: int = 3
-        found_signals: int = 0
-        continue_with_strategy = True
+    def start(self, config, strategy_obj: dict, args) -> None:
 
-        # first we need to get the necessary entry/exit signals
-        # so we start by 14 days and increase in 1 month steps
-        # until we have the desired trade amount.
-        for try_buysignals in range(max_try_signals):  # range(3) = 0..2
-            # re-initialize backtesting-variable
-            self.full_varHolder = backtest_lookahead_bias_checker.varHolder()
+        # deepcopy so we can change the pairs for the 2ndary runs
+        # and not worry about another strategy to check after.
+        self.local_config = deepcopy(config)
+        self.local_config['strategy_list'] = [strategy_obj['name']]
+        self.current_analysis = BacktestLookaheadBiasChecker.Analysis()
+        self.minimum_trade_amount = args['minimum_trade_amount']
+        self.targeted_trade_amount = args['targeted_trade_amount']
 
-            # define datetimes in human readable format
-            self.full_varHolder.from_dt = datetime(2022, 9, 1)
-            self.full_varHolder.to_dt = datetime(2022, 9, 15) + timedelta(days=30 * try_buysignals)
+        # first make a single backtest
+        self.full_varHolder = BacktestLookaheadBiasChecker.VarHolder()
 
-            self.prepare_data(self.full_varHolder, self.config['pairs'])
-
-            found_signals = self.full_varHolder.result['results'].shape[0] + 1
-            if try_buysignals == max_try_signals - 1:
-                if found_signals < self.signals_to_check / 2:
-                    print(f"... only found {str(int(found_signals / 2))} "
-                          f"buy signals for {self.strategy_obj['name']}. "
-                          f"Cancelling...")
-                    continue_with_strategy = False
-                else:
-                    print(
-                        f"Found {str(found_signals)} buy signals. "
-                        f"Going with max {str(self.signals_to_check)} "
-                        f" buy signals in the full timerange from "
-                        f"{str(self.full_varHolder.from_dt)} to {str(self.full_varHolder.to_dt)}")
-                    break
-            elif found_signals < self.signals_to_check:
-                print(
-                    f"Only found {str(found_signals)} buy signals in the full timerange from "
-                    f"{str(self.full_varHolder.from_dt)} to "
-                    f"{str(self.full_varHolder.to_dt)}. "
-                    f"will increase timerange trying to get at least "
-                    f"{str(self.signals_to_check)} signals.")
-            else:
-                print(
-                    f"Found {str(found_signals)} buy signals, more than necessary. "
-                    f"Reducing to {str(self.signals_to_check)} "
-                    f"checked buy signals in the full timerange from "
-                    f"{str(self.full_varHolder.from_dt)} to {str(self.full_varHolder.to_dt)}")
-                break
-        if not continue_with_strategy:
+        # define datetime in human-readable format
+        parsed_timerange = TimeRange.parse_timerange(config['timerange'])
+        if (parsed_timerange is not None and
+                parsed_timerange.startdt is not None and
+                parsed_timerange.stopdt is not None):
+            self.full_varHolder.from_dt = parsed_timerange.startdt
+            self.full_varHolder.to_dt = parsed_timerange.stopdt
+        else:
+            print("Parsing of parsed_timerange failed. exiting!")
             return
 
+        self.prepare_data(self.full_varHolder, self.local_config['pairs'])
+
+        found_signals: int = self.full_varHolder.result['results'].shape[0] + 1
+        if found_signals >= self.targeted_trade_amount:
+            print(f"Found {found_signals} trades, calculating {self.targeted_trade_amount} trades.")
+        elif self.targeted_trade_amount >= found_signals >= self.minimum_trade_amount:
+            print(f"Only found {found_signals} trades. Calculating all available trades.")
+        else:
+            print(f"found {found_signals} trades "
+                  f"which is less than minimum_trade_amount {self.minimum_trade_amount}. "
+                  f"Cancelling this backtest lookahead bias test.")
+            return
+
+        # now we loop through all entry signals
+        # starting from the same datetime to avoid miss-reports of bias
         for idx, result_row in self.full_varHolder.result['results'].iterrows():
-            if self.current_analysis.total_signals == self.signals_to_check:
+            if self.current_analysis.total_signals == self.targeted_trade_amount:
                 break
 
             # if force-sold, ignore this signal since here it will unconditionally exit.
@@ -193,49 +182,45 @@ class backtest_lookahead_bias_checker:
 
             self.current_analysis.total_signals += 1
 
-            self.entry_varholder = backtest_lookahead_bias_checker.varHolder()
-            self.exit_varholder = backtest_lookahead_bias_checker.varHolder()
-
-            self.entry_varholder.from_dt = self.full_varHolder.from_dt  # result_row['open_date']
-            self.entry_varholder.compared_dt = result_row['open_date']
+            self.entry_varHolder = BacktestLookaheadBiasChecker.VarHolder()
+            self.exit_varHolder = BacktestLookaheadBiasChecker.VarHolder()
 
+            self.entry_varHolder.from_dt = self.full_varHolder.from_dt
+            self.entry_varHolder.compared_dt = result_row['open_date']
             # to_dt needs +1 candle since it won't buy on the last candle
-            self.entry_varholder.to_dt = result_row['open_date'] + \
-                timedelta(minutes=timeframe_to_minutes(self.config['timeframe']) * 2)
+            self.entry_varHolder.to_dt = (result_row['open_date'] +
+                                          timedelta(minutes=timeframe_to_minutes(
+                                              self.local_config['timeframe'])))
 
-            self.prepare_data(self.entry_varholder, [result_row['pair']])
+            self.prepare_data(self.entry_varHolder, [result_row['pair']])
 
-            # ---
-            # print("analyzing the sell signal")
-            # to_dt needs +1 candle since it will always sell all trades on the last candle
-            self.exit_varholder.from_dt = self.full_varHolder.from_dt  # result_row['open_date']
-            self.exit_varholder.to_dt = \
-                result_row['close_date'] + \
-                timedelta(minutes=timeframe_to_minutes(self.config['timeframe']))
-            self.exit_varholder.compared_dt = result_row['close_date']
+            # to_dt needs +1 candle since it will always exit/force-exit trades on the last candle
+            self.exit_varHolder.from_dt = self.full_varHolder.from_dt
+            self.exit_varHolder.to_dt = (result_row['close_date'] +
+                                         timedelta(minutes=timeframe_to_minutes(
+                                             self.local_config['timeframe'])))
+            self.exit_varHolder.compared_dt = result_row['close_date']
 
-            self.prepare_data(self.exit_varholder, [result_row['pair']])
+            self.prepare_data(self.exit_varHolder, [result_row['pair']])
 
             # register if buy signal is broken
             if not self.report_signal(
-                    self.entry_varholder.result,
-                    "open_date", self.entry_varholder.compared_dt):
+                    self.entry_varHolder.result, "open_date", self.entry_varHolder.compared_dt):
                 self.current_analysis.false_entry_signals += 1
 
             # register if buy or sell signal is broken
-            if not self.report_signal(self.entry_varholder.result,
-                                      "open_date", self.entry_varholder.compared_dt) \
-                    or not self.report_signal(self.exit_varholder.result,
-                                              "close_date", self.exit_varholder.compared_dt):
+            if not self.report_signal(
+                    self.exit_varHolder.result, "close_date", self.exit_varHolder.compared_dt):
                 self.current_analysis.false_exit_signals += 1
 
-            self.analyze_indicators(self.full_varHolder, self.entry_varholder, result_row['pair'])
-            self.analyze_indicators(self.full_varHolder, self.exit_varholder, result_row['pair'])
+            # check if the indicators themselves contain biased data
+            self.analyze_indicators(self.full_varHolder, self.entry_varHolder, result_row['pair'])
+            self.analyze_indicators(self.full_varHolder, self.exit_varHolder, result_row['pair'])
 
-        if self.current_analysis.false_entry_signals > 0 or \
-                self.current_analysis.false_exit_signals > 0 or \
-                len(self.current_analysis.false_indicators) > 0:
-            print(" => " + self.strategy_obj['name'] + ": bias detected!")
+        if (self.current_analysis.false_entry_signals > 0 or
+                self.current_analysis.false_exit_signals > 0 or
+                len(self.current_analysis.false_indicators) > 0):
+            print(" => " + self.local_config['strategy_list'][0] + ": bias detected!")
             self.current_analysis.has_bias = True
         else:
-            print(self.strategy_obj['name'] + ": no bias detected")
+            print(self.local_config['strategy_list'][0] + ": no bias detected")

From a9ef4c3ab013b6e7a6f953788ce221b8f5301ea7 Mon Sep 17 00:00:00 2001
From: hippocritical <gunngirspam@gmail.com>
Date: Wed, 12 Apr 2023 21:03:59 +0200
Subject: [PATCH 003/130] partial progress commit: added terminal
 tabulate-output added yet non-working csv output using pandas

---
 freqtrade/commands/arguments.py               | 10 ++-
 freqtrade/commands/cli_options.py             |  5 ++
 freqtrade/commands/strategy_utils_commands.py | 78 ++++++++++++++++---
 .../backtest_lookahead_bias_checker.py        | 68 +++++++++-------
 4 files changed, 116 insertions(+), 45 deletions(-)
 mode change 100644 => 100755 freqtrade/commands/arguments.py
 mode change 100644 => 100755 freqtrade/commands/cli_options.py
 mode change 100644 => 100755 freqtrade/commands/strategy_utils_commands.py
 mode change 100644 => 100755 freqtrade/strategy/backtest_lookahead_bias_checker.py

diff --git a/freqtrade/commands/arguments.py b/freqtrade/commands/arguments.py
old mode 100644
new mode 100755
index 6cb727eaf..ac5c33ad1
--- a/freqtrade/commands/arguments.py
+++ b/freqtrade/commands/arguments.py
@@ -116,10 +116,11 @@ NO_CONF_REQURIED = ["convert-data", "convert-trade-data", "download-data", "list
 
 NO_CONF_ALLOWED = ["create-userdir", "list-exchanges", "new-strategy"]
 
-ARGS_STRATEGY_UPDATER = ["strategy_list"]
+ARGS_STRATEGY_UPDATER = ["strategy_list", "strategy_path", "recursive_strategy_search"]
 
 ARGS_BACKTEST_LOOKAHEAD_BIAS_CHECKER = ARGS_BACKTEST + ["minimum_trade_amount",
-                                                        "targeted_trade_amount"]
+                                                        "targeted_trade_amount",
+                                                        "overwrite_existing_exportfilename_content"]
 
 
 # + ["target_trades", "minimum_trades",
@@ -458,13 +459,14 @@ class Arguments:
                                                           'files to the current version',
                                                      parents=[_common_parser])
         strategy_updater_cmd.set_defaults(func=start_strategy_update)
-        self._build_args(optionlist=ARGS_STRATEGY_UPDATER, parser=strategy_updater_cmd)
+        self._build_args(optionlist=ARGS_STRATEGY_UPDATER,
+                         parser=strategy_updater_cmd)
 
         # Add backtest lookahead bias checker subcommand
         backtest_lookahead_bias_checker_cmd = \
             subparsers.add_parser('backtest-lookahead-bias-checker',
                                   help="checks for potential look ahead bias",
-                                  parents=[_common_parser])
+                                  parents=[_common_parser, _strategy_parser])
         backtest_lookahead_bias_checker_cmd.set_defaults(func=start_backtest_lookahead_bias_checker)
 
         self._build_args(optionlist=ARGS_BACKTEST_LOOKAHEAD_BIAS_CHECKER,
diff --git a/freqtrade/commands/cli_options.py b/freqtrade/commands/cli_options.py
old mode 100644
new mode 100755
index 5d2af934f..e0709fc31
--- a/freqtrade/commands/cli_options.py
+++ b/freqtrade/commands/cli_options.py
@@ -688,5 +688,10 @@ AVAILABLE_CLI_OPTIONS = {
         type=check_int_positive,
         metavar='INT',
         default=20,
+    ),
+    "overwrite_existing_exportfilename_content": Arg(
+        '--overwrite-existing-exportfilename-content',
+        help='overwrites existing contents if existent with exportfilename given',
+        action='store_true'
     )
 }
diff --git a/freqtrade/commands/strategy_utils_commands.py b/freqtrade/commands/strategy_utils_commands.py
old mode 100644
new mode 100755
index 663ea571a..b46481734
--- a/freqtrade/commands/strategy_utils_commands.py
+++ b/freqtrade/commands/strategy_utils_commands.py
@@ -4,6 +4,9 @@ import time
 from pathlib import Path
 from typing import Any, Dict
 
+import pandas as pd
+from tabulate import tabulate
+
 from freqtrade.configuration import setup_utils_configuration
 from freqtrade.enums import RunMode
 from freqtrade.resolvers import StrategyResolver
@@ -76,7 +79,6 @@ def start_backtest_lookahead_bias_checker(args: Dict[str, Any]) -> None:
         config, enum_failed=False, recursive=config.get('recursive_strategy_search', False))
 
     bias_checker_instances = []
-
     filtered_strategy_objs = []
     if 'strategy_list' in args and args['strategy_list'] is not None:
         for args_strategy in args['strategy_list']:
@@ -87,28 +89,82 @@ def start_backtest_lookahead_bias_checker(args: Dict[str, Any]) -> None:
                     break
 
         for filtered_strategy_obj in filtered_strategy_objs:
-            bias_checker_instances = initialize_single_lookahead_bias_checker(
-                filtered_strategy_obj, config, args)
+            bias_checker_instances.append(
+                initialize_single_lookahead_bias_checker(filtered_strategy_obj, config, args))
     else:
         processed_locations = set()
         for strategy_obj in strategy_objs:
             if strategy_obj['location'] not in processed_locations:
                 processed_locations.add(strategy_obj['location'])
-                bias_checker_instances = initialize_single_lookahead_bias_checker(
-                    strategy_obj, config, args)
-    create_result_list(bias_checker_instances)
+                bias_checker_instances.append(
+                    initialize_single_lookahead_bias_checker(strategy_obj, config, args))
+    text_table_bias_checker_instances(bias_checker_instances)
+    export_to_csv(args, bias_checker_instances)
 
 
-def create_result_list(bias_checker_instances):
-    pass
+def text_table_bias_checker_instances(bias_checker_instances):
+    headers = ['strategy', 'has_bias',
+               'total_signals', 'biased_entry_signals', 'biased_exit_signals', 'biased_indicators']
+    data = []
+    for current_instance in bias_checker_instances:
+        data.append(
+            [current_instance.strategy_obj['name'],
+             current_instance.current_analysis.has_bias,
+             current_instance.current_analysis.total_signals,
+             current_instance.current_analysis.false_entry_signals,
+             current_instance.current_analysis.false_exit_signals,
+             ", ".join(current_instance.current_analysis.false_indicators)]
+        )
+    table = tabulate(data, headers=headers, tablefmt="orgtbl")
+    print(table)
+
+
+def export_to_csv(args, bias_checker_instances):
+    def add_or_update_row(df, row_data):
+        strategy_col_name = 'strategy'
+        if row_data[strategy_col_name] in df[strategy_col_name].values:
+            # create temporary dataframe with a single row
+            # and use that to replace the previous data in there.
+            index = (df.index[df[strategy_col_name] ==
+                              row_data[strategy_col_name]][0])
+            df.loc[index] = pd.Series(row_data, index='strategy')
+
+        else:
+            df = df.concat(row_data, ignore_index=True)
+        return df
+
+    csv_df = None
+
+    if not Path.exists(args['exportfilename']):
+        # If the file doesn't exist, create a new DataFrame from scratch
+        csv_df = pd.DataFrame(columns=['filename', 'strategy', 'has_bias',
+                                       'total_signals',
+                                       'biased_entry_signals', 'biased_exit_signals',
+                                       'biased_indicators'],
+                              index='filename')
+    else:
+        # Read CSV file into a pandas dataframe
+        csv_df = pd.read_csv(args['exportfilename'])
+
+    for inst in bias_checker_instances:
+        new_row_data = {'filename': inst.strategy_obj['location'].parts[-1],
+                        'strategy': inst.strategy_obj['name'],
+                        'has_bias': inst.current_analysis.has_bias,
+                        'total_signals': inst.current_analysis.total_signals,
+                        'biased_entry_signals': inst.current_analysis.false_entry_signals,
+                        'biased_exit_signals': inst.current_analysis.false_exit_signals,
+                        'biased_indicators': ", ".join(inst.current_analysis.false_indicators)}
+        csv_df = add_or_update_row(csv_df, new_row_data)
+    if len(bias_checker_instances) > 0:
+        print(f"saving {args['exportfilename']}")
+        csv_df.to_csv(args['exportfilename'])
 
 
 def initialize_single_lookahead_bias_checker(strategy_obj, config, args):
-    # try:
     print(f"Bias test of {Path(strategy_obj['location']).name} started.")
-    instance_backtest_lookahead_bias_checker = BacktestLookaheadBiasChecker()
     start = time.perf_counter()
-    current_instance = instance_backtest_lookahead_bias_checker.start(config, strategy_obj, args)
+    current_instance = BacktestLookaheadBiasChecker()
+    current_instance.start(config, strategy_obj, args)
     elapsed = time.perf_counter() - start
     print(f"checking look ahead bias via backtests of {Path(strategy_obj['location']).name} "
           f"took {elapsed:.1f} seconds.")
diff --git a/freqtrade/strategy/backtest_lookahead_bias_checker.py b/freqtrade/strategy/backtest_lookahead_bias_checker.py
old mode 100644
new mode 100755
index c4a321a4a..c48c3a826
--- a/freqtrade/strategy/backtest_lookahead_bias_checker.py
+++ b/freqtrade/strategy/backtest_lookahead_bias_checker.py
@@ -2,7 +2,7 @@ import copy
 from copy import deepcopy
 from datetime import datetime, timedelta, timezone
 
-import pandas
+from pandas import DataFrame
 
 from freqtrade.configuration import TimeRange
 from freqtrade.data.history import get_timerange
@@ -10,33 +10,37 @@ from freqtrade.exchange import timeframe_to_minutes
 from freqtrade.optimize.backtesting import Backtesting
 
 
+class VarHolder:
+    timerange: TimeRange
+    data: DataFrame
+    indicators: DataFrame
+    result: DataFrame
+    compared: DataFrame
+    from_dt: datetime
+    to_dt: datetime
+    compared_dt: datetime
+
+
+class Analysis:
+    def __init__(self):
+        self.total_signals = 0
+        self.false_entry_signals = 0
+        self.false_exit_signals = 0
+        self.false_indicators = []
+        self.has_bias = False
+
+    total_signals: int
+    false_entry_signals: int
+    false_exit_signals: int
+
+    false_indicators: list
+    has_bias: bool
+
+
 class BacktestLookaheadBiasChecker:
-    class VarHolder:
-        timerange: TimeRange
-        data: pandas.DataFrame
-        indicators: pandas.DataFrame
-        result: pandas.DataFrame
-        compared: pandas.DataFrame
-        from_dt: datetime
-        to_dt: datetime
-        compared_dt: datetime
-
-    class Analysis:
-        def __init__(self):
-            self.total_signals = 0
-            self.false_entry_signals = 0
-            self.false_exit_signals = 0
-            self.false_indicators = []
-            self.has_bias = False
-
-        total_signals: int
-        false_entry_signals: int
-        false_exit_signals: int
-
-        false_indicators: list
-        has_bias: bool
 
     def __init__(self):
+        self.exportfilename = None
         self.strategy_obj = None
         self.current_analysis = None
         self.local_config = None
@@ -44,7 +48,6 @@ class BacktestLookaheadBiasChecker:
         self.entry_varHolder = None
         self.exit_varHolder = None
         self.backtesting = None
-        self.current_analysis = None
         self.minimum_trade_amount = None
         self.targeted_trade_amount = None
 
@@ -124,9 +127,12 @@ class BacktestLookaheadBiasChecker:
         prepare_data_config['timerange'] = (str(self.dt_to_timestamp(varHolder.from_dt)) + "-" +
                                             str(self.dt_to_timestamp(varHolder.to_dt)))
         prepare_data_config['pairs'] = pairs_to_load
+
         self.backtesting = Backtesting(prepare_data_config)
         self.backtesting._set_strategy(self.backtesting.strategylist[0])
         varHolder.data, varHolder.timerange = self.backtesting.load_bt_data()
+        self.backtesting.load_bt_data_detail()
+
         varHolder.indicators = self.backtesting.strategy.advise_all_indicators(varHolder.data)
         varHolder.result = self.get_result(self.backtesting, varHolder.indicators)
 
@@ -139,12 +145,14 @@ class BacktestLookaheadBiasChecker:
         # and not worry about another strategy to check after.
         self.local_config = deepcopy(config)
         self.local_config['strategy_list'] = [strategy_obj['name']]
-        self.current_analysis = BacktestLookaheadBiasChecker.Analysis()
+        self.current_analysis = Analysis()
         self.minimum_trade_amount = args['minimum_trade_amount']
         self.targeted_trade_amount = args['targeted_trade_amount']
+        self.exportfilename = args['exportfilename']
+        self.strategy_obj = strategy_obj
 
         # first make a single backtest
-        self.full_varHolder = BacktestLookaheadBiasChecker.VarHolder()
+        self.full_varHolder = VarHolder()
 
         # define datetime in human-readable format
         parsed_timerange = TimeRange.parse_timerange(config['timerange'])
@@ -182,8 +190,8 @@ class BacktestLookaheadBiasChecker:
 
             self.current_analysis.total_signals += 1
 
-            self.entry_varHolder = BacktestLookaheadBiasChecker.VarHolder()
-            self.exit_varHolder = BacktestLookaheadBiasChecker.VarHolder()
+            self.entry_varHolder = VarHolder()
+            self.exit_varHolder = VarHolder()
 
             self.entry_varHolder.from_dt = self.full_varHolder.from_dt
             self.entry_varHolder.compared_dt = result_row['open_date']

From 767442198ef2c64d27cb73291aeda4669817cb48 Mon Sep 17 00:00:00 2001
From: hippocritical <gunngirspam@gmail.com>
Date: Sat, 15 Apr 2023 14:29:52 +0200
Subject: [PATCH 004/130] saving and updating the csv file now works open ended
 timeranges now work if a file fails then it will not report as non-bias, but
 report in the table as error and the csv file will not have it listed.

---
 freqtrade/commands/strategy_utils_commands.py | 78 +++++++++++--------
 .../backtest_lookahead_bias_checker.py        | 19 +++--
 2 files changed, 58 insertions(+), 39 deletions(-)

diff --git a/freqtrade/commands/strategy_utils_commands.py b/freqtrade/commands/strategy_utils_commands.py
index b46481734..417d98ad5 100755
--- a/freqtrade/commands/strategy_utils_commands.py
+++ b/freqtrade/commands/strategy_utils_commands.py
@@ -103,48 +103,62 @@ def start_backtest_lookahead_bias_checker(args: Dict[str, Any]) -> None:
 
 
 def text_table_bias_checker_instances(bias_checker_instances):
-    headers = ['strategy', 'has_bias',
+    headers = ['filename', 'strategy', 'has_bias',
                'total_signals', 'biased_entry_signals', 'biased_exit_signals', 'biased_indicators']
     data = []
     for current_instance in bias_checker_instances:
-        data.append(
-            [current_instance.strategy_obj['name'],
-             current_instance.current_analysis.has_bias,
-             current_instance.current_analysis.total_signals,
-             current_instance.current_analysis.false_entry_signals,
-             current_instance.current_analysis.false_exit_signals,
-             ", ".join(current_instance.current_analysis.false_indicators)]
-        )
+        if current_instance.failed_bias_check:
+            data.append(
+                [
+                    current_instance.strategy_obj['location'].parts[-1],
+                    current_instance.strategy_obj['name'],
+                    'error while checking'
+                ]
+            )
+        else:
+            data.append(
+                [
+                    current_instance.strategy_obj['location'].parts[-1],
+                    current_instance.strategy_obj['name'],
+                    current_instance.current_analysis.has_bias,
+                    current_instance.current_analysis.total_signals,
+                    current_instance.current_analysis.false_entry_signals,
+                    current_instance.current_analysis.false_exit_signals,
+                    ", ".join(current_instance.current_analysis.false_indicators)
+                ]
+            )
     table = tabulate(data, headers=headers, tablefmt="orgtbl")
     print(table)
 
 
 def export_to_csv(args, bias_checker_instances):
     def add_or_update_row(df, row_data):
-        strategy_col_name = 'strategy'
-        if row_data[strategy_col_name] in df[strategy_col_name].values:
-            # create temporary dataframe with a single row
-            # and use that to replace the previous data in there.
-            index = (df.index[df[strategy_col_name] ==
-                              row_data[strategy_col_name]][0])
-            df.loc[index] = pd.Series(row_data, index='strategy')
-
+        if (
+                (df['filename'] == row_data['filename']) &
+                (df['strategy'] == row_data['strategy'])
+        ).any():
+            # Update existing row
+            pd_series = pd.DataFrame([row_data])
+            df.loc[
+                (df['filename'] == row_data['filename']) &
+                (df['strategy'] == row_data['strategy'])
+                ] = pd_series
         else:
-            df = df.concat(row_data, ignore_index=True)
+            # Add new row
+            df = pd.concat([df, pd.DataFrame([row_data], columns=df.columns)])
+
         return df
 
-    csv_df = None
-
-    if not Path.exists(args['exportfilename']):
-        # If the file doesn't exist, create a new DataFrame from scratch
-        csv_df = pd.DataFrame(columns=['filename', 'strategy', 'has_bias',
-                                       'total_signals',
-                                       'biased_entry_signals', 'biased_exit_signals',
-                                       'biased_indicators'],
-                              index='filename')
-    else:
+    if Path(args['exportfilename']).exists():
         # Read CSV file into a pandas dataframe
         csv_df = pd.read_csv(args['exportfilename'])
+    else:
+        # Create a new empty DataFrame with the desired column names and set the index
+        csv_df = pd.DataFrame(columns=[
+            'filename', 'strategy', 'has_bias', 'total_signals',
+            'biased_entry_signals', 'biased_exit_signals', 'biased_indicators'
+        ],
+            index=None)
 
     for inst in bias_checker_instances:
         new_row_data = {'filename': inst.strategy_obj['location'].parts[-1],
@@ -153,11 +167,11 @@ def export_to_csv(args, bias_checker_instances):
                         'total_signals': inst.current_analysis.total_signals,
                         'biased_entry_signals': inst.current_analysis.false_entry_signals,
                         'biased_exit_signals': inst.current_analysis.false_exit_signals,
-                        'biased_indicators': ", ".join(inst.current_analysis.false_indicators)}
+                        'biased_indicators': ",".join(inst.current_analysis.false_indicators)}
         csv_df = add_or_update_row(csv_df, new_row_data)
-    if len(bias_checker_instances) > 0:
-        print(f"saving {args['exportfilename']}")
-        csv_df.to_csv(args['exportfilename'])
+
+    print(f"saving {args['exportfilename']}")
+    csv_df.to_csv(args['exportfilename'], index=False)
 
 
 def initialize_single_lookahead_bias_checker(strategy_obj, config, args):
diff --git a/freqtrade/strategy/backtest_lookahead_bias_checker.py b/freqtrade/strategy/backtest_lookahead_bias_checker.py
index c48c3a826..98b82e209 100755
--- a/freqtrade/strategy/backtest_lookahead_bias_checker.py
+++ b/freqtrade/strategy/backtest_lookahead_bias_checker.py
@@ -50,6 +50,7 @@ class BacktestLookaheadBiasChecker:
         self.backtesting = None
         self.minimum_trade_amount = None
         self.targeted_trade_amount = None
+        self.failed_bias_check = True
 
     @staticmethod
     def dt_to_timestamp(dt):
@@ -156,14 +157,16 @@ class BacktestLookaheadBiasChecker:
 
         # define datetime in human-readable format
         parsed_timerange = TimeRange.parse_timerange(config['timerange'])
-        if (parsed_timerange is not None and
-                parsed_timerange.startdt is not None and
-                parsed_timerange.stopdt is not None):
-            self.full_varHolder.from_dt = parsed_timerange.startdt
-            self.full_varHolder.to_dt = parsed_timerange.stopdt
+
+        if parsed_timerange.startdt is None:
+            self.full_varHolder.from_dt = datetime.utcfromtimestamp(0)
         else:
-            print("Parsing of parsed_timerange failed. exiting!")
-            return
+            self.full_varHolder.from_dt = parsed_timerange.startdt
+
+        if parsed_timerange.stopdt is None:
+            self.full_varHolder.to_dt = datetime.now()
+        else:
+            self.full_varHolder.to_dt = parsed_timerange.stopdt
 
         self.prepare_data(self.full_varHolder, self.local_config['pairs'])
 
@@ -232,3 +235,5 @@ class BacktestLookaheadBiasChecker:
             self.current_analysis.has_bias = True
         else:
             print(self.local_config['strategy_list'][0] + ": no bias detected")
+
+        self.failed_bias_check = False

From 2b416d3b62159a4e789d5bf5124d3641b651405a Mon Sep 17 00:00:00 2001
From: hippocritical <gunngirspam@gmail.com>
Date: Sun, 16 Apr 2023 23:47:10 +0200
Subject: [PATCH 005/130] - Added a first version of docs (needs checking) -
 optimized pairs for entry_varholder and exit_varholder to only check a single
 pair instead of all pairs. - bias-check of freqai strategies now possible -
 added condition to not crash when compared_df is empty (meaning no
 differences have been found)

---
 docs/utils.md                                 | 33 ++++++++++++
 freqtrade/commands/strategy_utils_commands.py |  6 +++
 .../backtest_lookahead_bias_checker.py        | 51 ++++++++++++-------
 3 files changed, 71 insertions(+), 19 deletions(-)

diff --git a/docs/utils.md b/docs/utils.md
index eb675442f..cb77ca449 100644
--- a/docs/utils.md
+++ b/docs/utils.md
@@ -999,3 +999,36 @@ Common arguments:
                         Path to userdata directory.
 
 ```
+### Backtest lookahead bias checker
+#### Summary
+Checks a given strategy for look ahead bias
+Look ahead bias means that the backtest uses data from future candles thereby not making it viable beyond backtesting
+and producing false hopes for the one backtesting.
+
+#### Introduction:
+Many strategies - without the programmer knowing - have fallen prey to look ahead bias.
+
+Any backtest will populate the full dataframe including all time stamps at the beginning.
+If the programmer is not careful or oblivious how things work internally
+(which sometimes can be really hard to find out) then it will just look into the future making the strategy amazing
+but not realistic.
+
+The tool is made to try to verify the validity in the form of the aforementioned look ahead bias.
+
+#### How does the command work?
+It will not look at the strategy or any contents itself but instead will run multiple backtests 
+by using precisely cut timeranges and analyzing the results each time, comparing to the full timerange.
+
+At first, it starts a backtest over the whole duration
+and then repeats backtests from the same starting point to the respective points to watch.
+In addition, it analyzes the dataframes form the overall backtest to the cut ones.
+
+At the end it will return a result-table in terminal.
+
+Hint:
+If an entry or exit condition is only triggered rarely or the timerange was chosen
+so only a few entry conditions are met
+then the bias checker is unable to catch the biased entry or exit condition.
+In the end it only checks which entry and exit signals have been triggered.
+
+---Flow chart here for better understanding---
diff --git a/freqtrade/commands/strategy_utils_commands.py b/freqtrade/commands/strategy_utils_commands.py
index 417d98ad5..ab31cfa82 100755
--- a/freqtrade/commands/strategy_utils_commands.py
+++ b/freqtrade/commands/strategy_utils_commands.py
@@ -91,6 +91,12 @@ def start_backtest_lookahead_bias_checker(args: Dict[str, Any]) -> None:
         for filtered_strategy_obj in filtered_strategy_objs:
             bias_checker_instances.append(
                 initialize_single_lookahead_bias_checker(filtered_strategy_obj, config, args))
+    elif 'strategy' in args and args['strategy'] is not None:
+        for strategy_obj in strategy_objs:
+            if strategy_obj['name'] == args['strategy']:
+                bias_checker_instances.append(
+                    initialize_single_lookahead_bias_checker(strategy_obj, config, args))
+                break
     else:
         processed_locations = set()
         for strategy_obj in strategy_objs:
diff --git a/freqtrade/strategy/backtest_lookahead_bias_checker.py b/freqtrade/strategy/backtest_lookahead_bias_checker.py
index 98b82e209..2e5ef4165 100755
--- a/freqtrade/strategy/backtest_lookahead_bias_checker.py
+++ b/freqtrade/strategy/backtest_lookahead_bias_checker.py
@@ -1,4 +1,6 @@
 import copy
+import pathlib
+import shutil
 from copy import deepcopy
 from datetime import datetime, timedelta, timezone
 
@@ -45,8 +47,11 @@ class BacktestLookaheadBiasChecker:
         self.current_analysis = None
         self.local_config = None
         self.full_varHolder = None
+
         self.entry_varHolder = None
         self.exit_varHolder = None
+        self.entry_varHolders = []
+        self.exit_varHolders = []
         self.backtesting = None
         self.minimum_trade_amount = None
         self.targeted_trade_amount = None
@@ -105,29 +110,36 @@ class BacktestLookaheadBiasChecker:
             if cut_df_cut.shape[0] != 0:
                 compare_df = full_df_cut.compare(cut_df_cut)
 
-                # skippedColumns = ["date", "open", "high", "low", "close", "volume"]
-                for col_name, values in compare_df.items():
-                    col_idx = compare_df.columns.get_loc(col_name)
-                    compare_df_row = compare_df.iloc[0]
-                    # compare_df now comprises tuples with [1] having either 'self' or 'other'
-                    if 'other' in col_name[1]:
-                        continue
-                    self_value = compare_df_row[col_idx]
-                    other_value = compare_df_row[col_idx + 1]
+                if compare_df.shape[0] > 0:
+                    for col_name, values in compare_df.items():
+                        col_idx = compare_df.columns.get_loc(col_name)
+                        compare_df_row = compare_df.iloc[0]
+                        # compare_df now comprises tuples with [1] having either 'self' or 'other'
+                        if 'other' in col_name[1]:
+                            continue
+                        self_value = compare_df_row[col_idx]
+                        other_value = compare_df_row[col_idx + 1]
 
-                    # output differences
-                    if self_value != other_value:
+                        # output differences
+                        if self_value != other_value:
 
-                        if not self.current_analysis.false_indicators.__contains__(col_name[0]):
-                            self.current_analysis.false_indicators.append(col_name[0])
-                            print(f"=> found look ahead bias in indicator {col_name[0]}. " +
-                                  f"{str(self_value)} != {str(other_value)}")
+                            if not self.current_analysis.false_indicators.__contains__(col_name[0]):
+                                self.current_analysis.false_indicators.append(col_name[0])
+                                print(f"=> found look ahead bias in indicator {col_name[0]}. " +
+                                      f"{str(self_value)} != {str(other_value)}")
 
     def prepare_data(self, varHolder, pairs_to_load):
+
+        # purge previous data
+        abs_folder_path = pathlib.Path("user_data/models/uniqe-id").resolve()
+        # remove folder and its contents
+        if pathlib.Path.exists(abs_folder_path):
+            shutil.rmtree(abs_folder_path)
+
         prepare_data_config = copy.deepcopy(self.local_config)
         prepare_data_config['timerange'] = (str(self.dt_to_timestamp(varHolder.from_dt)) + "-" +
                                             str(self.dt_to_timestamp(varHolder.to_dt)))
-        prepare_data_config['pairs'] = pairs_to_load
+        prepare_data_config['exchange']['pair_whitelist'] = pairs_to_load
 
         self.backtesting = Backtesting(prepare_data_config)
         self.backtesting._set_strategy(self.backtesting.strategylist[0])
@@ -137,9 +149,6 @@ class BacktestLookaheadBiasChecker:
         varHolder.indicators = self.backtesting.strategy.advise_all_indicators(varHolder.data)
         varHolder.result = self.get_result(self.backtesting, varHolder.indicators)
 
-    def update_output_file(self):
-        pass
-
     def start(self, config, strategy_obj: dict, args) -> None:
 
         # deepcopy so we can change the pairs for the 2ndary runs
@@ -195,6 +204,8 @@ class BacktestLookaheadBiasChecker:
 
             self.entry_varHolder = VarHolder()
             self.exit_varHolder = VarHolder()
+            self.entry_varHolders.append(self.entry_varHolder)
+            self.exit_varHolders.append(self.exit_varHolder)
 
             self.entry_varHolder.from_dt = self.full_varHolder.from_dt
             self.entry_varHolder.compared_dt = result_row['open_date']
@@ -224,6 +235,8 @@ class BacktestLookaheadBiasChecker:
                     self.exit_varHolder.result, "close_date", self.exit_varHolder.compared_dt):
                 self.current_analysis.false_exit_signals += 1
 
+            if len(self.entry_varHolders) >= 10:
+                pass
             # check if the indicators themselves contain biased data
             self.analyze_indicators(self.full_varHolder, self.entry_varHolder, result_row['pair'])
             self.analyze_indicators(self.full_varHolder, self.exit_varHolder, result_row['pair'])

From 2306c74dc19c6c726c0e5137da13d9ecf9ffbd82 Mon Sep 17 00:00:00 2001
From: hippocritical <gunngirspam@gmail.com>
Date: Sat, 6 May 2023 21:56:11 +0200
Subject: [PATCH 006/130] adjusted code to matthias' specifications

did not change the code so that it only loads data once yet.
---
 docs/utils.md                                 |   4 +-
 freqtrade/commands/__init__.py                |   6 +-
 freqtrade/commands/arguments.py               |  26 +-
 freqtrade/commands/optimize_commands.py       |  50 +++
 freqtrade/commands/strategy_utils_commands.py | 136 -------
 freqtrade/optimize/lookahead_analysis.py      | 347 ++++++++++++++++++
 6 files changed, 415 insertions(+), 154 deletions(-)
 create mode 100755 freqtrade/optimize/lookahead_analysis.py

diff --git a/docs/utils.md b/docs/utils.md
index cb77ca449..cf8d23865 100644
--- a/docs/utils.md
+++ b/docs/utils.md
@@ -999,9 +999,9 @@ Common arguments:
                         Path to userdata directory.
 
 ```
-### Backtest lookahead bias checker
+### Lookahead - analysis
 #### Summary
-Checks a given strategy for look ahead bias
+Checks a given strategy for look ahead bias via backtest-analysis
 Look ahead bias means that the backtest uses data from future candles thereby not making it viable beyond backtesting
 and producing false hopes for the one backtesting.
 
diff --git a/freqtrade/commands/__init__.py b/freqtrade/commands/__init__.py
index 8add45241..b9346fd5f 100644
--- a/freqtrade/commands/__init__.py
+++ b/freqtrade/commands/__init__.py
@@ -19,10 +19,10 @@ from freqtrade.commands.list_commands import (start_list_exchanges, start_list_f
                                               start_list_markets, start_list_strategies,
                                               start_list_timeframes, start_show_trades)
 from freqtrade.commands.optimize_commands import (start_backtesting, start_backtesting_show,
-                                                  start_edge, start_hyperopt)
+                                                  start_edge, start_hyperopt,
+                                                  start_lookahead_analysis)
 from freqtrade.commands.pairlist_commands import start_test_pairlist
 from freqtrade.commands.plot_commands import start_plot_dataframe, start_plot_profit
-from freqtrade.commands.strategy_utils_commands import (start_backtest_lookahead_bias_checker,
-                                                        start_strategy_update)
+from freqtrade.commands.strategy_utils_commands import start_strategy_update
 from freqtrade.commands.trade_commands import start_trading
 from freqtrade.commands.webserver_commands import start_webserver
diff --git a/freqtrade/commands/arguments.py b/freqtrade/commands/arguments.py
index ac5c33ad1..59ba0bedb 100755
--- a/freqtrade/commands/arguments.py
+++ b/freqtrade/commands/arguments.py
@@ -118,9 +118,9 @@ NO_CONF_ALLOWED = ["create-userdir", "list-exchanges", "new-strategy"]
 
 ARGS_STRATEGY_UPDATER = ["strategy_list", "strategy_path", "recursive_strategy_search"]
 
-ARGS_BACKTEST_LOOKAHEAD_BIAS_CHECKER = ARGS_BACKTEST + ["minimum_trade_amount",
-                                                        "targeted_trade_amount",
-                                                        "overwrite_existing_exportfilename_content"]
+ARGS_LOOKAHEAD_ANALYSIS = ARGS_BACKTEST + ["minimum_trade_amount",
+                                           "targeted_trade_amount",
+                                           "overwrite_existing_exportfilename_content"]
 
 
 # + ["target_trades", "minimum_trades",
@@ -200,8 +200,7 @@ class Arguments:
         self.parser = argparse.ArgumentParser(description='Free, open source crypto trading bot')
         self._build_args(optionlist=['version'], parser=self.parser)
 
-        from freqtrade.commands import (start_analysis_entries_exits,
-                                        start_backtest_lookahead_bias_checker, start_backtesting,
+        from freqtrade.commands import (start_analysis_entries_exits, start_backtesting,
                                         start_backtesting_show, start_convert_data,
                                         start_convert_db, start_convert_trades,
                                         start_create_userdir, start_download_data, start_edge,
@@ -209,8 +208,9 @@ class Arguments:
                                         start_install_ui, start_list_data, start_list_exchanges,
                                         start_list_freqAI_models, start_list_markets,
                                         start_list_strategies, start_list_timeframes,
-                                        start_new_config, start_new_strategy, start_plot_dataframe,
-                                        start_plot_profit, start_show_trades, start_strategy_update,
+                                        start_lookahead_analysis, start_new_config,
+                                        start_new_strategy, start_plot_dataframe, start_plot_profit,
+                                        start_show_trades, start_strategy_update,
                                         start_test_pairlist, start_trading, start_webserver)
 
         subparsers = self.parser.add_subparsers(dest='command',
@@ -462,12 +462,12 @@ class Arguments:
         self._build_args(optionlist=ARGS_STRATEGY_UPDATER,
                          parser=strategy_updater_cmd)
 
-        # Add backtest lookahead bias checker subcommand
-        backtest_lookahead_bias_checker_cmd = \
-            subparsers.add_parser('backtest-lookahead-bias-checker',
+        # Add lookahead_analysis subcommand
+        lookahead_analayis_cmd = \
+            subparsers.add_parser('lookahead-analysis',
                                   help="checks for potential look ahead bias",
                                   parents=[_common_parser, _strategy_parser])
-        backtest_lookahead_bias_checker_cmd.set_defaults(func=start_backtest_lookahead_bias_checker)
+        lookahead_analayis_cmd.set_defaults(func=start_lookahead_analysis)
 
-        self._build_args(optionlist=ARGS_BACKTEST_LOOKAHEAD_BIAS_CHECKER,
-                         parser=backtest_lookahead_bias_checker_cmd)
+        self._build_args(optionlist=ARGS_LOOKAHEAD_ANALYSIS,
+                         parser=lookahead_analayis_cmd)
diff --git a/freqtrade/commands/optimize_commands.py b/freqtrade/commands/optimize_commands.py
index 1bfd384fc..765f2caf2 100644
--- a/freqtrade/commands/optimize_commands.py
+++ b/freqtrade/commands/optimize_commands.py
@@ -6,6 +6,8 @@ from freqtrade.configuration import setup_utils_configuration
 from freqtrade.enums import RunMode
 from freqtrade.exceptions import OperationalException
 from freqtrade.misc import round_coin_value
+from freqtrade.optimize.lookahead_analysis import LookaheadAnalysisSubFunctions
+from freqtrade.resolvers import StrategyResolver
 
 
 logger = logging.getLogger(__name__)
@@ -132,3 +134,51 @@ def start_edge(args: Dict[str, Any]) -> None:
     # Initialize Edge object
     edge_cli = EdgeCli(config)
     edge_cli.start()
+
+
+def start_lookahead_analysis(args: Dict[str, Any]) -> None:
+    """
+    Start the backtest bias tester script
+    :param args: Cli args from Arguments()
+    :return: None
+    """
+    config = setup_utils_configuration(args, RunMode.UTIL_NO_EXCHANGE)
+
+    if args['targeted_trade_amount'] < args['minimum_trade_amount']:
+        # add logic that tells the user to check the configuration
+        # since this combo doesn't make any sense.
+        pass
+
+    strategy_objs = StrategyResolver.search_all_objects(
+        config, enum_failed=False, recursive=config.get('recursive_strategy_search', False))
+
+    lookaheadAnalysis_instances = []
+    strategy_list = []
+
+    # unify --strategy and --strategy_list to one list
+    if 'strategy' in args and args['strategy'] is not None:
+        strategy_list = [args['strategy']]
+    else:
+        strategy_list = args['strategy_list']
+
+    # check if strategies can be properly loaded, only check them if they can be.
+    if strategy_list is not None:
+        for strat in strategy_list:
+            for strategy_obj in strategy_objs:
+                if strategy_obj['name'] == strat and strategy_obj not in strategy_list:
+                    lookaheadAnalysis_instances.append(
+                        LookaheadAnalysisSubFunctions.initialize_single_lookahead_analysis(
+                            strategy_obj, config, args))
+                    break
+
+    # report the results
+    if lookaheadAnalysis_instances:
+        LookaheadAnalysisSubFunctions.text_table_lookahead_analysis_instances(
+            lookaheadAnalysis_instances)
+        if args['exportfilename'] is not None:
+            LookaheadAnalysisSubFunctions.export_to_csv(args, lookaheadAnalysis_instances)
+    else:
+        logger.error("There were no strategies specified neither through "
+                     "--strategy nor through "
+                     "--strategy_list "
+                     "or timeframe was not specified.")
diff --git a/freqtrade/commands/strategy_utils_commands.py b/freqtrade/commands/strategy_utils_commands.py
index ab31cfa82..e579ec475 100755
--- a/freqtrade/commands/strategy_utils_commands.py
+++ b/freqtrade/commands/strategy_utils_commands.py
@@ -4,13 +4,9 @@ import time
 from pathlib import Path
 from typing import Any, Dict
 
-import pandas as pd
-from tabulate import tabulate
-
 from freqtrade.configuration import setup_utils_configuration
 from freqtrade.enums import RunMode
 from freqtrade.resolvers import StrategyResolver
-from freqtrade.strategy.backtest_lookahead_bias_checker import BacktestLookaheadBiasChecker
 from freqtrade.strategy.strategyupdater import StrategyUpdater
 
 
@@ -57,135 +53,3 @@ def start_conversion(strategy_obj, config):
     instance_strategy_updater.start(config, strategy_obj)
     elapsed = time.perf_counter() - start
     print(f"Conversion of {Path(strategy_obj['location']).name} took {elapsed:.1f} seconds.")
-
-    # except:
-    #    pass
-
-
-def start_backtest_lookahead_bias_checker(args: Dict[str, Any]) -> None:
-    """
-    Start the backtest bias tester script
-    :param args: Cli args from Arguments()
-    :return: None
-    """
-    config = setup_utils_configuration(args, RunMode.UTIL_NO_EXCHANGE)
-
-    if args['targeted_trade_amount'] < args['minimum_trade_amount']:
-        # add logic that tells the user to check the configuration
-        # since this combo doesn't make any sense.
-        pass
-
-    strategy_objs = StrategyResolver.search_all_objects(
-        config, enum_failed=False, recursive=config.get('recursive_strategy_search', False))
-
-    bias_checker_instances = []
-    filtered_strategy_objs = []
-    if 'strategy_list' in args and args['strategy_list'] is not None:
-        for args_strategy in args['strategy_list']:
-            for strategy_obj in strategy_objs:
-                if (strategy_obj['name'] == args_strategy
-                        and strategy_obj not in filtered_strategy_objs):
-                    filtered_strategy_objs.append(strategy_obj)
-                    break
-
-        for filtered_strategy_obj in filtered_strategy_objs:
-            bias_checker_instances.append(
-                initialize_single_lookahead_bias_checker(filtered_strategy_obj, config, args))
-    elif 'strategy' in args and args['strategy'] is not None:
-        for strategy_obj in strategy_objs:
-            if strategy_obj['name'] == args['strategy']:
-                bias_checker_instances.append(
-                    initialize_single_lookahead_bias_checker(strategy_obj, config, args))
-                break
-    else:
-        processed_locations = set()
-        for strategy_obj in strategy_objs:
-            if strategy_obj['location'] not in processed_locations:
-                processed_locations.add(strategy_obj['location'])
-                bias_checker_instances.append(
-                    initialize_single_lookahead_bias_checker(strategy_obj, config, args))
-    text_table_bias_checker_instances(bias_checker_instances)
-    export_to_csv(args, bias_checker_instances)
-
-
-def text_table_bias_checker_instances(bias_checker_instances):
-    headers = ['filename', 'strategy', 'has_bias',
-               'total_signals', 'biased_entry_signals', 'biased_exit_signals', 'biased_indicators']
-    data = []
-    for current_instance in bias_checker_instances:
-        if current_instance.failed_bias_check:
-            data.append(
-                [
-                    current_instance.strategy_obj['location'].parts[-1],
-                    current_instance.strategy_obj['name'],
-                    'error while checking'
-                ]
-            )
-        else:
-            data.append(
-                [
-                    current_instance.strategy_obj['location'].parts[-1],
-                    current_instance.strategy_obj['name'],
-                    current_instance.current_analysis.has_bias,
-                    current_instance.current_analysis.total_signals,
-                    current_instance.current_analysis.false_entry_signals,
-                    current_instance.current_analysis.false_exit_signals,
-                    ", ".join(current_instance.current_analysis.false_indicators)
-                ]
-            )
-    table = tabulate(data, headers=headers, tablefmt="orgtbl")
-    print(table)
-
-
-def export_to_csv(args, bias_checker_instances):
-    def add_or_update_row(df, row_data):
-        if (
-                (df['filename'] == row_data['filename']) &
-                (df['strategy'] == row_data['strategy'])
-        ).any():
-            # Update existing row
-            pd_series = pd.DataFrame([row_data])
-            df.loc[
-                (df['filename'] == row_data['filename']) &
-                (df['strategy'] == row_data['strategy'])
-                ] = pd_series
-        else:
-            # Add new row
-            df = pd.concat([df, pd.DataFrame([row_data], columns=df.columns)])
-
-        return df
-
-    if Path(args['exportfilename']).exists():
-        # Read CSV file into a pandas dataframe
-        csv_df = pd.read_csv(args['exportfilename'])
-    else:
-        # Create a new empty DataFrame with the desired column names and set the index
-        csv_df = pd.DataFrame(columns=[
-            'filename', 'strategy', 'has_bias', 'total_signals',
-            'biased_entry_signals', 'biased_exit_signals', 'biased_indicators'
-        ],
-            index=None)
-
-    for inst in bias_checker_instances:
-        new_row_data = {'filename': inst.strategy_obj['location'].parts[-1],
-                        'strategy': inst.strategy_obj['name'],
-                        'has_bias': inst.current_analysis.has_bias,
-                        'total_signals': inst.current_analysis.total_signals,
-                        'biased_entry_signals': inst.current_analysis.false_entry_signals,
-                        'biased_exit_signals': inst.current_analysis.false_exit_signals,
-                        'biased_indicators': ",".join(inst.current_analysis.false_indicators)}
-        csv_df = add_or_update_row(csv_df, new_row_data)
-
-    print(f"saving {args['exportfilename']}")
-    csv_df.to_csv(args['exportfilename'], index=False)
-
-
-def initialize_single_lookahead_bias_checker(strategy_obj, config, args):
-    print(f"Bias test of {Path(strategy_obj['location']).name} started.")
-    start = time.perf_counter()
-    current_instance = BacktestLookaheadBiasChecker()
-    current_instance.start(config, strategy_obj, args)
-    elapsed = time.perf_counter() - start
-    print(f"checking look ahead bias via backtests of {Path(strategy_obj['location']).name} "
-          f"took {elapsed:.1f} seconds.")
-    return current_instance
diff --git a/freqtrade/optimize/lookahead_analysis.py b/freqtrade/optimize/lookahead_analysis.py
new file mode 100755
index 000000000..fa8cd5822
--- /dev/null
+++ b/freqtrade/optimize/lookahead_analysis.py
@@ -0,0 +1,347 @@
+import copy
+import logging
+import pathlib
+import shutil
+import time
+from copy import deepcopy
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+from typing import Any, Dict, List
+
+import pandas as pd
+
+from freqtrade.configuration import TimeRange
+from freqtrade.data.history import get_timerange
+from freqtrade.exchange import timeframe_to_minutes
+from freqtrade.optimize.backtesting import Backtesting
+
+
+logger = logging.getLogger(__name__)
+
+
+class VarHolder:
+    timerange: TimeRange
+    data: pd.DataFrame
+    indicators: pd.DataFrame
+    result: pd.DataFrame
+    compared: pd.DataFrame
+    from_dt: datetime
+    to_dt: datetime
+    compared_dt: datetime
+    timeframe: str
+
+
+class Analysis:
+    def __init__(self) -> None:
+        self.total_signals = 0
+        self.false_entry_signals = 0
+        self.false_exit_signals = 0
+        self.false_indicators: List[str] = []
+        self.has_bias = False
+
+
+class LookaheadAnalysis:
+
+    def __init__(self, config: Dict[str, Any], strategy_obj: dict, args: Dict[str, Any]):
+        self.failed_bias_check = True
+        self.full_varHolder = VarHolder
+
+        self.entry_varHolders: List[VarHolder] = []
+        self.exit_varHolders: List[VarHolder] = []
+
+        # pull variables the scope of the lookahead_analysis-instance
+        self.local_config = deepcopy(config)
+        self.local_config['strategy'] = strategy_obj['name']
+        self.current_analysis = Analysis()
+        self.minimum_trade_amount = args['minimum_trade_amount']
+        self.targeted_trade_amount = args['targeted_trade_amount']
+        self.exportfilename = args['exportfilename']
+        self.strategy_obj = strategy_obj
+
+    @staticmethod
+    def dt_to_timestamp(dt: datetime):
+        timestamp = int(dt.replace(tzinfo=timezone.utc).timestamp())
+        return timestamp
+
+    @staticmethod
+    def get_result(backtesting, processed: pd.DataFrame):
+        min_date, max_date = get_timerange(processed)
+
+        result = backtesting.backtest(
+            processed=deepcopy(processed),
+            start_date=min_date,
+            end_date=max_date
+        )
+        return result
+
+    @staticmethod
+    def report_signal(result: dict, column_name: str, checked_timestamp: datetime):
+        df = result['results']
+        row_count = df[column_name].shape[0]
+
+        if row_count == 0:
+            return False
+        else:
+
+            df_cut = df[(df[column_name] == checked_timestamp)]
+            if df_cut[column_name].shape[0] == 0:
+                return False
+            else:
+                return True
+        return False
+
+    # analyzes two data frames with processed indicators and shows differences between them.
+    def analyze_indicators(self, full_vars: VarHolder, cut_vars: VarHolder, current_pair):
+        # extract dataframes
+        cut_df = cut_vars.indicators[current_pair]
+        full_df = full_vars.indicators[current_pair]
+
+        # cut longer dataframe to length of the shorter
+        full_df_cut = full_df[
+            (full_df.date == cut_vars.compared_dt)
+        ].reset_index(drop=True)
+        cut_df_cut = cut_df[
+            (cut_df.date == cut_vars.compared_dt)
+        ].reset_index(drop=True)
+
+        # compare dataframes
+        if full_df_cut.shape[0] != 0:
+            if cut_df_cut.shape[0] != 0:
+                compare_df = full_df_cut.compare(cut_df_cut)
+
+                if compare_df.shape[0] > 0:
+                    for col_name, values in compare_df.items():
+                        col_idx = compare_df.columns.get_loc(col_name)
+                        compare_df_row = compare_df.iloc[0]
+                        # compare_df now comprises tuples with [1] having either 'self' or 'other'
+                        if 'other' in col_name[1]:
+                            continue
+                        self_value = compare_df_row[col_idx]
+                        other_value = compare_df_row[col_idx + 1]
+
+                        # output differences
+                        if self_value != other_value:
+
+                            if not self.current_analysis.false_indicators.__contains__(col_name[0]):
+                                self.current_analysis.false_indicators.append(col_name[0])
+                                logging.info(f"=> found look ahead bias in indicator "
+                                             f"{col_name[0]}. "
+                                             f"{str(self_value)} != {str(other_value)}")
+
+    def prepare_data(self, varholder: VarHolder, pairs_to_load: List[pd.DataFrame]):
+
+        # purge previous data
+        abs_folder_path = pathlib.Path("user_data/models/uniqe-id").resolve()
+        # remove folder and its contents
+        if pathlib.Path.exists(abs_folder_path):
+            shutil.rmtree(abs_folder_path)
+
+        prepare_data_config = copy.deepcopy(self.local_config)
+        prepare_data_config['timerange'] = (str(self.dt_to_timestamp(varholder.from_dt)) + "-" +
+                                            str(self.dt_to_timestamp(varholder.to_dt)))
+        prepare_data_config['exchange']['pair_whitelist'] = pairs_to_load
+
+        self.backtesting = Backtesting(prepare_data_config)
+        self.backtesting._set_strategy(self.backtesting.strategylist[0])
+        varholder.data, varholder.timerange = self.backtesting.load_bt_data()
+        self.backtesting.load_bt_data_detail()
+        varholder.timeframe = self.backtesting.timeframe
+
+        varholder.indicators = self.backtesting.strategy.advise_all_indicators(varholder.data)
+        varholder.result = self.get_result(self.backtesting, varholder.indicators)
+
+    def fill_full_varholder(self):
+        self.full_varHolder = VarHolder()
+
+        # define datetime in human-readable format
+        parsed_timerange = TimeRange.parse_timerange(self.local_config['timerange'])
+
+        if parsed_timerange.startdt is None:
+            self.full_varHolder.from_dt = datetime.fromtimestamp(0, tz=timezone.utc)
+        else:
+            self.full_varHolder.from_dt = parsed_timerange.startdt
+
+        if parsed_timerange.stopdt is None:
+            self.full_varHolder.to_dt = datetime.utcnow()
+        else:
+            self.full_varHolder.to_dt = parsed_timerange.stopdt
+
+        self.prepare_data(self.full_varHolder, self.local_config['pairs'])
+
+    def fill_entry_and_exit_varHolders(self, idx, result_row):
+        # entry_varHolder
+        entry_varHolder = VarHolder()
+        self.entry_varHolders.append(entry_varHolder)
+        entry_varHolder.from_dt = self.full_varHolder.from_dt
+        entry_varHolder.compared_dt = result_row['open_date']
+        # to_dt needs +1 candle since it won't buy on the last candle
+        entry_varHolder.to_dt = (
+                result_row['open_date'] +
+                timedelta(minutes=timeframe_to_minutes(self.full_varHolder.timeframe)))
+        self.prepare_data(entry_varHolder, [result_row['pair']])
+
+        # exit_varHolder
+        exit_varHolder = VarHolder()
+        self.exit_varHolders.append(exit_varHolder)
+        # to_dt needs +1 candle since it will always exit/force-exit trades on the last candle
+        exit_varHolder.from_dt = self.full_varHolder.from_dt
+        exit_varHolder.to_dt = (
+                result_row['close_date'] +
+                timedelta(minutes=timeframe_to_minutes(self.full_varHolder.timeframe)))
+        exit_varHolder.compared_dt = result_row['close_date']
+        self.prepare_data(exit_varHolder, [result_row['pair']])
+
+    # now we analyze a full trade of full_varholder and look for analyze its bias
+    def analyze_row(self, idx, result_row):
+        # if force-sold, ignore this signal since here it will unconditionally exit.
+        if result_row.close_date == self.dt_to_timestamp(self.full_varHolder.to_dt):
+            return
+
+        # keep track of how many signals are processed at total
+        self.current_analysis.total_signals += 1
+
+        # fill entry_varHolder and exit_varHolder
+        self.fill_entry_and_exit_varHolders(idx, result_row)
+
+        # register if buy signal is broken
+        if not self.report_signal(
+                self.entry_varHolders[idx].result,
+                "open_date",
+                self.entry_varHolders[idx].compared_dt):
+            self.current_analysis.false_entry_signals += 1
+
+        # register if buy or sell signal is broken
+        if not self.report_signal(
+                self.exit_varHolders[idx].result,
+                "close_date",
+                self.exit_varHolders[idx].compared_dt):
+            self.current_analysis.false_exit_signals += 1
+
+        # check if the indicators themselves contain biased data
+        self.analyze_indicators(self.full_varHolder, self.entry_varHolders[idx], result_row['pair'])
+        self.analyze_indicators(self.full_varHolder, self.exit_varHolders[idx], result_row['pair'])
+
+    def start(self) -> None:
+
+        # first make a single backtest
+        self.fill_full_varholder()
+
+        # check if requirements have been met of full_varholder
+        found_signals: int = self.full_varHolder.result['results'].shape[0] + 1
+        if found_signals >= self.targeted_trade_amount:
+            logging.info(f"Found {found_signals} trades, "
+                         f"calculating {self.targeted_trade_amount} trades.")
+        elif self.targeted_trade_amount >= found_signals >= self.minimum_trade_amount:
+            logging.info(f"Only found {found_signals} trades. Calculating all available trades.")
+        else:
+            logging.info(f"found {found_signals} trades "
+                         f"which is less than minimum_trade_amount {self.minimum_trade_amount}. "
+                         f"Cancelling this backtest lookahead bias test.")
+            return
+
+        # now we loop through all signals
+        # starting from the same datetime to avoid miss-reports of bias
+        for idx, result_row in self.full_varHolder.result['results'].iterrows():
+            if self.current_analysis.total_signals == self.targeted_trade_amount:
+                break
+            self.analyze_row(idx, result_row)
+
+        # check and report signals
+        if (self.current_analysis.false_entry_signals > 0 or
+                self.current_analysis.false_exit_signals > 0 or
+                len(self.current_analysis.false_indicators) > 0):
+            logging.info(f" => {self.local_config['strategy']} + : bias detected!")
+            self.current_analysis.has_bias = True
+        else:
+            logging.info(self.local_config['strategy'] + ": no bias detected")
+
+        self.failed_bias_check = False
+
+
+class LookaheadAnalysisSubFunctions:
+    @staticmethod
+    def text_table_lookahead_analysis_instances(lookahead_instances: List[LookaheadAnalysis]):
+        headers = ['filename', 'strategy', 'has_bias', 'total_signals',
+                   'biased_entry_signals', 'biased_exit_signals', 'biased_indicators']
+        data = []
+        for inst in lookahead_instances:
+            if inst.failed_bias_check:
+                data.append(
+                    [
+                        inst.strategy_obj['location'].parts[-1],
+                        inst.strategy_obj['name'],
+                        'error while checking'
+                    ]
+                )
+            else:
+                data.append(
+                    [
+                        inst.strategy_obj['location'].parts[-1],
+                        inst.strategy_obj['name'],
+                        inst.current_analysis.has_bias,
+                        inst.current_analysis.total_signals,
+                        inst.current_analysis.false_entry_signals,
+                        inst.current_analysis.false_exit_signals,
+                        ", ".join(inst.current_analysis.false_indicators)
+                    ]
+                )
+        from tabulate import tabulate
+        table = tabulate(data, headers=headers, tablefmt="orgtbl")
+        print(table)
+
+    @staticmethod
+    def export_to_csv(args: Dict[str, Any], lookahead_analysis: List[LookaheadAnalysis]):
+        def add_or_update_row(df, row_data):
+            if (
+                    (df['filename'] == row_data['filename']) &
+                    (df['strategy'] == row_data['strategy'])
+            ).any():
+                # Update existing row
+                pd_series = pd.DataFrame([row_data])
+                df.loc[
+                    (df['filename'] == row_data['filename']) &
+                    (df['strategy'] == row_data['strategy'])
+                    ] = pd_series
+            else:
+                # Add new row
+                df = pd.concat([df, pd.DataFrame([row_data], columns=df.columns)])
+
+            return df
+
+        if Path(args['exportfilename']).exists():
+            # Read CSV file into a pandas dataframe
+            csv_df = pd.read_csv(args['exportfilename'])
+        else:
+            # Create a new empty DataFrame with the desired column names and set the index
+            csv_df = pd.DataFrame(columns=[
+                'filename', 'strategy', 'has_bias', 'total_signals',
+                'biased_entry_signals', 'biased_exit_signals', 'biased_indicators'
+            ],
+                index=None)
+
+        for inst in lookahead_analysis:
+            new_row_data = {'filename': inst.strategy_obj['location'].parts[-1],
+                            'strategy': inst.strategy_obj['name'],
+                            'has_bias': inst.current_analysis.has_bias,
+                            'total_signals': inst.current_analysis.total_signals,
+                            'biased_entry_signals': inst.current_analysis.false_entry_signals,
+                            'biased_exit_signals': inst.current_analysis.false_exit_signals,
+                            'biased_indicators': ",".join(inst.current_analysis.false_indicators)}
+            csv_df = add_or_update_row(csv_df, new_row_data)
+
+        logger.info(f"saving {args['exportfilename']}")
+        csv_df.to_csv(args['exportfilename'], index=False)
+
+    @staticmethod
+    def initialize_single_lookahead_analysis(strategy_obj: Dict[str, Any], config: Dict[str, Any],
+                                             args: Dict[str, Any]):
+
+        logger.info(f"Bias test of {Path(strategy_obj['location']).name} started.")
+        start = time.perf_counter()
+        current_instance = LookaheadAnalysis(config, strategy_obj, args)
+        current_instance.start()
+        elapsed = time.perf_counter() - start
+        logger.info(f"checking look ahead bias via backtests "
+                    f"of {Path(strategy_obj['location']).name} "
+                    f"took {elapsed:.0f} seconds.")
+        return current_instance

From b252bdd3c7e15956920ff7b308265cdfb4c45afc Mon Sep 17 00:00:00 2001
From: hippocritical <gunngirspam@gmail.com>
Date: Mon, 8 May 2023 22:35:13 +0200
Subject: [PATCH 007/130] made purging of config.freqai.identifier variable

---
 freqtrade/optimize/lookahead_analysis.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/freqtrade/optimize/lookahead_analysis.py b/freqtrade/optimize/lookahead_analysis.py
index fa8cd5822..3ca1b71a0 100755
--- a/freqtrade/optimize/lookahead_analysis.py
+++ b/freqtrade/optimize/lookahead_analysis.py
@@ -130,11 +130,16 @@ class LookaheadAnalysis:
 
     def prepare_data(self, varholder: VarHolder, pairs_to_load: List[pd.DataFrame]):
 
-        # purge previous data
-        abs_folder_path = pathlib.Path("user_data/models/uniqe-id").resolve()
-        # remove folder and its contents
-        if pathlib.Path.exists(abs_folder_path):
-            shutil.rmtree(abs_folder_path)
+        if 'freqai' in self.local_config and 'identifier' in self.local_config['freqai']:
+            # purge previous data if the freqai model is defined
+            # (to be sure nothing is carried over from older backtests)
+            path_to_current_identifier = (
+                pathlib.Path(f"{self.local_config['user_data_dir']}"
+                             "/models/"
+                             f"{self.local_config['freqai']['identifier']}").resolve())
+            # remove folder and its contents
+            if pathlib.Path.exists(path_to_current_identifier):
+                shutil.rmtree(path_to_current_identifier)
 
         prepare_data_config = copy.deepcopy(self.local_config)
         prepare_data_config['timerange'] = (str(self.dt_to_timestamp(varholder.from_dt)) + "-" +
@@ -143,6 +148,7 @@ class LookaheadAnalysis:
 
         self.backtesting = Backtesting(prepare_data_config)
         self.backtesting._set_strategy(self.backtesting.strategylist[0])
+
         varholder.data, varholder.timerange = self.backtesting.load_bt_data()
         self.backtesting.load_bt_data_detail()
         varholder.timeframe = self.backtesting.timeframe
@@ -168,7 +174,7 @@ class LookaheadAnalysis:
 
         self.prepare_data(self.full_varHolder, self.local_config['pairs'])
 
-    def fill_entry_and_exit_varHolders(self, idx, result_row):
+    def fill_entry_and_exit_varHolders(self, result_row):
         # entry_varHolder
         entry_varHolder = VarHolder()
         self.entry_varHolders.append(entry_varHolder)
@@ -201,7 +207,7 @@ class LookaheadAnalysis:
         self.current_analysis.total_signals += 1
 
         # fill entry_varHolder and exit_varHolder
-        self.fill_entry_and_exit_varHolders(idx, result_row)
+        self.fill_entry_and_exit_varHolders(result_row)
 
         # register if buy signal is broken
         if not self.report_signal(

From 91ce1cb2aeab0ff332bb2a0b3a6823c426c571cf Mon Sep 17 00:00:00 2001
From: hippocritical <gunngirspam@gmail.com>
Date: Wed, 10 May 2023 22:41:27 +0200
Subject: [PATCH 008/130] removed overwrite_existing_exportfilename_content
 (won't use it myself, wouldn't make sense for others to not overwrite
 something they re-calculated) switched from args to config (args still work)
 renamed exportfilename to lookahead_analysis_exportfilename so if users
 decide to put something into it then it won't compete with other
 configurations

---
 freqtrade/commands/arguments.py          |  2 +-
 freqtrade/commands/cli_options.py        | 11 ++++++-----
 freqtrade/commands/optimize_commands.py  |  4 ++--
 freqtrade/configuration/configuration.py |  6 +++++-
 freqtrade/optimize/lookahead_analysis.py | 10 +++++-----
 5 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/freqtrade/commands/arguments.py b/freqtrade/commands/arguments.py
index f0419a7d9..af5f0a470 100755
--- a/freqtrade/commands/arguments.py
+++ b/freqtrade/commands/arguments.py
@@ -121,7 +121,7 @@ ARGS_STRATEGY_UPDATER = ["strategy_list", "strategy_path", "recursive_strategy_s
 
 ARGS_LOOKAHEAD_ANALYSIS = ARGS_BACKTEST + ["minimum_trade_amount",
                                            "targeted_trade_amount",
-                                           "overwrite_existing_exportfilename_content"]
+                                           "lookahead_analysis_exportfilename"]
 
 
 # + ["target_trades", "minimum_trades",
diff --git a/freqtrade/commands/cli_options.py b/freqtrade/commands/cli_options.py
index 77f8f2005..5dd587559 100755
--- a/freqtrade/commands/cli_options.py
+++ b/freqtrade/commands/cli_options.py
@@ -704,9 +704,10 @@ AVAILABLE_CLI_OPTIONS = {
         metavar='INT',
         default=20,
     ),
-    "overwrite_existing_exportfilename_content": Arg(
-        '--overwrite-existing-exportfilename-content',
-        help='overwrites existing contents if existent with exportfilename given',
-        action='store_true'
-    )
+    "lookahead_analysis_exportfilename": Arg(
+        '--lookahead-analysis-exportfilename',
+        help="Use this filename to store lookahead-analysis-results",
+        default=None,
+        type=str
+    ),
 }
diff --git a/freqtrade/commands/optimize_commands.py b/freqtrade/commands/optimize_commands.py
index 765f2caf2..78ad140de 100644
--- a/freqtrade/commands/optimize_commands.py
+++ b/freqtrade/commands/optimize_commands.py
@@ -175,8 +175,8 @@ def start_lookahead_analysis(args: Dict[str, Any]) -> None:
     if lookaheadAnalysis_instances:
         LookaheadAnalysisSubFunctions.text_table_lookahead_analysis_instances(
             lookaheadAnalysis_instances)
-        if args['exportfilename'] is not None:
-            LookaheadAnalysisSubFunctions.export_to_csv(args, lookaheadAnalysis_instances)
+        if config['lookahead_analysis_exportfilename'] is not None:
+            LookaheadAnalysisSubFunctions.export_to_csv(config, lookaheadAnalysis_instances)
     else:
         logger.error("There were no strategies specified neither through "
                      "--strategy nor through "
diff --git a/freqtrade/configuration/configuration.py b/freqtrade/configuration/configuration.py
index 8e9a7fd7c..defb76b4b 100644
--- a/freqtrade/configuration/configuration.py
+++ b/freqtrade/configuration/configuration.py
@@ -203,7 +203,7 @@ class Configuration:
         # This will override the strategy configuration
         self._args_to_config(config, argname='timeframe',
                              logstring='Parameter -i/--timeframe detected ... '
-                             'Using timeframe: {} ...')
+                                       'Using timeframe: {} ...')
 
         self._args_to_config(config, argname='position_stacking',
                              logstring='Parameter --enable-position-stacking detected ...')
@@ -300,6 +300,10 @@ class Configuration:
         self._args_to_config(config, argname='hyperoptexportfilename',
                              logstring='Using hyperopt file: {}')
 
+        if self.args["lookahead_analysis_exportfilename"] is not None:
+            self._args_to_config(config, argname='lookahead_analysis_exportfilename',
+                                 logstring='saving lookahead analysis results into {} ...')
+
         self._args_to_config(config, argname='epochs',
                              logstring='Parameter --epochs detected ... '
                              'Will run Hyperopt with for {} epochs ...'
diff --git a/freqtrade/optimize/lookahead_analysis.py b/freqtrade/optimize/lookahead_analysis.py
index 3ca1b71a0..8e6771b4b 100755
--- a/freqtrade/optimize/lookahead_analysis.py
+++ b/freqtrade/optimize/lookahead_analysis.py
@@ -296,7 +296,7 @@ class LookaheadAnalysisSubFunctions:
         print(table)
 
     @staticmethod
-    def export_to_csv(args: Dict[str, Any], lookahead_analysis: List[LookaheadAnalysis]):
+    def export_to_csv(config: Dict[str, Any], lookahead_analysis: List[LookaheadAnalysis]):
         def add_or_update_row(df, row_data):
             if (
                     (df['filename'] == row_data['filename']) &
@@ -314,9 +314,9 @@ class LookaheadAnalysisSubFunctions:
 
             return df
 
-        if Path(args['exportfilename']).exists():
+        if Path(config['lookahead_analysis_exportfilename']).exists():
             # Read CSV file into a pandas dataframe
-            csv_df = pd.read_csv(args['exportfilename'])
+            csv_df = pd.read_csv(config['lookahead_analysis_exportfilename'])
         else:
             # Create a new empty DataFrame with the desired column names and set the index
             csv_df = pd.DataFrame(columns=[
@@ -335,8 +335,8 @@ class LookaheadAnalysisSubFunctions:
                             'biased_indicators': ",".join(inst.current_analysis.false_indicators)}
             csv_df = add_or_update_row(csv_df, new_row_data)
 
-        logger.info(f"saving {args['exportfilename']}")
-        csv_df.to_csv(args['exportfilename'], index=False)
+        logger.info(f"saving {config['lookahead_analysis_exportfilename']}")
+        csv_df.to_csv(config['lookahead_analysis_exportfilename'], index=False)
 
     @staticmethod
     def initialize_single_lookahead_analysis(strategy_obj: Dict[str, Any], config: Dict[str, Any],

From 7d871faf04d5c03fa9aa213db93934e74f9576c1 Mon Sep 17 00:00:00 2001
From: hippocritical <gunngirspam@gmail.com>
Date: Sat, 13 May 2023 22:40:11 +0200
Subject: [PATCH 009/130] added exportfilename to args_to_config introduced
 strategy_test_v3_with_lookahead_bias.py for checking lookahead_bias#
 introduced test_lookahead_analysis which currently is broken

---
 freqtrade/configuration/configuration.py      |  7 +-
 .../strategy_test_v3_with_lookahead_bias.py   | 50 ++++++++++++++
 tests/test_lookahead_analysis.py              | 69 +++++++++++++++++++
 3 files changed, 123 insertions(+), 3 deletions(-)
 create mode 100644 tests/strategy/strats/strategy_test_v3_with_lookahead_bias.py
 create mode 100644 tests/test_lookahead_analysis.py

diff --git a/freqtrade/configuration/configuration.py b/freqtrade/configuration/configuration.py
index defb76b4b..c763d791a 100644
--- a/freqtrade/configuration/configuration.py
+++ b/freqtrade/configuration/configuration.py
@@ -300,9 +300,10 @@ class Configuration:
         self._args_to_config(config, argname='hyperoptexportfilename',
                              logstring='Using hyperopt file: {}')
 
-        if self.args["lookahead_analysis_exportfilename"] is not None:
-            self._args_to_config(config, argname='lookahead_analysis_exportfilename',
-                                 logstring='saving lookahead analysis results into {} ...')
+        if self.args.get('lookahead_analysis_exportfilename'):
+            if self.args["lookahead_analysis_exportfilename"] is not None:
+                self._args_to_config(config, argname='lookahead_analysis_exportfilename',
+                                     logstring='saving lookahead analysis results into {} ...')
 
         self._args_to_config(config, argname='epochs',
                              logstring='Parameter --epochs detected ... '
diff --git a/tests/strategy/strats/strategy_test_v3_with_lookahead_bias.py b/tests/strategy/strats/strategy_test_v3_with_lookahead_bias.py
new file mode 100644
index 000000000..6cf894586
--- /dev/null
+++ b/tests/strategy/strats/strategy_test_v3_with_lookahead_bias.py
@@ -0,0 +1,50 @@
+# pragma pylint: disable=missing-docstring, invalid-name, pointless-string-statement
+from pandas import DataFrame
+from technical.indicators import ichimoku
+
+from freqtrade.strategy import IStrategy
+
+
+class strategy_test_v3_with_lookahead_bias(IStrategy):
+    INTERFACE_VERSION = 3
+
+    # Minimal ROI designed for the strategy
+    minimal_roi = {
+        "40": 0.0,
+        "30": 0.01,
+        "20": 0.02,
+        "0": 0.04
+    }
+
+    # Optimal stoploss designed for the strategy
+    stoploss = -0.10
+
+    # Optimal timeframe for the strategy
+    timeframe = '5m'
+
+    # Number of candles the strategy requires before producing valid signals
+    startup_candle_count: int = 20
+
+    def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
+        # bias is introduced here
+        ichi = ichimoku(dataframe,
+                        conversion_line_period=20,
+                        base_line_periods=60,
+                        laggin_span=120,
+                        displacement=30)
+        dataframe['chikou_span'] = ichi['chikou_span']
+
+        return dataframe
+
+    def populate_entry_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
+        dataframe.loc[
+            dataframe['close'].shift(-10) > dataframe['close'],
+            'enter_long'] = 1
+
+        return dataframe
+
+    def populate_exit_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
+        dataframe.loc[
+            dataframe['close'].shift(-10) > dataframe['close'], 'exit'] = 1
+
+        return dataframe
diff --git a/tests/test_lookahead_analysis.py b/tests/test_lookahead_analysis.py
new file mode 100644
index 000000000..24290798b
--- /dev/null
+++ b/tests/test_lookahead_analysis.py
@@ -0,0 +1,69 @@
+# pragma pylint: disable=missing-docstring, W0212, line-too-long, C0103, unused-argument
+
+from unittest.mock import PropertyMock
+
+import numpy as np
+
+import freqtrade.commands.arguments
+import freqtrade.optimize.lookahead_analysis
+from freqtrade.configuration import TimeRange
+from freqtrade.data import history
+from freqtrade.data.converter import clean_ohlcv_dataframe
+from freqtrade.data.history import get_timerange
+from tests.conftest import patch_exchange
+
+
+def trim_dictlist(dict_list, num):
+    new = {}
+    for pair, pair_data in dict_list.items():
+        new[pair] = pair_data[num:].reset_index()
+    return new
+
+
+def load_data_test(what, testdatadir):
+    timerange = TimeRange.parse_timerange('1510694220-1510700340')
+    data = history.load_pair_history(pair='UNITTEST/BTC', datadir=testdatadir,
+                                     timeframe='1m', timerange=timerange,
+                                     drop_incomplete=False,
+                                     fill_up_missing=False)
+
+    base = 0.001
+    if what == 'raise':
+        data.loc[:, 'open'] = data.index * base
+        data.loc[:, 'high'] = data.index * base + 0.0001
+        data.loc[:, 'low'] = data.index * base - 0.0001
+        data.loc[:, 'close'] = data.index * base
+
+    if what == 'lower':
+        data.loc[:, 'open'] = 1 - data.index * base
+        data.loc[:, 'high'] = 1 - data.index * base + 0.0001
+        data.loc[:, 'low'] = 1 - data.index * base - 0.0001
+        data.loc[:, 'close'] = 1 - data.index * base
+
+    if what == 'sine':
+        hz = 0.1  # frequency
+        data.loc[:, 'open'] = np.sin(data.index * hz) / 1000 + base
+        data.loc[:, 'high'] = np.sin(data.index * hz) / 1000 + base + 0.0001
+        data.loc[:, 'low'] = np.sin(data.index * hz) / 1000 + base - 0.0001
+        data.loc[:, 'close'] = np.sin(data.index * hz) / 1000 + base
+
+    return {'UNITTEST/BTC': clean_ohlcv_dataframe(data, timeframe='1m', pair='UNITTEST/BTC',
+                                                  fill_missing=True, drop_incomplete=True)}
+
+
+def test_biased_strategy(default_conf, mocker, caplog) -> None:
+
+    mocker.patch('freqtrade.data.history.get_timerange', get_timerange)
+    patch_exchange(mocker)
+    mocker.patch('freqtrade.plugins.pairlistmanager.PairListManager.whitelist',
+                 PropertyMock(return_value=['UNITTEST/BTC']))
+
+    default_conf['timeframe'] = '5m'
+    default_conf['timerange'] = '-1510694220'
+    default_conf['strategy'] = 'strategy_test_v3_with_lookahead_bias'
+    default_conf['strategy_path'] = 'tests/strategy/strats'
+
+    strategy_obj = {}
+    strategy_obj['name'] = "strategy_test_v3_with_lookahead_bias"
+    freqtrade.optimize.lookahead_analysis.LookaheadAnalysis(default_conf, strategy_obj, {})
+    pass

From 5488789bc467e262e23116df5480691a2c064330 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 20 May 2023 11:01:42 +0200
Subject: [PATCH 010/130] Arguments should be in the configuration.

---
 freqtrade/commands/cli_options.py | 2 --
 freqtrade/constants.py            | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/freqtrade/commands/cli_options.py b/freqtrade/commands/cli_options.py
index 5dd587559..33de428db 100755
--- a/freqtrade/commands/cli_options.py
+++ b/freqtrade/commands/cli_options.py
@@ -695,14 +695,12 @@ AVAILABLE_CLI_OPTIONS = {
         help='set INT minimum trade amount',
         type=check_int_positive,
         metavar='INT',
-        default=10,
     ),
     "targeted_trade_amount": Arg(
         '--targeted-trade-amount',
         help='set INT targeted trade amount',
         type=check_int_positive,
         metavar='INT',
-        default=20,
     ),
     "lookahead_analysis_exportfilename": Arg(
         '--lookahead-analysis-exportfilename',
diff --git a/freqtrade/constants.py b/freqtrade/constants.py
index b8e240419..ef59d8999 100644
--- a/freqtrade/constants.py
+++ b/freqtrade/constants.py
@@ -164,6 +164,8 @@ CONF_SCHEMA = {
         'trading_mode': {'type': 'string', 'enum': TRADING_MODES},
         'margin_mode': {'type': 'string', 'enum': MARGIN_MODES},
         'reduce_df_footprint': {'type': 'boolean', 'default': False},
+        'minimum_trade_amount': {'type': 'number', 'default': 10},
+        'targeted_trade_amount': {'type': 'number', 'default': 20},
         'liquidation_buffer': {'type': 'number', 'minimum': 0.0, 'maximum': 0.99},
         'backtest_breakdown': {
             'type': 'array',

From 2e79aaae0023635d24ec6016978dcf6aca113d9d Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 20 May 2023 11:02:13 +0200
Subject: [PATCH 011/130] Remove usage of args.

It's clumsy to use and prevents specifying settings in the configuration.
---
 freqtrade/commands/optimize_commands.py  | 11 ++++-------
 freqtrade/optimize/lookahead_analysis.py | 13 ++++++-------
 2 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/freqtrade/commands/optimize_commands.py b/freqtrade/commands/optimize_commands.py
index 78ad140de..866bf8e61 100644
--- a/freqtrade/commands/optimize_commands.py
+++ b/freqtrade/commands/optimize_commands.py
@@ -144,7 +144,7 @@ def start_lookahead_analysis(args: Dict[str, Any]) -> None:
     """
     config = setup_utils_configuration(args, RunMode.UTIL_NO_EXCHANGE)
 
-    if args['targeted_trade_amount'] < args['minimum_trade_amount']:
+    if config['targeted_trade_amount'] < config['minimum_trade_amount']:
         # add logic that tells the user to check the configuration
         # since this combo doesn't make any sense.
         pass
@@ -153,13 +153,10 @@ def start_lookahead_analysis(args: Dict[str, Any]) -> None:
         config, enum_failed=False, recursive=config.get('recursive_strategy_search', False))
 
     lookaheadAnalysis_instances = []
-    strategy_list = []
 
     # unify --strategy and --strategy_list to one list
-    if 'strategy' in args and args['strategy'] is not None:
-        strategy_list = [args['strategy']]
-    else:
-        strategy_list = args['strategy_list']
+    if not (strategy_list := config.get('strategy_list', [])):
+        strategy_list = [config['strategy']]
 
     # check if strategies can be properly loaded, only check them if they can be.
     if strategy_list is not None:
@@ -168,7 +165,7 @@ def start_lookahead_analysis(args: Dict[str, Any]) -> None:
                 if strategy_obj['name'] == strat and strategy_obj not in strategy_list:
                     lookaheadAnalysis_instances.append(
                         LookaheadAnalysisSubFunctions.initialize_single_lookahead_analysis(
-                            strategy_obj, config, args))
+                            strategy_obj, config))
                     break
 
     # report the results
diff --git a/freqtrade/optimize/lookahead_analysis.py b/freqtrade/optimize/lookahead_analysis.py
index 8e6771b4b..90aa934a6 100755
--- a/freqtrade/optimize/lookahead_analysis.py
+++ b/freqtrade/optimize/lookahead_analysis.py
@@ -42,7 +42,7 @@ class Analysis:
 
 class LookaheadAnalysis:
 
-    def __init__(self, config: Dict[str, Any], strategy_obj: dict, args: Dict[str, Any]):
+    def __init__(self, config: Dict[str, Any], strategy_obj: Dict):
         self.failed_bias_check = True
         self.full_varHolder = VarHolder
 
@@ -53,9 +53,9 @@ class LookaheadAnalysis:
         self.local_config = deepcopy(config)
         self.local_config['strategy'] = strategy_obj['name']
         self.current_analysis = Analysis()
-        self.minimum_trade_amount = args['minimum_trade_amount']
-        self.targeted_trade_amount = args['targeted_trade_amount']
-        self.exportfilename = args['exportfilename']
+        self.minimum_trade_amount = config['minimum_trade_amount']
+        self.targeted_trade_amount = config['targeted_trade_amount']
+        self.exportfilename = config['exportfilename']
         self.strategy_obj = strategy_obj
 
     @staticmethod
@@ -339,12 +339,11 @@ class LookaheadAnalysisSubFunctions:
         csv_df.to_csv(config['lookahead_analysis_exportfilename'], index=False)
 
     @staticmethod
-    def initialize_single_lookahead_analysis(strategy_obj: Dict[str, Any], config: Dict[str, Any],
-                                             args: Dict[str, Any]):
+    def initialize_single_lookahead_analysis(strategy_obj: Dict[str, Any], config: Dict[str, Any]):
 
         logger.info(f"Bias test of {Path(strategy_obj['location']).name} started.")
         start = time.perf_counter()
-        current_instance = LookaheadAnalysis(config, strategy_obj, args)
+        current_instance = LookaheadAnalysis(config, strategy_obj)
         current_instance.start()
         elapsed = time.perf_counter() - start
         logger.info(f"checking look ahead bias via backtests "

From a0edbe4797e7edbdf287d86a61c113bbbf3bea0d Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 20 May 2023 11:06:50 +0200
Subject: [PATCH 012/130] Switch to using config instead of args.

---
 freqtrade/commands/cli_options.py        |  5 ++---
 freqtrade/commands/optimize_commands.py  |  2 +-
 freqtrade/configuration/configuration.py | 13 +++++++++++++
 freqtrade/constants.py                   |  1 +
 4 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/freqtrade/commands/cli_options.py b/freqtrade/commands/cli_options.py
index 33de428db..e4a864ea0 100755
--- a/freqtrade/commands/cli_options.py
+++ b/freqtrade/commands/cli_options.py
@@ -692,20 +692,19 @@ AVAILABLE_CLI_OPTIONS = {
     ),
     "minimum_trade_amount": Arg(
         '--minimum-trade-amount',
-        help='set INT minimum trade amount',
+        help='Minimum trade amount for lookahead-analysis',
         type=check_int_positive,
         metavar='INT',
     ),
     "targeted_trade_amount": Arg(
         '--targeted-trade-amount',
-        help='set INT targeted trade amount',
+        help='Targeted trade amount for lookahead analysis',
         type=check_int_positive,
         metavar='INT',
     ),
     "lookahead_analysis_exportfilename": Arg(
         '--lookahead-analysis-exportfilename',
         help="Use this filename to store lookahead-analysis-results",
-        default=None,
         type=str
     ),
 }
diff --git a/freqtrade/commands/optimize_commands.py b/freqtrade/commands/optimize_commands.py
index 866bf8e61..d5d4a0625 100644
--- a/freqtrade/commands/optimize_commands.py
+++ b/freqtrade/commands/optimize_commands.py
@@ -172,7 +172,7 @@ def start_lookahead_analysis(args: Dict[str, Any]) -> None:
     if lookaheadAnalysis_instances:
         LookaheadAnalysisSubFunctions.text_table_lookahead_analysis_instances(
             lookaheadAnalysis_instances)
-        if config['lookahead_analysis_exportfilename'] is not None:
+        if config.get('lookahead_analysis_exportfilename') is not None:
             LookaheadAnalysisSubFunctions.export_to_csv(config, lookaheadAnalysis_instances)
     else:
         logger.error("There were no strategies specified neither through "
diff --git a/freqtrade/configuration/configuration.py b/freqtrade/configuration/configuration.py
index c763d791a..5bbbf301d 100644
--- a/freqtrade/configuration/configuration.py
+++ b/freqtrade/configuration/configuration.py
@@ -479,6 +479,19 @@ class Configuration:
         self._args_to_config(config, argname='analysis_csv_path',
                              logstring='Path to store analysis CSVs: {}')
 
+        self._args_to_config(config, argname='analysis_csv_path',
+                             logstring='Path to store analysis CSVs: {}')
+
+        # Lookahead analysis results
+        self._args_to_config(config, argname='targeted_trade_amount',
+                             logstring='Targeted Trade amount: {}')
+
+        self._args_to_config(config, argname='minimum_trade_amount',
+                             logstring='Minimum Trade amount: {}')
+
+        self._args_to_config(config, argname='lookahead_analysis_exportfilename',
+                             logstring='Path to store lookahead-analysis-results: {}')
+
     def _process_runmode(self, config: Config) -> None:
 
         self._args_to_config(config, argname='dry_run',
diff --git a/freqtrade/constants.py b/freqtrade/constants.py
index ef59d8999..30484e560 100644
--- a/freqtrade/constants.py
+++ b/freqtrade/constants.py
@@ -166,6 +166,7 @@ CONF_SCHEMA = {
         'reduce_df_footprint': {'type': 'boolean', 'default': False},
         'minimum_trade_amount': {'type': 'number', 'default': 10},
         'targeted_trade_amount': {'type': 'number', 'default': 20},
+        'lookahead_analysis_exportfilename': {'type': 'string'},
         'liquidation_buffer': {'type': 'number', 'minimum': 0.0, 'maximum': 0.99},
         'backtest_breakdown': {
             'type': 'array',

From 073dac8d5f2956a2f04383fd420fb91b61b0546f Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 20 May 2023 11:08:22 +0200
Subject: [PATCH 013/130] Move lookahead analysis tests to optimize subdir

---
 tests/{ => optimize}/test_lookahead_analysis.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/{ => optimize}/test_lookahead_analysis.py (100%)

diff --git a/tests/test_lookahead_analysis.py b/tests/optimize/test_lookahead_analysis.py
similarity index 100%
rename from tests/test_lookahead_analysis.py
rename to tests/optimize/test_lookahead_analysis.py

From 2e675efa13e86d7a3c73097bab50fe57d1ca4545 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 20 May 2023 11:14:13 +0200
Subject: [PATCH 014/130] Initial fix - test

---
 freqtrade/optimize/lookahead_analysis.py  |  1 -
 tests/optimize/test_lookahead_analysis.py | 53 +++++++----------------
 2 files changed, 15 insertions(+), 39 deletions(-)

diff --git a/freqtrade/optimize/lookahead_analysis.py b/freqtrade/optimize/lookahead_analysis.py
index 90aa934a6..4de9d755e 100755
--- a/freqtrade/optimize/lookahead_analysis.py
+++ b/freqtrade/optimize/lookahead_analysis.py
@@ -55,7 +55,6 @@ class LookaheadAnalysis:
         self.current_analysis = Analysis()
         self.minimum_trade_amount = config['minimum_trade_amount']
         self.targeted_trade_amount = config['targeted_trade_amount']
-        self.exportfilename = config['exportfilename']
         self.strategy_obj = strategy_obj
 
     @staticmethod
diff --git a/tests/optimize/test_lookahead_analysis.py b/tests/optimize/test_lookahead_analysis.py
index 24290798b..945b3893b 100644
--- a/tests/optimize/test_lookahead_analysis.py
+++ b/tests/optimize/test_lookahead_analysis.py
@@ -3,6 +3,7 @@
 from unittest.mock import PropertyMock
 
 import numpy as np
+import pytest
 
 import freqtrade.commands.arguments
 import freqtrade.optimize.lookahead_analysis
@@ -10,7 +11,15 @@ from freqtrade.configuration import TimeRange
 from freqtrade.data import history
 from freqtrade.data.converter import clean_ohlcv_dataframe
 from freqtrade.data.history import get_timerange
-from tests.conftest import patch_exchange
+from tests.conftest import generate_test_data, patch_exchange
+
+
+@pytest.fixture
+def lookahead_conf(default_conf_usdt):
+    default_conf_usdt['minimum_trade_amount'] = 10
+    default_conf_usdt['targeted_trade_amount'] = 20
+
+    return default_conf_usdt
 
 
 def trim_dictlist(dict_list, num):
@@ -20,50 +29,18 @@ def trim_dictlist(dict_list, num):
     return new
 
 
-def load_data_test(what, testdatadir):
-    timerange = TimeRange.parse_timerange('1510694220-1510700340')
-    data = history.load_pair_history(pair='UNITTEST/BTC', datadir=testdatadir,
-                                     timeframe='1m', timerange=timerange,
-                                     drop_incomplete=False,
-                                     fill_up_missing=False)
-
-    base = 0.001
-    if what == 'raise':
-        data.loc[:, 'open'] = data.index * base
-        data.loc[:, 'high'] = data.index * base + 0.0001
-        data.loc[:, 'low'] = data.index * base - 0.0001
-        data.loc[:, 'close'] = data.index * base
-
-    if what == 'lower':
-        data.loc[:, 'open'] = 1 - data.index * base
-        data.loc[:, 'high'] = 1 - data.index * base + 0.0001
-        data.loc[:, 'low'] = 1 - data.index * base - 0.0001
-        data.loc[:, 'close'] = 1 - data.index * base
-
-    if what == 'sine':
-        hz = 0.1  # frequency
-        data.loc[:, 'open'] = np.sin(data.index * hz) / 1000 + base
-        data.loc[:, 'high'] = np.sin(data.index * hz) / 1000 + base + 0.0001
-        data.loc[:, 'low'] = np.sin(data.index * hz) / 1000 + base - 0.0001
-        data.loc[:, 'close'] = np.sin(data.index * hz) / 1000 + base
-
-    return {'UNITTEST/BTC': clean_ohlcv_dataframe(data, timeframe='1m', pair='UNITTEST/BTC',
-                                                  fill_missing=True, drop_incomplete=True)}
-
-
-def test_biased_strategy(default_conf, mocker, caplog) -> None:
+def test_biased_strategy(lookahead_conf, mocker, caplog) -> None:
 
     mocker.patch('freqtrade.data.history.get_timerange', get_timerange)
     patch_exchange(mocker)
     mocker.patch('freqtrade.plugins.pairlistmanager.PairListManager.whitelist',
                  PropertyMock(return_value=['UNITTEST/BTC']))
 
-    default_conf['timeframe'] = '5m'
-    default_conf['timerange'] = '-1510694220'
-    default_conf['strategy'] = 'strategy_test_v3_with_lookahead_bias'
-    default_conf['strategy_path'] = 'tests/strategy/strats'
+    lookahead_conf['timeframe'] = '5m'
+    lookahead_conf['timerange'] = '-1510694220'
+    lookahead_conf['strategy'] = 'strategy_test_v3_with_lookahead_bias'
 
     strategy_obj = {}
     strategy_obj['name'] = "strategy_test_v3_with_lookahead_bias"
-    freqtrade.optimize.lookahead_analysis.LookaheadAnalysis(default_conf, strategy_obj, {})
+    freqtrade.optimize.lookahead_analysis.LookaheadAnalysis(lookahead_conf, strategy_obj)
     pass

From 209eb63edebe16810da8c6e9be4ecc0c09f74607 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 20 May 2023 11:28:52 +0200
Subject: [PATCH 015/130] Add startup test case

---
 freqtrade/commands/optimize_commands.py   |  7 ++-
 tests/optimize/test_lookahead_analysis.py | 56 ++++++++++++++++++-----
 2 files changed, 50 insertions(+), 13 deletions(-)

diff --git a/freqtrade/commands/optimize_commands.py b/freqtrade/commands/optimize_commands.py
index d5d4a0625..fb2d5ff21 100644
--- a/freqtrade/commands/optimize_commands.py
+++ b/freqtrade/commands/optimize_commands.py
@@ -6,7 +6,6 @@ from freqtrade.configuration import setup_utils_configuration
 from freqtrade.enums import RunMode
 from freqtrade.exceptions import OperationalException
 from freqtrade.misc import round_coin_value
-from freqtrade.optimize.lookahead_analysis import LookaheadAnalysisSubFunctions
 from freqtrade.resolvers import StrategyResolver
 
 
@@ -142,12 +141,16 @@ def start_lookahead_analysis(args: Dict[str, Any]) -> None:
     :param args: Cli args from Arguments()
     :return: None
     """
+    from freqtrade.optimize.lookahead_analysis import LookaheadAnalysisSubFunctions
+
     config = setup_utils_configuration(args, RunMode.UTIL_NO_EXCHANGE)
 
     if config['targeted_trade_amount'] < config['minimum_trade_amount']:
         # add logic that tells the user to check the configuration
         # since this combo doesn't make any sense.
-        pass
+        raise OperationalException(
+            "targeted trade amount can't be smaller than minimum trade amount."
+        )
 
     strategy_objs = StrategyResolver.search_all_objects(
         config, enum_failed=False, recursive=config.get('recursive_strategy_search', False))
diff --git a/tests/optimize/test_lookahead_analysis.py b/tests/optimize/test_lookahead_analysis.py
index 945b3893b..ff6d9c7da 100644
--- a/tests/optimize/test_lookahead_analysis.py
+++ b/tests/optimize/test_lookahead_analysis.py
@@ -1,17 +1,16 @@
 # pragma pylint: disable=missing-docstring, W0212, line-too-long, C0103, unused-argument
 
-from unittest.mock import PropertyMock
+from pathlib import Path
+from unittest.mock import MagicMock, PropertyMock
 
-import numpy as np
 import pytest
 
 import freqtrade.commands.arguments
 import freqtrade.optimize.lookahead_analysis
-from freqtrade.configuration import TimeRange
-from freqtrade.data import history
-from freqtrade.data.converter import clean_ohlcv_dataframe
+from freqtrade.commands.optimize_commands import start_lookahead_analysis
 from freqtrade.data.history import get_timerange
-from tests.conftest import generate_test_data, patch_exchange
+from freqtrade.exceptions import OperationalException
+from tests.conftest import CURRENT_TEST_STRATEGY, get_args, patch_exchange
 
 
 @pytest.fixture
@@ -22,11 +21,46 @@ def lookahead_conf(default_conf_usdt):
     return default_conf_usdt
 
 
-def trim_dictlist(dict_list, num):
-    new = {}
-    for pair, pair_data in dict_list.items():
-        new[pair] = pair_data[num:].reset_index()
-    return new
+def test_start_start_lookahead_analysis(mocker):
+    single_mock = MagicMock()
+    mocker.patch.multiple(
+        'freqtrade.optimize.lookahead_analysis.LookaheadAnalysisSubFunctions',
+        initialize_single_lookahead_analysis=single_mock,
+        text_table_lookahead_analysis_instances=MagicMock(),
+        )
+    args = [
+        "lookahead-analysis",
+        "--strategy",
+        CURRENT_TEST_STRATEGY,
+        "--strategy-path",
+        str(Path(__file__).parent.parent / "strategy" / "strats"),
+    ]
+    pargs = get_args(args)
+    pargs['config'] = None
+
+    start_lookahead_analysis(pargs)
+    assert single_mock.call_count == 1
+
+    single_mock.reset_mock()
+
+    # Test invalid config
+    args = [
+        "lookahead-analysis",
+        "--strategy",
+        CURRENT_TEST_STRATEGY,
+        "--strategy-path",
+        str(Path(__file__).parent.parent / "strategy" / "strats"),
+        "--targeted-trade-amount",
+        "10",
+        "--minimum-trade-amount",
+        "20",
+        ]
+    pargs = get_args(args)
+    pargs['config'] = None
+    with pytest.raises(OperationalException,
+                       match=r"targeted trade amount can't be smaller than .*"):
+        start_lookahead_analysis(pargs)
+
 
 
 def test_biased_strategy(lookahead_conf, mocker, caplog) -> None:

From 7b9f82c71a960b7314ccffea1d2c6c23e9872da8 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 20 May 2023 11:30:51 +0200
Subject: [PATCH 016/130] Remove needless check for "None" list

---
 freqtrade/commands/optimize_commands.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/freqtrade/commands/optimize_commands.py b/freqtrade/commands/optimize_commands.py
index fb2d5ff21..9c753dcb9 100644
--- a/freqtrade/commands/optimize_commands.py
+++ b/freqtrade/commands/optimize_commands.py
@@ -162,14 +162,13 @@ def start_lookahead_analysis(args: Dict[str, Any]) -> None:
         strategy_list = [config['strategy']]
 
     # check if strategies can be properly loaded, only check them if they can be.
-    if strategy_list is not None:
-        for strat in strategy_list:
-            for strategy_obj in strategy_objs:
-                if strategy_obj['name'] == strat and strategy_obj not in strategy_list:
-                    lookaheadAnalysis_instances.append(
-                        LookaheadAnalysisSubFunctions.initialize_single_lookahead_analysis(
-                            strategy_obj, config))
-                    break
+    for strat in strategy_list:
+        for strategy_obj in strategy_objs:
+            if strategy_obj['name'] == strat and strategy_obj not in strategy_list:
+                lookaheadAnalysis_instances.append(
+                    LookaheadAnalysisSubFunctions.initialize_single_lookahead_analysis(
+                        strategy_obj, config))
+                break
 
     # report the results
     if lookaheadAnalysis_instances:

From 1c4a7c7a05a7619e24b2c9af64518d999379e42b Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 20 May 2023 11:35:17 +0200
Subject: [PATCH 017/130] Split Lookahead helper to separate file

---
 freqtrade/commands/optimize_commands.py       |  2 +-
 freqtrade/optimize/lookahead_analysis.py      | 87 -----------------
 .../optimize/lookahead_analysis_helpers.py    | 95 +++++++++++++++++++
 3 files changed, 96 insertions(+), 88 deletions(-)
 create mode 100644 freqtrade/optimize/lookahead_analysis_helpers.py

diff --git a/freqtrade/commands/optimize_commands.py b/freqtrade/commands/optimize_commands.py
index 9c753dcb9..06e9b7adb 100644
--- a/freqtrade/commands/optimize_commands.py
+++ b/freqtrade/commands/optimize_commands.py
@@ -141,7 +141,7 @@ def start_lookahead_analysis(args: Dict[str, Any]) -> None:
     :param args: Cli args from Arguments()
     :return: None
     """
-    from freqtrade.optimize.lookahead_analysis import LookaheadAnalysisSubFunctions
+    from freqtrade.optimize.lookahead_analysis_helpers import LookaheadAnalysisSubFunctions
 
     config = setup_utils_configuration(args, RunMode.UTIL_NO_EXCHANGE)
 
diff --git a/freqtrade/optimize/lookahead_analysis.py b/freqtrade/optimize/lookahead_analysis.py
index 4de9d755e..e8631e8b6 100755
--- a/freqtrade/optimize/lookahead_analysis.py
+++ b/freqtrade/optimize/lookahead_analysis.py
@@ -2,10 +2,8 @@ import copy
 import logging
 import pathlib
 import shutil
-import time
 from copy import deepcopy
 from datetime import datetime, timedelta, timezone
-from pathlib import Path
 from typing import Any, Dict, List
 
 import pandas as pd
@@ -263,89 +261,4 @@ class LookaheadAnalysis:
         self.failed_bias_check = False
 
 
-class LookaheadAnalysisSubFunctions:
-    @staticmethod
-    def text_table_lookahead_analysis_instances(lookahead_instances: List[LookaheadAnalysis]):
-        headers = ['filename', 'strategy', 'has_bias', 'total_signals',
-                   'biased_entry_signals', 'biased_exit_signals', 'biased_indicators']
-        data = []
-        for inst in lookahead_instances:
-            if inst.failed_bias_check:
-                data.append(
-                    [
-                        inst.strategy_obj['location'].parts[-1],
-                        inst.strategy_obj['name'],
-                        'error while checking'
-                    ]
-                )
-            else:
-                data.append(
-                    [
-                        inst.strategy_obj['location'].parts[-1],
-                        inst.strategy_obj['name'],
-                        inst.current_analysis.has_bias,
-                        inst.current_analysis.total_signals,
-                        inst.current_analysis.false_entry_signals,
-                        inst.current_analysis.false_exit_signals,
-                        ", ".join(inst.current_analysis.false_indicators)
-                    ]
-                )
-        from tabulate import tabulate
-        table = tabulate(data, headers=headers, tablefmt="orgtbl")
-        print(table)
 
-    @staticmethod
-    def export_to_csv(config: Dict[str, Any], lookahead_analysis: List[LookaheadAnalysis]):
-        def add_or_update_row(df, row_data):
-            if (
-                    (df['filename'] == row_data['filename']) &
-                    (df['strategy'] == row_data['strategy'])
-            ).any():
-                # Update existing row
-                pd_series = pd.DataFrame([row_data])
-                df.loc[
-                    (df['filename'] == row_data['filename']) &
-                    (df['strategy'] == row_data['strategy'])
-                    ] = pd_series
-            else:
-                # Add new row
-                df = pd.concat([df, pd.DataFrame([row_data], columns=df.columns)])
-
-            return df
-
-        if Path(config['lookahead_analysis_exportfilename']).exists():
-            # Read CSV file into a pandas dataframe
-            csv_df = pd.read_csv(config['lookahead_analysis_exportfilename'])
-        else:
-            # Create a new empty DataFrame with the desired column names and set the index
-            csv_df = pd.DataFrame(columns=[
-                'filename', 'strategy', 'has_bias', 'total_signals',
-                'biased_entry_signals', 'biased_exit_signals', 'biased_indicators'
-            ],
-                index=None)
-
-        for inst in lookahead_analysis:
-            new_row_data = {'filename': inst.strategy_obj['location'].parts[-1],
-                            'strategy': inst.strategy_obj['name'],
-                            'has_bias': inst.current_analysis.has_bias,
-                            'total_signals': inst.current_analysis.total_signals,
-                            'biased_entry_signals': inst.current_analysis.false_entry_signals,
-                            'biased_exit_signals': inst.current_analysis.false_exit_signals,
-                            'biased_indicators': ",".join(inst.current_analysis.false_indicators)}
-            csv_df = add_or_update_row(csv_df, new_row_data)
-
-        logger.info(f"saving {config['lookahead_analysis_exportfilename']}")
-        csv_df.to_csv(config['lookahead_analysis_exportfilename'], index=False)
-
-    @staticmethod
-    def initialize_single_lookahead_analysis(strategy_obj: Dict[str, Any], config: Dict[str, Any]):
-
-        logger.info(f"Bias test of {Path(strategy_obj['location']).name} started.")
-        start = time.perf_counter()
-        current_instance = LookaheadAnalysis(config, strategy_obj)
-        current_instance.start()
-        elapsed = time.perf_counter() - start
-        logger.info(f"checking look ahead bias via backtests "
-                    f"of {Path(strategy_obj['location']).name} "
-                    f"took {elapsed:.0f} seconds.")
-        return current_instance
diff --git a/freqtrade/optimize/lookahead_analysis_helpers.py b/freqtrade/optimize/lookahead_analysis_helpers.py
new file mode 100644
index 000000000..987a55b24
--- /dev/null
+++ b/freqtrade/optimize/lookahead_analysis_helpers.py
@@ -0,0 +1,95 @@
+import time
+from pathlib import Path
+from typing import Any, Dict, List
+
+import pandas as pd
+
+from freqtrade.optimize.lookahead_analysis import LookaheadAnalysis, logger
+
+
+class LookaheadAnalysisSubFunctions:
+    @staticmethod
+    def text_table_lookahead_analysis_instances(lookahead_instances: List[LookaheadAnalysis]):
+        headers = ['filename', 'strategy', 'has_bias', 'total_signals',
+                   'biased_entry_signals', 'biased_exit_signals', 'biased_indicators']
+        data = []
+        for inst in lookahead_instances:
+            if inst.failed_bias_check:
+                data.append(
+                    [
+                        inst.strategy_obj['location'].parts[-1],
+                        inst.strategy_obj['name'],
+                        'error while checking'
+                    ]
+                )
+            else:
+                data.append(
+                    [
+                        inst.strategy_obj['location'].parts[-1],
+                        inst.strategy_obj['name'],
+                        inst.current_analysis.has_bias,
+                        inst.current_analysis.total_signals,
+                        inst.current_analysis.false_entry_signals,
+                        inst.current_analysis.false_exit_signals,
+                        ", ".join(inst.current_analysis.false_indicators)
+                    ]
+                )
+        from tabulate import tabulate
+        table = tabulate(data, headers=headers, tablefmt="orgtbl")
+        print(table)
+
+    @staticmethod
+    def export_to_csv(config: Dict[str, Any], lookahead_analysis: List[LookaheadAnalysis]):
+        def add_or_update_row(df, row_data):
+            if (
+                    (df['filename'] == row_data['filename']) &
+                    (df['strategy'] == row_data['strategy'])
+            ).any():
+                # Update existing row
+                pd_series = pd.DataFrame([row_data])
+                df.loc[
+                    (df['filename'] == row_data['filename']) &
+                    (df['strategy'] == row_data['strategy'])
+                    ] = pd_series
+            else:
+                # Add new row
+                df = pd.concat([df, pd.DataFrame([row_data], columns=df.columns)])
+
+            return df
+
+        if Path(config['lookahead_analysis_exportfilename']).exists():
+            # Read CSV file into a pandas dataframe
+            csv_df = pd.read_csv(config['lookahead_analysis_exportfilename'])
+        else:
+            # Create a new empty DataFrame with the desired column names and set the index
+            csv_df = pd.DataFrame(columns=[
+                'filename', 'strategy', 'has_bias', 'total_signals',
+                'biased_entry_signals', 'biased_exit_signals', 'biased_indicators'
+            ],
+                index=None)
+
+        for inst in lookahead_analysis:
+            new_row_data = {'filename': inst.strategy_obj['location'].parts[-1],
+                            'strategy': inst.strategy_obj['name'],
+                            'has_bias': inst.current_analysis.has_bias,
+                            'total_signals': inst.current_analysis.total_signals,
+                            'biased_entry_signals': inst.current_analysis.false_entry_signals,
+                            'biased_exit_signals': inst.current_analysis.false_exit_signals,
+                            'biased_indicators': ",".join(inst.current_analysis.false_indicators)}
+            csv_df = add_or_update_row(csv_df, new_row_data)
+
+        logger.info(f"saving {config['lookahead_analysis_exportfilename']}")
+        csv_df.to_csv(config['lookahead_analysis_exportfilename'], index=False)
+
+    @staticmethod
+    def initialize_single_lookahead_analysis(strategy_obj: Dict[str, Any], config: Dict[str, Any]):
+
+        logger.info(f"Bias test of {Path(strategy_obj['location']).name} started.")
+        start = time.perf_counter()
+        current_instance = LookaheadAnalysis(config, strategy_obj)
+        current_instance.start()
+        elapsed = time.perf_counter() - start
+        logger.info(f"checking look ahead bias via backtests "
+                    f"of {Path(strategy_obj['location']).name} "
+                    f"took {elapsed:.0f} seconds.")
+        return current_instance

From d8af0dc9c4e1c2dad1844d611e78d3b1b86b997b Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 20 May 2023 11:36:58 +0200
Subject: [PATCH 018/130] Slightly improve testcase

---
 tests/optimize/test_lookahead_analysis.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/optimize/test_lookahead_analysis.py b/tests/optimize/test_lookahead_analysis.py
index ff6d9c7da..6872cb73a 100644
--- a/tests/optimize/test_lookahead_analysis.py
+++ b/tests/optimize/test_lookahead_analysis.py
@@ -5,11 +5,10 @@ from unittest.mock import MagicMock, PropertyMock
 
 import pytest
 
-import freqtrade.commands.arguments
-import freqtrade.optimize.lookahead_analysis
 from freqtrade.commands.optimize_commands import start_lookahead_analysis
 from freqtrade.data.history import get_timerange
 from freqtrade.exceptions import OperationalException
+from freqtrade.optimize.lookahead_analysis import LookaheadAnalysis
 from tests.conftest import CURRENT_TEST_STRATEGY, get_args, patch_exchange
 
 
@@ -21,12 +20,13 @@ def lookahead_conf(default_conf_usdt):
     return default_conf_usdt
 
 
-def test_start_start_lookahead_analysis(mocker):
+def test_start_lookahead_analysis(mocker):
     single_mock = MagicMock()
+    text_table_mock = MagicMock()
     mocker.patch.multiple(
-        'freqtrade.optimize.lookahead_analysis.LookaheadAnalysisSubFunctions',
+        'freqtrade.optimize.lookahead_analysis_helpers.LookaheadAnalysisSubFunctions',
         initialize_single_lookahead_analysis=single_mock,
-        text_table_lookahead_analysis_instances=MagicMock(),
+        text_table_lookahead_analysis_instances=text_table_mock,
         )
     args = [
         "lookahead-analysis",
@@ -40,6 +40,7 @@ def test_start_start_lookahead_analysis(mocker):
 
     start_lookahead_analysis(pargs)
     assert single_mock.call_count == 1
+    assert text_table_mock.call_count == 1
 
     single_mock.reset_mock()
 
@@ -62,7 +63,6 @@ def test_start_start_lookahead_analysis(mocker):
         start_lookahead_analysis(pargs)
 
 
-
 def test_biased_strategy(lookahead_conf, mocker, caplog) -> None:
 
     mocker.patch('freqtrade.data.history.get_timerange', get_timerange)
@@ -76,5 +76,5 @@ def test_biased_strategy(lookahead_conf, mocker, caplog) -> None:
 
     strategy_obj = {}
     strategy_obj['name'] = "strategy_test_v3_with_lookahead_bias"
-    freqtrade.optimize.lookahead_analysis.LookaheadAnalysis(lookahead_conf, strategy_obj)
+    LookaheadAnalysis(lookahead_conf, strategy_obj)
     pass

From ceddcd9242209211153e30c728df77f9e907b4a1 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 20 May 2023 11:45:06 +0200
Subject: [PATCH 019/130] Move most of the logic to lookahead_analysis helper

---
 freqtrade/commands/optimize_commands.py       | 37 +--------------
 .../optimize/lookahead_analysis_helpers.py    | 47 ++++++++++++++++++-
 tests/optimize/test_lookahead_analysis.py     |  1 +
 3 files changed, 48 insertions(+), 37 deletions(-)

diff --git a/freqtrade/commands/optimize_commands.py b/freqtrade/commands/optimize_commands.py
index 06e9b7adb..4b8763737 100644
--- a/freqtrade/commands/optimize_commands.py
+++ b/freqtrade/commands/optimize_commands.py
@@ -6,7 +6,6 @@ from freqtrade.configuration import setup_utils_configuration
 from freqtrade.enums import RunMode
 from freqtrade.exceptions import OperationalException
 from freqtrade.misc import round_coin_value
-from freqtrade.resolvers import StrategyResolver
 
 
 logger = logging.getLogger(__name__)
@@ -144,40 +143,6 @@ def start_lookahead_analysis(args: Dict[str, Any]) -> None:
     from freqtrade.optimize.lookahead_analysis_helpers import LookaheadAnalysisSubFunctions
 
     config = setup_utils_configuration(args, RunMode.UTIL_NO_EXCHANGE)
+    LookaheadAnalysisSubFunctions.start(config)
 
-    if config['targeted_trade_amount'] < config['minimum_trade_amount']:
-        # add logic that tells the user to check the configuration
-        # since this combo doesn't make any sense.
-        raise OperationalException(
-            "targeted trade amount can't be smaller than minimum trade amount."
-        )
 
-    strategy_objs = StrategyResolver.search_all_objects(
-        config, enum_failed=False, recursive=config.get('recursive_strategy_search', False))
-
-    lookaheadAnalysis_instances = []
-
-    # unify --strategy and --strategy_list to one list
-    if not (strategy_list := config.get('strategy_list', [])):
-        strategy_list = [config['strategy']]
-
-    # check if strategies can be properly loaded, only check them if they can be.
-    for strat in strategy_list:
-        for strategy_obj in strategy_objs:
-            if strategy_obj['name'] == strat and strategy_obj not in strategy_list:
-                lookaheadAnalysis_instances.append(
-                    LookaheadAnalysisSubFunctions.initialize_single_lookahead_analysis(
-                        strategy_obj, config))
-                break
-
-    # report the results
-    if lookaheadAnalysis_instances:
-        LookaheadAnalysisSubFunctions.text_table_lookahead_analysis_instances(
-            lookaheadAnalysis_instances)
-        if config.get('lookahead_analysis_exportfilename') is not None:
-            LookaheadAnalysisSubFunctions.export_to_csv(config, lookaheadAnalysis_instances)
-    else:
-        logger.error("There were no strategies specified neither through "
-                     "--strategy nor through "
-                     "--strategy_list "
-                     "or timeframe was not specified.")
diff --git a/freqtrade/optimize/lookahead_analysis_helpers.py b/freqtrade/optimize/lookahead_analysis_helpers.py
index 987a55b24..e2e9ffb42 100644
--- a/freqtrade/optimize/lookahead_analysis_helpers.py
+++ b/freqtrade/optimize/lookahead_analysis_helpers.py
@@ -1,10 +1,17 @@
+import logging
 import time
 from pathlib import Path
 from typing import Any, Dict, List
 
 import pandas as pd
 
-from freqtrade.optimize.lookahead_analysis import LookaheadAnalysis, logger
+from freqtrade.constants import Config
+from freqtrade.exceptions import OperationalException
+from freqtrade.optimize.lookahead_analysis import LookaheadAnalysis
+from freqtrade.resolvers import StrategyResolver
+
+
+logger = logging.getLogger(__name__)
 
 
 class LookaheadAnalysisSubFunctions:
@@ -93,3 +100,41 @@ class LookaheadAnalysisSubFunctions:
                     f"of {Path(strategy_obj['location']).name} "
                     f"took {elapsed:.0f} seconds.")
         return current_instance
+
+    @staticmethod
+    def start(config: Config):
+        if config['targeted_trade_amount'] < config['minimum_trade_amount']:
+            # this combo doesn't make any sense.
+            raise OperationalException(
+                "targeted trade amount can't be smaller than minimum trade amount."
+            )
+
+        strategy_objs = StrategyResolver.search_all_objects(
+                config, enum_failed=False, recursive=config.get('recursive_strategy_search', False))
+
+        lookaheadAnalysis_instances = []
+
+        # unify --strategy and --strategy_list to one list
+        if not (strategy_list := config.get('strategy_list', [])):
+            strategy_list = [config['strategy']]
+
+        # check if strategies can be properly loaded, only check them if they can be.
+        for strat in strategy_list:
+            for strategy_obj in strategy_objs:
+                if strategy_obj['name'] == strat and strategy_obj not in strategy_list:
+                    lookaheadAnalysis_instances.append(
+                        LookaheadAnalysisSubFunctions.initialize_single_lookahead_analysis(
+                            strategy_obj, config))
+                    break
+
+        # report the results
+        if lookaheadAnalysis_instances:
+            LookaheadAnalysisSubFunctions.text_table_lookahead_analysis_instances(
+                lookaheadAnalysis_instances)
+            if config.get('lookahead_analysis_exportfilename') is not None:
+                LookaheadAnalysisSubFunctions.export_to_csv(config, lookaheadAnalysis_instances)
+        else:
+            logger.error("There were no strategies specified neither through "
+                         "--strategy nor through "
+                         "--strategy_list "
+                         "or timeframe was not specified.")
diff --git a/tests/optimize/test_lookahead_analysis.py b/tests/optimize/test_lookahead_analysis.py
index 6872cb73a..3136d6a16 100644
--- a/tests/optimize/test_lookahead_analysis.py
+++ b/tests/optimize/test_lookahead_analysis.py
@@ -77,4 +77,5 @@ def test_biased_strategy(lookahead_conf, mocker, caplog) -> None:
     strategy_obj = {}
     strategy_obj['name'] = "strategy_test_v3_with_lookahead_bias"
     LookaheadAnalysis(lookahead_conf, strategy_obj)
+
     pass

From e183707979b5fb9ec94eb022c82de8f7a2a170ef Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 20 May 2023 11:51:46 +0200
Subject: [PATCH 020/130] Further test lookahead_helpers

---
 .../optimize/lookahead_analysis_helpers.py    |  5 +++
 tests/optimize/test_lookahead_analysis.py     | 35 ++++++++++++++++++-
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/freqtrade/optimize/lookahead_analysis_helpers.py b/freqtrade/optimize/lookahead_analysis_helpers.py
index e2e9ffb42..53d8c8a59 100644
--- a/freqtrade/optimize/lookahead_analysis_helpers.py
+++ b/freqtrade/optimize/lookahead_analysis_helpers.py
@@ -116,6 +116,11 @@ class LookaheadAnalysisSubFunctions:
 
         # unify --strategy and --strategy_list to one list
         if not (strategy_list := config.get('strategy_list', [])):
+            if config.get('strategy') is None:
+                raise OperationalException(
+                    "No Strategy specified. Please specify a strategy via --strategy or "
+                    "--strategy_list"
+                )
             strategy_list = [config['strategy']]
 
         # check if strategies can be properly loaded, only check them if they can be.
diff --git a/tests/optimize/test_lookahead_analysis.py b/tests/optimize/test_lookahead_analysis.py
index 3136d6a16..2eb0b2657 100644
--- a/tests/optimize/test_lookahead_analysis.py
+++ b/tests/optimize/test_lookahead_analysis.py
@@ -1,5 +1,6 @@
 # pragma pylint: disable=missing-docstring, W0212, line-too-long, C0103, unused-argument
 
+from copy import deepcopy
 from pathlib import Path
 from unittest.mock import MagicMock, PropertyMock
 
@@ -9,7 +10,8 @@ from freqtrade.commands.optimize_commands import start_lookahead_analysis
 from freqtrade.data.history import get_timerange
 from freqtrade.exceptions import OperationalException
 from freqtrade.optimize.lookahead_analysis import LookaheadAnalysis
-from tests.conftest import CURRENT_TEST_STRATEGY, get_args, patch_exchange
+from freqtrade.optimize.lookahead_analysis_helpers import LookaheadAnalysisSubFunctions
+from tests.conftest import CURRENT_TEST_STRATEGY, get_args, log_has_re, patch_exchange
 
 
 @pytest.fixture
@@ -63,6 +65,37 @@ def test_start_lookahead_analysis(mocker):
         start_lookahead_analysis(pargs)
 
 
+def test_lookahead_helper_invalid_config(lookahead_conf, mocker, caplog) -> None:
+    conf = deepcopy(lookahead_conf)
+    conf['targeted_trade_amount'] = 10
+    conf['minimum_trade_amount'] = 40
+    with pytest.raises(OperationalException,
+                       match=r"targeted trade amount can't be smaller than .*"):
+        LookaheadAnalysisSubFunctions.start(conf)
+
+    conf = deepcopy(lookahead_conf)
+    del conf['strategy']
+    with pytest.raises(OperationalException,
+                       match=r"No Strategy specified"):
+        LookaheadAnalysisSubFunctions.start(conf)
+
+
+def test_lookahead_helper_start(lookahead_conf, mocker, caplog) -> None:
+    single_mock = MagicMock()
+    text_table_mock = MagicMock()
+    mocker.patch.multiple(
+            'freqtrade.optimize.lookahead_analysis_helpers.LookaheadAnalysisSubFunctions',
+            initialize_single_lookahead_analysis=single_mock,
+            text_table_lookahead_analysis_instances=text_table_mock,
+        )
+    LookaheadAnalysisSubFunctions.start(lookahead_conf)
+    assert single_mock.call_count == 1
+    assert text_table_mock.call_count == 1
+
+    single_mock.reset_mock()
+    text_table_mock.reset_mock()
+
+
 def test_biased_strategy(lookahead_conf, mocker, caplog) -> None:
 
     mocker.patch('freqtrade.data.history.get_timerange', get_timerange)

From 3f5c18a035a311dd01fb60d5040a8c0b5689b3df Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 20 May 2023 12:03:06 +0200
Subject: [PATCH 021/130] Add some tests as todo

---
 tests/optimize/test_lookahead_analysis.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tests/optimize/test_lookahead_analysis.py b/tests/optimize/test_lookahead_analysis.py
index 2eb0b2657..0092b6074 100644
--- a/tests/optimize/test_lookahead_analysis.py
+++ b/tests/optimize/test_lookahead_analysis.py
@@ -96,6 +96,21 @@ def test_lookahead_helper_start(lookahead_conf, mocker, caplog) -> None:
     text_table_mock.reset_mock()
 
 
+def test_lookahead_helper_text_table_lookahead_analysis_instances():
+    # TODO
+    pytest.skip("TODO")
+
+
+def test_lookahead_helper_export_to_csv():
+    # TODO
+    pytest.skip("TODO")
+
+
+def test_initialize_single_lookahead_analysis():
+    # TODO
+    pytest.skip("TODO")
+
+
 def test_biased_strategy(lookahead_conf, mocker, caplog) -> None:
 
     mocker.patch('freqtrade.data.history.get_timerange', get_timerange)

From 9869a21951c4947f6d1f070ff40c432e78515a36 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 20 May 2023 15:39:21 +0200
Subject: [PATCH 022/130] Move strategy to it's own directory to avoid having
 other

---
 freqtrade/optimize/lookahead_analysis.py      |  3 ---
 tests/optimize/test_lookahead_analysis.py     | 19 +++++++++++++------
 .../strategy_test_v3_with_lookahead_bias.py   |  0
 3 files changed, 13 insertions(+), 9 deletions(-)
 rename tests/strategy/strats/{ => lookahead_bias}/strategy_test_v3_with_lookahead_bias.py (100%)

diff --git a/freqtrade/optimize/lookahead_analysis.py b/freqtrade/optimize/lookahead_analysis.py
index e8631e8b6..dfb1a9ea2 100755
--- a/freqtrade/optimize/lookahead_analysis.py
+++ b/freqtrade/optimize/lookahead_analysis.py
@@ -259,6 +259,3 @@ class LookaheadAnalysis:
             logging.info(self.local_config['strategy'] + ": no bias detected")
 
         self.failed_bias_check = False
-
-
-
diff --git a/tests/optimize/test_lookahead_analysis.py b/tests/optimize/test_lookahead_analysis.py
index 0092b6074..944892685 100644
--- a/tests/optimize/test_lookahead_analysis.py
+++ b/tests/optimize/test_lookahead_analysis.py
@@ -11,13 +11,15 @@ from freqtrade.data.history import get_timerange
 from freqtrade.exceptions import OperationalException
 from freqtrade.optimize.lookahead_analysis import LookaheadAnalysis
 from freqtrade.optimize.lookahead_analysis_helpers import LookaheadAnalysisSubFunctions
-from tests.conftest import CURRENT_TEST_STRATEGY, get_args, log_has_re, patch_exchange
+from tests.conftest import EXMS, get_args, patch_exchange
 
 
 @pytest.fixture
 def lookahead_conf(default_conf_usdt):
     default_conf_usdt['minimum_trade_amount'] = 10
     default_conf_usdt['targeted_trade_amount'] = 20
+    default_conf_usdt['strategy_path'] = str(
+        Path(__file__).parent.parent / "strategy/strats/lookahead_bias")
 
     return default_conf_usdt
 
@@ -33,7 +35,7 @@ def test_start_lookahead_analysis(mocker):
     args = [
         "lookahead-analysis",
         "--strategy",
-        CURRENT_TEST_STRATEGY,
+        "strategy_test_v3_with_lookahead_bias",
         "--strategy-path",
         str(Path(__file__).parent.parent / "strategy" / "strats"),
     ]
@@ -50,7 +52,7 @@ def test_start_lookahead_analysis(mocker):
     args = [
         "lookahead-analysis",
         "--strategy",
-        CURRENT_TEST_STRATEGY,
+        "strategy_test_v3_with_lookahead_bias",
         "--strategy-path",
         str(Path(__file__).parent.parent / "strategy" / "strats"),
         "--targeted-trade-amount",
@@ -114,16 +116,21 @@ def test_initialize_single_lookahead_analysis():
 def test_biased_strategy(lookahead_conf, mocker, caplog) -> None:
 
     mocker.patch('freqtrade.data.history.get_timerange', get_timerange)
+    mocker.patch(f'{EXMS}.get_fee', return_value=0.0)
+    mocker.patch(f'{EXMS}.get_min_pair_stake_amount', return_value=0.00001)
+    mocker.patch(f'{EXMS}.get_max_pair_stake_amount', return_value=float('inf'))
     patch_exchange(mocker)
     mocker.patch('freqtrade.plugins.pairlistmanager.PairListManager.whitelist',
                  PropertyMock(return_value=['UNITTEST/BTC']))
+    lookahead_conf['pairs'] = ['UNITTEST/USDT']
 
     lookahead_conf['timeframe'] = '5m'
-    lookahead_conf['timerange'] = '-1510694220'
+    lookahead_conf['timerange'] = '1516406400-1517270400'
     lookahead_conf['strategy'] = 'strategy_test_v3_with_lookahead_bias'
 
     strategy_obj = {}
     strategy_obj['name'] = "strategy_test_v3_with_lookahead_bias"
-    LookaheadAnalysis(lookahead_conf, strategy_obj)
+    instance = LookaheadAnalysis(lookahead_conf, strategy_obj)
+    instance.start()
 
-    pass
+    # TODO: assert something ... most likely output (?) or instance state?
diff --git a/tests/strategy/strats/strategy_test_v3_with_lookahead_bias.py b/tests/strategy/strats/lookahead_bias/strategy_test_v3_with_lookahead_bias.py
similarity index 100%
rename from tests/strategy/strats/strategy_test_v3_with_lookahead_bias.py
rename to tests/strategy/strats/lookahead_bias/strategy_test_v3_with_lookahead_bias.py

From e73cd1487e61825614206ed4019b79e41aea9795 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 20 May 2023 19:57:26 +0200
Subject: [PATCH 023/130] Add somewhat sensible assert

---
 tests/optimize/test_lookahead_analysis.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/optimize/test_lookahead_analysis.py b/tests/optimize/test_lookahead_analysis.py
index 944892685..ef4f8809c 100644
--- a/tests/optimize/test_lookahead_analysis.py
+++ b/tests/optimize/test_lookahead_analysis.py
@@ -11,7 +11,7 @@ from freqtrade.data.history import get_timerange
 from freqtrade.exceptions import OperationalException
 from freqtrade.optimize.lookahead_analysis import LookaheadAnalysis
 from freqtrade.optimize.lookahead_analysis_helpers import LookaheadAnalysisSubFunctions
-from tests.conftest import EXMS, get_args, patch_exchange
+from tests.conftest import EXMS, get_args, log_has_re, patch_exchange
 
 
 @pytest.fixture
@@ -125,12 +125,13 @@ def test_biased_strategy(lookahead_conf, mocker, caplog) -> None:
     lookahead_conf['pairs'] = ['UNITTEST/USDT']
 
     lookahead_conf['timeframe'] = '5m'
-    lookahead_conf['timerange'] = '1516406400-1517270400'
+    lookahead_conf['timerange'] = '20180119-20180122'
     lookahead_conf['strategy'] = 'strategy_test_v3_with_lookahead_bias'
 
     strategy_obj = {}
     strategy_obj['name'] = "strategy_test_v3_with_lookahead_bias"
     instance = LookaheadAnalysis(lookahead_conf, strategy_obj)
     instance.start()
+    assert log_has_re(r".*bias detected.*", caplog)
 
     # TODO: assert something ... most likely output (?) or instance state?

From 104fa9e32db3dc69d5fdf43ffffdcc03dbd58d5b Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 20 May 2023 19:58:14 +0200
Subject: [PATCH 024/130] Use logger, not the logging module

---
 freqtrade/optimize/lookahead_analysis.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/freqtrade/optimize/lookahead_analysis.py b/freqtrade/optimize/lookahead_analysis.py
index dfb1a9ea2..e40322c88 100755
--- a/freqtrade/optimize/lookahead_analysis.py
+++ b/freqtrade/optimize/lookahead_analysis.py
@@ -121,9 +121,9 @@ class LookaheadAnalysis:
 
                             if not self.current_analysis.false_indicators.__contains__(col_name[0]):
                                 self.current_analysis.false_indicators.append(col_name[0])
-                                logging.info(f"=> found look ahead bias in indicator "
-                                             f"{col_name[0]}. "
-                                             f"{str(self_value)} != {str(other_value)}")
+                                logger.info(f"=> found look ahead bias in indicator "
+                                            f"{col_name[0]}. "
+                                            f"{str(self_value)} != {str(other_value)}")
 
     def prepare_data(self, varholder: VarHolder, pairs_to_load: List[pd.DataFrame]):
 
@@ -232,14 +232,14 @@ class LookaheadAnalysis:
         # check if requirements have been met of full_varholder
         found_signals: int = self.full_varHolder.result['results'].shape[0] + 1
         if found_signals >= self.targeted_trade_amount:
-            logging.info(f"Found {found_signals} trades, "
-                         f"calculating {self.targeted_trade_amount} trades.")
+            logger.info(f"Found {found_signals} trades, "
+                        f"calculating {self.targeted_trade_amount} trades.")
         elif self.targeted_trade_amount >= found_signals >= self.minimum_trade_amount:
-            logging.info(f"Only found {found_signals} trades. Calculating all available trades.")
+            logger.info(f"Only found {found_signals} trades. Calculating all available trades.")
         else:
-            logging.info(f"found {found_signals} trades "
-                         f"which is less than minimum_trade_amount {self.minimum_trade_amount}. "
-                         f"Cancelling this backtest lookahead bias test.")
+            logger.info(f"found {found_signals} trades "
+                        f"which is less than minimum_trade_amount {self.minimum_trade_amount}. "
+                        f"Cancelling this backtest lookahead bias test.")
             return
 
         # now we loop through all signals
@@ -253,9 +253,9 @@ class LookaheadAnalysis:
         if (self.current_analysis.false_entry_signals > 0 or
                 self.current_analysis.false_exit_signals > 0 or
                 len(self.current_analysis.false_indicators) > 0):
-            logging.info(f" => {self.local_config['strategy']} + : bias detected!")
+            logger.info(f" => {self.local_config['strategy']} + : bias detected!")
             self.current_analysis.has_bias = True
         else:
-            logging.info(self.local_config['strategy'] + ": no bias detected")
+            logger.info(self.local_config['strategy'] + ": no bias detected")
 
         self.failed_bias_check = False

From 3e6a2bf9b04d0c140350b0f78baa274dc7bae11d Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 20 May 2023 20:12:04 +0200
Subject: [PATCH 025/130] Add parameters for analysis tests ...

---
 tests/optimize/test_lookahead_analysis.py     | 23 +++++++++++++++++--
 .../strategy_test_v3_with_lookahead_bias.py   | 17 ++++++++++----
 2 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/tests/optimize/test_lookahead_analysis.py b/tests/optimize/test_lookahead_analysis.py
index ef4f8809c..0706750ec 100644
--- a/tests/optimize/test_lookahead_analysis.py
+++ b/tests/optimize/test_lookahead_analysis.py
@@ -113,7 +113,10 @@ def test_initialize_single_lookahead_analysis():
     pytest.skip("TODO")
 
 
-def test_biased_strategy(lookahead_conf, mocker, caplog) -> None:
+@pytest.mark.parametrize('scenario', [
+    'no_bias', 'bias1'
+])
+def test_biased_strategy(lookahead_conf, mocker, caplog, scenario) -> None:
 
     mocker.patch('freqtrade.data.history.get_timerange', get_timerange)
     mocker.patch(f'{EXMS}.get_fee', return_value=0.0)
@@ -128,10 +131,26 @@ def test_biased_strategy(lookahead_conf, mocker, caplog) -> None:
     lookahead_conf['timerange'] = '20180119-20180122'
     lookahead_conf['strategy'] = 'strategy_test_v3_with_lookahead_bias'
 
+    # Patch scenario Parameter to allow for easy selection
+    mocker.patch('freqtrade.strategy.hyper.HyperStrategyMixin.load_params_from_file',
+                 return_value={
+                     'params': {
+                         "buy": {
+                                "scenario": scenario
+                         }
+                     }
+                 })
+
     strategy_obj = {}
     strategy_obj['name'] = "strategy_test_v3_with_lookahead_bias"
     instance = LookaheadAnalysis(lookahead_conf, strategy_obj)
     instance.start()
+    # Assert init correct
+    assert log_has_re(f"Strategy Parameter: scenario = {scenario}", caplog)
+    # Assert bias detected
     assert log_has_re(r".*bias detected.*", caplog)
-
     # TODO: assert something ... most likely output (?) or instance state?
+
+    # Assert False to see full logs in output
+    # assert False
+    # Run with `pytest tests/optimize/test_lookahead_analysis.py  -k test_biased_strategy`
diff --git a/tests/strategy/strats/lookahead_bias/strategy_test_v3_with_lookahead_bias.py b/tests/strategy/strats/lookahead_bias/strategy_test_v3_with_lookahead_bias.py
index 6cf894586..d35b85b2d 100644
--- a/tests/strategy/strats/lookahead_bias/strategy_test_v3_with_lookahead_bias.py
+++ b/tests/strategy/strats/lookahead_bias/strategy_test_v3_with_lookahead_bias.py
@@ -3,6 +3,7 @@ from pandas import DataFrame
 from technical.indicators import ichimoku
 
 from freqtrade.strategy import IStrategy
+from freqtrade.strategy.parameters import CategoricalParameter
 
 
 class strategy_test_v3_with_lookahead_bias(IStrategy):
@@ -21,6 +22,7 @@ class strategy_test_v3_with_lookahead_bias(IStrategy):
 
     # Optimal timeframe for the strategy
     timeframe = '5m'
+    scenario = CategoricalParameter(['no_bias', 'bias1'], default='bias1', space="buy")
 
     # Number of candles the strategy requires before producing valid signals
     startup_candle_count: int = 20
@@ -37,14 +39,19 @@ class strategy_test_v3_with_lookahead_bias(IStrategy):
         return dataframe
 
     def populate_entry_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
-        dataframe.loc[
-            dataframe['close'].shift(-10) > dataframe['close'],
-            'enter_long'] = 1
+        if self.scenario.value == 'no_bias':
+            dataframe.loc[dataframe['close'].shift(10) < dataframe['close'], 'enter_long'] = 1
+        else:
+            dataframe.loc[dataframe['close'].shift(-10) > dataframe['close'], 'enter_long'] = 1
 
         return dataframe
 
     def populate_exit_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
-        dataframe.loc[
-            dataframe['close'].shift(-10) > dataframe['close'], 'exit'] = 1
+        if self.scenario.value == 'no_bias':
+            dataframe.loc[
+                dataframe['close'].shift(10) < dataframe['close'], 'exit'] = 1
+        else:
+            dataframe.loc[
+                dataframe['close'].shift(-10) > dataframe['close'], 'exit'] = 1
 
         return dataframe

From 70a0c2e62527184d8b1af08e09ac57abdde291e2 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sun, 21 May 2023 08:21:08 +0200
Subject: [PATCH 026/130] Fix test mishap

---
 tests/optimize/test_lookahead_analysis.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/optimize/test_lookahead_analysis.py b/tests/optimize/test_lookahead_analysis.py
index 0706750ec..8ee92e6fc 100644
--- a/tests/optimize/test_lookahead_analysis.py
+++ b/tests/optimize/test_lookahead_analysis.py
@@ -20,6 +20,7 @@ def lookahead_conf(default_conf_usdt):
     default_conf_usdt['targeted_trade_amount'] = 20
     default_conf_usdt['strategy_path'] = str(
         Path(__file__).parent.parent / "strategy/strats/lookahead_bias")
+    default_conf_usdt['strategy'] = 'strategy_test_v3_with_lookahead_bias'
 
     return default_conf_usdt
 
@@ -37,7 +38,7 @@ def test_start_lookahead_analysis(mocker):
         "--strategy",
         "strategy_test_v3_with_lookahead_bias",
         "--strategy-path",
-        str(Path(__file__).parent.parent / "strategy" / "strats"),
+        str(Path(__file__).parent.parent / "strategy/strats/lookahead_bias"),
     ]
     pargs = get_args(args)
     pargs['config'] = None
@@ -54,7 +55,7 @@ def test_start_lookahead_analysis(mocker):
         "--strategy",
         "strategy_test_v3_with_lookahead_bias",
         "--strategy-path",
-        str(Path(__file__).parent.parent / "strategy" / "strats"),
+        str(Path(__file__).parent.parent / "strategy/strats/lookahead_bias"),
         "--targeted-trade-amount",
         "10",
         "--minimum-trade-amount",
@@ -129,7 +130,6 @@ def test_biased_strategy(lookahead_conf, mocker, caplog, scenario) -> None:
 
     lookahead_conf['timeframe'] = '5m'
     lookahead_conf['timerange'] = '20180119-20180122'
-    lookahead_conf['strategy'] = 'strategy_test_v3_with_lookahead_bias'
 
     # Patch scenario Parameter to allow for easy selection
     mocker.patch('freqtrade.strategy.hyper.HyperStrategyMixin.load_params_from_file',

From eb31b574c1f362b7a9914275a8e19ba0f0cf9c80 Mon Sep 17 00:00:00 2001
From: hippocritical <gunngirspam@gmail.com>
Date: Fri, 26 May 2023 12:55:54 +0200
Subject: [PATCH 027/130] added returns to
 text_table_lookahead_analysis_instances filled in
 test_lookahead_helper_text_table_lookahead_analysis_instances

---
 .../optimize/lookahead_analysis_helpers.py    |  1 +
 tests/optimize/test_lookahead_analysis.py     | 68 +++++++++++++++----
 2 files changed, 56 insertions(+), 13 deletions(-)

diff --git a/freqtrade/optimize/lookahead_analysis_helpers.py b/freqtrade/optimize/lookahead_analysis_helpers.py
index 53d8c8a59..54c63b78c 100644
--- a/freqtrade/optimize/lookahead_analysis_helpers.py
+++ b/freqtrade/optimize/lookahead_analysis_helpers.py
@@ -44,6 +44,7 @@ class LookaheadAnalysisSubFunctions:
         from tabulate import tabulate
         table = tabulate(data, headers=headers, tablefmt="orgtbl")
         print(table)
+        return table, headers, data
 
     @staticmethod
     def export_to_csv(config: Dict[str, Any], lookahead_analysis: List[LookaheadAnalysis]):
diff --git a/tests/optimize/test_lookahead_analysis.py b/tests/optimize/test_lookahead_analysis.py
index 8ee92e6fc..e9c5f0f85 100644
--- a/tests/optimize/test_lookahead_analysis.py
+++ b/tests/optimize/test_lookahead_analysis.py
@@ -1,7 +1,7 @@
 # pragma pylint: disable=missing-docstring, W0212, line-too-long, C0103, unused-argument
 
 from copy import deepcopy
-from pathlib import Path
+from pathlib import Path, PurePosixPath
 from unittest.mock import MagicMock, PropertyMock
 
 import pytest
@@ -9,7 +9,7 @@ import pytest
 from freqtrade.commands.optimize_commands import start_lookahead_analysis
 from freqtrade.data.history import get_timerange
 from freqtrade.exceptions import OperationalException
-from freqtrade.optimize.lookahead_analysis import LookaheadAnalysis
+from freqtrade.optimize.lookahead_analysis import Analysis, LookaheadAnalysis
 from freqtrade.optimize.lookahead_analysis_helpers import LookaheadAnalysisSubFunctions
 from tests.conftest import EXMS, get_args, log_has_re, patch_exchange
 
@@ -32,7 +32,7 @@ def test_start_lookahead_analysis(mocker):
         'freqtrade.optimize.lookahead_analysis_helpers.LookaheadAnalysisSubFunctions',
         initialize_single_lookahead_analysis=single_mock,
         text_table_lookahead_analysis_instances=text_table_mock,
-        )
+    )
     args = [
         "lookahead-analysis",
         "--strategy",
@@ -60,7 +60,7 @@ def test_start_lookahead_analysis(mocker):
         "10",
         "--minimum-trade-amount",
         "20",
-        ]
+    ]
     pargs = get_args(args)
     pargs['config'] = None
     with pytest.raises(OperationalException,
@@ -87,10 +87,10 @@ def test_lookahead_helper_start(lookahead_conf, mocker, caplog) -> None:
     single_mock = MagicMock()
     text_table_mock = MagicMock()
     mocker.patch.multiple(
-            'freqtrade.optimize.lookahead_analysis_helpers.LookaheadAnalysisSubFunctions',
-            initialize_single_lookahead_analysis=single_mock,
-            text_table_lookahead_analysis_instances=text_table_mock,
-        )
+        'freqtrade.optimize.lookahead_analysis_helpers.LookaheadAnalysisSubFunctions',
+        initialize_single_lookahead_analysis=single_mock,
+        text_table_lookahead_analysis_instances=text_table_mock,
+    )
     LookaheadAnalysisSubFunctions.start(lookahead_conf)
     assert single_mock.call_count == 1
     assert text_table_mock.call_count == 1
@@ -99,9 +99,52 @@ def test_lookahead_helper_start(lookahead_conf, mocker, caplog) -> None:
     text_table_mock.reset_mock()
 
 
-def test_lookahead_helper_text_table_lookahead_analysis_instances():
-    # TODO
-    pytest.skip("TODO")
+def test_lookahead_helper_text_table_lookahead_analysis_instances(lookahead_conf, caplog):
+    analysis = Analysis()
+    analysis.total_signals = 5
+    analysis.has_bias = True
+    analysis.false_entry_signals = 4
+    analysis.false_exit_signals = 3
+
+    strategy_obj = \
+        {
+            'name': "strategy_test_v3_with_lookahead_bias",
+            'location': PurePosixPath(lookahead_conf['strategy_path'],
+                                      f"{lookahead_conf['strategy']}.py")
+        }
+
+    instance = LookaheadAnalysis(lookahead_conf, strategy_obj)
+    instance.current_analysis = analysis
+    table, headers, data = (LookaheadAnalysisSubFunctions.
+                            text_table_lookahead_analysis_instances([instance]))
+
+    # check amount of returning rows
+    assert len(data) == 1
+
+    # check row contents for a try that errored out
+    assert data[0][0] == 'strategy_test_v3_with_lookahead_bias.py'
+    assert data[0][1] == 'strategy_test_v3_with_lookahead_bias'
+    assert data[0][2].__contains__('error')
+    assert len(data[0]) == 3
+
+    # edit it into not showing an error
+    instance.failed_bias_check = False
+    table, headers, data = (LookaheadAnalysisSubFunctions.
+                            text_table_lookahead_analysis_instances([instance]))
+    assert data[0][0] == 'strategy_test_v3_with_lookahead_bias.py'
+    assert data[0][1] == 'strategy_test_v3_with_lookahead_bias'
+    assert data[0][2]  # True
+    assert data[0][3] == 5
+    assert data[0][4] == 4
+    assert data[0][5] == 3
+    assert data[0][6] == ''
+
+    analysis.false_indicators.append('falseIndicator1')
+    analysis.false_indicators.append('falseIndicator2')
+    table, headers, data = (LookaheadAnalysisSubFunctions.
+                            text_table_lookahead_analysis_instances([instance]))
+
+    assert data[0][6] == 'falseIndicator1, falseIndicator2'
 
 
 def test_lookahead_helper_export_to_csv():
@@ -118,7 +161,6 @@ def test_initialize_single_lookahead_analysis():
     'no_bias', 'bias1'
 ])
 def test_biased_strategy(lookahead_conf, mocker, caplog, scenario) -> None:
-
     mocker.patch('freqtrade.data.history.get_timerange', get_timerange)
     mocker.patch(f'{EXMS}.get_fee', return_value=0.0)
     mocker.patch(f'{EXMS}.get_min_pair_stake_amount', return_value=0.00001)
@@ -136,7 +178,7 @@ def test_biased_strategy(lookahead_conf, mocker, caplog, scenario) -> None:
                  return_value={
                      'params': {
                          "buy": {
-                                "scenario": scenario
+                             "scenario": scenario
                          }
                      }
                  })

From 31e19add2745ec90c66adbd8a5adedbcb1ac7cf3 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Fri, 26 May 2023 18:40:14 +0200
Subject: [PATCH 028/130] start transition toward outsourcing the data pipeline
 with objective of improving pipeline flexibility

---
 .../freqai/base_models/BasePyTorchModel.py    |  75 +-
 .../freqai/base_models/BaseRegressionModel.py |  37 +-
 .../freqai/base_models/BaseTensorFlowModel.py |  70 --
 freqtrade/freqai/data_drawer.py               |  15 +
 freqtrade/freqai/data_kitchen.py              | 843 +++++++++---------
 freqtrade/freqai/freqai_interface.py          |  28 +
 requirements-freqai.txt                       |   1 +
 tests/freqai/test_freqai_datakitchen.py       |  96 +-
 8 files changed, 579 insertions(+), 586 deletions(-)
 delete mode 100644 freqtrade/freqai/base_models/BaseTensorFlowModel.py

diff --git a/freqtrade/freqai/base_models/BasePyTorchModel.py b/freqtrade/freqai/base_models/BasePyTorchModel.py
index 82042d24c..21dc4e894 100644
--- a/freqtrade/freqai/base_models/BasePyTorchModel.py
+++ b/freqtrade/freqai/base_models/BasePyTorchModel.py
@@ -7,14 +7,15 @@ import torch
 from pandas import DataFrame
 
 from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
-from freqtrade.freqai.freqai_interface import IFreqaiModel
+# from freqtrade.freqai.freqai_interface import IFreqaiModel
+from freqtrade.freqai.base_models import BaseRegressionModel
 from freqtrade.freqai.torch.PyTorchDataConvertor import PyTorchDataConvertor
 
 
 logger = logging.getLogger(__name__)
 
 
-class BasePyTorchModel(IFreqaiModel, ABC):
+class BasePyTorchModel(BaseRegressionModel):
     """
     Base class for PyTorch type models.
     User *must* inherit from this class and set fit() and predict() and
@@ -29,50 +30,50 @@ class BasePyTorchModel(IFreqaiModel, ABC):
         self.splits = ["train", "test"] if test_size != 0 else ["train"]
         self.window_size = self.freqai_info.get("conv_width", 1)
 
-    def train(
-        self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
-    ) -> Any:
-        """
-        Filter the training data and train a model to it. Train makes heavy use of the datakitchen
-        for storing, saving, loading, and analyzing the data.
-        :param unfiltered_df: Full dataframe for the current training period
-        :return:
-        :model: Trained model which can be used to inference (self.predict)
-        """
+    # def train(
+    #     self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
+    # ) -> Any:
+    #     """
+    #     Filter the training data and train a model to it. Train makes heavy use of the datakitchen
+    #     for storing, saving, loading, and analyzing the data.
+    #     :param unfiltered_df: Full dataframe for the current training period
+    #     :return:
+    #     :model: Trained model which can be used to inference (self.predict)
+    #     """
 
-        logger.info(f"-------------------- Starting training {pair} --------------------")
+    #     logger.info(f"-------------------- Starting training {pair} --------------------")
 
-        start_time = time()
+    #     start_time = time()
 
-        features_filtered, labels_filtered = dk.filter_features(
-            unfiltered_df,
-            dk.training_features_list,
-            dk.label_list,
-            training_filter=True,
-        )
+    #     features_filtered, labels_filtered = dk.filter_features(
+    #         unfiltered_df,
+    #         dk.training_features_list,
+    #         dk.label_list,
+    #         training_filter=True,
+    #     )
 
-        # split data into train/test data.
-        data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
-        if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
-            dk.fit_labels()
-        # normalize all data based on train_dataset only
-        data_dictionary = dk.normalize_data(data_dictionary)
+    #     # split data into train/test data.
+    #     data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
+    #     if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
+    #         dk.fit_labels()
+    #     # normalize all data based on train_dataset only
+    #     data_dictionary = dk.normalize_data(data_dictionary)
 
-        # optional additional data cleaning/analysis
-        self.data_cleaning_train(dk)
+    #     # optional additional data cleaning/analysis
+    #     self.data_cleaning_train(dk)
 
-        logger.info(
-            f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
-        )
-        logger.info(f"Training model on {len(data_dictionary['train_features'])} data points")
+    #     logger.info(
+    #         f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
+    #     )
+    #     logger.info(f"Training model on {len(data_dictionary['train_features'])} data points")
 
-        model = self.fit(data_dictionary, dk)
-        end_time = time()
+    #     model = self.fit(data_dictionary, dk)
+    #     end_time = time()
 
-        logger.info(f"-------------------- Done training {pair} "
-                    f"({end_time - start_time:.2f} secs) --------------------")
+    #     logger.info(f"-------------------- Done training {pair} "
+    #                 f"({end_time - start_time:.2f} secs) --------------------")
 
-        return model
+    #     return model
 
     @property
     @abstractmethod
diff --git a/freqtrade/freqai/base_models/BaseRegressionModel.py b/freqtrade/freqai/base_models/BaseRegressionModel.py
index 1f9b4f5a6..45660253e 100644
--- a/freqtrade/freqai/base_models/BaseRegressionModel.py
+++ b/freqtrade/freqai/base_models/BaseRegressionModel.py
@@ -49,21 +49,34 @@ class BaseRegressionModel(IFreqaiModel):
         logger.info(f"-------------------- Training on data from {start_date} to "
                     f"{end_date} --------------------")
         # split data into train/test data.
-        data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
+        d = dk.make_train_test_datasets(features_filtered, labels_filtered)
         if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
             dk.fit_labels()
-        # normalize all data based on train_dataset only
-        data_dictionary = dk.normalize_data(data_dictionary)
 
-        # optional additional data cleaning/analysis
-        self.data_cleaning_train(dk)
+        self.define_data_pipeline(dk)
+        self.define_label_pipeline(dk)
+
+        d["train_labels"], _, _ = dk.label_pipeline.fit_transform(d["train_labels"])
+        d["test_labels"], _, _ = dk.label_pipeline.transform(d["test_labels"])
+
+        (d["train_features"],
+         d["train_labels"],
+         d["train_weights"]) = dk.pipeline.fit_transform(d["train_features"],
+                                                         d["train_labels"],
+                                                         d["train_weights"])
+
+        (d["test_features"],
+         d["test_labels"],
+         d["test_weights"]) = dk.pipeline.transform(d["test_features"],
+                                                    d["test_labels"],
+                                                    d["test_weights"])
 
         logger.info(
             f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
         )
-        logger.info(f"Training model on {len(data_dictionary['train_features'])} data points")
+        logger.info(f"Training model on {len(d['train_features'])} data points")
 
-        model = self.fit(data_dictionary, dk)
+        model = self.fit(d, dk)
 
         end_time = time()
 
@@ -88,11 +101,11 @@ class BaseRegressionModel(IFreqaiModel):
         filtered_df, _ = dk.filter_features(
             unfiltered_df, dk.training_features_list, training_filter=False
         )
-        filtered_df = dk.normalize_data_from_metadata(filtered_df)
+        # filtered_df = dk.normalize_data_from_metadata(filtered_df)
         dk.data_dictionary["prediction_features"] = filtered_df
 
-        # optional additional data cleaning/analysis
-        self.data_cleaning_predict(dk)
+        dk.data_dictionary["prediction_features"], outliers, _ = dk.pipeline.transform(
+            dk.data_dictionary["prediction_features"], outlier_check=True)
 
         predictions = self.model.predict(dk.data_dictionary["prediction_features"])
         if self.CONV_WIDTH == 1:
@@ -100,6 +113,8 @@ class BaseRegressionModel(IFreqaiModel):
 
         pred_df = DataFrame(predictions, columns=dk.label_list)
 
-        pred_df = dk.denormalize_labels_from_metadata(pred_df)
+        pred_df, _, _ = dk.label_pipeline.inverse_transform(pred_df)
+        dk.DI_values = dk.label_pipeline.get_step("di").di_values
+        dk.do_predict = outliers.to_numpy()
 
         return (pred_df, dk.do_predict)
diff --git a/freqtrade/freqai/base_models/BaseTensorFlowModel.py b/freqtrade/freqai/base_models/BaseTensorFlowModel.py
deleted file mode 100644
index b41ee0175..000000000
--- a/freqtrade/freqai/base_models/BaseTensorFlowModel.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import logging
-from time import time
-from typing import Any
-
-from pandas import DataFrame
-
-from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
-from freqtrade.freqai.freqai_interface import IFreqaiModel
-
-
-logger = logging.getLogger(__name__)
-
-
-class BaseTensorFlowModel(IFreqaiModel):
-    """
-    Base class for TensorFlow type models.
-    User *must* inherit from this class and set fit() and predict().
-    """
-
-    def train(
-        self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
-    ) -> Any:
-        """
-        Filter the training data and train a model to it. Train makes heavy use of the datakitchen
-        for storing, saving, loading, and analyzing the data.
-        :param unfiltered_df: Full dataframe for the current training period
-        :param metadata: pair metadata from strategy.
-        :return:
-        :model: Trained model which can be used to inference (self.predict)
-        """
-
-        logger.info(f"-------------------- Starting training {pair} --------------------")
-
-        start_time = time()
-
-        # filter the features requested by user in the configuration file and elegantly handle NaNs
-        features_filtered, labels_filtered = dk.filter_features(
-            unfiltered_df,
-            dk.training_features_list,
-            dk.label_list,
-            training_filter=True,
-        )
-
-        start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d")
-        end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d")
-        logger.info(f"-------------------- Training on data from {start_date} to "
-                    f"{end_date} --------------------")
-        # split data into train/test data.
-        data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
-        if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
-            dk.fit_labels()
-        # normalize all data based on train_dataset only
-        data_dictionary = dk.normalize_data(data_dictionary)
-
-        # optional additional data cleaning/analysis
-        self.data_cleaning_train(dk)
-
-        logger.info(
-            f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
-        )
-        logger.info(f"Training model on {len(data_dictionary['train_features'])} data points")
-
-        model = self.fit(data_dictionary, dk)
-
-        end_time = time()
-
-        logger.info(f"-------------------- Done training {pair} "
-                    f"({end_time - start_time:.2f} secs) --------------------")
-
-        return model
diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py
index b68a9dcad..9fdcc2d41 100644
--- a/freqtrade/freqai/data_drawer.py
+++ b/freqtrade/freqai/data_drawer.py
@@ -460,6 +460,13 @@ class FreqaiDataDrawer:
         with (save_path / f"{dk.model_filename}_metadata.json").open("w") as fp:
             rapidjson.dump(dk.data, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE)
 
+        # save the pipelines to pickle files
+        with (save_path / f"{dk.model_filename}_pipeline.pkl").open("wb") as fp:
+            cloudpickle.dump(dk.pipeline, fp)
+
+        with (save_path / f"{dk.model_filename}_label_pipeline.pkl").open("wb") as fp:
+            cloudpickle.dump(dk.label_pipeline, fp)
+
         # save the train data to file so we can check preds for area of applicability later
         dk.data_dictionary["train_features"].to_pickle(
             save_path / f"{dk.model_filename}_trained_df.pkl"
@@ -482,6 +489,8 @@ class FreqaiDataDrawer:
             self.meta_data_dictionary[coin] = {}
         self.meta_data_dictionary[coin]["train_df"] = dk.data_dictionary["train_features"]
         self.meta_data_dictionary[coin]["meta_data"] = dk.data
+        self.meta_data_dictionary[coin]["pipeline"] = dk.pipeline
+        self.meta_data_dictionary[coin]["label_pipeline"] = dk.label_pipeline
         self.save_drawer_to_disk()
 
         return
@@ -513,6 +522,8 @@ class FreqaiDataDrawer:
         if coin in self.meta_data_dictionary:
             dk.data = self.meta_data_dictionary[coin]["meta_data"]
             dk.data_dictionary["train_features"] = self.meta_data_dictionary[coin]["train_df"]
+            dk.pipeline = self.meta_data_dictionary[coin]["pipeline"]
+            dk.label_pipeline = self.meta_data_dictionary[coin]["label_pipeline"]
         else:
             with (dk.data_path / f"{dk.model_filename}_metadata.json").open("r") as fp:
                 dk.data = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
@@ -520,6 +531,10 @@ class FreqaiDataDrawer:
             dk.data_dictionary["train_features"] = pd.read_pickle(
                 dk.data_path / f"{dk.model_filename}_trained_df.pkl"
             )
+            with (dk.data_path / f"{dk.model_filename}_pipeline.pkl").open("rb") as fp:
+                dk.pipeline = cloudpickle.load(fp)
+            with (dk.data_path / f"{dk.model_filename}_label_pipeline.pkl").open("rb") as fp:
+                dk.label_pipeline = cloudpickle.load(fp)
 
         dk.training_features_list = dk.data["training_features_list"]
         dk.label_list = dk.data["label_list"]
diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py
index 21b41db2d..adfeb8dd5 100644
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -27,6 +27,7 @@ from freqtrade.exceptions import OperationalException
 from freqtrade.exchange import timeframe_to_seconds
 from freqtrade.strategy import merge_informative_pair
 from freqtrade.strategy.interface import IStrategy
+from datasieve.pipeline import Pipeline
 
 
 SECONDS_IN_DAY = 86400
@@ -86,6 +87,8 @@ class FreqaiDataKitchen:
         self.keras: bool = self.freqai_config.get("keras", False)
         self.set_all_pairs()
         self.backtest_live_models = config.get("freqai_backtest_live_models", False)
+        self.pipeline = Pipeline()
+        self.label_pipeline = Pipeline()
 
         if not self.live:
             self.full_path = self.get_full_models_path(self.config)
@@ -307,106 +310,106 @@ class FreqaiDataKitchen:
 
         return self.data_dictionary
 
-    def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
-        """
-        Normalize all data in the data_dictionary according to the training dataset
-        :param data_dictionary: dictionary containing the cleaned and
-                                split training/test data/labels
-        :returns:
-        :data_dictionary: updated dictionary with standardized values.
-        """
+    # def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
+    #     """
+    #     Normalize all data in the data_dictionary according to the training dataset
+    #     :param data_dictionary: dictionary containing the cleaned and
+    #                             split training/test data/labels
+    #     :returns:
+    #     :data_dictionary: updated dictionary with standardized values.
+    #     """
 
-        # standardize the data by training stats
-        train_max = data_dictionary["train_features"].max()
-        train_min = data_dictionary["train_features"].min()
-        data_dictionary["train_features"] = (
-            2 * (data_dictionary["train_features"] - train_min) / (train_max - train_min) - 1
-        )
-        data_dictionary["test_features"] = (
-            2 * (data_dictionary["test_features"] - train_min) / (train_max - train_min) - 1
-        )
+    #     # standardize the data by training stats
+    #     train_max = data_dictionary["train_features"].max()
+    #     train_min = data_dictionary["train_features"].min()
+    #     data_dictionary["train_features"] = (
+    #         2 * (data_dictionary["train_features"] - train_min) / (train_max - train_min) - 1
+    #     )
+    #     data_dictionary["test_features"] = (
+    #         2 * (data_dictionary["test_features"] - train_min) / (train_max - train_min) - 1
+    #     )
 
-        for item in train_max.keys():
-            self.data[item + "_max"] = train_max[item]
-            self.data[item + "_min"] = train_min[item]
+    #     for item in train_max.keys():
+    #         self.data[item + "_max"] = train_max[item]
+    #         self.data[item + "_min"] = train_min[item]
 
-        for item in data_dictionary["train_labels"].keys():
-            if data_dictionary["train_labels"][item].dtype == object:
-                continue
-            train_labels_max = data_dictionary["train_labels"][item].max()
-            train_labels_min = data_dictionary["train_labels"][item].min()
-            data_dictionary["train_labels"][item] = (
-                2
-                * (data_dictionary["train_labels"][item] - train_labels_min)
-                / (train_labels_max - train_labels_min)
-                - 1
-            )
-            if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
-                data_dictionary["test_labels"][item] = (
-                    2
-                    * (data_dictionary["test_labels"][item] - train_labels_min)
-                    / (train_labels_max - train_labels_min)
-                    - 1
-                )
+    #     for item in data_dictionary["train_labels"].keys():
+    #         if data_dictionary["train_labels"][item].dtype == object:
+    #             continue
+    #         train_labels_max = data_dictionary["train_labels"][item].max()
+    #         train_labels_min = data_dictionary["train_labels"][item].min()
+    #         data_dictionary["train_labels"][item] = (
+    #             2
+    #             * (data_dictionary["train_labels"][item] - train_labels_min)
+    #             / (train_labels_max - train_labels_min)
+    #             - 1
+    #         )
+    #         if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
+    #             data_dictionary["test_labels"][item] = (
+    #                 2
+    #                 * (data_dictionary["test_labels"][item] - train_labels_min)
+    #                 / (train_labels_max - train_labels_min)
+    #                 - 1
+    #             )
 
-            self.data[f"{item}_max"] = train_labels_max
-            self.data[f"{item}_min"] = train_labels_min
-        return data_dictionary
+    #         self.data[f"{item}_max"] = train_labels_max
+    #         self.data[f"{item}_min"] = train_labels_min
+    #     return data_dictionary
 
-    def normalize_single_dataframe(self, df: DataFrame) -> DataFrame:
+    # def normalize_single_dataframe(self, df: DataFrame) -> DataFrame:
 
-        train_max = df.max()
-        train_min = df.min()
-        df = (
-            2 * (df - train_min) / (train_max - train_min) - 1
-        )
+    #     train_max = df.max()
+    #     train_min = df.min()
+    #     df = (
+    #         2 * (df - train_min) / (train_max - train_min) - 1
+    #     )
 
-        for item in train_max.keys():
-            self.data[item + "_max"] = train_max[item]
-            self.data[item + "_min"] = train_min[item]
+    #     for item in train_max.keys():
+    #         self.data[item + "_max"] = train_max[item]
+    #         self.data[item + "_min"] = train_min[item]
 
-        return df
+    #     return df
 
-    def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
-        """
-        Normalize a set of data using the mean and standard deviation from
-        the associated training data.
-        :param df: Dataframe to be standardized
-        """
+    # def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
+    #     """
+    #     Normalize a set of data using the mean and standard deviation from
+    #     the associated training data.
+    #     :param df: Dataframe to be standardized
+    #     """
 
-        train_max = [None] * len(df.keys())
-        train_min = [None] * len(df.keys())
+    #     train_max = [None] * len(df.keys())
+    #     train_min = [None] * len(df.keys())
 
-        for i, item in enumerate(df.keys()):
-            train_max[i] = self.data[f"{item}_max"]
-            train_min[i] = self.data[f"{item}_min"]
+    #     for i, item in enumerate(df.keys()):
+    #         train_max[i] = self.data[f"{item}_max"]
+    #         train_min[i] = self.data[f"{item}_min"]
 
-        train_max_series = pd.Series(train_max, index=df.keys())
-        train_min_series = pd.Series(train_min, index=df.keys())
+    #     train_max_series = pd.Series(train_max, index=df.keys())
+    #     train_min_series = pd.Series(train_min, index=df.keys())
 
-        df = (
-            2 * (df - train_min_series) / (train_max_series - train_min_series) - 1
-        )
+    #     df = (
+    #         2 * (df - train_min_series) / (train_max_series - train_min_series) - 1
+    #     )
 
-        return df
+    #     return df
 
-    def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
-        """
-        Denormalize a set of data using the mean and standard deviation from
-        the associated training data.
-        :param df: Dataframe of predictions to be denormalized
-        """
+    # def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
+    #     """
+    #     Denormalize a set of data using the mean and standard deviation from
+    #     the associated training data.
+    #     :param df: Dataframe of predictions to be denormalized
+    #     """
 
-        for label in df.columns:
-            if df[label].dtype == object or label in self.unique_class_list:
-                continue
-            df[label] = (
-                (df[label] + 1)
-                * (self.data[f"{label}_max"] - self.data[f"{label}_min"])
-                / 2
-            ) + self.data[f"{label}_min"]
+    #     for label in df.columns:
+    #         if df[label].dtype == object or label in self.unique_class_list:
+    #             continue
+    #         df[label] = (
+    #             (df[label] + 1)
+    #             * (self.data[f"{label}_max"] - self.data[f"{label}_min"])
+    #             / 2
+    #         ) + self.data[f"{label}_min"]
 
-        return df
+    #     return df
 
     def split_timerange(
         self, tr: str, train_split: int = 28, bt_split: float = 7
@@ -501,398 +504,398 @@ class FreqaiDataKitchen:
 
         return df_predictions
 
-    def principal_component_analysis(self) -> None:
-        """
-        Performs Principal Component Analysis on the data for dimensionality reduction
-        and outlier detection (see self.remove_outliers())
-        No parameters or returns, it acts on the data_dictionary held by the DataHandler.
-        """
+    # def principal_component_analysis(self) -> None:
+    #     """
+    #     Performs Principal Component Analysis on the data for dimensionality reduction
+    #     and outlier detection (see self.remove_outliers())
+    #     No parameters or returns, it acts on the data_dictionary held by the DataHandler.
+    #     """
 
-        from sklearn.decomposition import PCA  # avoid importing if we dont need it
+    #     from sklearn.decomposition import PCA  # avoid importing if we dont need it
 
-        pca = PCA(0.999)
-        pca = pca.fit(self.data_dictionary["train_features"])
-        n_keep_components = pca.n_components_
-        self.data["n_kept_components"] = n_keep_components
-        n_components = self.data_dictionary["train_features"].shape[1]
-        logger.info("reduced feature dimension by %s", n_components - n_keep_components)
-        logger.info("explained variance %f", np.sum(pca.explained_variance_ratio_))
+    #     pca = PCA(0.999)
+    #     pca = pca.fit(self.data_dictionary["train_features"])
+    #     n_keep_components = pca.n_components_
+    #     self.data["n_kept_components"] = n_keep_components
+    #     n_components = self.data_dictionary["train_features"].shape[1]
+    #     logger.info("reduced feature dimension by %s", n_components - n_keep_components)
+    #     logger.info("explained variance %f", np.sum(pca.explained_variance_ratio_))
 
-        train_components = pca.transform(self.data_dictionary["train_features"])
-        self.data_dictionary["train_features"] = pd.DataFrame(
-            data=train_components,
-            columns=["PC" + str(i) for i in range(0, n_keep_components)],
-            index=self.data_dictionary["train_features"].index,
-        )
-        # normalsing transformed training features
-        self.data_dictionary["train_features"] = self.normalize_single_dataframe(
-            self.data_dictionary["train_features"])
+    #     train_components = pca.transform(self.data_dictionary["train_features"])
+    #     self.data_dictionary["train_features"] = pd.DataFrame(
+    #         data=train_components,
+    #         columns=["PC" + str(i) for i in range(0, n_keep_components)],
+    #         index=self.data_dictionary["train_features"].index,
+    #     )
+    #     # normalsing transformed training features
+    #     self.data_dictionary["train_features"] = self.normalize_single_dataframe(
+    #         self.data_dictionary["train_features"])
 
-        # keeping a copy of the non-transformed features so we can check for errors during
-        # model load from disk
-        self.data["training_features_list_raw"] = copy.deepcopy(self.training_features_list)
-        self.training_features_list = self.data_dictionary["train_features"].columns
+    #     # keeping a copy of the non-transformed features so we can check for errors during
+    #     # model load from disk
+    #     self.data["training_features_list_raw"] = copy.deepcopy(self.training_features_list)
+    #     self.training_features_list = self.data_dictionary["train_features"].columns
 
-        if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
-            test_components = pca.transform(self.data_dictionary["test_features"])
-            self.data_dictionary["test_features"] = pd.DataFrame(
-                data=test_components,
-                columns=["PC" + str(i) for i in range(0, n_keep_components)],
-                index=self.data_dictionary["test_features"].index,
-            )
-            # normalise transformed test feature to transformed training features
-            self.data_dictionary["test_features"] = self.normalize_data_from_metadata(
-                self.data_dictionary["test_features"])
+    #     if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
+    #         test_components = pca.transform(self.data_dictionary["test_features"])
+    #         self.data_dictionary["test_features"] = pd.DataFrame(
+    #             data=test_components,
+    #             columns=["PC" + str(i) for i in range(0, n_keep_components)],
+    #             index=self.data_dictionary["test_features"].index,
+    #         )
+    #         # normalise transformed test feature to transformed training features
+    #         self.data_dictionary["test_features"] = self.normalize_data_from_metadata(
+    #             self.data_dictionary["test_features"])
 
-        self.data["n_kept_components"] = n_keep_components
-        self.pca = pca
+    #     self.data["n_kept_components"] = n_keep_components
+    #     self.pca = pca
 
-        logger.info(f"PCA reduced total features from  {n_components} to {n_keep_components}")
+    #     logger.info(f"PCA reduced total features from  {n_components} to {n_keep_components}")
 
-        if not self.data_path.is_dir():
-            self.data_path.mkdir(parents=True, exist_ok=True)
+    #     if not self.data_path.is_dir():
+    #         self.data_path.mkdir(parents=True, exist_ok=True)
 
-        return None
+    #     return None
 
-    def pca_transform(self, filtered_dataframe: DataFrame) -> None:
-        """
-        Use an existing pca transform to transform data into components
-        :param filtered_dataframe: DataFrame = the cleaned dataframe
-        """
-        pca_components = self.pca.transform(filtered_dataframe)
-        self.data_dictionary["prediction_features"] = pd.DataFrame(
-            data=pca_components,
-            columns=["PC" + str(i) for i in range(0, self.data["n_kept_components"])],
-            index=filtered_dataframe.index,
-        )
-        # normalise transformed predictions to transformed training features
-        self.data_dictionary["prediction_features"] = self.normalize_data_from_metadata(
-            self.data_dictionary["prediction_features"])
+    # def pca_transform(self, filtered_dataframe: DataFrame) -> None:
+    #     """
+    #     Use an existing pca transform to transform data into components
+    #     :param filtered_dataframe: DataFrame = the cleaned dataframe
+    #     """
+    #     pca_components = self.pca.transform(filtered_dataframe)
+    #     self.data_dictionary["prediction_features"] = pd.DataFrame(
+    #         data=pca_components,
+    #         columns=["PC" + str(i) for i in range(0, self.data["n_kept_components"])],
+    #         index=filtered_dataframe.index,
+    #     )
+    #     # normalise transformed predictions to transformed training features
+    #     self.data_dictionary["prediction_features"] = self.normalize_data_from_metadata(
+    #         self.data_dictionary["prediction_features"])
 
-    def compute_distances(self) -> float:
-        """
-        Compute distances between each training point and every other training
-        point. This metric defines the neighborhood of trained data and is used
-        for prediction confidence in the Dissimilarity Index
-        """
-        # logger.info("computing average mean distance for all training points")
-        pairwise = pairwise_distances(
-            self.data_dictionary["train_features"], n_jobs=self.thread_count)
-        # remove the diagonal distances which are itself distances ~0
-        np.fill_diagonal(pairwise, np.NaN)
-        pairwise = pairwise.reshape(-1, 1)
-        avg_mean_dist = pairwise[~np.isnan(pairwise)].mean()
+    # def compute_distances(self) -> float:
+    #     """
+    #     Compute distances between each training point and every other training
+    #     point. This metric defines the neighborhood of trained data and is used
+    #     for prediction confidence in the Dissimilarity Index
+    #     """
+    #     # logger.info("computing average mean distance for all training points")
+    #     pairwise = pairwise_distances(
+    #         self.data_dictionary["train_features"], n_jobs=self.thread_count)
+    #     # remove the diagonal distances which are itself distances ~0
+    #     np.fill_diagonal(pairwise, np.NaN)
+    #     pairwise = pairwise.reshape(-1, 1)
+    #     avg_mean_dist = pairwise[~np.isnan(pairwise)].mean()
 
-        return avg_mean_dist
+    #     return avg_mean_dist
 
-    def get_outlier_percentage(self, dropped_pts: npt.NDArray) -> float:
-        """
-        Check if more than X% of points werer dropped during outlier detection.
-        """
-        outlier_protection_pct = self.freqai_config["feature_parameters"].get(
-            "outlier_protection_percentage", 30)
-        outlier_pct = (dropped_pts.sum() / len(dropped_pts)) * 100
-        if outlier_pct >= outlier_protection_pct:
-            return outlier_pct
-        else:
-            return 0.0
+    # def get_outlier_percentage(self, dropped_pts: npt.NDArray) -> float:
+    #     """
+    #     Check if more than X% of points werer dropped during outlier detection.
+    #     """
+    #     outlier_protection_pct = self.freqai_config["feature_parameters"].get(
+    #         "outlier_protection_percentage", 30)
+    #     outlier_pct = (dropped_pts.sum() / len(dropped_pts)) * 100
+    #     if outlier_pct >= outlier_protection_pct:
+    #         return outlier_pct
+    #     else:
+    #         return 0.0
 
-    def use_SVM_to_remove_outliers(self, predict: bool) -> None:
-        """
-        Build/inference a Support Vector Machine to detect outliers
-        in training data and prediction
-        :param predict: bool = If true, inference an existing SVM model, else construct one
-        """
+    # def use_SVM_to_remove_outliers(self, predict: bool) -> None:
+    #     """
+    #     Build/inference a Support Vector Machine to detect outliers
+    #     in training data and prediction
+    #     :param predict: bool = If true, inference an existing SVM model, else construct one
+    #     """
 
-        if self.keras:
-            logger.warning(
-                "SVM outlier removal not currently supported for Keras based models. "
-                "Skipping user requested function."
-            )
-            if predict:
-                self.do_predict = np.ones(len(self.data_dictionary["prediction_features"]))
-            return
+    #     if self.keras:
+    #         logger.warning(
+    #             "SVM outlier removal not currently supported for Keras based models. "
+    #             "Skipping user requested function."
+    #         )
+    #         if predict:
+    #             self.do_predict = np.ones(len(self.data_dictionary["prediction_features"]))
+    #         return
 
-        if predict:
-            if not self.svm_model:
-                logger.warning("No svm model available for outlier removal")
-                return
-            y_pred = self.svm_model.predict(self.data_dictionary["prediction_features"])
-            do_predict = np.where(y_pred == -1, 0, y_pred)
+    #     if predict:
+    #         if not self.svm_model:
+    #             logger.warning("No svm model available for outlier removal")
+    #             return
+    #         y_pred = self.svm_model.predict(self.data_dictionary["prediction_features"])
+    #         do_predict = np.where(y_pred == -1, 0, y_pred)
 
-            if (len(do_predict) - do_predict.sum()) > 0:
-                logger.info(f"SVM tossed {len(do_predict) - do_predict.sum()} predictions.")
-            self.do_predict += do_predict
-            self.do_predict -= 1
+    #         if (len(do_predict) - do_predict.sum()) > 0:
+    #             logger.info(f"SVM tossed {len(do_predict) - do_predict.sum()} predictions.")
+    #         self.do_predict += do_predict
+    #         self.do_predict -= 1
 
-        else:
-            # use SGDOneClassSVM to increase speed?
-            svm_params = self.freqai_config["feature_parameters"].get(
-                "svm_params", {"shuffle": False, "nu": 0.1})
-            self.svm_model = linear_model.SGDOneClassSVM(**svm_params).fit(
-                self.data_dictionary["train_features"]
-            )
-            y_pred = self.svm_model.predict(self.data_dictionary["train_features"])
-            kept_points = np.where(y_pred == -1, 0, y_pred)
-            # keep_index = np.where(y_pred == 1)
-            outlier_pct = self.get_outlier_percentage(1 - kept_points)
-            if outlier_pct:
-                logger.warning(
-                        f"SVM detected {outlier_pct:.2f}% of the points as outliers. "
-                        f"Keeping original dataset."
-                )
-                self.svm_model = None
-                return
+    #     else:
+    #         # use SGDOneClassSVM to increase speed?
+    #         svm_params = self.freqai_config["feature_parameters"].get(
+    #             "svm_params", {"shuffle": False, "nu": 0.1})
+    #         self.svm_model = linear_model.SGDOneClassSVM(**svm_params).fit(
+    #             self.data_dictionary["train_features"]
+    #         )
+    #         y_pred = self.svm_model.predict(self.data_dictionary["train_features"])
+    #         kept_points = np.where(y_pred == -1, 0, y_pred)
+    #         # keep_index = np.where(y_pred == 1)
+    #         outlier_pct = self.get_outlier_percentage(1 - kept_points)
+    #         if outlier_pct:
+    #             logger.warning(
+    #                     f"SVM detected {outlier_pct:.2f}% of the points as outliers. "
+    #                     f"Keeping original dataset."
+    #             )
+    #             self.svm_model = None
+    #             return
 
-            self.data_dictionary["train_features"] = self.data_dictionary["train_features"][
-                (y_pred == 1)
-            ]
-            self.data_dictionary["train_labels"] = self.data_dictionary["train_labels"][
-                (y_pred == 1)
-            ]
-            self.data_dictionary["train_weights"] = self.data_dictionary["train_weights"][
-                (y_pred == 1)
-            ]
+    #         self.data_dictionary["train_features"] = self.data_dictionary["train_features"][
+    #             (y_pred == 1)
+    #         ]
+    #         self.data_dictionary["train_labels"] = self.data_dictionary["train_labels"][
+    #             (y_pred == 1)
+    #         ]
+    #         self.data_dictionary["train_weights"] = self.data_dictionary["train_weights"][
+    #             (y_pred == 1)
+    #         ]
 
-            logger.info(
-                f"SVM tossed {len(y_pred) - kept_points.sum()}"
-                f" train points from {len(y_pred)} total points."
-            )
+    #         logger.info(
+    #             f"SVM tossed {len(y_pred) - kept_points.sum()}"
+    #             f" train points from {len(y_pred)} total points."
+    #         )
 
-            # same for test data
-            # TODO: This (and the part above) could be refactored into a separate function
-            # to reduce code duplication
-            if self.freqai_config['data_split_parameters'].get('test_size', 0.1) != 0:
-                y_pred = self.svm_model.predict(self.data_dictionary["test_features"])
-                kept_points = np.where(y_pred == -1, 0, y_pred)
-                self.data_dictionary["test_features"] = self.data_dictionary["test_features"][
-                    (y_pred == 1)
-                ]
-                self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][(
-                    y_pred == 1)]
-                self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][
-                    (y_pred == 1)
-                ]
+    #         # same for test data
+    #         # TODO: This (and the part above) could be refactored into a separate function
+    #         # to reduce code duplication
+    #         if self.freqai_config['data_split_parameters'].get('test_size', 0.1) != 0:
+    #             y_pred = self.svm_model.predict(self.data_dictionary["test_features"])
+    #             kept_points = np.where(y_pred == -1, 0, y_pred)
+    #             self.data_dictionary["test_features"] = self.data_dictionary["test_features"][
+    #                 (y_pred == 1)
+    #             ]
+    #             self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][(
+    #                 y_pred == 1)]
+    #             self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][
+    #                 (y_pred == 1)
+    #             ]
 
-            logger.info(
-                f"{self.pair}: SVM tossed {len(y_pred) - kept_points.sum()}"
-                f" test points from {len(y_pred)} total points."
-            )
+    #         logger.info(
+    #             f"{self.pair}: SVM tossed {len(y_pred) - kept_points.sum()}"
+    #             f" test points from {len(y_pred)} total points."
+    #         )
 
-        return
+    #     return
 
-    def use_DBSCAN_to_remove_outliers(self, predict: bool, eps=None) -> None:
-        """
-        Use DBSCAN to cluster training data and remove "noisy" data (read outliers).
-        User controls this via the config param `DBSCAN_outlier_pct` which indicates the
-        pct of training data that they want to be considered outliers.
-        :param predict: bool = If False (training), iterate to find the best hyper parameters
-                        to match user requested outlier percent target.
-                        If True (prediction), use the parameters determined from
-                        the previous training to estimate if the current prediction point
-                        is an outlier.
-        """
+    # def use_DBSCAN_to_remove_outliers(self, predict: bool, eps=None) -> None:
+    #     """
+    #     Use DBSCAN to cluster training data and remove "noisy" data (read outliers).
+    #     User controls this via the config param `DBSCAN_outlier_pct` which indicates the
+    #     pct of training data that they want to be considered outliers.
+    #     :param predict: bool = If False (training), iterate to find the best hyper parameters
+    #                     to match user requested outlier percent target.
+    #                     If True (prediction), use the parameters determined from
+    #                     the previous training to estimate if the current prediction point
+    #                     is an outlier.
+    #     """
 
-        if predict:
-            if not self.data['DBSCAN_eps']:
-                return
-            train_ft_df = self.data_dictionary['train_features']
-            pred_ft_df = self.data_dictionary['prediction_features']
-            num_preds = len(pred_ft_df)
-            df = pd.concat([train_ft_df, pred_ft_df], axis=0, ignore_index=True)
-            clustering = DBSCAN(eps=self.data['DBSCAN_eps'],
-                                min_samples=self.data['DBSCAN_min_samples'],
-                                n_jobs=self.thread_count
-                                ).fit(df)
-            do_predict = np.where(clustering.labels_[-num_preds:] == -1, 0, 1)
+    #     if predict:
+    #         if not self.data['DBSCAN_eps']:
+    #             return
+    #         train_ft_df = self.data_dictionary['train_features']
+    #         pred_ft_df = self.data_dictionary['prediction_features']
+    #         num_preds = len(pred_ft_df)
+    #         df = pd.concat([train_ft_df, pred_ft_df], axis=0, ignore_index=True)
+    #         clustering = DBSCAN(eps=self.data['DBSCAN_eps'],
+    #                             min_samples=self.data['DBSCAN_min_samples'],
+    #                             n_jobs=self.thread_count
+    #                             ).fit(df)
+    #         do_predict = np.where(clustering.labels_[-num_preds:] == -1, 0, 1)
 
-            if (len(do_predict) - do_predict.sum()) > 0:
-                logger.info(f"DBSCAN tossed {len(do_predict) - do_predict.sum()} predictions")
-            self.do_predict += do_predict
-            self.do_predict -= 1
+    #         if (len(do_predict) - do_predict.sum()) > 0:
+    #             logger.info(f"DBSCAN tossed {len(do_predict) - do_predict.sum()} predictions")
+    #         self.do_predict += do_predict
+    #         self.do_predict -= 1
 
-        else:
+    #     else:
 
-            def normalise_distances(distances):
-                normalised_distances = (distances - distances.min()) / \
-                                        (distances.max() - distances.min())
-                return normalised_distances
+    #         def normalise_distances(distances):
+    #             normalised_distances = (distances - distances.min()) / \
+    #                                     (distances.max() - distances.min())
+    #             return normalised_distances
 
-            def rotate_point(origin, point, angle):
-                # rotate a point counterclockwise by a given angle (in radians)
-                # around a given origin
-                x = origin[0] + cos(angle) * (point[0] - origin[0]) - \
-                                    sin(angle) * (point[1] - origin[1])
-                y = origin[1] + sin(angle) * (point[0] - origin[0]) + \
-                    cos(angle) * (point[1] - origin[1])
-                return (x, y)
+    #         def rotate_point(origin, point, angle):
+    #             # rotate a point counterclockwise by a given angle (in radians)
+    #             # around a given origin
+    #             x = origin[0] + cos(angle) * (point[0] - origin[0]) - \
+    #                                 sin(angle) * (point[1] - origin[1])
+    #             y = origin[1] + sin(angle) * (point[0] - origin[0]) + \
+    #                 cos(angle) * (point[1] - origin[1])
+    #             return (x, y)
 
-            MinPts = int(len(self.data_dictionary['train_features'].index) * 0.25)
-            # measure pairwise distances to nearest neighbours
-            neighbors = NearestNeighbors(
-                n_neighbors=MinPts, n_jobs=self.thread_count)
-            neighbors_fit = neighbors.fit(self.data_dictionary['train_features'])
-            distances, _ = neighbors_fit.kneighbors(self.data_dictionary['train_features'])
-            distances = np.sort(distances, axis=0).mean(axis=1)
+    #         MinPts = int(len(self.data_dictionary['train_features'].index) * 0.25)
+    #         # measure pairwise distances to nearest neighbours
+    #         neighbors = NearestNeighbors(
+    #             n_neighbors=MinPts, n_jobs=self.thread_count)
+    #         neighbors_fit = neighbors.fit(self.data_dictionary['train_features'])
+    #         distances, _ = neighbors_fit.kneighbors(self.data_dictionary['train_features'])
+    #         distances = np.sort(distances, axis=0).mean(axis=1)
 
-            normalised_distances = normalise_distances(distances)
-            x_range = np.linspace(0, 1, len(distances))
-            line = np.linspace(normalised_distances[0],
-                               normalised_distances[-1], len(normalised_distances))
-            deflection = np.abs(normalised_distances - line)
-            max_deflection_loc = np.where(deflection == deflection.max())[0][0]
-            origin = x_range[max_deflection_loc], line[max_deflection_loc]
-            point = x_range[max_deflection_loc], normalised_distances[max_deflection_loc]
-            rot_angle = np.pi / 4
-            elbow_loc = rotate_point(origin, point, rot_angle)
+    #         normalised_distances = normalise_distances(distances)
+    #         x_range = np.linspace(0, 1, len(distances))
+    #         line = np.linspace(normalised_distances[0],
+    #                            normalised_distances[-1], len(normalised_distances))
+    #         deflection = np.abs(normalised_distances - line)
+    #         max_deflection_loc = np.where(deflection == deflection.max())[0][0]
+    #         origin = x_range[max_deflection_loc], line[max_deflection_loc]
+    #         point = x_range[max_deflection_loc], normalised_distances[max_deflection_loc]
+    #         rot_angle = np.pi / 4
+    #         elbow_loc = rotate_point(origin, point, rot_angle)
 
-            epsilon = elbow_loc[1] * (distances[-1] - distances[0]) + distances[0]
+    #         epsilon = elbow_loc[1] * (distances[-1] - distances[0]) + distances[0]
 
-            clustering = DBSCAN(eps=epsilon, min_samples=MinPts,
-                                n_jobs=int(self.thread_count)).fit(
-                                                    self.data_dictionary['train_features']
-                                                )
+    #         clustering = DBSCAN(eps=epsilon, min_samples=MinPts,
+    #                             n_jobs=int(self.thread_count)).fit(
+    #                                                 self.data_dictionary['train_features']
+    #                                             )
 
-            logger.info(f'DBSCAN found eps of {epsilon:.2f}.')
+    #         logger.info(f'DBSCAN found eps of {epsilon:.2f}.')
 
-            self.data['DBSCAN_eps'] = epsilon
-            self.data['DBSCAN_min_samples'] = MinPts
-            dropped_points = np.where(clustering.labels_ == -1, 1, 0)
+    #         self.data['DBSCAN_eps'] = epsilon
+    #         self.data['DBSCAN_min_samples'] = MinPts
+    #         dropped_points = np.where(clustering.labels_ == -1, 1, 0)
 
-            outlier_pct = self.get_outlier_percentage(dropped_points)
-            if outlier_pct:
-                logger.warning(
-                        f"DBSCAN detected {outlier_pct:.2f}% of the points as outliers. "
-                        f"Keeping original dataset."
-                )
-                self.data['DBSCAN_eps'] = 0
-                return
+    #         outlier_pct = self.get_outlier_percentage(dropped_points)
+    #         if outlier_pct:
+    #             logger.warning(
+    #                     f"DBSCAN detected {outlier_pct:.2f}% of the points as outliers. "
+    #                     f"Keeping original dataset."
+    #             )
+    #             self.data['DBSCAN_eps'] = 0
+    #             return
 
-            self.data_dictionary['train_features'] = self.data_dictionary['train_features'][
-                (clustering.labels_ != -1)
-            ]
-            self.data_dictionary["train_labels"] = self.data_dictionary["train_labels"][
-                (clustering.labels_ != -1)
-            ]
-            self.data_dictionary["train_weights"] = self.data_dictionary["train_weights"][
-                (clustering.labels_ != -1)
-            ]
+    #         self.data_dictionary['train_features'] = self.data_dictionary['train_features'][
+    #             (clustering.labels_ != -1)
+    #         ]
+    #         self.data_dictionary["train_labels"] = self.data_dictionary["train_labels"][
+    #             (clustering.labels_ != -1)
+    #         ]
+    #         self.data_dictionary["train_weights"] = self.data_dictionary["train_weights"][
+    #             (clustering.labels_ != -1)
+    #         ]
 
-            logger.info(
-                f"DBSCAN tossed {dropped_points.sum()}"
-                f" train points from {len(clustering.labels_)}"
-            )
+    #         logger.info(
+    #             f"DBSCAN tossed {dropped_points.sum()}"
+    #             f" train points from {len(clustering.labels_)}"
+    #         )
 
-        return
+    #     return
 
-    def compute_inlier_metric(self, set_='train') -> None:
-        """
-        Compute inlier metric from backwards distance distributions.
-        This metric defines how well features from a timepoint fit
-        into previous timepoints.
-        """
+    # def compute_inlier_metric(self, set_='train') -> None:
+    #     """
+    #     Compute inlier metric from backwards distance distributions.
+    #     This metric defines how well features from a timepoint fit
+    #     into previous timepoints.
+    #     """
 
-        def normalise(dataframe: DataFrame, key: str) -> DataFrame:
-            if set_ == 'train':
-                min_value = dataframe.min()
-                max_value = dataframe.max()
-                self.data[f'{key}_min'] = min_value
-                self.data[f'{key}_max'] = max_value
-            else:
-                min_value = self.data[f'{key}_min']
-                max_value = self.data[f'{key}_max']
-            return (dataframe - min_value) / (max_value - min_value)
+    #     def normalise(dataframe: DataFrame, key: str) -> DataFrame:
+    #         if set_ == 'train':
+    #             min_value = dataframe.min()
+    #             max_value = dataframe.max()
+    #             self.data[f'{key}_min'] = min_value
+    #             self.data[f'{key}_max'] = max_value
+    #         else:
+    #             min_value = self.data[f'{key}_min']
+    #             max_value = self.data[f'{key}_max']
+    #         return (dataframe - min_value) / (max_value - min_value)
 
-        no_prev_pts = self.freqai_config["feature_parameters"]["inlier_metric_window"]
+    #     no_prev_pts = self.freqai_config["feature_parameters"]["inlier_metric_window"]
 
-        if set_ == 'train':
-            compute_df = copy.deepcopy(self.data_dictionary['train_features'])
-        elif set_ == 'test':
-            compute_df = copy.deepcopy(self.data_dictionary['test_features'])
-        else:
-            compute_df = copy.deepcopy(self.data_dictionary['prediction_features'])
+    #     if set_ == 'train':
+    #         compute_df = copy.deepcopy(self.data_dictionary['train_features'])
+    #     elif set_ == 'test':
+    #         compute_df = copy.deepcopy(self.data_dictionary['test_features'])
+    #     else:
+    #         compute_df = copy.deepcopy(self.data_dictionary['prediction_features'])
 
-        compute_df_reindexed = compute_df.reindex(
-            index=np.flip(compute_df.index)
-        )
+    #     compute_df_reindexed = compute_df.reindex(
+    #         index=np.flip(compute_df.index)
+    #     )
 
-        pairwise = pd.DataFrame(
-            np.triu(
-                pairwise_distances(compute_df_reindexed, n_jobs=self.thread_count)
-            ),
-            columns=compute_df_reindexed.index,
-            index=compute_df_reindexed.index
-        )
-        pairwise = pairwise.round(5)
+    #     pairwise = pd.DataFrame(
+    #         np.triu(
+    #             pairwise_distances(compute_df_reindexed, n_jobs=self.thread_count)
+    #         ),
+    #         columns=compute_df_reindexed.index,
+    #         index=compute_df_reindexed.index
+    #     )
+    #     pairwise = pairwise.round(5)
 
-        column_labels = [
-            '{}{}'.format('d', i) for i in range(1, no_prev_pts + 1)
-        ]
-        distances = pd.DataFrame(
-            columns=column_labels, index=compute_df.index
-        )
+    #     column_labels = [
+    #         '{}{}'.format('d', i) for i in range(1, no_prev_pts + 1)
+    #     ]
+    #     distances = pd.DataFrame(
+    #         columns=column_labels, index=compute_df.index
+    #     )
 
-        for index in compute_df.index[no_prev_pts:]:
-            current_row = pairwise.loc[[index]]
-            current_row_no_zeros = current_row.loc[
-                :, (current_row != 0).any(axis=0)
-            ]
-            distances.loc[[index]] = current_row_no_zeros.iloc[
-                :, :no_prev_pts
-            ]
-        distances = distances.replace([np.inf, -np.inf], np.nan)
-        drop_index = pd.isnull(distances).any(axis=1)
-        distances = distances[drop_index == 0]
+    #     for index in compute_df.index[no_prev_pts:]:
+    #         current_row = pairwise.loc[[index]]
+    #         current_row_no_zeros = current_row.loc[
+    #             :, (current_row != 0).any(axis=0)
+    #         ]
+    #         distances.loc[[index]] = current_row_no_zeros.iloc[
+    #             :, :no_prev_pts
+    #         ]
+    #     distances = distances.replace([np.inf, -np.inf], np.nan)
+    #     drop_index = pd.isnull(distances).any(axis=1)
+    #     distances = distances[drop_index == 0]
 
-        inliers = pd.DataFrame(index=distances.index)
-        for key in distances.keys():
-            current_distances = distances[key].dropna()
-            current_distances = normalise(current_distances, key)
-            if set_ == 'train':
-                fit_params = stats.weibull_min.fit(current_distances)
-                self.data[f'{key}_fit_params'] = fit_params
-            else:
-                fit_params = self.data[f'{key}_fit_params']
-            quantiles = stats.weibull_min.cdf(current_distances, *fit_params)
+    #     inliers = pd.DataFrame(index=distances.index)
+    #     for key in distances.keys():
+    #         current_distances = distances[key].dropna()
+    #         current_distances = normalise(current_distances, key)
+    #         if set_ == 'train':
+    #             fit_params = stats.weibull_min.fit(current_distances)
+    #             self.data[f'{key}_fit_params'] = fit_params
+    #         else:
+    #             fit_params = self.data[f'{key}_fit_params']
+    #         quantiles = stats.weibull_min.cdf(current_distances, *fit_params)
 
-            df_inlier = pd.DataFrame(
-                {key: quantiles}, index=distances.index
-            )
-            inliers = pd.concat(
-                [inliers, df_inlier], axis=1
-            )
+    #         df_inlier = pd.DataFrame(
+    #             {key: quantiles}, index=distances.index
+    #         )
+    #         inliers = pd.concat(
+    #             [inliers, df_inlier], axis=1
+    #         )
 
-        inlier_metric = pd.DataFrame(
-            data=inliers.sum(axis=1) / no_prev_pts,
-            columns=['%-inlier_metric'],
-            index=compute_df.index
-        )
+    #     inlier_metric = pd.DataFrame(
+    #         data=inliers.sum(axis=1) / no_prev_pts,
+    #         columns=['%-inlier_metric'],
+    #         index=compute_df.index
+    #     )
 
-        inlier_metric = (2 * (inlier_metric - inlier_metric.min()) /
-                         (inlier_metric.max() - inlier_metric.min()) - 1)
+    #     inlier_metric = (2 * (inlier_metric - inlier_metric.min()) /
+    #                      (inlier_metric.max() - inlier_metric.min()) - 1)
 
-        if set_ in ('train', 'test'):
-            inlier_metric = inlier_metric.iloc[no_prev_pts:]
-            compute_df = compute_df.iloc[no_prev_pts:]
-            self.remove_beginning_points_from_data_dict(set_, no_prev_pts)
-            self.data_dictionary[f'{set_}_features'] = pd.concat(
-                [compute_df, inlier_metric], axis=1)
-        else:
-            self.data_dictionary['prediction_features'] = pd.concat(
-                [compute_df, inlier_metric], axis=1)
-            self.data_dictionary['prediction_features'].fillna(0, inplace=True)
+    #     if set_ in ('train', 'test'):
+    #         inlier_metric = inlier_metric.iloc[no_prev_pts:]
+    #         compute_df = compute_df.iloc[no_prev_pts:]
+    #         self.remove_beginning_points_from_data_dict(set_, no_prev_pts)
+    #         self.data_dictionary[f'{set_}_features'] = pd.concat(
+    #             [compute_df, inlier_metric], axis=1)
+    #     else:
+    #         self.data_dictionary['prediction_features'] = pd.concat(
+    #             [compute_df, inlier_metric], axis=1)
+    #         self.data_dictionary['prediction_features'].fillna(0, inplace=True)
 
-        logger.info('Inlier metric computed and added to features.')
+    #     logger.info('Inlier metric computed and added to features.')
 
-        return None
+    #     return None
 
-    def remove_beginning_points_from_data_dict(self, set_='train', no_prev_pts: int = 10):
-        features = self.data_dictionary[f'{set_}_features']
-        weights = self.data_dictionary[f'{set_}_weights']
-        labels = self.data_dictionary[f'{set_}_labels']
-        self.data_dictionary[f'{set_}_weights'] = weights[no_prev_pts:]
-        self.data_dictionary[f'{set_}_features'] = features.iloc[no_prev_pts:]
-        self.data_dictionary[f'{set_}_labels'] = labels.iloc[no_prev_pts:]
+    # def remove_beginning_points_from_data_dict(self, set_='train', no_prev_pts: int = 10):
+    #     features = self.data_dictionary[f'{set_}_features']
+    #     weights = self.data_dictionary[f'{set_}_weights']
+    #     labels = self.data_dictionary[f'{set_}_labels']
+    #     self.data_dictionary[f'{set_}_weights'] = weights[no_prev_pts:]
+    #     self.data_dictionary[f'{set_}_features'] = features.iloc[no_prev_pts:]
+    #     self.data_dictionary[f'{set_}_labels'] = labels.iloc[no_prev_pts:]
 
     def add_noise_to_training_features(self) -> None:
         """
diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index 9cfda05ee..cacbfea67 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -23,6 +23,8 @@ from freqtrade.freqai.data_drawer import FreqaiDataDrawer
 from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
 from freqtrade.freqai.utils import get_tb_logger, plot_feature_importance, record_params
 from freqtrade.strategy.interface import IStrategy
+from datasieve.pipeline import Pipeline
+import datasieve.transforms as ds
 
 
 pd.options.mode.chained_assignment = None
@@ -566,6 +568,32 @@ class IFreqaiModel(ABC):
         if ft_params.get("use_DBSCAN_to_remove_outliers", False):
             dk.use_DBSCAN_to_remove_outliers(predict=True)
 
+    def define_data_pipeline(self, dk: FreqaiDataKitchen) -> None:
+        ft_params = self.freqai_info["feature_parameters"]
+        dk.pipeline = Pipeline([('scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1)))])
+
+        if ft_params.get("principal_component_analysis", False):
+            dk.pipeline.steps += [('pca', ds.DataSievePCA())]
+            dk.pipeline.steps += [('post-pca-scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1)))]
+
+        if ft_params.get("use_SVM_to_remove_outliers", False):
+            dk.pipeline.steps += [('svm', ds.SVMOutlierExtractor())]
+
+        if ft_params.get("DI_threshold", 0):
+            dk.pipeline.steps += [('di', ds.DissimilarityIndex())]
+
+        if ft_params.get("use_DBSCAN_to_remove_outliers", False):
+            dk.pipeline.steps += [('dbscan', ds.DataSieveDBSCAN())]
+
+        dk.pipeline.fitparams = dk.pipeline._validate_fitparams({}, dk.pipeline.steps)
+
+        # if self.freqai_info["feature_parameters"].get('noise_standard_deviation', 0):
+        #     dk.pipeline.extend(('noise', ds.Noise()))
+
+    def define_label_pipeline(self, dk: FreqaiDataKitchen) -> None:
+
+        dk.label_pipeline = Pipeline([('scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1)))])
+
     def model_exists(self, dk: FreqaiDataKitchen) -> bool:
         """
         Given a pair and path, check if a model already exists
diff --git a/requirements-freqai.txt b/requirements-freqai.txt
index ad069ade2..66da4e873 100644
--- a/requirements-freqai.txt
+++ b/requirements-freqai.txt
@@ -10,3 +10,4 @@ catboost==1.2; 'arm' not in platform_machine and (sys_platform != 'darwin' or py
 lightgbm==3.3.5
 xgboost==1.7.5
 tensorboard==2.13.0
+datasieve==0.0.5
diff --git a/tests/freqai/test_freqai_datakitchen.py b/tests/freqai/test_freqai_datakitchen.py
index 13dc6b4b0..e3ef1612c 100644
--- a/tests/freqai/test_freqai_datakitchen.py
+++ b/tests/freqai/test_freqai_datakitchen.py
@@ -9,9 +9,9 @@ from freqtrade.configuration import TimeRange
 from freqtrade.data.dataprovider import DataProvider
 from freqtrade.exceptions import OperationalException
 from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
-from tests.conftest import get_patched_exchange, log_has_re
+from tests.conftest import get_patched_exchange  # , log_has_re
 from tests.freqai.conftest import (get_patched_data_kitchen, get_patched_freqai_strategy,
-                                   make_data_dictionary, make_unfiltered_dataframe)
+                                   make_unfiltered_dataframe)  # make_data_dictionary,
 from tests.freqai.test_freqai_interface import is_mac
 
 
@@ -72,66 +72,66 @@ def test_check_if_model_expired(mocker, freqai_conf):
     shutil.rmtree(Path(dk.full_path))
 
 
-def test_use_DBSCAN_to_remove_outliers(mocker, freqai_conf, caplog):
-    freqai = make_data_dictionary(mocker, freqai_conf)
-    # freqai_conf['freqai']['feature_parameters'].update({"outlier_protection_percentage": 1})
-    freqai.dk.use_DBSCAN_to_remove_outliers(predict=False)
-    assert log_has_re(r"DBSCAN found eps of 1\.7\d\.", caplog)
+# def test_use_DBSCAN_to_remove_outliers(mocker, freqai_conf, caplog):
+#     freqai = make_data_dictionary(mocker, freqai_conf)
+#     # freqai_conf['freqai']['feature_parameters'].update({"outlier_protection_percentage": 1})
+#     freqai.dk.use_DBSCAN_to_remove_outliers(predict=False)
+#     assert log_has_re(r"DBSCAN found eps of 1\.7\d\.", caplog)
 
 
-def test_compute_distances(mocker, freqai_conf):
-    freqai = make_data_dictionary(mocker, freqai_conf)
-    freqai_conf['freqai']['feature_parameters'].update({"DI_threshold": 1})
-    avg_mean_dist = freqai.dk.compute_distances()
-    assert round(avg_mean_dist, 2) == 1.98
+# def test_compute_distances(mocker, freqai_conf):
+#     freqai = make_data_dictionary(mocker, freqai_conf)
+#     freqai_conf['freqai']['feature_parameters'].update({"DI_threshold": 1})
+#     avg_mean_dist = freqai.dk.compute_distances()
+#     assert round(avg_mean_dist, 2) == 1.98
 
 
-def test_use_SVM_to_remove_outliers_and_outlier_protection(mocker, freqai_conf, caplog):
-    freqai = make_data_dictionary(mocker, freqai_conf)
-    freqai_conf['freqai']['feature_parameters'].update({"outlier_protection_percentage": 0.1})
-    freqai.dk.use_SVM_to_remove_outliers(predict=False)
-    assert log_has_re(
-        "SVM detected 7.83%",
-        caplog,
-    )
+# def test_use_SVM_to_remove_outliers_and_outlier_protection(mocker, freqai_conf, caplog):
+#     freqai = make_data_dictionary(mocker, freqai_conf)
+#     freqai_conf['freqai']['feature_parameters'].update({"outlier_protection_percentage": 0.1})
+#     freqai.dk.use_SVM_to_remove_outliers(predict=False)
+#     assert log_has_re(
+#         "SVM detected 7.83%",
+#         caplog,
+#     )
 
 
-def test_compute_inlier_metric(mocker, freqai_conf, caplog):
-    freqai = make_data_dictionary(mocker, freqai_conf)
-    freqai_conf['freqai']['feature_parameters'].update({"inlier_metric_window": 10})
-    freqai.dk.compute_inlier_metric(set_='train')
-    assert log_has_re(
-        "Inlier metric computed and added to features.",
-        caplog,
-    )
+# def test_compute_inlier_metric(mocker, freqai_conf, caplog):
+#     freqai = make_data_dictionary(mocker, freqai_conf)
+#     freqai_conf['freqai']['feature_parameters'].update({"inlier_metric_window": 10})
+#     freqai.dk.compute_inlier_metric(set_='train')
+#     assert log_has_re(
+#         "Inlier metric computed and added to features.",
+#         caplog,
+#     )
 
 
-def test_add_noise_to_training_features(mocker, freqai_conf):
-    freqai = make_data_dictionary(mocker, freqai_conf)
-    freqai_conf['freqai']['feature_parameters'].update({"noise_standard_deviation": 0.1})
-    freqai.dk.add_noise_to_training_features()
+# def test_add_noise_to_training_features(mocker, freqai_conf):
+#     freqai = make_data_dictionary(mocker, freqai_conf)
+#     freqai_conf['freqai']['feature_parameters'].update({"noise_standard_deviation": 0.1})
+#     freqai.dk.add_noise_to_training_features()
 
 
-def test_remove_beginning_points_from_data_dict(mocker, freqai_conf):
-    freqai = make_data_dictionary(mocker, freqai_conf)
-    freqai.dk.remove_beginning_points_from_data_dict(set_='train')
+# def test_remove_beginning_points_from_data_dict(mocker, freqai_conf):
+#     freqai = make_data_dictionary(mocker, freqai_conf)
+#     freqai.dk.remove_beginning_points_from_data_dict(set_='train')
 
 
-def test_principal_component_analysis(mocker, freqai_conf, caplog):
-    freqai = make_data_dictionary(mocker, freqai_conf)
-    freqai.dk.principal_component_analysis()
-    assert log_has_re(
-        "reduced feature dimension by",
-        caplog,
-    )
+# def test_principal_component_analysis(mocker, freqai_conf, caplog):
+#     freqai = make_data_dictionary(mocker, freqai_conf)
+#     freqai.dk.principal_component_analysis()
+#     assert log_has_re(
+#         "reduced feature dimension by",
+#         caplog,
+#     )
 
 
-def test_normalize_data(mocker, freqai_conf):
-    freqai = make_data_dictionary(mocker, freqai_conf)
-    data_dict = freqai.dk.data_dictionary
-    freqai.dk.normalize_data(data_dict)
-    assert any('_max' in entry for entry in freqai.dk.data.keys())
-    assert any('_min' in entry for entry in freqai.dk.data.keys())
+# def test_normalize_data(mocker, freqai_conf):
+#     freqai = make_data_dictionary(mocker, freqai_conf)
+#     data_dict = freqai.dk.data_dictionary
+#     freqai.dk.normalize_data(data_dict)
+#     assert any('_max' in entry for entry in freqai.dk.data.keys())
+#     assert any('_min' in entry for entry in freqai.dk.data.keys())
 
 
 def test_filter_features(mocker, freqai_conf):

From 636298bb719e0fde0395a05ee5f8f9790c1df977 Mon Sep 17 00:00:00 2001
From: hippocritical <gunngirspam@gmail.com>
Date: Sat, 27 May 2023 19:15:35 +0200
Subject: [PATCH 029/130] added test_lookahead_helper_export_to_csv

---
 tests/optimize/test_lookahead_analysis.py | 134 ++++++++++++++++++++--
 1 file changed, 126 insertions(+), 8 deletions(-)

diff --git a/tests/optimize/test_lookahead_analysis.py b/tests/optimize/test_lookahead_analysis.py
index e9c5f0f85..85cd8fd66 100644
--- a/tests/optimize/test_lookahead_analysis.py
+++ b/tests/optimize/test_lookahead_analysis.py
@@ -1,5 +1,4 @@
 # pragma pylint: disable=missing-docstring, W0212, line-too-long, C0103, unused-argument
-
 from copy import deepcopy
 from pathlib import Path, PurePosixPath
 from unittest.mock import MagicMock, PropertyMock
@@ -101,8 +100,8 @@ def test_lookahead_helper_start(lookahead_conf, mocker, caplog) -> None:
 
 def test_lookahead_helper_text_table_lookahead_analysis_instances(lookahead_conf, caplog):
     analysis = Analysis()
-    analysis.total_signals = 5
     analysis.has_bias = True
+    analysis.total_signals = 5
     analysis.false_entry_signals = 4
     analysis.false_exit_signals = 3
 
@@ -118,9 +117,6 @@ def test_lookahead_helper_text_table_lookahead_analysis_instances(lookahead_conf
     table, headers, data = (LookaheadAnalysisSubFunctions.
                             text_table_lookahead_analysis_instances([instance]))
 
-    # check amount of returning rows
-    assert len(data) == 1
-
     # check row contents for a try that errored out
     assert data[0][0] == 'strategy_test_v3_with_lookahead_bias.py'
     assert data[0][1] == 'strategy_test_v3_with_lookahead_bias'
@@ -146,10 +142,132 @@ def test_lookahead_helper_text_table_lookahead_analysis_instances(lookahead_conf
 
     assert data[0][6] == 'falseIndicator1, falseIndicator2'
 
+    # check amount of returning rows
+    assert len(data) == 1
 
-def test_lookahead_helper_export_to_csv():
-    # TODO
-    pytest.skip("TODO")
+    # check amount of multiple rows
+    table, headers, data = (LookaheadAnalysisSubFunctions.
+                            text_table_lookahead_analysis_instances([instance, instance, instance]))
+    assert len(data) == 3
+
+
+def test_lookahead_helper_export_to_csv(lookahead_conf):
+    import pandas as pd
+    lookahead_conf['lookahead_analysis_exportfilename'] = "temp_csv_lookahead_analysis.csv"
+
+    # just to be sure the test won't fail: remove file if exists for some reason
+    # (repeat this at the end once again to clean up)
+    if Path(lookahead_conf['lookahead_analysis_exportfilename']).exists():
+        Path(lookahead_conf['lookahead_analysis_exportfilename']).unlink()
+
+    # before we can start we have to delete the
+
+    # 1st check: create a new file and verify its contents
+    analysis1 = Analysis()
+    analysis1.has_bias = True
+    analysis1.total_signals = 5
+    analysis1.false_entry_signals = 4
+    analysis1.false_exit_signals = 3
+    analysis1.false_indicators.append('falseIndicator1')
+    analysis1.false_indicators.append('falseIndicator2')
+    lookahead_conf['lookahead_analysis_exportfilename'] = "temp_csv_lookahead_analysis.csv"
+
+    strategy_obj1 = {
+            'name': "strat1",
+            'location': PurePosixPath("file1.py"),
+        }
+
+    instance1 = LookaheadAnalysis(lookahead_conf, strategy_obj1)
+    instance1.current_analysis = analysis1
+
+    LookaheadAnalysisSubFunctions.export_to_csv(lookahead_conf, [instance1])
+    saved_data1 = pd.read_csv(lookahead_conf['lookahead_analysis_exportfilename'])
+
+    expected_values1 = [
+        [
+            'file1.py', 'strat1', True,
+            5, 4, 3,
+            "falseIndicator1,falseIndicator2"
+        ],
+    ]
+    expected_columns = ['filename', 'strategy', 'has_bias',
+                        'total_signals', 'biased_entry_signals', 'biased_exit_signals',
+                        'biased_indicators']
+    expected_data1 = pd.DataFrame(expected_values1, columns=expected_columns)
+
+    assert Path(lookahead_conf['lookahead_analysis_exportfilename']).exists()
+    assert expected_data1.equals(saved_data1)
+
+    # 2nd check: update the same strategy (which internally changed or is being retested)
+    expected_values2 = [
+        [
+            'file1.py', 'strat1', False,
+            10, 11, 12,
+            "falseIndicator3,falseIndicator4"
+        ],
+    ]
+    expected_data2 = pd.DataFrame(expected_values2, columns=expected_columns)
+
+    analysis2 = Analysis()
+    analysis2.has_bias = False
+    analysis2.total_signals = 10
+    analysis2.false_entry_signals = 11
+    analysis2.false_exit_signals = 12
+    analysis2.false_indicators.append('falseIndicator3')
+    analysis2.false_indicators.append('falseIndicator4')
+
+    strategy_obj2 = {
+        'name': "strat1",
+        'location': PurePosixPath("file1.py"),
+    }
+
+    instance2 = LookaheadAnalysis(lookahead_conf, strategy_obj2)
+    instance2.current_analysis = analysis2
+
+    LookaheadAnalysisSubFunctions.export_to_csv(lookahead_conf, [instance2])
+    saved_data2 = pd.read_csv(lookahead_conf['lookahead_analysis_exportfilename'])
+
+    assert expected_data2.equals(saved_data2)
+
+    # 3rd check: now we add a new row to an already existing file
+    expected_values3 = [
+        [
+            'file1.py', 'strat1', False,
+            10, 11, 12,
+            "falseIndicator3,falseIndicator4"
+        ],
+        [
+            'file3.py', 'strat3', True,
+            20, 21, 22, "falseIndicator5,falseIndicator6"
+        ],
+    ]
+
+    expected_data3 = pd.DataFrame(expected_values3, columns=expected_columns)
+
+    analysis3 = Analysis()
+    analysis3.has_bias = True
+    analysis3.total_signals = 20
+    analysis3.false_entry_signals = 21
+    analysis3.false_exit_signals = 22
+    analysis3.false_indicators.append('falseIndicator5')
+    analysis3.false_indicators.append('falseIndicator6')
+    lookahead_conf['lookahead_analysis_exportfilename'] = "temp_csv_lookahead_analysis.csv"
+
+    strategy_obj3 = {
+        'name': "strat3",
+        'location': PurePosixPath("file3.py"),
+    }
+
+    instance3 = LookaheadAnalysis(lookahead_conf, strategy_obj3)
+    instance3.current_analysis = analysis3
+
+    LookaheadAnalysisSubFunctions.export_to_csv(lookahead_conf, [instance3])
+    saved_data3 = pd.read_csv(lookahead_conf['lookahead_analysis_exportfilename'])
+    assert expected_data3.equals(saved_data3)
+
+    # remove csv file after the test is done
+    if Path(lookahead_conf['lookahead_analysis_exportfilename']).exists():
+        Path(lookahead_conf['lookahead_analysis_exportfilename']).unlink()
 
 
 def test_initialize_single_lookahead_analysis():

From a7426755bc5892c0d9a4dc98a7f536612a6ec296 Mon Sep 17 00:00:00 2001
From: hippocritical <gunngirspam@gmail.com>
Date: Sat, 27 May 2023 20:35:45 +0200
Subject: [PATCH 030/130] added a check for bias1. Looking at has_bias should
 be enough to statisfy the test. The tests could be extended with thecking the
 buy/sell signals and the dataframe itself - but this should be sufficient for
 now.

---
 tests/optimize/test_lookahead_analysis.py     | 39 +++++++++++++------
 .../strategy_test_v3_with_lookahead_bias.py   | 13 ++++---
 2 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/tests/optimize/test_lookahead_analysis.py b/tests/optimize/test_lookahead_analysis.py
index 85cd8fd66..1bd864906 100644
--- a/tests/optimize/test_lookahead_analysis.py
+++ b/tests/optimize/test_lookahead_analysis.py
@@ -173,9 +173,9 @@ def test_lookahead_helper_export_to_csv(lookahead_conf):
     lookahead_conf['lookahead_analysis_exportfilename'] = "temp_csv_lookahead_analysis.csv"
 
     strategy_obj1 = {
-            'name': "strat1",
-            'location': PurePosixPath("file1.py"),
-        }
+        'name': "strat1",
+        'location': PurePosixPath("file1.py"),
+    }
 
     instance1 = LookaheadAnalysis(lookahead_conf, strategy_obj1)
     instance1.current_analysis = analysis1
@@ -270,9 +270,24 @@ def test_lookahead_helper_export_to_csv(lookahead_conf):
         Path(lookahead_conf['lookahead_analysis_exportfilename']).unlink()
 
 
-def test_initialize_single_lookahead_analysis():
-    # TODO
-    pytest.skip("TODO")
+def test_initialize_single_lookahead_analysis(lookahead_conf, mocker):
+    mocker.patch('freqtrade.data.history.get_timerange', get_timerange)
+    mocker.patch(f'{EXMS}.get_fee', return_value=0.0)
+    mocker.patch(f'{EXMS}.get_min_pair_stake_amount', return_value=0.00001)
+    mocker.patch(f'{EXMS}.get_max_pair_stake_amount', return_value=float('inf'))
+    patch_exchange(mocker)
+    mocker.patch('freqtrade.plugins.pairlistmanager.PairListManager.whitelist',
+                 PropertyMock(return_value=['UNITTEST/BTC']))
+    lookahead_conf['pairs'] = ['UNITTEST/USDT']
+
+    lookahead_conf['timeframe'] = '5m'
+    lookahead_conf['timerange'] = '20180119-20180122'
+    strategy_obj = {
+        'name': "strat1",
+        'location': PurePosixPath("file1.py"),
+    }
+    LookaheadAnalysisSubFunctions.initialize_single_lookahead_analysis(
+        strategy_obj, lookahead_conf)
 
 
 @pytest.mark.parametrize('scenario', [
@@ -307,10 +322,10 @@ def test_biased_strategy(lookahead_conf, mocker, caplog, scenario) -> None:
     instance.start()
     # Assert init correct
     assert log_has_re(f"Strategy Parameter: scenario = {scenario}", caplog)
-    # Assert bias detected
-    assert log_has_re(r".*bias detected.*", caplog)
-    # TODO: assert something ... most likely output (?) or instance state?
 
-    # Assert False to see full logs in output
-    # assert False
-    # Run with `pytest tests/optimize/test_lookahead_analysis.py  -k test_biased_strategy`
+    # check non-biased strategy
+    if scenario == "no_bias":
+        assert not instance.current_analysis.has_bias
+    # check biased strategy
+    elif scenario == "bias1":
+        assert instance.current_analysis.has_bias
diff --git a/tests/strategy/strats/lookahead_bias/strategy_test_v3_with_lookahead_bias.py b/tests/strategy/strats/lookahead_bias/strategy_test_v3_with_lookahead_bias.py
index d35b85b2d..e50d5d17b 100644
--- a/tests/strategy/strats/lookahead_bias/strategy_test_v3_with_lookahead_bias.py
+++ b/tests/strategy/strats/lookahead_bias/strategy_test_v3_with_lookahead_bias.py
@@ -29,12 +29,13 @@ class strategy_test_v3_with_lookahead_bias(IStrategy):
 
     def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
         # bias is introduced here
-        ichi = ichimoku(dataframe,
-                        conversion_line_period=20,
-                        base_line_periods=60,
-                        laggin_span=120,
-                        displacement=30)
-        dataframe['chikou_span'] = ichi['chikou_span']
+        if self.scenario.value != 'no_bias':
+            ichi = ichimoku(dataframe,
+                            conversion_line_period=20,
+                            base_line_periods=60,
+                            laggin_span=120,
+                            displacement=30)
+            dataframe['chikou_span'] = ichi['chikou_span']
 
         return dataframe
 

From 0ed84fbcc18bf6e5ecbb6667831348f94c95d32d Mon Sep 17 00:00:00 2001
From: hippocritical <gunngirspam@gmail.com>
Date: Sat, 27 May 2023 20:47:59 +0200
Subject: [PATCH 031/130] added test_initialize_single_lookahead_analysis

A check for a random variable should be enough, right? :)
---
 tests/optimize/test_lookahead_analysis.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/tests/optimize/test_lookahead_analysis.py b/tests/optimize/test_lookahead_analysis.py
index 1bd864906..814c9f3b8 100644
--- a/tests/optimize/test_lookahead_analysis.py
+++ b/tests/optimize/test_lookahead_analysis.py
@@ -282,12 +282,10 @@ def test_initialize_single_lookahead_analysis(lookahead_conf, mocker):
 
     lookahead_conf['timeframe'] = '5m'
     lookahead_conf['timerange'] = '20180119-20180122'
-    strategy_obj = {
-        'name': "strat1",
-        'location': PurePosixPath("file1.py"),
-    }
-    LookaheadAnalysisSubFunctions.initialize_single_lookahead_analysis(
-        strategy_obj, lookahead_conf)
+    strategy_obj = {'name': "strategy_test_v3_with_lookahead_bias"}
+
+    instance = LookaheadAnalysis(lookahead_conf, strategy_obj)
+    assert instance.strategy_obj['name'] == "strategy_test_v3_with_lookahead_bias"
 
 
 @pytest.mark.parametrize('scenario', [
@@ -316,8 +314,7 @@ def test_biased_strategy(lookahead_conf, mocker, caplog, scenario) -> None:
                      }
                  })
 
-    strategy_obj = {}
-    strategy_obj['name'] = "strategy_test_v3_with_lookahead_bias"
+    strategy_obj = {'name': "strategy_test_v3_with_lookahead_bias"}
     instance = LookaheadAnalysis(lookahead_conf, strategy_obj)
     instance.start()
     # Assert init correct

From 9bb25be88091c3455c7a8e60b1edca5fe1d4eab1 Mon Sep 17 00:00:00 2001
From: hippocritical <gunngirspam@gmail.com>
Date: Sat, 27 May 2023 22:31:47 +0200
Subject: [PATCH 032/130] modified help-string for the cli-option
 lookahead_analysis_exportfilename moved doc from utils.md to
 lookahead-analysis.md and modified it (unfinished) added a check to
 automatically edit the config['backtest_cache'] to be 'none'

---
 docs/lookahead-analysis.md                    | 79 +++++++++++++++++++
 docs/utils.md                                 | 33 --------
 freqtrade/commands/cli_options.py             |  2 +-
 .../optimize/lookahead_analysis_helpers.py    | 11 +++
 4 files changed, 91 insertions(+), 34 deletions(-)
 create mode 100644 docs/lookahead-analysis.md

diff --git a/docs/lookahead-analysis.md b/docs/lookahead-analysis.md
new file mode 100644
index 000000000..22440a6d6
--- /dev/null
+++ b/docs/lookahead-analysis.md
@@ -0,0 +1,79 @@
+# Lookahead analysis
+This page explains how to validate your strategy in terms of look ahead bias.
+
+Checking look ahead bias is the bane of any strategy since it is sometimes very easy to introduce backtest bias -
+but very hard to detect.
+
+Backtesting initializes all timestamps at once and calculates all indicators in the beginning.
+This means that if you are allowing your indicators (or the libraries that get used) then you would 
+look into the future and falsify your backtest.
+
+Lookahead-analysis requires historic data to be available.
+To learn how to get data for the pairs and exchange you're interested in,
+head over to the [Data Downloading](data-download.md) section of the documentation.
+
+This command is built upon backtesting
+since it internally chains backtests and pokes at the strategy to provoke it to show look ahead bias.
+This is done by looking not at the strategy itself - but at the results it returned.
+The results are things like changed indicator-values and moved entries/exits compared to the full backtest. 
+
+You can use commands of [Backtesting](backtesting.md).
+It also supports the lookahead-analysis of freqai strategies.
+
+--cache is enforced to be "none"
+
+## Backtesting command reference
+
+```
+usage: freqtrade lookahead-analysis [-h] [-v] [-V] 
+                             [--minimum-trade-amount INT]
+                             [--targeted-trade-amount INT]
+                             [--lookahead-analysis-exportfilename PATH]
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --minimum-trade-amount INT
+                        Override the value of the `minimum_trade_amount` configuration
+                        setting
+                        Requires `--targeted-trade-amount` to be larger or equal to --minimum-trade-amount.
+                        (default: 10)
+  --targeted-trade-amount INT
+                        Override the value of the `minimum_trade_amount` configuration
+                        (default: 20)
+  --lookahead-analysis-exportfilename PATH
+                        Use this filename to save your lookahead-analysis-results to a csv file
+```
+
+
+#### Summary
+Checks a given strategy for look ahead bias via backtest-analysis
+Look ahead bias means that the backtest uses data from future candles thereby not making it viable beyond backtesting
+and producing false hopes for the one backtesting.
+
+#### Introduction:
+Many strategies - without the programmer knowing - have fallen prey to look ahead bias.
+
+Any backtest will populate the full dataframe including all time stamps at the beginning.
+If the programmer is not careful or oblivious how things work internally
+(which sometimes can be really hard to find out) then it will just look into the future making the strategy amazing
+but not realistic.
+
+This command is made to try to verify the validity in the form of the aforementioned look ahead bias.
+
+#### How does the command work?
+It will not look at the strategy or any contents itself but instead will run multiple backtests 
+by using precisely cut timeranges and analyzing the results each time, comparing to the full timerange.
+
+At first, it starts a backtest over the whole duration
+and then repeats backtests from the same starting point to the respective points to watch.
+In addition, it analyzes the dataframes form the overall backtest to the cut ones.
+
+At the end it will return a result-table in terminal.
+
+Hint:
+If an entry or exit condition is only triggered rarely or the timerange was chosen
+so only a few entry conditions are met
+then the bias checker is unable to catch the biased entry or exit condition.
+In the end it only checks which entry and exit signals have been triggered.
+
+---Flow chart here for better understanding---
diff --git a/docs/utils.md b/docs/utils.md
index 798a87fae..900856af4 100644
--- a/docs/utils.md
+++ b/docs/utils.md
@@ -1010,36 +1010,3 @@ Common arguments:
                         Path to userdata directory.
 
 ```
-### Lookahead - analysis
-#### Summary
-Checks a given strategy for look ahead bias via backtest-analysis
-Look ahead bias means that the backtest uses data from future candles thereby not making it viable beyond backtesting
-and producing false hopes for the one backtesting.
-
-#### Introduction:
-Many strategies - without the programmer knowing - have fallen prey to look ahead bias.
-
-Any backtest will populate the full dataframe including all time stamps at the beginning.
-If the programmer is not careful or oblivious how things work internally
-(which sometimes can be really hard to find out) then it will just look into the future making the strategy amazing
-but not realistic.
-
-The tool is made to try to verify the validity in the form of the aforementioned look ahead bias.
-
-#### How does the command work?
-It will not look at the strategy or any contents itself but instead will run multiple backtests 
-by using precisely cut timeranges and analyzing the results each time, comparing to the full timerange.
-
-At first, it starts a backtest over the whole duration
-and then repeats backtests from the same starting point to the respective points to watch.
-In addition, it analyzes the dataframes form the overall backtest to the cut ones.
-
-At the end it will return a result-table in terminal.
-
-Hint:
-If an entry or exit condition is only triggered rarely or the timerange was chosen
-so only a few entry conditions are met
-then the bias checker is unable to catch the biased entry or exit condition.
-In the end it only checks which entry and exit signals have been triggered.
-
----Flow chart here for better understanding---
diff --git a/freqtrade/commands/cli_options.py b/freqtrade/commands/cli_options.py
index e4a864ea0..08283430e 100755
--- a/freqtrade/commands/cli_options.py
+++ b/freqtrade/commands/cli_options.py
@@ -704,7 +704,7 @@ AVAILABLE_CLI_OPTIONS = {
     ),
     "lookahead_analysis_exportfilename": Arg(
         '--lookahead-analysis-exportfilename',
-        help="Use this filename to store lookahead-analysis-results",
+        help="Use this csv-filename to store lookahead-analysis-results",
         type=str
     ),
 }
diff --git a/freqtrade/optimize/lookahead_analysis_helpers.py b/freqtrade/optimize/lookahead_analysis_helpers.py
index 54c63b78c..f212d8403 100644
--- a/freqtrade/optimize/lookahead_analysis_helpers.py
+++ b/freqtrade/optimize/lookahead_analysis_helpers.py
@@ -110,6 +110,17 @@ class LookaheadAnalysisSubFunctions:
                 "targeted trade amount can't be smaller than minimum trade amount."
             )
 
+        # enforce cache to be 'none', shift it to 'none' if not already
+        # (since the default value is 'day')
+        if config.get('backtest_cache') is None:
+            config['backtest_cache'] = 'none'
+        elif config['backtest_cache'] != 'none':
+            logger.info(f"backtest_cache = "
+                        f"{config['backtest_cache']} detected. "
+                        f"Inside lookahead-analysis it is enforced to be 'none'. "
+                        f"Changed it to 'none'")
+            config['backtest_cache'] = 'none'
+
         strategy_objs = StrategyResolver.search_all_objects(
                 config, enum_failed=False, recursive=config.get('recursive_strategy_search', False))
 

From eec78371672e11886662a10daf8cf5e5a8363abe Mon Sep 17 00:00:00 2001
From: hippocritical <gunngirspam@gmail.com>
Date: Sun, 28 May 2023 20:52:58 +0200
Subject: [PATCH 033/130] - modified help-string for the cli-option
 lookahead_analysis_exportfilename - moved doc from utils.md to
 lookahead-analysis.md and modified it (unfinished) - added a check to
 automatically edit the config['backtest_cache'] to be 'none' - adjusted
 test_lookahead_helper_export_to_csv to catch the new catching of errors -
 adjusted test_lookahead_helper_text_table_lookahead_analysis_instances to
 catch the new catching of errors - changed lookahead_analysis.start
 result-reporting to show that not enough trades were caught including x of y

---
 freqtrade/optimize/lookahead_analysis.py      | 19 ++++--
 .../optimize/lookahead_analysis_helpers.py    | 43 ++++++++++----
 tests/optimize/test_lookahead_analysis.py     | 59 +++++++++++--------
 3 files changed, 80 insertions(+), 41 deletions(-)

diff --git a/freqtrade/optimize/lookahead_analysis.py b/freqtrade/optimize/lookahead_analysis.py
index e40322c88..4f3d7a4d0 100755
--- a/freqtrade/optimize/lookahead_analysis.py
+++ b/freqtrade/optimize/lookahead_analysis.py
@@ -250,12 +250,19 @@ class LookaheadAnalysis:
             self.analyze_row(idx, result_row)
 
         # check and report signals
-        if (self.current_analysis.false_entry_signals > 0 or
-                self.current_analysis.false_exit_signals > 0 or
-                len(self.current_analysis.false_indicators) > 0):
-            logger.info(f" => {self.local_config['strategy']} + : bias detected!")
+        if self.current_analysis.total_signals < self.local_config['minimum_trade_amount']:
+            logger.info(f" -> {self.local_config['strategy']} : too few trades. "
+                        f"We only found {self.current_analysis.total_signals} trades. "
+                        f"Hint: Extend the timerange "
+                        f"to get at least {self.local_config['minimum_trade_amount']} "
+                        f"or lower the value of minimum_trade_amount.")
+            self.failed_bias_check = True
+        elif (self.current_analysis.false_entry_signals > 0 or
+              self.current_analysis.false_exit_signals > 0 or
+              len(self.current_analysis.false_indicators) > 0):
+            logger.info(f" => {self.local_config['strategy']} : bias detected!")
             self.current_analysis.has_bias = True
+            self.failed_bias_check = False
         else:
             logger.info(self.local_config['strategy'] + ": no bias detected")
-
-        self.failed_bias_check = False
+            self.failed_bias_check = False
diff --git a/freqtrade/optimize/lookahead_analysis_helpers.py b/freqtrade/optimize/lookahead_analysis_helpers.py
index f212d8403..49f225943 100644
--- a/freqtrade/optimize/lookahead_analysis_helpers.py
+++ b/freqtrade/optimize/lookahead_analysis_helpers.py
@@ -16,12 +16,24 @@ logger = logging.getLogger(__name__)
 
 class LookaheadAnalysisSubFunctions:
     @staticmethod
-    def text_table_lookahead_analysis_instances(lookahead_instances: List[LookaheadAnalysis]):
+    def text_table_lookahead_analysis_instances(
+            config: Dict[str, Any],
+            lookahead_instances: List[LookaheadAnalysis]):
         headers = ['filename', 'strategy', 'has_bias', 'total_signals',
                    'biased_entry_signals', 'biased_exit_signals', 'biased_indicators']
         data = []
         for inst in lookahead_instances:
-            if inst.failed_bias_check:
+            if config['minimum_trade_amount'] > inst.current_analysis.total_signals:
+                data.append(
+                    [
+                        inst.strategy_obj['location'].parts[-1],
+                        inst.strategy_obj['name'],
+                        "too few trades caught "
+                        f"({inst.current_analysis.total_signals}/{config['minimum_trade_amount']})."
+                        f"Test failed."
+                    ]
+                )
+            elif inst.failed_bias_check:
                 data.append(
                     [
                         inst.strategy_obj['location'].parts[-1],
@@ -77,14 +89,21 @@ class LookaheadAnalysisSubFunctions:
                 index=None)
 
         for inst in lookahead_analysis:
-            new_row_data = {'filename': inst.strategy_obj['location'].parts[-1],
-                            'strategy': inst.strategy_obj['name'],
-                            'has_bias': inst.current_analysis.has_bias,
-                            'total_signals': inst.current_analysis.total_signals,
-                            'biased_entry_signals': inst.current_analysis.false_entry_signals,
-                            'biased_exit_signals': inst.current_analysis.false_exit_signals,
-                            'biased_indicators': ",".join(inst.current_analysis.false_indicators)}
-            csv_df = add_or_update_row(csv_df, new_row_data)
+            # only update if
+            if (inst.current_analysis.total_signals > config['minimum_trade_amount']
+                    and inst.failed_bias_check is not True):
+                new_row_data = {'filename': inst.strategy_obj['location'].parts[-1],
+                                'strategy': inst.strategy_obj['name'],
+                                'has_bias': inst.current_analysis.has_bias,
+                                'total_signals':
+                                    int(inst.current_analysis.total_signals),
+                                'biased_entry_signals':
+                                    int(inst.current_analysis.false_entry_signals),
+                                'biased_exit_signals':
+                                    int(inst.current_analysis.false_exit_signals),
+                                'biased_indicators':
+                                    ",".join(inst.current_analysis.false_indicators)}
+                csv_df = add_or_update_row(csv_df, new_row_data)
 
         logger.info(f"saving {config['lookahead_analysis_exportfilename']}")
         csv_df.to_csv(config['lookahead_analysis_exportfilename'], index=False)
@@ -122,7 +141,7 @@ class LookaheadAnalysisSubFunctions:
             config['backtest_cache'] = 'none'
 
         strategy_objs = StrategyResolver.search_all_objects(
-                config, enum_failed=False, recursive=config.get('recursive_strategy_search', False))
+            config, enum_failed=False, recursive=config.get('recursive_strategy_search', False))
 
         lookaheadAnalysis_instances = []
 
@@ -147,7 +166,7 @@ class LookaheadAnalysisSubFunctions:
         # report the results
         if lookaheadAnalysis_instances:
             LookaheadAnalysisSubFunctions.text_table_lookahead_analysis_instances(
-                lookaheadAnalysis_instances)
+                config, lookaheadAnalysis_instances)
             if config.get('lookahead_analysis_exportfilename') is not None:
                 LookaheadAnalysisSubFunctions.export_to_csv(config, lookaheadAnalysis_instances)
         else:
diff --git a/tests/optimize/test_lookahead_analysis.py b/tests/optimize/test_lookahead_analysis.py
index 814c9f3b8..476627c57 100644
--- a/tests/optimize/test_lookahead_analysis.py
+++ b/tests/optimize/test_lookahead_analysis.py
@@ -115,30 +115,40 @@ def test_lookahead_helper_text_table_lookahead_analysis_instances(lookahead_conf
     instance = LookaheadAnalysis(lookahead_conf, strategy_obj)
     instance.current_analysis = analysis
     table, headers, data = (LookaheadAnalysisSubFunctions.
-                            text_table_lookahead_analysis_instances([instance]))
+                            text_table_lookahead_analysis_instances(lookahead_conf, [instance]))
 
-    # check row contents for a try that errored out
+    # check row contents for a try that has too few signals
     assert data[0][0] == 'strategy_test_v3_with_lookahead_bias.py'
     assert data[0][1] == 'strategy_test_v3_with_lookahead_bias'
-    assert data[0][2].__contains__('error')
+    assert data[0][2].__contains__('too few trades')
     assert len(data[0]) == 3
 
+    # now check for an error which occured after enough trades
+    analysis.total_signals = 12
+    analysis.false_entry_signals = 11
+    analysis.false_exit_signals = 10
+    instance = LookaheadAnalysis(lookahead_conf, strategy_obj)
+    instance.current_analysis = analysis
+    table, headers, data = (LookaheadAnalysisSubFunctions.
+                            text_table_lookahead_analysis_instances(lookahead_conf, [instance]))
+    assert data[0][2].__contains__("error")
+
     # edit it into not showing an error
     instance.failed_bias_check = False
     table, headers, data = (LookaheadAnalysisSubFunctions.
-                            text_table_lookahead_analysis_instances([instance]))
+                            text_table_lookahead_analysis_instances(lookahead_conf, [instance]))
     assert data[0][0] == 'strategy_test_v3_with_lookahead_bias.py'
     assert data[0][1] == 'strategy_test_v3_with_lookahead_bias'
     assert data[0][2]  # True
-    assert data[0][3] == 5
-    assert data[0][4] == 4
-    assert data[0][5] == 3
+    assert data[0][3] == 12
+    assert data[0][4] == 11
+    assert data[0][5] == 10
     assert data[0][6] == ''
 
     analysis.false_indicators.append('falseIndicator1')
     analysis.false_indicators.append('falseIndicator2')
     table, headers, data = (LookaheadAnalysisSubFunctions.
-                            text_table_lookahead_analysis_instances([instance]))
+                            text_table_lookahead_analysis_instances(lookahead_conf, [instance]))
 
     assert data[0][6] == 'falseIndicator1, falseIndicator2'
 
@@ -146,8 +156,8 @@ def test_lookahead_helper_text_table_lookahead_analysis_instances(lookahead_conf
     assert len(data) == 1
 
     # check amount of multiple rows
-    table, headers, data = (LookaheadAnalysisSubFunctions.
-                            text_table_lookahead_analysis_instances([instance, instance, instance]))
+    table, headers, data = (LookaheadAnalysisSubFunctions.text_table_lookahead_analysis_instances(
+        lookahead_conf, [instance, instance, instance]))
     assert len(data) == 3
 
 
@@ -165,9 +175,9 @@ def test_lookahead_helper_export_to_csv(lookahead_conf):
     # 1st check: create a new file and verify its contents
     analysis1 = Analysis()
     analysis1.has_bias = True
-    analysis1.total_signals = 5
-    analysis1.false_entry_signals = 4
-    analysis1.false_exit_signals = 3
+    analysis1.total_signals = 12
+    analysis1.false_entry_signals = 11
+    analysis1.false_exit_signals = 10
     analysis1.false_indicators.append('falseIndicator1')
     analysis1.false_indicators.append('falseIndicator2')
     lookahead_conf['lookahead_analysis_exportfilename'] = "temp_csv_lookahead_analysis.csv"
@@ -178,6 +188,7 @@ def test_lookahead_helper_export_to_csv(lookahead_conf):
     }
 
     instance1 = LookaheadAnalysis(lookahead_conf, strategy_obj1)
+    instance1.failed_bias_check = False
     instance1.current_analysis = analysis1
 
     LookaheadAnalysisSubFunctions.export_to_csv(lookahead_conf, [instance1])
@@ -186,7 +197,7 @@ def test_lookahead_helper_export_to_csv(lookahead_conf):
     expected_values1 = [
         [
             'file1.py', 'strat1', True,
-            5, 4, 3,
+            12, 11, 10,
             "falseIndicator1,falseIndicator2"
         ],
     ]
@@ -202,7 +213,7 @@ def test_lookahead_helper_export_to_csv(lookahead_conf):
     expected_values2 = [
         [
             'file1.py', 'strat1', False,
-            10, 11, 12,
+            22, 21, 20,
             "falseIndicator3,falseIndicator4"
         ],
     ]
@@ -210,9 +221,9 @@ def test_lookahead_helper_export_to_csv(lookahead_conf):
 
     analysis2 = Analysis()
     analysis2.has_bias = False
-    analysis2.total_signals = 10
-    analysis2.false_entry_signals = 11
-    analysis2.false_exit_signals = 12
+    analysis2.total_signals = 22
+    analysis2.false_entry_signals = 21
+    analysis2.false_exit_signals = 20
     analysis2.false_indicators.append('falseIndicator3')
     analysis2.false_indicators.append('falseIndicator4')
 
@@ -222,6 +233,7 @@ def test_lookahead_helper_export_to_csv(lookahead_conf):
     }
 
     instance2 = LookaheadAnalysis(lookahead_conf, strategy_obj2)
+    instance2.failed_bias_check = False
     instance2.current_analysis = analysis2
 
     LookaheadAnalysisSubFunctions.export_to_csv(lookahead_conf, [instance2])
@@ -233,12 +245,12 @@ def test_lookahead_helper_export_to_csv(lookahead_conf):
     expected_values3 = [
         [
             'file1.py', 'strat1', False,
-            10, 11, 12,
+            22, 21, 20,
             "falseIndicator3,falseIndicator4"
         ],
         [
             'file3.py', 'strat3', True,
-            20, 21, 22, "falseIndicator5,falseIndicator6"
+            32, 31, 30, "falseIndicator5,falseIndicator6"
         ],
     ]
 
@@ -246,9 +258,9 @@ def test_lookahead_helper_export_to_csv(lookahead_conf):
 
     analysis3 = Analysis()
     analysis3.has_bias = True
-    analysis3.total_signals = 20
-    analysis3.false_entry_signals = 21
-    analysis3.false_exit_signals = 22
+    analysis3.total_signals = 32
+    analysis3.false_entry_signals = 31
+    analysis3.false_exit_signals = 30
     analysis3.false_indicators.append('falseIndicator5')
     analysis3.false_indicators.append('falseIndicator6')
     lookahead_conf['lookahead_analysis_exportfilename'] = "temp_csv_lookahead_analysis.csv"
@@ -259,6 +271,7 @@ def test_lookahead_helper_export_to_csv(lookahead_conf):
     }
 
     instance3 = LookaheadAnalysis(lookahead_conf, strategy_obj3)
+    instance3.failed_bias_check = False
     instance3.current_analysis = analysis3
 
     LookaheadAnalysisSubFunctions.export_to_csv(lookahead_conf, [instance3])

From 6b3b5f201d7e3b00bfc54d885aa89dadba202103 Mon Sep 17 00:00:00 2001
From: hippocritical <gunngirspam@gmail.com>
Date: Sun, 28 May 2023 22:13:29 +0200
Subject: [PATCH 034/130] export_to_csv: Added forced conversion of float64 to
 int to remove the .0 values once and for all ...

---
 freqtrade/optimize/lookahead_analysis_helpers.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/freqtrade/optimize/lookahead_analysis_helpers.py b/freqtrade/optimize/lookahead_analysis_helpers.py
index 49f225943..0eccf0526 100644
--- a/freqtrade/optimize/lookahead_analysis_helpers.py
+++ b/freqtrade/optimize/lookahead_analysis_helpers.py
@@ -105,6 +105,16 @@ class LookaheadAnalysisSubFunctions:
                                     ",".join(inst.current_analysis.false_indicators)}
                 csv_df = add_or_update_row(csv_df, new_row_data)
 
+        # Fill NaN values with a default value (e.g., 0)
+        csv_df['total_signals'] = csv_df['total_signals'].fillna(0)
+        csv_df['biased_entry_signals'] = csv_df['biased_entry_signals'].fillna(0)
+        csv_df['biased_exit_signals'] = csv_df['biased_exit_signals'].fillna(0)
+
+        # Convert columns to integers
+        csv_df['total_signals'] = csv_df['total_signals'].astype(int)
+        csv_df['biased_entry_signals'] = csv_df['biased_entry_signals'].astype(int)
+        csv_df['biased_exit_signals'] = csv_df['biased_exit_signals'].astype(int)
+
         logger.info(f"saving {config['lookahead_analysis_exportfilename']}")
         csv_df.to_csv(config['lookahead_analysis_exportfilename'], index=False)
 

From e57265361606dae1f34e957f675b333b73587a75 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Mon, 29 May 2023 13:33:29 +0200
Subject: [PATCH 035/130] bring classifier/rl up to new paradigm. ensure tests
 pass. remove old code. add documentation, add new example transform

---
 docs/freqai-feature-engineering.md            |  84 +++-
 .../RL/BaseReinforcementLearningModel.py      |  45 +-
 .../freqai/base_models/BaseClassifierModel.py |  34 +-
 .../base_models/BasePyTorchClassifier.py      |  72 ++-
 .../freqai/base_models/BasePyTorchModel.py    |  54 +--
 .../base_models/BasePyTorchRegressor.py       |  74 ++-
 .../freqai/base_models/BaseRegressionModel.py |  29 +-
 freqtrade/freqai/data_drawer.py               |  29 +-
 freqtrade/freqai/data_kitchen.py              | 441 +-----------------
 freqtrade/freqai/freqai_interface.py          |  93 +---
 .../PyTorchTransformerRegressor.py            |  16 +-
 .../prediction_models/XGBoostRFRegressor.py   |   4 +
 .../prediction_models/XGBoostRegressor.py     |  23 +
 freqtrade/freqai/transforms/__init__.py       |   6 +
 .../freqai/transforms/quantile_transform.py   |  28 ++
 freqtrade/resolvers/freqaimodel_resolver.py   |   2 +-
 tests/freqai/test_freqai_datakitchen.py       |  66 +--
 tests/freqai/test_freqai_interface.py         |  44 +-
 18 files changed, 390 insertions(+), 754 deletions(-)
 create mode 100644 freqtrade/freqai/transforms/__init__.py
 create mode 100644 freqtrade/freqai/transforms/quantile_transform.py

diff --git a/docs/freqai-feature-engineering.md b/docs/freqai-feature-engineering.md
index 82b7569a5..eb4b4272e 100644
--- a/docs/freqai-feature-engineering.md
+++ b/docs/freqai-feature-engineering.md
@@ -209,15 +209,67 @@ Another example, where the user wants to use live metrics from the trade databas
 
 You need to set the standard dictionary in the config so that FreqAI can return proper dataframe shapes. These values will likely be overridden by the prediction model, but in the case where the model has yet to set them, or needs a default initial value, the pre-set values are what will be returned.
 
-## Feature normalization
+### Weighting features for temporal importance
 
-FreqAI is strict when it comes to data normalization. The train features, $X^{train}$, are always normalized to [-1, 1] using a shifted min-max normalization:
+FreqAI allows you to set a `weight_factor` to weight recent data more strongly than past data via an exponential function:
 
-$$X^{train}_{norm} = 2 * \frac{X^{train} - X^{train}.min()}{X^{train}.max() - X^{train}.min()} - 1$$
+$$ W_i = \exp(\frac{-i}{\alpha*n}) $$
 
-All other data (test data and unseen prediction data in dry/live/backtest) is always automatically normalized to the training feature space according to industry standards. FreqAI stores all the metadata required to ensure that test and prediction features will be properly normalized and that predictions are properly denormalized. For this reason, it is not recommended to eschew industry standards and modify FreqAI internals - however - advanced users can do so by inheriting `train()` in their custom `IFreqaiModel` and using their own normalization functions.
+where $W_i$ is the weight of data point $i$ in a total set of $n$ data points. Below is a figure showing the effect of different weight factors on the data points in a feature set.
 
-## Data dimensionality reduction with Principal Component Analysis
+![weight-factor](assets/freqai_weight-factor.jpg)
+
+# Building the data pipeline
+
+FreqAI uses the the [`DataSieve`](https://github.com/emergentmethods/datasieve) pipeline, which follows the SKlearn pipeline API, but adds, among other features, coherence between the X, y, and sample_weight vector point removals, and feature removal feature name following. 
+
+This means that users can use/customize any SKLearn modules and easily add them to their FreqAI data pipeline. By default, FreqAI builds the following pipeline:
+
+```py
+dk.feature_pipeline = Pipeline([
+    ('scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1))),
+    ('di', ds.DissimilarityIndex(di_threshold=1)),
+    ])
+```
+
+But users will find that they can add PCA and other steps just by changing their configuration settings, for example, if you add `"principal_component_analysis": true` to the `feature_parameters` dict in the `freqai` config, then FreqAI will add the PCA step for you resulting in the following pipeline:
+
+```py
+dk.feature_pipeline = Pipeline([
+    ('scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1))),
+    ('pca', ds.DataSievePCA()),
+    ('post-pca-scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1)))
+    ('di', ds.DissimilarityIndex(di_threshold=1)),
+    ])
+```
+
+The same concept follows if users activate other config options like `"use_SVM_to_remove_outliers": true` or `"use_DBSCAN_to_remove_outliers": true`. FreqAI will add the appropriate steps to the pipeline for you.
+
+## Customizing the pipeline
+
+Users are encouraged to customize the data pipeline to their needs by building their own data pipeline. This can be done by overriding `define_data_pipeline` in their `IFreqaiModel`. For example:
+
+```py
+    def define_data_pipeline(self, dk: FreqaiDataKitchen) -> None:
+        """
+        User defines their custom eature pipeline here (if they wish)
+        """
+        from freqtrade.freqai.transforms import FreqaiQuantileTransformer
+        dk.feature_pipeline = Pipeline([
+            ('qt', FreqaiQuantileTransformer(output_distribution='normal'))
+        ])
+
+        return
+```
+
+Here, you are defining the exact pipeline that will be used for your feature set during training and prediction. If you have a custom step that you would like to add to the pipeline, you simply create a class that follows the DataSieve/SKLearn API. That means your step must have a `fit()`, `transform()`, `fit_transform()`, and `inverse_transform()` method. You can see examples of this in the `freqtrade.freqai.transforms` module where we use SKLearn `QuantileNormalization` to create a new step for the pipeline.
+
+As there is the `feature_pipeline`, there also exists a definition for the `label_pipeline` which can be defined the same way as the `feature_pipeline`, by overriding `define_label_pipeline`.
+
+!!! note "Inheritence required"
+    While most SKLearn methods are very easy to override, as shown in freqtrade/freqai/transforms/quantile_transform.py, they still need to include passing X, y, and sample_weights through all `fit()`, `transform()`, `fit_transform()` and `inverse_transform()` functions, even if that means a direct pass through without modifications.
+
+<!-- ## Data dimensionality reduction with Principal Component Analysis
 
 You can reduce the dimensionality of your features by activating the `principal_component_analysis` in the config:
 
@@ -241,17 +293,7 @@ You define the lookback window by setting `inlier_metric_window` and FreqAI comp
 
 FreqAI adds the `inlier_metric` to the training features and hence gives the model access to a novel type of temporal information. 
 
-This function does **not** remove outliers from the data set.
-
-## Weighting features for temporal importance
-
-FreqAI allows you to set a `weight_factor` to weight recent data more strongly than past data via an exponential function:
-
-$$ W_i = \exp(\frac{-i}{\alpha*n}) $$
-
-where $W_i$ is the weight of data point $i$ in a total set of $n$ data points. Below is a figure showing the effect of different weight factors on the data points in a feature set.
-
-![weight-factor](assets/freqai_weight-factor.jpg)
+This function does **not** remove outliers from the data set. -->
 
 ## Outlier detection
 
@@ -259,7 +301,7 @@ Equity and crypto markets suffer from a high level of non-patterned noise in the
 
 ### Identifying outliers with the Dissimilarity Index (DI)
 
- The Dissimilarity Index (DI) aims to quantify the uncertainty associated with each prediction made by the model. 
+The Dissimilarity Index (DI) aims to quantify the uncertainty associated with each prediction made by the model. 
 
 You can tell FreqAI to remove outlier data points from the training/test data sets using the DI by including the following statement in the config:
 
@@ -271,7 +313,7 @@ You can tell FreqAI to remove outlier data points from the training/test data se
     }
 ```
 
- The DI allows predictions which are outliers (not existent in the model feature space) to be thrown out due to low levels of certainty. To do so, FreqAI measures the distance between each training data point (feature vector), $X_{a}$, and all other training data points:
+Which will add `DissimilarityIndex` step to your `feature_pipeline` and set the threshold to 1. The DI allows predictions which are outliers (not existent in the model feature space) to be thrown out due to low levels of certainty. To do so, FreqAI measures the distance between each training data point (feature vector), $X_{a}$, and all other training data points:
 
 $$ d_{ab} = \sqrt{\sum_{j=1}^p(X_{a,j}-X_{b,j})^2} $$
 
@@ -305,9 +347,9 @@ You can tell FreqAI to remove outlier data points from the training/test data se
     }
 ```
 
-The SVM will be trained on the training data and any data point that the SVM deems to be beyond the feature space will be removed.
+Which will add `SVMOutlierExtractor` step to your `feature_pipeline`. The SVM will be trained on the training data and any data point that the SVM deems to be beyond the feature space will be removed.
 
-FreqAI uses `sklearn.linear_model.SGDOneClassSVM` (details are available on scikit-learn's webpage [here](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDOneClassSVM.html) (external website)) and you can elect to provide additional parameters for the SVM, such as `shuffle`, and `nu`.
+You can elect to provide additional parameters for the SVM, such as `shuffle`, and `nu` via the `feature_parameters.svm_params` dictionary in the config.
 
 The parameter `shuffle` is by default set to `False` to ensure consistent results. If it is set to `True`, running the SVM multiple times on the same data set might result in different outcomes due to `max_iter` being to low for the algorithm to reach the demanded `tol`. Increasing `max_iter` solves this issue but causes the procedure to take longer time.
 
@@ -325,7 +367,7 @@ You can configure FreqAI to use DBSCAN to cluster and remove outliers from the t
     }
 ```
 
-DBSCAN is an unsupervised machine learning algorithm that clusters data without needing to know how many clusters there should be.
+Which will add the `DataSieveDBSCAN` step to your `feature_pipeline`. This is an unsupervised machine learning algorithm that clusters data without needing to know how many clusters there should be.
 
 Given a number of data points $N$, and a distance $\varepsilon$, DBSCAN clusters the data set by setting all data points that have $N-1$ other data points within a distance of $\varepsilon$ as *core points*. A data point that is within a distance of $\varepsilon$ from a *core point* but that does not have $N-1$ other data points within a distance of $\varepsilon$ from itself is considered an *edge point*. A cluster is then the collection of *core points* and *edge points*. Data points that have no other data points at a distance $<\varepsilon$ are considered outliers. The figure below shows a cluster with $N = 3$.
 
diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
index 8ee3c7c56..bd22decaa 100644
--- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
+++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
@@ -82,6 +82,9 @@ class BaseReinforcementLearningModel(IFreqaiModel):
         if self.ft_params.get('use_DBSCAN_to_remove_outliers', False):
             self.ft_params.update({'use_DBSCAN_to_remove_outliers': False})
             logger.warning('User tried to use DBSCAN with RL. Deactivating DBSCAN.')
+        if self.ft_params.get('DI_threshold', False):
+            self.ft_params.update({'DI_threshold': False})
+            logger.warning('User tried to use DI_threshold with RL. Deactivating DI_threshold.')
         if self.freqai_info['data_split_parameters'].get('shuffle', False):
             self.freqai_info['data_split_parameters'].update({'shuffle': False})
             logger.warning('User tried to shuffle training data. Setting shuffle to False')
@@ -107,27 +110,40 @@ class BaseReinforcementLearningModel(IFreqaiModel):
             training_filter=True,
         )
 
-        data_dictionary: Dict[str, Any] = dk.make_train_test_datasets(
+        d: Dict[str, Any] = dk.make_train_test_datasets(
             features_filtered, labels_filtered)
-        self.df_raw = copy.deepcopy(data_dictionary["train_features"])
+        self.df_raw = copy.deepcopy(d["train_features"])
         dk.fit_labels()  # FIXME useless for now, but just satiating append methods
 
         # normalize all data based on train_dataset only
         prices_train, prices_test = self.build_ohlc_price_dataframes(dk.data_dictionary, pair, dk)
 
-        data_dictionary = dk.normalize_data(data_dictionary)
+        self.define_data_pipeline(dk)
+        self.define_label_pipeline(dk)
 
-        # data cleaning/analysis
-        self.data_cleaning_train(dk)
+        # d["train_labels"], _, _ = dk.label_pipeline.fit_transform(d["train_labels"])
+        # d["test_labels"], _, _ = dk.label_pipeline.transform(d["test_labels"])
+
+        (d["train_features"],
+         d["train_labels"],
+         d["train_weights"]) = dk.feature_pipeline.fit_transform(d["train_features"],
+                                                                 d["train_labels"],
+                                                                 d["train_weights"])
+
+        (d["test_features"],
+         d["test_labels"],
+         d["test_weights"]) = dk.feature_pipeline.transform(d["test_features"],
+                                                            d["test_labels"],
+                                                            d["test_weights"])
 
         logger.info(
             f'Training model on {len(dk.data_dictionary["train_features"].columns)}'
-            f' features and {len(data_dictionary["train_features"])} data points'
+            f' features and {len(d["train_features"])} data points'
         )
 
-        self.set_train_and_eval_environments(data_dictionary, prices_train, prices_test, dk)
+        self.set_train_and_eval_environments(d, prices_train, prices_test, dk)
 
-        model = self.fit(data_dictionary, dk)
+        model = self.fit(d, dk)
 
         logger.info(f"--------------------done training {pair}--------------------")
 
@@ -236,18 +252,19 @@ class BaseReinforcementLearningModel(IFreqaiModel):
             unfiltered_df, dk.training_features_list, training_filter=False
         )
 
-        filtered_dataframe = self.drop_ohlc_from_df(filtered_dataframe, dk)
+        dk.data_dictionary["prediction_features"] = self.drop_ohlc_from_df(filtered_dataframe, dk)
 
-        filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
-        dk.data_dictionary["prediction_features"] = filtered_dataframe
-
-        # optional additional data cleaning/analysis
-        self.data_cleaning_predict(dk)
+        dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform(
+            dk.data_dictionary["prediction_features"], outlier_check=True)
 
         pred_df = self.rl_model_predict(
             dk.data_dictionary["prediction_features"], dk, self.model)
         pred_df.fillna(0, inplace=True)
 
+        if self.freqai_info.get("DI_threshold", 0) > 0:
+            dk.DI_values = dk.feature_pipeline["di"].di_values
+        dk.do_predict = outliers.to_numpy()
+
         return (pred_df, dk.do_predict)
 
     def rl_model_predict(self, dataframe: DataFrame,
diff --git a/freqtrade/freqai/base_models/BaseClassifierModel.py b/freqtrade/freqai/base_models/BaseClassifierModel.py
index ffd42dd1d..179e8a5af 100644
--- a/freqtrade/freqai/base_models/BaseClassifierModel.py
+++ b/freqtrade/freqai/base_models/BaseClassifierModel.py
@@ -50,21 +50,30 @@ class BaseClassifierModel(IFreqaiModel):
         logger.info(f"-------------------- Training on data from {start_date} to "
                     f"{end_date} --------------------")
         # split data into train/test data.
-        data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
+        d = dk.make_train_test_datasets(features_filtered, labels_filtered)
         if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
             dk.fit_labels()
-        # normalize all data based on train_dataset only
-        data_dictionary = dk.normalize_data(data_dictionary)
+        self.define_data_pipeline(dk)
+        self.define_label_pipeline(dk)
 
-        # optional additional data cleaning/analysis
-        self.data_cleaning_train(dk)
+        (d["train_features"],
+         d["train_labels"],
+         d["train_weights"]) = dk.feature_pipeline.fit_transform(d["train_features"],
+                                                                 d["train_labels"],
+                                                                 d["train_weights"])
+
+        (d["test_features"],
+         d["test_labels"],
+         d["test_weights"]) = dk.feature_pipeline.transform(d["test_features"],
+                                                            d["test_labels"],
+                                                            d["test_weights"])
 
         logger.info(
             f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
         )
-        logger.info(f"Training model on {len(data_dictionary['train_features'])} data points")
+        logger.info(f"Training model on {len(d['train_features'])} data points")
 
-        model = self.fit(data_dictionary, dk)
+        model = self.fit(d, dk)
 
         end_time = time()
 
@@ -89,10 +98,11 @@ class BaseClassifierModel(IFreqaiModel):
         filtered_df, _ = dk.filter_features(
             unfiltered_df, dk.training_features_list, training_filter=False
         )
-        filtered_df = dk.normalize_data_from_metadata(filtered_df)
+
         dk.data_dictionary["prediction_features"] = filtered_df
 
-        self.data_cleaning_predict(dk)
+        dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform(
+            dk.data_dictionary["prediction_features"], outlier_check=True)
 
         predictions = self.model.predict(dk.data_dictionary["prediction_features"])
         if self.CONV_WIDTH == 1:
@@ -107,4 +117,10 @@ class BaseClassifierModel(IFreqaiModel):
 
         pred_df = pd.concat([pred_df, pred_df_prob], axis=1)
 
+        if self.freqai_info.get("DI_threshold", 0) > 0:
+            dk.DI_values = dk.feature_pipeline["di"].di_values
+        else:
+            dk.DI_values = np.zeros(len(outliers.index))
+        dk.do_predict = outliers.to_numpy()
+
         return (pred_df, dk.do_predict)
diff --git a/freqtrade/freqai/base_models/BasePyTorchClassifier.py b/freqtrade/freqai/base_models/BasePyTorchClassifier.py
index 436294dcc..448384852 100644
--- a/freqtrade/freqai/base_models/BasePyTorchClassifier.py
+++ b/freqtrade/freqai/base_models/BasePyTorchClassifier.py
@@ -1,5 +1,6 @@
 import logging
-from typing import Dict, List, Tuple
+from time import time
+from typing import Any, Dict, List, Tuple
 
 import numpy as np
 import numpy.typing as npt
@@ -68,9 +69,12 @@ class BasePyTorchClassifier(BasePyTorchModel):
         filtered_df, _ = dk.filter_features(
             unfiltered_df, dk.training_features_list, training_filter=False
         )
-        filtered_df = dk.normalize_data_from_metadata(filtered_df)
+
         dk.data_dictionary["prediction_features"] = filtered_df
-        self.data_cleaning_predict(dk)
+
+        dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform(
+            dk.data_dictionary["prediction_features"], outlier_check=True)
+
         x = self.data_convertor.convert_x(
             dk.data_dictionary["prediction_features"],
             device=self.device
@@ -85,6 +89,13 @@ class BasePyTorchClassifier(BasePyTorchModel):
         pred_df_prob = DataFrame(probs.detach().tolist(), columns=class_names)
         pred_df = DataFrame(predicted_classes_str, columns=[dk.label_list[0]])
         pred_df = pd.concat([pred_df, pred_df_prob], axis=1)
+
+        if self.freqai_info.get("DI_threshold", 0) > 0:
+            dk.DI_values = dk.feature_pipeline["di"].di_values
+        else:
+            dk.DI_values = np.zeros(len(outliers.index))
+        dk.do_predict = outliers.to_numpy()
+
         return (pred_df, dk.do_predict)
 
     def encode_class_names(
@@ -149,3 +160,58 @@ class BasePyTorchClassifier(BasePyTorchModel):
             )
 
         return self.class_names
+
+    def train(
+        self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
+    ) -> Any:
+        """
+        Filter the training data and train a model to it. Train makes heavy use of the datakitchen
+        for storing, saving, loading, and analyzing the data.
+        :param unfiltered_df: Full dataframe for the current training period
+        :return:
+        :model: Trained model which can be used to inference (self.predict)
+        """
+
+        logger.info(f"-------------------- Starting training {pair} --------------------")
+
+        start_time = time()
+
+        features_filtered, labels_filtered = dk.filter_features(
+            unfiltered_df,
+            dk.training_features_list,
+            dk.label_list,
+            training_filter=True,
+        )
+
+        # split data into train/test data.
+        d = dk.make_train_test_datasets(features_filtered, labels_filtered)
+        if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
+            dk.fit_labels()
+
+        d["train_labels"], _, _ = dk.label_pipeline.fit_transform(d["train_labels"])
+        d["test_labels"], _, _ = dk.label_pipeline.transform(d["test_labels"])
+
+        (d["train_features"],
+         d["train_labels"],
+         d["train_weights"]) = dk.feature_pipeline.fit_transform(d["train_features"],
+                                                                 d["train_labels"],
+                                                                 d["train_weights"])
+
+        (d["test_features"],
+         d["test_labels"],
+         d["test_weights"]) = dk.feature_pipeline.transform(d["test_features"],
+                                                            d["test_labels"],
+                                                            d["test_weights"])
+
+        logger.info(
+            f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
+        )
+        logger.info(f"Training model on {len(d['train_features'])} data points")
+
+        model = self.fit(d, dk)
+        end_time = time()
+
+        logger.info(f"-------------------- Done training {pair} "
+                    f"({end_time - start_time:.2f} secs) --------------------")
+
+        return model
diff --git a/freqtrade/freqai/base_models/BasePyTorchModel.py b/freqtrade/freqai/base_models/BasePyTorchModel.py
index 21dc4e894..71369a146 100644
--- a/freqtrade/freqai/base_models/BasePyTorchModel.py
+++ b/freqtrade/freqai/base_models/BasePyTorchModel.py
@@ -1,21 +1,16 @@
 import logging
 from abc import ABC, abstractmethod
-from time import time
-from typing import Any
 
 import torch
-from pandas import DataFrame
 
-from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
-# from freqtrade.freqai.freqai_interface import IFreqaiModel
-from freqtrade.freqai.base_models import BaseRegressionModel
+from freqtrade.freqai.freqai_interface import IFreqaiModel
 from freqtrade.freqai.torch.PyTorchDataConvertor import PyTorchDataConvertor
 
 
 logger = logging.getLogger(__name__)
 
 
-class BasePyTorchModel(BaseRegressionModel):
+class BasePyTorchModel(IFreqaiModel, ABC):
     """
     Base class for PyTorch type models.
     User *must* inherit from this class and set fit() and predict() and
@@ -30,51 +25,6 @@ class BasePyTorchModel(BaseRegressionModel):
         self.splits = ["train", "test"] if test_size != 0 else ["train"]
         self.window_size = self.freqai_info.get("conv_width", 1)
 
-    # def train(
-    #     self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
-    # ) -> Any:
-    #     """
-    #     Filter the training data and train a model to it. Train makes heavy use of the datakitchen
-    #     for storing, saving, loading, and analyzing the data.
-    #     :param unfiltered_df: Full dataframe for the current training period
-    #     :return:
-    #     :model: Trained model which can be used to inference (self.predict)
-    #     """
-
-    #     logger.info(f"-------------------- Starting training {pair} --------------------")
-
-    #     start_time = time()
-
-    #     features_filtered, labels_filtered = dk.filter_features(
-    #         unfiltered_df,
-    #         dk.training_features_list,
-    #         dk.label_list,
-    #         training_filter=True,
-    #     )
-
-    #     # split data into train/test data.
-    #     data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
-    #     if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
-    #         dk.fit_labels()
-    #     # normalize all data based on train_dataset only
-    #     data_dictionary = dk.normalize_data(data_dictionary)
-
-    #     # optional additional data cleaning/analysis
-    #     self.data_cleaning_train(dk)
-
-    #     logger.info(
-    #         f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
-    #     )
-    #     logger.info(f"Training model on {len(data_dictionary['train_features'])} data points")
-
-    #     model = self.fit(data_dictionary, dk)
-    #     end_time = time()
-
-    #     logger.info(f"-------------------- Done training {pair} "
-    #                 f"({end_time - start_time:.2f} secs) --------------------")
-
-    #     return model
-
     @property
     @abstractmethod
     def data_convertor(self) -> PyTorchDataConvertor:
diff --git a/freqtrade/freqai/base_models/BasePyTorchRegressor.py b/freqtrade/freqai/base_models/BasePyTorchRegressor.py
index 6139f2e85..2f2aaef39 100644
--- a/freqtrade/freqai/base_models/BasePyTorchRegressor.py
+++ b/freqtrade/freqai/base_models/BasePyTorchRegressor.py
@@ -1,5 +1,6 @@
 import logging
-from typing import Tuple
+from time import time
+from typing import Any, Tuple
 
 import numpy as np
 import numpy.typing as npt
@@ -36,10 +37,11 @@ class BasePyTorchRegressor(BasePyTorchModel):
         filtered_df, _ = dk.filter_features(
             unfiltered_df, dk.training_features_list, training_filter=False
         )
-        filtered_df = dk.normalize_data_from_metadata(filtered_df)
         dk.data_dictionary["prediction_features"] = filtered_df
 
-        self.data_cleaning_predict(dk)
+        dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform(
+            dk.data_dictionary["prediction_features"], outlier_check=True)
+
         x = self.data_convertor.convert_x(
             dk.data_dictionary["prediction_features"],
             device=self.device
@@ -47,5 +49,69 @@ class BasePyTorchRegressor(BasePyTorchModel):
         self.model.model.eval()
         y = self.model.model(x)
         pred_df = DataFrame(y.detach().tolist(), columns=[dk.label_list[0]])
-        pred_df = dk.denormalize_labels_from_metadata(pred_df)
+        pred_df, _, _ = dk.label_pipeline.inverse_transform(pred_df)
+
+        if self.freqai_info.get("DI_threshold", 0) > 0:
+            dk.DI_values = dk.feature_pipeline["di"].di_values
+        else:
+            dk.DI_values = np.zeros(len(outliers.index))
+        dk.do_predict = outliers.to_numpy()
         return (pred_df, dk.do_predict)
+
+    def train(
+        self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
+    ) -> Any:
+        """
+        Filter the training data and train a model to it. Train makes heavy use of the datakitchen
+        for storing, saving, loading, and analyzing the data.
+        :param unfiltered_df: Full dataframe for the current training period
+        :return:
+        :model: Trained model which can be used to inference (self.predict)
+        """
+
+        logger.info(f"-------------------- Starting training {pair} --------------------")
+
+        start_time = time()
+
+        features_filtered, labels_filtered = dk.filter_features(
+            unfiltered_df,
+            dk.training_features_list,
+            dk.label_list,
+            training_filter=True,
+        )
+
+        # split data into train/test data.
+        d = dk.make_train_test_datasets(features_filtered, labels_filtered)
+        if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
+            dk.fit_labels()
+
+        self.define_data_pipeline(dk)
+        self.define_label_pipeline(dk)
+
+        d["train_labels"], _, _ = dk.label_pipeline.fit_transform(d["train_labels"])
+        d["test_labels"], _, _ = dk.label_pipeline.transform(d["test_labels"])
+
+        (d["train_features"],
+         d["train_labels"],
+         d["train_weights"]) = dk.feature_pipeline.fit_transform(d["train_features"],
+                                                                 d["train_labels"],
+                                                                 d["train_weights"])
+
+        (d["test_features"],
+         d["test_labels"],
+         d["test_weights"]) = dk.feature_pipeline.transform(d["test_features"],
+                                                            d["test_labels"],
+                                                            d["test_weights"])
+
+        logger.info(
+            f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
+        )
+        logger.info(f"Training model on {len(d['train_features'])} data points")
+
+        model = self.fit(d, dk)
+        end_time = time()
+
+        logger.info(f"-------------------- Done training {pair} "
+                    f"({end_time - start_time:.2f} secs) --------------------")
+
+        return model
diff --git a/freqtrade/freqai/base_models/BaseRegressionModel.py b/freqtrade/freqai/base_models/BaseRegressionModel.py
index 45660253e..1babd5f0c 100644
--- a/freqtrade/freqai/base_models/BaseRegressionModel.py
+++ b/freqtrade/freqai/base_models/BaseRegressionModel.py
@@ -56,20 +56,20 @@ class BaseRegressionModel(IFreqaiModel):
         self.define_data_pipeline(dk)
         self.define_label_pipeline(dk)
 
-        d["train_labels"], _, _ = dk.label_pipeline.fit_transform(d["train_labels"])
-        d["test_labels"], _, _ = dk.label_pipeline.transform(d["test_labels"])
-
         (d["train_features"],
          d["train_labels"],
-         d["train_weights"]) = dk.pipeline.fit_transform(d["train_features"],
-                                                         d["train_labels"],
-                                                         d["train_weights"])
+         d["train_weights"]) = dk.feature_pipeline.fit_transform(d["train_features"],
+                                                                 d["train_labels"],
+                                                                 d["train_weights"])
 
         (d["test_features"],
          d["test_labels"],
-         d["test_weights"]) = dk.pipeline.transform(d["test_features"],
-                                                    d["test_labels"],
-                                                    d["test_weights"])
+         d["test_weights"]) = dk.feature_pipeline.transform(d["test_features"],
+                                                            d["test_labels"],
+                                                            d["test_weights"])
+
+        d["train_labels"], _, _ = dk.label_pipeline.fit_transform(d["train_labels"])
+        d["test_labels"], _, _ = dk.label_pipeline.transform(d["test_labels"])
 
         logger.info(
             f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
@@ -98,13 +98,11 @@ class BaseRegressionModel(IFreqaiModel):
         """
 
         dk.find_features(unfiltered_df)
-        filtered_df, _ = dk.filter_features(
+        dk.data_dictionary["prediction_features"], _ = dk.filter_features(
             unfiltered_df, dk.training_features_list, training_filter=False
         )
-        # filtered_df = dk.normalize_data_from_metadata(filtered_df)
-        dk.data_dictionary["prediction_features"] = filtered_df
 
-        dk.data_dictionary["prediction_features"], outliers, _ = dk.pipeline.transform(
+        dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform(
             dk.data_dictionary["prediction_features"], outlier_check=True)
 
         predictions = self.model.predict(dk.data_dictionary["prediction_features"])
@@ -114,7 +112,10 @@ class BaseRegressionModel(IFreqaiModel):
         pred_df = DataFrame(predictions, columns=dk.label_list)
 
         pred_df, _, _ = dk.label_pipeline.inverse_transform(pred_df)
-        dk.DI_values = dk.label_pipeline.get_step("di").di_values
+        if self.freqai_info.get("DI_threshold", 0) > 0:
+            dk.DI_values = dk.feature_pipeline["di"].di_values
+        else:
+            dk.DI_values = np.zeros(len(outliers.index))
         dk.do_predict = outliers.to_numpy()
 
         return (pred_df, dk.do_predict)
diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py
index 9fdcc2d41..670dfc620 100644
--- a/freqtrade/freqai/data_drawer.py
+++ b/freqtrade/freqai/data_drawer.py
@@ -449,9 +449,6 @@ class FreqaiDataDrawer:
         elif self.model_type in ["stable_baselines3", "sb3_contrib", "pytorch"]:
             model.save(save_path / f"{dk.model_filename}_model.zip")
 
-        if dk.svm_model is not None:
-            dump(dk.svm_model, save_path / f"{dk.model_filename}_svm_model.joblib")
-
         dk.data["data_path"] = str(dk.data_path)
         dk.data["model_filename"] = str(dk.model_filename)
         dk.data["training_features_list"] = dk.training_features_list
@@ -461,8 +458,8 @@ class FreqaiDataDrawer:
             rapidjson.dump(dk.data, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE)
 
         # save the pipelines to pickle files
-        with (save_path / f"{dk.model_filename}_pipeline.pkl").open("wb") as fp:
-            cloudpickle.dump(dk.pipeline, fp)
+        with (save_path / f"{dk.model_filename}_feature_pipeline.pkl").open("wb") as fp:
+            cloudpickle.dump(dk.feature_pipeline, fp)
 
         with (save_path / f"{dk.model_filename}_label_pipeline.pkl").open("wb") as fp:
             cloudpickle.dump(dk.label_pipeline, fp)
@@ -476,11 +473,6 @@ class FreqaiDataDrawer:
             save_path / f"{dk.model_filename}_trained_dates_df.pkl"
         )
 
-        if self.freqai_info["feature_parameters"].get("principal_component_analysis"):
-            cloudpickle.dump(
-                dk.pca, (dk.data_path / f"{dk.model_filename}_pca_object.pkl").open("wb")
-            )
-
         self.model_dictionary[coin] = model
         self.pair_dict[coin]["model_filename"] = dk.model_filename
         self.pair_dict[coin]["data_path"] = str(dk.data_path)
@@ -489,7 +481,7 @@ class FreqaiDataDrawer:
             self.meta_data_dictionary[coin] = {}
         self.meta_data_dictionary[coin]["train_df"] = dk.data_dictionary["train_features"]
         self.meta_data_dictionary[coin]["meta_data"] = dk.data
-        self.meta_data_dictionary[coin]["pipeline"] = dk.pipeline
+        self.meta_data_dictionary[coin]["feature_pipeline"] = dk.feature_pipeline
         self.meta_data_dictionary[coin]["label_pipeline"] = dk.label_pipeline
         self.save_drawer_to_disk()
 
@@ -522,7 +514,7 @@ class FreqaiDataDrawer:
         if coin in self.meta_data_dictionary:
             dk.data = self.meta_data_dictionary[coin]["meta_data"]
             dk.data_dictionary["train_features"] = self.meta_data_dictionary[coin]["train_df"]
-            dk.pipeline = self.meta_data_dictionary[coin]["pipeline"]
+            dk.feature_pipeline = self.meta_data_dictionary[coin]["feature_pipeline"]
             dk.label_pipeline = self.meta_data_dictionary[coin]["label_pipeline"]
         else:
             with (dk.data_path / f"{dk.model_filename}_metadata.json").open("r") as fp:
@@ -532,7 +524,7 @@ class FreqaiDataDrawer:
                 dk.data_path / f"{dk.model_filename}_trained_df.pkl"
             )
             with (dk.data_path / f"{dk.model_filename}_pipeline.pkl").open("rb") as fp:
-                dk.pipeline = cloudpickle.load(fp)
+                dk.feature_pipeline = cloudpickle.load(fp)
             with (dk.data_path / f"{dk.model_filename}_label_pipeline.pkl").open("rb") as fp:
                 dk.label_pipeline = cloudpickle.load(fp)
 
@@ -544,9 +536,6 @@ class FreqaiDataDrawer:
             model = self.model_dictionary[coin]
         elif self.model_type == 'joblib':
             model = load(dk.data_path / f"{dk.model_filename}_model.joblib")
-        elif self.model_type == 'keras':
-            from tensorflow import keras
-            model = keras.models.load_model(dk.data_path / f"{dk.model_filename}_model.h5")
         elif 'stable_baselines' in self.model_type or 'sb3_contrib' == self.model_type:
             mod = importlib.import_module(
                 self.model_type, self.freqai_info['rl_config']['model_type'])
@@ -558,9 +547,6 @@ class FreqaiDataDrawer:
             model = zip["pytrainer"]
             model = model.load_from_checkpoint(zip)
 
-        if Path(dk.data_path / f"{dk.model_filename}_svm_model.joblib").is_file():
-            dk.svm_model = load(dk.data_path / f"{dk.model_filename}_svm_model.joblib")
-
         if not model:
             raise OperationalException(
                 f"Unable to load model, ensure model exists at " f"{dk.data_path} "
@@ -570,11 +556,6 @@ class FreqaiDataDrawer:
         if coin not in self.model_dictionary:
             self.model_dictionary[coin] = model
 
-        if self.config["freqai"]["feature_parameters"]["principal_component_analysis"]:
-            dk.pca = cloudpickle.load(
-                (dk.data_path / f"{dk.model_filename}_pca_object.pkl").open("rb")
-            )
-
         return model
 
     def update_historic_data(self, strategy: IStrategy, dk: FreqaiDataKitchen) -> None:
diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py
index adfeb8dd5..04182dc69 100644
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -4,7 +4,6 @@ import logging
 import random
 import shutil
 from datetime import datetime, timezone
-from math import cos, sin
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
 
@@ -12,13 +11,10 @@ import numpy as np
 import numpy.typing as npt
 import pandas as pd
 import psutil
+from datasieve.pipeline import Pipeline
 from pandas import DataFrame
-from scipy import stats
-from sklearn import linear_model
-from sklearn.cluster import DBSCAN
 from sklearn.metrics.pairwise import pairwise_distances
 from sklearn.model_selection import train_test_split
-from sklearn.neighbors import NearestNeighbors
 
 from freqtrade.configuration import TimeRange
 from freqtrade.constants import Config
@@ -27,7 +23,6 @@ from freqtrade.exceptions import OperationalException
 from freqtrade.exchange import timeframe_to_seconds
 from freqtrade.strategy import merge_informative_pair
 from freqtrade.strategy.interface import IStrategy
-from datasieve.pipeline import Pipeline
 
 
 SECONDS_IN_DAY = 86400
@@ -83,11 +78,11 @@ class FreqaiDataKitchen:
         self.live = live
         self.pair = pair
 
-        self.svm_model: linear_model.SGDOneClassSVM = None
+        # self.svm_model: linear_model.SGDOneClassSVM = None
         self.keras: bool = self.freqai_config.get("keras", False)
         self.set_all_pairs()
         self.backtest_live_models = config.get("freqai_backtest_live_models", False)
-        self.pipeline = Pipeline()
+        self.feature_pipeline = Pipeline()
         self.label_pipeline = Pipeline()
 
         if not self.live:
@@ -230,13 +225,14 @@ class FreqaiDataKitchen:
         drop_index = pd.isnull(filtered_df).any(axis=1)  # get the rows that have NaNs,
         drop_index = drop_index.replace(True, 1).replace(False, 0)  # pep8 requirement.
         if (training_filter):
-            const_cols = list((filtered_df.nunique() == 1).loc[lambda x: x].index)
-            if const_cols:
-                filtered_df = filtered_df.filter(filtered_df.columns.difference(const_cols))
-                self.data['constant_features_list'] = const_cols
-                logger.warning(f"Removed features {const_cols} with constant values.")
-            else:
-                self.data['constant_features_list'] = []
+            # const_cols = list((filtered_df.nunique() == 1).loc[lambda x: x].index)
+            # if const_cols:
+            #     filtered_df = filtered_df.filter(filtered_df.columns.difference(const_cols))
+            #     self.data['constant_features_list'] = const_cols
+            #     logger.warning(f"Removed features {const_cols} with constant values.")
+            # else:
+            #     self.data['constant_features_list'] = []
+
             # we don't care about total row number (total no. datapoints) in training, we only care
             # about removing any row with NaNs
             # if labels has multiple columns (user wants to train multiple modelEs), we detect here
@@ -267,8 +263,10 @@ class FreqaiDataKitchen:
             self.data["filter_drop_index_training"] = drop_index
 
         else:
-            if 'constant_features_list' in self.data and len(self.data['constant_features_list']):
-                filtered_df = self.check_pred_labels(filtered_df)
+
+            # if 'constant_features_list' in self.data and len(self.data['constant_features_list']):
+            #     filtered_df = self.check_pred_labels(filtered_df)
+
             # we are backtesting so we need to preserve row number to send back to strategy,
             # so now we use do_predict to avoid any prediction based on a NaN
             drop_index = pd.isnull(filtered_df).any(axis=1)
@@ -488,415 +486,6 @@ class FreqaiDataKitchen:
 
         return df
 
-    def check_pred_labels(self, df_predictions: DataFrame) -> DataFrame:
-        """
-        Check that prediction feature labels match training feature labels.
-        :param df_predictions: incoming predictions
-        """
-        constant_labels = self.data['constant_features_list']
-        df_predictions = df_predictions.filter(
-            df_predictions.columns.difference(constant_labels)
-        )
-        logger.warning(
-            f"Removed {len(constant_labels)} features from prediction features, "
-            f"these were considered constant values during most recent training."
-        )
-
-        return df_predictions
-
-    # def principal_component_analysis(self) -> None:
-    #     """
-    #     Performs Principal Component Analysis on the data for dimensionality reduction
-    #     and outlier detection (see self.remove_outliers())
-    #     No parameters or returns, it acts on the data_dictionary held by the DataHandler.
-    #     """
-
-    #     from sklearn.decomposition import PCA  # avoid importing if we dont need it
-
-    #     pca = PCA(0.999)
-    #     pca = pca.fit(self.data_dictionary["train_features"])
-    #     n_keep_components = pca.n_components_
-    #     self.data["n_kept_components"] = n_keep_components
-    #     n_components = self.data_dictionary["train_features"].shape[1]
-    #     logger.info("reduced feature dimension by %s", n_components - n_keep_components)
-    #     logger.info("explained variance %f", np.sum(pca.explained_variance_ratio_))
-
-    #     train_components = pca.transform(self.data_dictionary["train_features"])
-    #     self.data_dictionary["train_features"] = pd.DataFrame(
-    #         data=train_components,
-    #         columns=["PC" + str(i) for i in range(0, n_keep_components)],
-    #         index=self.data_dictionary["train_features"].index,
-    #     )
-    #     # normalsing transformed training features
-    #     self.data_dictionary["train_features"] = self.normalize_single_dataframe(
-    #         self.data_dictionary["train_features"])
-
-    #     # keeping a copy of the non-transformed features so we can check for errors during
-    #     # model load from disk
-    #     self.data["training_features_list_raw"] = copy.deepcopy(self.training_features_list)
-    #     self.training_features_list = self.data_dictionary["train_features"].columns
-
-    #     if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
-    #         test_components = pca.transform(self.data_dictionary["test_features"])
-    #         self.data_dictionary["test_features"] = pd.DataFrame(
-    #             data=test_components,
-    #             columns=["PC" + str(i) for i in range(0, n_keep_components)],
-    #             index=self.data_dictionary["test_features"].index,
-    #         )
-    #         # normalise transformed test feature to transformed training features
-    #         self.data_dictionary["test_features"] = self.normalize_data_from_metadata(
-    #             self.data_dictionary["test_features"])
-
-    #     self.data["n_kept_components"] = n_keep_components
-    #     self.pca = pca
-
-    #     logger.info(f"PCA reduced total features from  {n_components} to {n_keep_components}")
-
-    #     if not self.data_path.is_dir():
-    #         self.data_path.mkdir(parents=True, exist_ok=True)
-
-    #     return None
-
-    # def pca_transform(self, filtered_dataframe: DataFrame) -> None:
-    #     """
-    #     Use an existing pca transform to transform data into components
-    #     :param filtered_dataframe: DataFrame = the cleaned dataframe
-    #     """
-    #     pca_components = self.pca.transform(filtered_dataframe)
-    #     self.data_dictionary["prediction_features"] = pd.DataFrame(
-    #         data=pca_components,
-    #         columns=["PC" + str(i) for i in range(0, self.data["n_kept_components"])],
-    #         index=filtered_dataframe.index,
-    #     )
-    #     # normalise transformed predictions to transformed training features
-    #     self.data_dictionary["prediction_features"] = self.normalize_data_from_metadata(
-    #         self.data_dictionary["prediction_features"])
-
-    # def compute_distances(self) -> float:
-    #     """
-    #     Compute distances between each training point and every other training
-    #     point. This metric defines the neighborhood of trained data and is used
-    #     for prediction confidence in the Dissimilarity Index
-    #     """
-    #     # logger.info("computing average mean distance for all training points")
-    #     pairwise = pairwise_distances(
-    #         self.data_dictionary["train_features"], n_jobs=self.thread_count)
-    #     # remove the diagonal distances which are itself distances ~0
-    #     np.fill_diagonal(pairwise, np.NaN)
-    #     pairwise = pairwise.reshape(-1, 1)
-    #     avg_mean_dist = pairwise[~np.isnan(pairwise)].mean()
-
-    #     return avg_mean_dist
-
-    # def get_outlier_percentage(self, dropped_pts: npt.NDArray) -> float:
-    #     """
-    #     Check if more than X% of points werer dropped during outlier detection.
-    #     """
-    #     outlier_protection_pct = self.freqai_config["feature_parameters"].get(
-    #         "outlier_protection_percentage", 30)
-    #     outlier_pct = (dropped_pts.sum() / len(dropped_pts)) * 100
-    #     if outlier_pct >= outlier_protection_pct:
-    #         return outlier_pct
-    #     else:
-    #         return 0.0
-
-    # def use_SVM_to_remove_outliers(self, predict: bool) -> None:
-    #     """
-    #     Build/inference a Support Vector Machine to detect outliers
-    #     in training data and prediction
-    #     :param predict: bool = If true, inference an existing SVM model, else construct one
-    #     """
-
-    #     if self.keras:
-    #         logger.warning(
-    #             "SVM outlier removal not currently supported for Keras based models. "
-    #             "Skipping user requested function."
-    #         )
-    #         if predict:
-    #             self.do_predict = np.ones(len(self.data_dictionary["prediction_features"]))
-    #         return
-
-    #     if predict:
-    #         if not self.svm_model:
-    #             logger.warning("No svm model available for outlier removal")
-    #             return
-    #         y_pred = self.svm_model.predict(self.data_dictionary["prediction_features"])
-    #         do_predict = np.where(y_pred == -1, 0, y_pred)
-
-    #         if (len(do_predict) - do_predict.sum()) > 0:
-    #             logger.info(f"SVM tossed {len(do_predict) - do_predict.sum()} predictions.")
-    #         self.do_predict += do_predict
-    #         self.do_predict -= 1
-
-    #     else:
-    #         # use SGDOneClassSVM to increase speed?
-    #         svm_params = self.freqai_config["feature_parameters"].get(
-    #             "svm_params", {"shuffle": False, "nu": 0.1})
-    #         self.svm_model = linear_model.SGDOneClassSVM(**svm_params).fit(
-    #             self.data_dictionary["train_features"]
-    #         )
-    #         y_pred = self.svm_model.predict(self.data_dictionary["train_features"])
-    #         kept_points = np.where(y_pred == -1, 0, y_pred)
-    #         # keep_index = np.where(y_pred == 1)
-    #         outlier_pct = self.get_outlier_percentage(1 - kept_points)
-    #         if outlier_pct:
-    #             logger.warning(
-    #                     f"SVM detected {outlier_pct:.2f}% of the points as outliers. "
-    #                     f"Keeping original dataset."
-    #             )
-    #             self.svm_model = None
-    #             return
-
-    #         self.data_dictionary["train_features"] = self.data_dictionary["train_features"][
-    #             (y_pred == 1)
-    #         ]
-    #         self.data_dictionary["train_labels"] = self.data_dictionary["train_labels"][
-    #             (y_pred == 1)
-    #         ]
-    #         self.data_dictionary["train_weights"] = self.data_dictionary["train_weights"][
-    #             (y_pred == 1)
-    #         ]
-
-    #         logger.info(
-    #             f"SVM tossed {len(y_pred) - kept_points.sum()}"
-    #             f" train points from {len(y_pred)} total points."
-    #         )
-
-    #         # same for test data
-    #         # TODO: This (and the part above) could be refactored into a separate function
-    #         # to reduce code duplication
-    #         if self.freqai_config['data_split_parameters'].get('test_size', 0.1) != 0:
-    #             y_pred = self.svm_model.predict(self.data_dictionary["test_features"])
-    #             kept_points = np.where(y_pred == -1, 0, y_pred)
-    #             self.data_dictionary["test_features"] = self.data_dictionary["test_features"][
-    #                 (y_pred == 1)
-    #             ]
-    #             self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][(
-    #                 y_pred == 1)]
-    #             self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][
-    #                 (y_pred == 1)
-    #             ]
-
-    #         logger.info(
-    #             f"{self.pair}: SVM tossed {len(y_pred) - kept_points.sum()}"
-    #             f" test points from {len(y_pred)} total points."
-    #         )
-
-    #     return
-
-    # def use_DBSCAN_to_remove_outliers(self, predict: bool, eps=None) -> None:
-    #     """
-    #     Use DBSCAN to cluster training data and remove "noisy" data (read outliers).
-    #     User controls this via the config param `DBSCAN_outlier_pct` which indicates the
-    #     pct of training data that they want to be considered outliers.
-    #     :param predict: bool = If False (training), iterate to find the best hyper parameters
-    #                     to match user requested outlier percent target.
-    #                     If True (prediction), use the parameters determined from
-    #                     the previous training to estimate if the current prediction point
-    #                     is an outlier.
-    #     """
-
-    #     if predict:
-    #         if not self.data['DBSCAN_eps']:
-    #             return
-    #         train_ft_df = self.data_dictionary['train_features']
-    #         pred_ft_df = self.data_dictionary['prediction_features']
-    #         num_preds = len(pred_ft_df)
-    #         df = pd.concat([train_ft_df, pred_ft_df], axis=0, ignore_index=True)
-    #         clustering = DBSCAN(eps=self.data['DBSCAN_eps'],
-    #                             min_samples=self.data['DBSCAN_min_samples'],
-    #                             n_jobs=self.thread_count
-    #                             ).fit(df)
-    #         do_predict = np.where(clustering.labels_[-num_preds:] == -1, 0, 1)
-
-    #         if (len(do_predict) - do_predict.sum()) > 0:
-    #             logger.info(f"DBSCAN tossed {len(do_predict) - do_predict.sum()} predictions")
-    #         self.do_predict += do_predict
-    #         self.do_predict -= 1
-
-    #     else:
-
-    #         def normalise_distances(distances):
-    #             normalised_distances = (distances - distances.min()) / \
-    #                                     (distances.max() - distances.min())
-    #             return normalised_distances
-
-    #         def rotate_point(origin, point, angle):
-    #             # rotate a point counterclockwise by a given angle (in radians)
-    #             # around a given origin
-    #             x = origin[0] + cos(angle) * (point[0] - origin[0]) - \
-    #                                 sin(angle) * (point[1] - origin[1])
-    #             y = origin[1] + sin(angle) * (point[0] - origin[0]) + \
-    #                 cos(angle) * (point[1] - origin[1])
-    #             return (x, y)
-
-    #         MinPts = int(len(self.data_dictionary['train_features'].index) * 0.25)
-    #         # measure pairwise distances to nearest neighbours
-    #         neighbors = NearestNeighbors(
-    #             n_neighbors=MinPts, n_jobs=self.thread_count)
-    #         neighbors_fit = neighbors.fit(self.data_dictionary['train_features'])
-    #         distances, _ = neighbors_fit.kneighbors(self.data_dictionary['train_features'])
-    #         distances = np.sort(distances, axis=0).mean(axis=1)
-
-    #         normalised_distances = normalise_distances(distances)
-    #         x_range = np.linspace(0, 1, len(distances))
-    #         line = np.linspace(normalised_distances[0],
-    #                            normalised_distances[-1], len(normalised_distances))
-    #         deflection = np.abs(normalised_distances - line)
-    #         max_deflection_loc = np.where(deflection == deflection.max())[0][0]
-    #         origin = x_range[max_deflection_loc], line[max_deflection_loc]
-    #         point = x_range[max_deflection_loc], normalised_distances[max_deflection_loc]
-    #         rot_angle = np.pi / 4
-    #         elbow_loc = rotate_point(origin, point, rot_angle)
-
-    #         epsilon = elbow_loc[1] * (distances[-1] - distances[0]) + distances[0]
-
-    #         clustering = DBSCAN(eps=epsilon, min_samples=MinPts,
-    #                             n_jobs=int(self.thread_count)).fit(
-    #                                                 self.data_dictionary['train_features']
-    #                                             )
-
-    #         logger.info(f'DBSCAN found eps of {epsilon:.2f}.')
-
-    #         self.data['DBSCAN_eps'] = epsilon
-    #         self.data['DBSCAN_min_samples'] = MinPts
-    #         dropped_points = np.where(clustering.labels_ == -1, 1, 0)
-
-    #         outlier_pct = self.get_outlier_percentage(dropped_points)
-    #         if outlier_pct:
-    #             logger.warning(
-    #                     f"DBSCAN detected {outlier_pct:.2f}% of the points as outliers. "
-    #                     f"Keeping original dataset."
-    #             )
-    #             self.data['DBSCAN_eps'] = 0
-    #             return
-
-    #         self.data_dictionary['train_features'] = self.data_dictionary['train_features'][
-    #             (clustering.labels_ != -1)
-    #         ]
-    #         self.data_dictionary["train_labels"] = self.data_dictionary["train_labels"][
-    #             (clustering.labels_ != -1)
-    #         ]
-    #         self.data_dictionary["train_weights"] = self.data_dictionary["train_weights"][
-    #             (clustering.labels_ != -1)
-    #         ]
-
-    #         logger.info(
-    #             f"DBSCAN tossed {dropped_points.sum()}"
-    #             f" train points from {len(clustering.labels_)}"
-    #         )
-
-    #     return
-
-    # def compute_inlier_metric(self, set_='train') -> None:
-    #     """
-    #     Compute inlier metric from backwards distance distributions.
-    #     This metric defines how well features from a timepoint fit
-    #     into previous timepoints.
-    #     """
-
-    #     def normalise(dataframe: DataFrame, key: str) -> DataFrame:
-    #         if set_ == 'train':
-    #             min_value = dataframe.min()
-    #             max_value = dataframe.max()
-    #             self.data[f'{key}_min'] = min_value
-    #             self.data[f'{key}_max'] = max_value
-    #         else:
-    #             min_value = self.data[f'{key}_min']
-    #             max_value = self.data[f'{key}_max']
-    #         return (dataframe - min_value) / (max_value - min_value)
-
-    #     no_prev_pts = self.freqai_config["feature_parameters"]["inlier_metric_window"]
-
-    #     if set_ == 'train':
-    #         compute_df = copy.deepcopy(self.data_dictionary['train_features'])
-    #     elif set_ == 'test':
-    #         compute_df = copy.deepcopy(self.data_dictionary['test_features'])
-    #     else:
-    #         compute_df = copy.deepcopy(self.data_dictionary['prediction_features'])
-
-    #     compute_df_reindexed = compute_df.reindex(
-    #         index=np.flip(compute_df.index)
-    #     )
-
-    #     pairwise = pd.DataFrame(
-    #         np.triu(
-    #             pairwise_distances(compute_df_reindexed, n_jobs=self.thread_count)
-    #         ),
-    #         columns=compute_df_reindexed.index,
-    #         index=compute_df_reindexed.index
-    #     )
-    #     pairwise = pairwise.round(5)
-
-    #     column_labels = [
-    #         '{}{}'.format('d', i) for i in range(1, no_prev_pts + 1)
-    #     ]
-    #     distances = pd.DataFrame(
-    #         columns=column_labels, index=compute_df.index
-    #     )
-
-    #     for index in compute_df.index[no_prev_pts:]:
-    #         current_row = pairwise.loc[[index]]
-    #         current_row_no_zeros = current_row.loc[
-    #             :, (current_row != 0).any(axis=0)
-    #         ]
-    #         distances.loc[[index]] = current_row_no_zeros.iloc[
-    #             :, :no_prev_pts
-    #         ]
-    #     distances = distances.replace([np.inf, -np.inf], np.nan)
-    #     drop_index = pd.isnull(distances).any(axis=1)
-    #     distances = distances[drop_index == 0]
-
-    #     inliers = pd.DataFrame(index=distances.index)
-    #     for key in distances.keys():
-    #         current_distances = distances[key].dropna()
-    #         current_distances = normalise(current_distances, key)
-    #         if set_ == 'train':
-    #             fit_params = stats.weibull_min.fit(current_distances)
-    #             self.data[f'{key}_fit_params'] = fit_params
-    #         else:
-    #             fit_params = self.data[f'{key}_fit_params']
-    #         quantiles = stats.weibull_min.cdf(current_distances, *fit_params)
-
-    #         df_inlier = pd.DataFrame(
-    #             {key: quantiles}, index=distances.index
-    #         )
-    #         inliers = pd.concat(
-    #             [inliers, df_inlier], axis=1
-    #         )
-
-    #     inlier_metric = pd.DataFrame(
-    #         data=inliers.sum(axis=1) / no_prev_pts,
-    #         columns=['%-inlier_metric'],
-    #         index=compute_df.index
-    #     )
-
-    #     inlier_metric = (2 * (inlier_metric - inlier_metric.min()) /
-    #                      (inlier_metric.max() - inlier_metric.min()) - 1)
-
-    #     if set_ in ('train', 'test'):
-    #         inlier_metric = inlier_metric.iloc[no_prev_pts:]
-    #         compute_df = compute_df.iloc[no_prev_pts:]
-    #         self.remove_beginning_points_from_data_dict(set_, no_prev_pts)
-    #         self.data_dictionary[f'{set_}_features'] = pd.concat(
-    #             [compute_df, inlier_metric], axis=1)
-    #     else:
-    #         self.data_dictionary['prediction_features'] = pd.concat(
-    #             [compute_df, inlier_metric], axis=1)
-    #         self.data_dictionary['prediction_features'].fillna(0, inplace=True)
-
-    #     logger.info('Inlier metric computed and added to features.')
-
-    #     return None
-
-    # def remove_beginning_points_from_data_dict(self, set_='train', no_prev_pts: int = 10):
-    #     features = self.data_dictionary[f'{set_}_features']
-    #     weights = self.data_dictionary[f'{set_}_weights']
-    #     labels = self.data_dictionary[f'{set_}_labels']
-    #     self.data_dictionary[f'{set_}_weights'] = weights[no_prev_pts:]
-    #     self.data_dictionary[f'{set_}_features'] = features.iloc[no_prev_pts:]
-    #     self.data_dictionary[f'{set_}_labels'] = labels.iloc[no_prev_pts:]
-
     def add_noise_to_training_features(self) -> None:
         """
         Add noise to train features to reduce the risk of overfitting.
diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index cacbfea67..6dfa9855c 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -7,9 +7,11 @@ from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Dict, List, Literal, Optional, Tuple
 
+import datasieve.transforms as ds
 import numpy as np
 import pandas as pd
 import psutil
+from datasieve.pipeline import Pipeline
 from numpy.typing import NDArray
 from pandas import DataFrame
 
@@ -23,8 +25,6 @@ from freqtrade.freqai.data_drawer import FreqaiDataDrawer
 from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
 from freqtrade.freqai.utils import get_tb_logger, plot_feature_importance, record_params
 from freqtrade.strategy.interface import IStrategy
-from datasieve.pipeline import Pipeline
-import datasieve.transforms as ds
 
 
 pd.options.mode.chained_assignment = None
@@ -505,94 +505,39 @@ class IFreqaiModel(ABC):
                 "feature_engineering_* functions"
             )
 
-    def data_cleaning_train(self, dk: FreqaiDataKitchen) -> None:
-        """
-        Base data cleaning method for train.
-        Functions here improve/modify the input data by identifying outliers,
-        computing additional metrics, adding noise, reducing dimensionality etc.
-        """
-
-        ft_params = self.freqai_info["feature_parameters"]
-
-        if ft_params.get('inlier_metric_window', 0):
-            dk.compute_inlier_metric(set_='train')
-            if self.freqai_info["data_split_parameters"]["test_size"] > 0:
-                dk.compute_inlier_metric(set_='test')
-
-        if ft_params.get(
-            "principal_component_analysis", False
-        ):
-            dk.principal_component_analysis()
-
-        if ft_params.get("use_SVM_to_remove_outliers", False):
-            dk.use_SVM_to_remove_outliers(predict=False)
-
-        if ft_params.get("DI_threshold", 0):
-            dk.data["avg_mean_dist"] = dk.compute_distances()
-
-        if ft_params.get("use_DBSCAN_to_remove_outliers", False):
-            if dk.pair in self.dd.old_DBSCAN_eps:
-                eps = self.dd.old_DBSCAN_eps[dk.pair]
-            else:
-                eps = None
-            dk.use_DBSCAN_to_remove_outliers(predict=False, eps=eps)
-            self.dd.old_DBSCAN_eps[dk.pair] = dk.data['DBSCAN_eps']
-
-        if self.freqai_info["feature_parameters"].get('noise_standard_deviation', 0):
-            dk.add_noise_to_training_features()
-
-    def data_cleaning_predict(self, dk: FreqaiDataKitchen) -> None:
-        """
-        Base data cleaning method for predict.
-        Functions here are complementary to the functions of data_cleaning_train.
-        """
-        ft_params = self.freqai_info["feature_parameters"]
-
-        # ensure user is feeding the correct indicators to the model
-        self.check_if_feature_list_matches_strategy(dk)
-
-        if ft_params.get('inlier_metric_window', 0):
-            dk.compute_inlier_metric(set_='predict')
-
-        if ft_params.get(
-            "principal_component_analysis", False
-        ):
-            dk.pca_transform(dk.data_dictionary['prediction_features'])
-
-        if ft_params.get("use_SVM_to_remove_outliers", False):
-            dk.use_SVM_to_remove_outliers(predict=True)
-
-        if ft_params.get("DI_threshold", 0):
-            dk.check_if_pred_in_training_spaces()
-
-        if ft_params.get("use_DBSCAN_to_remove_outliers", False):
-            dk.use_DBSCAN_to_remove_outliers(predict=True)
-
     def define_data_pipeline(self, dk: FreqaiDataKitchen) -> None:
         ft_params = self.freqai_info["feature_parameters"]
-        dk.pipeline = Pipeline([('scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1)))])
+        dk.feature_pipeline = Pipeline(
+            [('scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1)))])
 
         if ft_params.get("principal_component_analysis", False):
-            dk.pipeline.steps += [('pca', ds.DataSievePCA())]
-            dk.pipeline.steps += [('post-pca-scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1)))]
+            dk.feature_pipeline.steps += [('pca', ds.DataSievePCA())]
+            dk.feature_pipeline.steps += [('post-pca-scaler',
+                                           ds.DataSieveMinMaxScaler(feature_range=(-1, 1)))]
 
         if ft_params.get("use_SVM_to_remove_outliers", False):
-            dk.pipeline.steps += [('svm', ds.SVMOutlierExtractor())]
+            svm_params = ft_params.get(
+                "svm_params", {"shuffle": False, "nu": 0.01})
+            dk.feature_pipeline.steps += [('svm', ds.SVMOutlierExtractor(**svm_params))]
 
-        if ft_params.get("DI_threshold", 0):
-            dk.pipeline.steps += [('di', ds.DissimilarityIndex())]
+        di = ft_params.get("DI_threshold", 0)
+        if di:
+            dk.feature_pipeline.steps += [('di', ds.DissimilarityIndex(di_threshold=di))]
 
         if ft_params.get("use_DBSCAN_to_remove_outliers", False):
-            dk.pipeline.steps += [('dbscan', ds.DataSieveDBSCAN())]
+            dk.feature_pipeline.steps += [('dbscan', ds.DataSieveDBSCAN())]
 
-        dk.pipeline.fitparams = dk.pipeline._validate_fitparams({}, dk.pipeline.steps)
+        dk.feature_pipeline.fitparams = dk.feature_pipeline._validate_fitparams(
+            {}, dk.feature_pipeline.steps)
 
         # if self.freqai_info["feature_parameters"].get('noise_standard_deviation', 0):
         #     dk.pipeline.extend(('noise', ds.Noise()))
 
     def define_label_pipeline(self, dk: FreqaiDataKitchen) -> None:
 
-        dk.label_pipeline = Pipeline([('scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1)))])
+        dk.label_pipeline = Pipeline([
+            ('scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1)))
+            ])
 
     def model_exists(self, dk: FreqaiDataKitchen) -> bool:
         """
diff --git a/freqtrade/freqai/prediction_models/PyTorchTransformerRegressor.py b/freqtrade/freqai/prediction_models/PyTorchTransformerRegressor.py
index b3b684c14..bf78488ff 100644
--- a/freqtrade/freqai/prediction_models/PyTorchTransformerRegressor.py
+++ b/freqtrade/freqai/prediction_models/PyTorchTransformerRegressor.py
@@ -103,13 +103,13 @@ class PyTorchTransformerRegressor(BasePyTorchRegressor):
         """
 
         dk.find_features(unfiltered_df)
-        filtered_df, _ = dk.filter_features(
+        dk.data_dictionary["prediction_features"], _ = dk.filter_features(
             unfiltered_df, dk.training_features_list, training_filter=False
         )
-        filtered_df = dk.normalize_data_from_metadata(filtered_df)
-        dk.data_dictionary["prediction_features"] = filtered_df
 
-        self.data_cleaning_predict(dk)
+        dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform(
+            dk.data_dictionary["prediction_features"], outlier_check=True)
+
         x = self.data_convertor.convert_x(
             dk.data_dictionary["prediction_features"],
             device=self.device
@@ -131,7 +131,13 @@ class PyTorchTransformerRegressor(BasePyTorchRegressor):
 
         yb = yb.cpu().squeeze()
         pred_df = pd.DataFrame(yb.detach().numpy(), columns=dk.label_list)
-        pred_df = dk.denormalize_labels_from_metadata(pred_df)
+        pred_df, _, _ = dk.label_pipeline.inverse_transform(pred_df)
+
+        if self.freqai_info.get("DI_threshold", 0) > 0:
+            dk.DI_values = dk.feature_pipeline["di"].di_values
+        else:
+            dk.DI_values = np.zeros(len(outliers.index))
+        dk.do_predict = outliers.to_numpy()
 
         if x.shape[1] > 1:
             zeros_df = pd.DataFrame(np.zeros((x.shape[1] - len(pred_df), len(pred_df.columns))),
diff --git a/freqtrade/freqai/prediction_models/XGBoostRFRegressor.py b/freqtrade/freqai/prediction_models/XGBoostRFRegressor.py
index 1aefbf19a..f43585ab0 100644
--- a/freqtrade/freqai/prediction_models/XGBoostRFRegressor.py
+++ b/freqtrade/freqai/prediction_models/XGBoostRFRegressor.py
@@ -5,6 +5,7 @@ from xgboost import XGBRFRegressor
 
 from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
 from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
+from freqtrade.freqai.tensorboard import TBCallback
 
 
 logger = logging.getLogger(__name__)
@@ -44,7 +45,10 @@ class XGBoostRFRegressor(BaseRegressionModel):
 
         model = XGBRFRegressor(**self.model_training_parameters)
 
+        model.set_params(callbacks=[TBCallback(dk.data_path)], activate=self.activate_tensorboard)
         model.fit(X=X, y=y, sample_weight=sample_weight, eval_set=eval_set,
                   sample_weight_eval_set=eval_weights, xgb_model=xgb_model)
+        # set the callbacks to empty so that we can serialize to disk later
+        model.set_params(callbacks=[])
 
         return model
diff --git a/freqtrade/freqai/prediction_models/XGBoostRegressor.py b/freqtrade/freqai/prediction_models/XGBoostRegressor.py
index f8b4d353d..88d348448 100644
--- a/freqtrade/freqai/prediction_models/XGBoostRegressor.py
+++ b/freqtrade/freqai/prediction_models/XGBoostRegressor.py
@@ -8,6 +8,9 @@ from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
 from freqtrade.freqai.tensorboard import TBCallback
 
 
+# from datasieve.pipeline import Pipeline
+# from freqtrade.freqai.transforms import FreqaiQuantileTransformer
+
 logger = logging.getLogger(__name__)
 
 
@@ -52,3 +55,23 @@ class XGBoostRegressor(BaseRegressionModel):
         model.set_params(callbacks=[])
 
         return model
+
+    # def define_data_pipeline(self, dk: FreqaiDataKitchen) -> None:
+    #     """
+    #     User defines their custom eature pipeline here (if they wish)
+    #     """
+    #     dk.feature_pipeline = Pipeline([
+    #         ('qt', FreqaiQuantileTransformer(output_distribution='normal'))
+    #     ])
+
+    #     return
+
+    # def define_label_pipeline(self, dk: FreqaiDataKitchen) -> None:
+    #     """
+    #     User defines their custom label pipeline here (if they wish)
+    #     """
+    #     dk.label_pipeline = Pipeline([
+    #         ('qt', FreqaiQuantileTransformer(output_distribution='normal'))
+    #     ])
+
+    #     return
diff --git a/freqtrade/freqai/transforms/__init__.py b/freqtrade/freqai/transforms/__init__.py
new file mode 100644
index 000000000..9b7d8ccf5
--- /dev/null
+++ b/freqtrade/freqai/transforms/__init__.py
@@ -0,0 +1,6 @@
+from freqtrade.freqai.transforms.quantile_transform import FreqaiQuantileTransformer
+
+
+__all__ = (
+    "FreqaiQuantileTransformer",
+)
diff --git a/freqtrade/freqai/transforms/quantile_transform.py b/freqtrade/freqai/transforms/quantile_transform.py
new file mode 100644
index 000000000..3d1bd2731
--- /dev/null
+++ b/freqtrade/freqai/transforms/quantile_transform.py
@@ -0,0 +1,28 @@
+from sklearn.preprocessing import QuantileTransformer
+
+
+class FreqaiQuantileTransformer(QuantileTransformer):
+    """
+    A subclass of the SKLearn Quantile that ensures fit, transform, fit_transform and
+    inverse_transform all take the full set of params X, y, sample_weight required to
+    benefit from the DataSieve features.
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    def fit_transform(self, X, y=None, sample_weight=None, feature_list=None, **kwargs):
+        super().fit(X)
+        X = super().transform(X)
+        return X, y, sample_weight, feature_list
+
+    def fit(self, X, y=None, sample_weight=None, feature_list=None, **kwargs):
+        super().fit(X)
+        return X, y, sample_weight, feature_list
+
+    def transform(self, X, y=None, sample_weight=None, feature_list=None, **kwargs):
+        X = super().transform(X)
+        return X, y, sample_weight, feature_list
+
+    def inverse_transform(self, X, y=None, sample_weight=None, feature_list=None, **kwargs):
+        return super().inverse_transform(X), y, sample_weight, feature_list
diff --git a/freqtrade/resolvers/freqaimodel_resolver.py b/freqtrade/resolvers/freqaimodel_resolver.py
index 48c3facac..3696b9e56 100644
--- a/freqtrade/resolvers/freqaimodel_resolver.py
+++ b/freqtrade/resolvers/freqaimodel_resolver.py
@@ -34,7 +34,7 @@ class FreqaiModelResolver(IResolver):
         Load the custom class from config parameter
         :param config: configuration dictionary
         """
-        disallowed_models = ["BaseRegressionModel", "BaseTensorFlowModel"]
+        disallowed_models = ["BaseRegressionModel"]
 
         freqaimodel_name = config.get("freqaimodel")
         if not freqaimodel_name:
diff --git a/tests/freqai/test_freqai_datakitchen.py b/tests/freqai/test_freqai_datakitchen.py
index e3ef1612c..c067df151 100644
--- a/tests/freqai/test_freqai_datakitchen.py
+++ b/tests/freqai/test_freqai_datakitchen.py
@@ -10,8 +10,8 @@ from freqtrade.data.dataprovider import DataProvider
 from freqtrade.exceptions import OperationalException
 from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
 from tests.conftest import get_patched_exchange  # , log_has_re
-from tests.freqai.conftest import (get_patched_data_kitchen, get_patched_freqai_strategy,
-                                   make_unfiltered_dataframe)  # make_data_dictionary,
+from tests.freqai.conftest import make_unfiltered_dataframe  # make_data_dictionary,
+from tests.freqai.conftest import get_patched_data_kitchen, get_patched_freqai_strategy
 from tests.freqai.test_freqai_interface import is_mac
 
 
@@ -72,68 +72,6 @@ def test_check_if_model_expired(mocker, freqai_conf):
     shutil.rmtree(Path(dk.full_path))
 
 
-# def test_use_DBSCAN_to_remove_outliers(mocker, freqai_conf, caplog):
-#     freqai = make_data_dictionary(mocker, freqai_conf)
-#     # freqai_conf['freqai']['feature_parameters'].update({"outlier_protection_percentage": 1})
-#     freqai.dk.use_DBSCAN_to_remove_outliers(predict=False)
-#     assert log_has_re(r"DBSCAN found eps of 1\.7\d\.", caplog)
-
-
-# def test_compute_distances(mocker, freqai_conf):
-#     freqai = make_data_dictionary(mocker, freqai_conf)
-#     freqai_conf['freqai']['feature_parameters'].update({"DI_threshold": 1})
-#     avg_mean_dist = freqai.dk.compute_distances()
-#     assert round(avg_mean_dist, 2) == 1.98
-
-
-# def test_use_SVM_to_remove_outliers_and_outlier_protection(mocker, freqai_conf, caplog):
-#     freqai = make_data_dictionary(mocker, freqai_conf)
-#     freqai_conf['freqai']['feature_parameters'].update({"outlier_protection_percentage": 0.1})
-#     freqai.dk.use_SVM_to_remove_outliers(predict=False)
-#     assert log_has_re(
-#         "SVM detected 7.83%",
-#         caplog,
-#     )
-
-
-# def test_compute_inlier_metric(mocker, freqai_conf, caplog):
-#     freqai = make_data_dictionary(mocker, freqai_conf)
-#     freqai_conf['freqai']['feature_parameters'].update({"inlier_metric_window": 10})
-#     freqai.dk.compute_inlier_metric(set_='train')
-#     assert log_has_re(
-#         "Inlier metric computed and added to features.",
-#         caplog,
-#     )
-
-
-# def test_add_noise_to_training_features(mocker, freqai_conf):
-#     freqai = make_data_dictionary(mocker, freqai_conf)
-#     freqai_conf['freqai']['feature_parameters'].update({"noise_standard_deviation": 0.1})
-#     freqai.dk.add_noise_to_training_features()
-
-
-# def test_remove_beginning_points_from_data_dict(mocker, freqai_conf):
-#     freqai = make_data_dictionary(mocker, freqai_conf)
-#     freqai.dk.remove_beginning_points_from_data_dict(set_='train')
-
-
-# def test_principal_component_analysis(mocker, freqai_conf, caplog):
-#     freqai = make_data_dictionary(mocker, freqai_conf)
-#     freqai.dk.principal_component_analysis()
-#     assert log_has_re(
-#         "reduced feature dimension by",
-#         caplog,
-#     )
-
-
-# def test_normalize_data(mocker, freqai_conf):
-#     freqai = make_data_dictionary(mocker, freqai_conf)
-#     data_dict = freqai.dk.data_dictionary
-#     freqai.dk.normalize_data(data_dict)
-#     assert any('_max' in entry for entry in freqai.dk.data.keys())
-#     assert any('_min' in entry for entry in freqai.dk.data.keys())
-
-
 def test_filter_features(mocker, freqai_conf):
     freqai, unfiltered_dataframe = make_unfiltered_dataframe(mocker, freqai_conf)
     freqai.dk.find_features(unfiltered_dataframe)
diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py
index 61a7b7346..90959ec2c 100644
--- a/tests/freqai/test_freqai_interface.py
+++ b/tests/freqai/test_freqai_interface.py
@@ -74,6 +74,7 @@ def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca,
         freqai_conf = make_rl_config(freqai_conf)
         # test the RL guardrails
         freqai_conf['freqai']['feature_parameters'].update({"use_SVM_to_remove_outliers": True})
+        freqai_conf['freqai']['feature_parameters'].update({"DI_threshold": 2})
         freqai_conf['freqai']['data_split_parameters'].update({'shuffle': True})
 
     if 'test_3ac' in model or 'test_4ac' in model:
@@ -162,7 +163,6 @@ def test_extract_data_and_train_model_MultiTargets(mocker, freqai_conf, model, s
     assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file()
     assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").is_file()
     assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_trained_df.pkl").is_file()
-    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_svm_model.joblib").is_file()
     assert len(freqai.dk.data['training_features_list']) == 14
 
     shutil.rmtree(Path(freqai.dk.full_path))
@@ -218,7 +218,6 @@ def test_extract_data_and_train_model_Classifiers(mocker, freqai_conf, model):
                 f"{freqai.dk.model_filename}_model{model_file_extension}").exists()
     assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists()
     assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_trained_df.pkl").exists()
-    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_svm_model.joblib").exists()
 
     shutil.rmtree(Path(freqai.dk.full_path))
 
@@ -283,9 +282,6 @@ def test_start_backtesting(mocker, freqai_conf, model, num_files, strat, caplog)
     _, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk)
     df = base_df[freqai_conf["timeframe"]]
 
-    for i in range(5):
-        df[f'%-constant_{i}'] = i
-
     metadata = {"pair": "LTC/BTC"}
     freqai.dk.set_paths('LTC/BTC', None)
     freqai.start_backtesting(df, metadata, freqai.dk, strategy)
@@ -293,14 +289,6 @@ def test_start_backtesting(mocker, freqai_conf, model, num_files, strat, caplog)
 
     assert len(model_folders) == num_files
     Trade.use_db = True
-    assert log_has_re(
-        "Removed features ",
-        caplog,
-    )
-    assert log_has_re(
-        "Removed 5 features from prediction features, ",
-        caplog,
-    )
     Backtesting.cleanup()
     shutil.rmtree(Path(freqai.dk.full_path))
 
@@ -425,36 +413,6 @@ def test_backtesting_fit_live_predictions(mocker, freqai_conf, caplog):
     shutil.rmtree(Path(freqai.dk.full_path))
 
 
-def test_principal_component_analysis(mocker, freqai_conf):
-    freqai_conf.update({"timerange": "20180110-20180130"})
-    freqai_conf.get("freqai", {}).get("feature_parameters", {}).update(
-        {"princpial_component_analysis": "true"})
-
-    strategy = get_patched_freqai_strategy(mocker, freqai_conf)
-    exchange = get_patched_exchange(mocker, freqai_conf)
-    strategy.dp = DataProvider(freqai_conf, exchange)
-    strategy.freqai_info = freqai_conf.get("freqai", {})
-    freqai = strategy.freqai
-    freqai.live = True
-    freqai.dk = FreqaiDataKitchen(freqai_conf)
-    freqai.dk.live = True
-    timerange = TimeRange.parse_timerange("20180110-20180130")
-    freqai.dd.load_all_pair_histories(timerange, freqai.dk)
-
-    freqai.dd.pair_dict = MagicMock()
-
-    data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
-    new_timerange = TimeRange.parse_timerange("20180120-20180130")
-    freqai.dk.set_paths('ADA/BTC', None)
-
-    freqai.extract_data_and_train_model(
-        new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
-
-    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_pca_object.pkl")
-
-    shutil.rmtree(Path(freqai.dk.full_path))
-
-
 def test_plot_feature_importance(mocker, freqai_conf):
 
     from freqtrade.freqai.utils import plot_feature_importance

From 62378068170dc4144e52042e0774502680c75662 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Mon, 29 May 2023 15:18:28 +0200
Subject: [PATCH 036/130] bump datasieve to 0.0.8

---
 requirements-freqai.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements-freqai.txt b/requirements-freqai.txt
index 66da4e873..0cc5762e0 100644
--- a/requirements-freqai.txt
+++ b/requirements-freqai.txt
@@ -10,4 +10,4 @@ catboost==1.2; 'arm' not in platform_machine and (sys_platform != 'darwin' or py
 lightgbm==3.3.5
 xgboost==1.7.5
 tensorboard==2.13.0
-datasieve==0.0.5
+datasieve==0.0.8

From 785f0d396f954805cc73ad5f532fa174a5325adf Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Mon, 29 May 2023 16:44:53 +0200
Subject: [PATCH 037/130] bump datasieve version

---
 requirements-freqai.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements-freqai.txt b/requirements-freqai.txt
index 0cc5762e0..81d49eee4 100644
--- a/requirements-freqai.txt
+++ b/requirements-freqai.txt
@@ -10,4 +10,4 @@ catboost==1.2; 'arm' not in platform_machine and (sys_platform != 'darwin' or py
 lightgbm==3.3.5
 xgboost==1.7.5
 tensorboard==2.13.0
-datasieve==0.0.8
+datasieve==0.0.9

From f6a32f4ffd91def67a98f77a2aafe513185805cd Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Mon, 29 May 2023 23:35:24 +0200
Subject: [PATCH 038/130] bump version

---
 freqtrade/freqai/data_kitchen.py     | 115 ---------------------------
 freqtrade/freqai/freqai_interface.py |   6 +-
 requirements-freqai.txt              |   2 +-
 3 files changed, 5 insertions(+), 118 deletions(-)

diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py
index 04182dc69..127193a35 100644
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -77,8 +77,6 @@ class FreqaiDataKitchen:
         self.backtest_predictions_folder: str = "backtesting_predictions"
         self.live = live
         self.pair = pair
-
-        # self.svm_model: linear_model.SGDOneClassSVM = None
         self.keras: bool = self.freqai_config.get("keras", False)
         self.set_all_pairs()
         self.backtest_live_models = config.get("freqai_backtest_live_models", False)
@@ -225,13 +223,6 @@ class FreqaiDataKitchen:
         drop_index = pd.isnull(filtered_df).any(axis=1)  # get the rows that have NaNs,
         drop_index = drop_index.replace(True, 1).replace(False, 0)  # pep8 requirement.
         if (training_filter):
-            # const_cols = list((filtered_df.nunique() == 1).loc[lambda x: x].index)
-            # if const_cols:
-            #     filtered_df = filtered_df.filter(filtered_df.columns.difference(const_cols))
-            #     self.data['constant_features_list'] = const_cols
-            #     logger.warning(f"Removed features {const_cols} with constant values.")
-            # else:
-            #     self.data['constant_features_list'] = []
 
             # we don't care about total row number (total no. datapoints) in training, we only care
             # about removing any row with NaNs
@@ -264,9 +255,6 @@ class FreqaiDataKitchen:
 
         else:
 
-            # if 'constant_features_list' in self.data and len(self.data['constant_features_list']):
-            #     filtered_df = self.check_pred_labels(filtered_df)
-
             # we are backtesting so we need to preserve row number to send back to strategy,
             # so now we use do_predict to avoid any prediction based on a NaN
             drop_index = pd.isnull(filtered_df).any(axis=1)
@@ -308,107 +296,6 @@ class FreqaiDataKitchen:
 
         return self.data_dictionary
 
-    # def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
-    #     """
-    #     Normalize all data in the data_dictionary according to the training dataset
-    #     :param data_dictionary: dictionary containing the cleaned and
-    #                             split training/test data/labels
-    #     :returns:
-    #     :data_dictionary: updated dictionary with standardized values.
-    #     """
-
-    #     # standardize the data by training stats
-    #     train_max = data_dictionary["train_features"].max()
-    #     train_min = data_dictionary["train_features"].min()
-    #     data_dictionary["train_features"] = (
-    #         2 * (data_dictionary["train_features"] - train_min) / (train_max - train_min) - 1
-    #     )
-    #     data_dictionary["test_features"] = (
-    #         2 * (data_dictionary["test_features"] - train_min) / (train_max - train_min) - 1
-    #     )
-
-    #     for item in train_max.keys():
-    #         self.data[item + "_max"] = train_max[item]
-    #         self.data[item + "_min"] = train_min[item]
-
-    #     for item in data_dictionary["train_labels"].keys():
-    #         if data_dictionary["train_labels"][item].dtype == object:
-    #             continue
-    #         train_labels_max = data_dictionary["train_labels"][item].max()
-    #         train_labels_min = data_dictionary["train_labels"][item].min()
-    #         data_dictionary["train_labels"][item] = (
-    #             2
-    #             * (data_dictionary["train_labels"][item] - train_labels_min)
-    #             / (train_labels_max - train_labels_min)
-    #             - 1
-    #         )
-    #         if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
-    #             data_dictionary["test_labels"][item] = (
-    #                 2
-    #                 * (data_dictionary["test_labels"][item] - train_labels_min)
-    #                 / (train_labels_max - train_labels_min)
-    #                 - 1
-    #             )
-
-    #         self.data[f"{item}_max"] = train_labels_max
-    #         self.data[f"{item}_min"] = train_labels_min
-    #     return data_dictionary
-
-    # def normalize_single_dataframe(self, df: DataFrame) -> DataFrame:
-
-    #     train_max = df.max()
-    #     train_min = df.min()
-    #     df = (
-    #         2 * (df - train_min) / (train_max - train_min) - 1
-    #     )
-
-    #     for item in train_max.keys():
-    #         self.data[item + "_max"] = train_max[item]
-    #         self.data[item + "_min"] = train_min[item]
-
-    #     return df
-
-    # def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
-    #     """
-    #     Normalize a set of data using the mean and standard deviation from
-    #     the associated training data.
-    #     :param df: Dataframe to be standardized
-    #     """
-
-    #     train_max = [None] * len(df.keys())
-    #     train_min = [None] * len(df.keys())
-
-    #     for i, item in enumerate(df.keys()):
-    #         train_max[i] = self.data[f"{item}_max"]
-    #         train_min[i] = self.data[f"{item}_min"]
-
-    #     train_max_series = pd.Series(train_max, index=df.keys())
-    #     train_min_series = pd.Series(train_min, index=df.keys())
-
-    #     df = (
-    #         2 * (df - train_min_series) / (train_max_series - train_min_series) - 1
-    #     )
-
-    #     return df
-
-    # def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
-    #     """
-    #     Denormalize a set of data using the mean and standard deviation from
-    #     the associated training data.
-    #     :param df: Dataframe of predictions to be denormalized
-    #     """
-
-    #     for label in df.columns:
-    #         if df[label].dtype == object or label in self.unique_class_list:
-    #             continue
-    #         df[label] = (
-    #             (df[label] + 1)
-    #             * (self.data[f"{label}_max"] - self.data[f"{label}_min"])
-    #             / 2
-    #         ) + self.data[f"{label}_min"]
-
-    #     return df
-
     def split_timerange(
         self, tr: str, train_split: int = 28, bt_split: float = 7
     ) -> Tuple[list, list]:
@@ -453,9 +340,7 @@ class FreqaiDataKitchen:
             tr_training_list_timerange.append(copy.deepcopy(timerange_train))
 
             # associated backtest period
-
             timerange_backtest.startts = timerange_train.stopts
-
             timerange_backtest.stopts = timerange_backtest.startts + int(bt_period)
 
             if timerange_backtest.stopts > config_timerange.stopts:
diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index 6dfa9855c..3f04b17fb 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -507,8 +507,10 @@ class IFreqaiModel(ABC):
 
     def define_data_pipeline(self, dk: FreqaiDataKitchen) -> None:
         ft_params = self.freqai_info["feature_parameters"]
-        dk.feature_pipeline = Pipeline(
-            [('scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1)))])
+        dk.feature_pipeline = Pipeline([
+            ('const', ds.DataSieveVarianceThreshold(threshold=0)),
+            ('scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1)))
+            ])
 
         if ft_params.get("principal_component_analysis", False):
             dk.feature_pipeline.steps += [('pca', ds.DataSievePCA())]
diff --git a/requirements-freqai.txt b/requirements-freqai.txt
index 81d49eee4..31c73b594 100644
--- a/requirements-freqai.txt
+++ b/requirements-freqai.txt
@@ -10,4 +10,4 @@ catboost==1.2; 'arm' not in platform_machine and (sys_platform != 'darwin' or py
 lightgbm==3.3.5
 xgboost==1.7.5
 tensorboard==2.13.0
-datasieve==0.0.9
+datasieve==0.1.0

From 94bc91ef57d0213b973342ee26e80b8473a233c6 Mon Sep 17 00:00:00 2001
From: Robert Caulk <rob.caulk@gmail.com>
Date: Sun, 4 Jun 2023 21:50:13 +0200
Subject: [PATCH 039/130] Update tests/freqai/test_freqai_datakitchen.py

Co-authored-by: Matthias <xmatthias@outlook.com>
---
 tests/freqai/test_freqai_datakitchen.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/freqai/test_freqai_datakitchen.py b/tests/freqai/test_freqai_datakitchen.py
index c067df151..77816749f 100644
--- a/tests/freqai/test_freqai_datakitchen.py
+++ b/tests/freqai/test_freqai_datakitchen.py
@@ -9,8 +9,8 @@ from freqtrade.configuration import TimeRange
 from freqtrade.data.dataprovider import DataProvider
 from freqtrade.exceptions import OperationalException
 from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
-from tests.conftest import get_patched_exchange  # , log_has_re
-from tests.freqai.conftest import make_unfiltered_dataframe  # make_data_dictionary,
+from tests.conftest import get_patched_exchange
+from tests.freqai.conftest import make_unfiltered_dataframe
 from tests.freqai.conftest import get_patched_data_kitchen, get_patched_freqai_strategy
 from tests.freqai.test_freqai_interface import is_mac
 

From 5ac141f72b2df55d4ef9444a746860f73a82b8e6 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Tue, 6 Jun 2023 21:05:51 +0200
Subject: [PATCH 040/130] convert to new datasieve api

---
 docs/freqai-feature-engineering.md            | 35 ++-----------------
 freqtrade/freqai/freqai_interface.py          | 22 ++++++------
 .../prediction_models/XGBoostRegressor.py     |  6 ++--
 freqtrade/freqai/transforms/__init__.py       |  6 ----
 .../freqai/transforms/quantile_transform.py   | 28 ---------------
 requirements-freqai.txt                       |  2 +-
 6 files changed, 18 insertions(+), 81 deletions(-)
 delete mode 100644 freqtrade/freqai/transforms/__init__.py
 delete mode 100644 freqtrade/freqai/transforms/quantile_transform.py

diff --git a/docs/freqai-feature-engineering.md b/docs/freqai-feature-engineering.md
index eb4b4272e..0eee0793b 100644
--- a/docs/freqai-feature-engineering.md
+++ b/docs/freqai-feature-engineering.md
@@ -254,47 +254,18 @@ Users are encouraged to customize the data pipeline to their needs by building t
         """
         User defines their custom eature pipeline here (if they wish)
         """
-        from freqtrade.freqai.transforms import FreqaiQuantileTransformer
+        from sklearn.preprocessing import QuantileTransformer
         dk.feature_pipeline = Pipeline([
-            ('qt', FreqaiQuantileTransformer(output_distribution='normal'))
+            ('qt', SKLearnWrapper(QuantileTransformer(output_distribution='normal')))
         ])
 
         return
 ```
 
-Here, you are defining the exact pipeline that will be used for your feature set during training and prediction. If you have a custom step that you would like to add to the pipeline, you simply create a class that follows the DataSieve/SKLearn API. That means your step must have a `fit()`, `transform()`, `fit_transform()`, and `inverse_transform()` method. You can see examples of this in the `freqtrade.freqai.transforms` module where we use SKLearn `QuantileNormalization` to create a new step for the pipeline.
+Here, you are defining the exact pipeline that will be used for your feature set during training and prediction. Here you can use *most* SKLearn transformation steps by wrapping them in the `SKLearnWrapper` class.
 
 As there is the `feature_pipeline`, there also exists a definition for the `label_pipeline` which can be defined the same way as the `feature_pipeline`, by overriding `define_label_pipeline`.
 
-!!! note "Inheritence required"
-    While most SKLearn methods are very easy to override, as shown in freqtrade/freqai/transforms/quantile_transform.py, they still need to include passing X, y, and sample_weights through all `fit()`, `transform()`, `fit_transform()` and `inverse_transform()` functions, even if that means a direct pass through without modifications.
-
-<!-- ## Data dimensionality reduction with Principal Component Analysis
-
-You can reduce the dimensionality of your features by activating the `principal_component_analysis` in the config:
-
-```json
-    "freqai": {
-        "feature_parameters" : {
-            "principal_component_analysis": true
-        }
-    }
-```
-
-This will perform PCA on the features and reduce their dimensionality so that the explained variance of the data set is >= 0.999. Reducing data dimensionality makes training the model faster and hence allows for more up-to-date models. 
-
-## Inlier metric
-
-The `inlier_metric` is a metric aimed at quantifying how similar the features of a data point are to the most recent historical data points. 
-
-You define the lookback window by setting `inlier_metric_window` and FreqAI computes the distance between the present time point and each of the previous `inlier_metric_window` lookback points. A Weibull function is fit to each of the lookback distributions and its cumulative distribution function (CDF) is used to produce a quantile for each lookback point. The `inlier_metric` is then computed for each time point as the average of the corresponding lookback quantiles. The figure below explains the concept for an `inlier_metric_window` of 5.
-
-![inlier-metric](assets/freqai_inlier-metric.jpg)
-
-FreqAI adds the `inlier_metric` to the training features and hence gives the model access to a novel type of temporal information. 
-
-This function does **not** remove outliers from the data set. -->
-
 ## Outlier detection
 
 Equity and crypto markets suffer from a high level of non-patterned noise in the form of outlier data points. FreqAI implements a variety of methods to identify such outliers and hence mitigate risk.
diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index 3f04b17fb..ffe0ee8c3 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -12,8 +12,10 @@ import numpy as np
 import pandas as pd
 import psutil
 from datasieve.pipeline import Pipeline
+from datasieve.transforms import SKLearnWrapper
 from numpy.typing import NDArray
 from pandas import DataFrame
+from sklearn.preprocessing import MinMaxScaler
 
 from freqtrade.configuration import TimeRange
 from freqtrade.constants import Config
@@ -509,25 +511,25 @@ class IFreqaiModel(ABC):
         ft_params = self.freqai_info["feature_parameters"]
         dk.feature_pipeline = Pipeline([
             ('const', ds.DataSieveVarianceThreshold(threshold=0)),
-            ('scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1)))
+            ('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1))))
             ])
 
         if ft_params.get("principal_component_analysis", False):
-            dk.feature_pipeline.steps += [('pca', ds.DataSievePCA())]
-            dk.feature_pipeline.steps += [('post-pca-scaler',
-                                           ds.DataSieveMinMaxScaler(feature_range=(-1, 1)))]
+            dk.feature_pipeline.append(('pca', ds.DataSievePCA()))
+            dk.feature_pipeline.append(('post-pca-scaler',
+                                        SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1)))))
 
         if ft_params.get("use_SVM_to_remove_outliers", False):
             svm_params = ft_params.get(
                 "svm_params", {"shuffle": False, "nu": 0.01})
-            dk.feature_pipeline.steps += [('svm', ds.SVMOutlierExtractor(**svm_params))]
+            dk.feature_pipeline.append(('svm', ds.SVMOutlierExtractor(**svm_params)))
 
         di = ft_params.get("DI_threshold", 0)
         if di:
-            dk.feature_pipeline.steps += [('di', ds.DissimilarityIndex(di_threshold=di))]
+            dk.feature_pipeline.append(('di', ds.DissimilarityIndex(di_threshold=di)))
 
         if ft_params.get("use_DBSCAN_to_remove_outliers", False):
-            dk.feature_pipeline.steps += [('dbscan', ds.DataSieveDBSCAN())]
+            dk.feature_pipeline.append(('dbscan', ds.DataSieveDBSCAN()))
 
         dk.feature_pipeline.fitparams = dk.feature_pipeline._validate_fitparams(
             {}, dk.feature_pipeline.steps)
@@ -538,7 +540,7 @@ class IFreqaiModel(ABC):
     def define_label_pipeline(self, dk: FreqaiDataKitchen) -> None:
 
         dk.label_pipeline = Pipeline([
-            ('scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1)))
+            ('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1))))
             ])
 
     def model_exists(self, dk: FreqaiDataKitchen) -> bool:
@@ -551,8 +553,6 @@ class IFreqaiModel(ABC):
         """
         if self.dd.model_type == 'joblib':
             file_type = ".joblib"
-        elif self.dd.model_type == 'keras':
-            file_type = ".h5"
         elif self.dd.model_type in ["stable_baselines3", "sb3_contrib", "pytorch"]:
             file_type = ".zip"
 
@@ -676,7 +676,7 @@ class IFreqaiModel(ABC):
 
         # # for keras type models, the conv_window needs to be prepended so
         # # viewing is correct in frequi
-        if self.freqai_info.get('keras', False) or self.ft_params.get('inlier_metric_window', 0):
+        if self.ft_params.get('inlier_metric_window', 0):
             n_lost_points = self.freqai_info.get('conv_width', 2)
             zeros_df = DataFrame(np.zeros((n_lost_points, len(hist_preds_df.columns))),
                                  columns=hist_preds_df.columns)
diff --git a/freqtrade/freqai/prediction_models/XGBoostRegressor.py b/freqtrade/freqai/prediction_models/XGBoostRegressor.py
index 88d348448..19c051b91 100644
--- a/freqtrade/freqai/prediction_models/XGBoostRegressor.py
+++ b/freqtrade/freqai/prediction_models/XGBoostRegressor.py
@@ -9,7 +9,7 @@ from freqtrade.freqai.tensorboard import TBCallback
 
 
 # from datasieve.pipeline import Pipeline
-# from freqtrade.freqai.transforms import FreqaiQuantileTransformer
+# from sklearn.preprocessing import QuantileTransformer
 
 logger = logging.getLogger(__name__)
 
@@ -61,7 +61,7 @@ class XGBoostRegressor(BaseRegressionModel):
     #     User defines their custom eature pipeline here (if they wish)
     #     """
     #     dk.feature_pipeline = Pipeline([
-    #         ('qt', FreqaiQuantileTransformer(output_distribution='normal'))
+    #         ('qt', SKLearnWrapper(QuantileTransformer(output_distribution='normal')))
     #     ])
 
     #     return
@@ -71,7 +71,7 @@ class XGBoostRegressor(BaseRegressionModel):
     #     User defines their custom label pipeline here (if they wish)
     #     """
     #     dk.label_pipeline = Pipeline([
-    #         ('qt', FreqaiQuantileTransformer(output_distribution='normal'))
+    #          ('qt', SKLearnWrapper(QuantileTransformer(output_distribution='normal')))
     #     ])
 
     #     return
diff --git a/freqtrade/freqai/transforms/__init__.py b/freqtrade/freqai/transforms/__init__.py
deleted file mode 100644
index 9b7d8ccf5..000000000
--- a/freqtrade/freqai/transforms/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from freqtrade.freqai.transforms.quantile_transform import FreqaiQuantileTransformer
-
-
-__all__ = (
-    "FreqaiQuantileTransformer",
-)
diff --git a/freqtrade/freqai/transforms/quantile_transform.py b/freqtrade/freqai/transforms/quantile_transform.py
deleted file mode 100644
index 3d1bd2731..000000000
--- a/freqtrade/freqai/transforms/quantile_transform.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from sklearn.preprocessing import QuantileTransformer
-
-
-class FreqaiQuantileTransformer(QuantileTransformer):
-    """
-    A subclass of the SKLearn Quantile that ensures fit, transform, fit_transform and
-    inverse_transform all take the full set of params X, y, sample_weight required to
-    benefit from the DataSieve features.
-    """
-
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-
-    def fit_transform(self, X, y=None, sample_weight=None, feature_list=None, **kwargs):
-        super().fit(X)
-        X = super().transform(X)
-        return X, y, sample_weight, feature_list
-
-    def fit(self, X, y=None, sample_weight=None, feature_list=None, **kwargs):
-        super().fit(X)
-        return X, y, sample_weight, feature_list
-
-    def transform(self, X, y=None, sample_weight=None, feature_list=None, **kwargs):
-        X = super().transform(X)
-        return X, y, sample_weight, feature_list
-
-    def inverse_transform(self, X, y=None, sample_weight=None, feature_list=None, **kwargs):
-        return super().inverse_transform(X), y, sample_weight, feature_list
diff --git a/requirements-freqai.txt b/requirements-freqai.txt
index 31c73b594..748950e24 100644
--- a/requirements-freqai.txt
+++ b/requirements-freqai.txt
@@ -10,4 +10,4 @@ catboost==1.2; 'arm' not in platform_machine and (sys_platform != 'darwin' or py
 lightgbm==3.3.5
 xgboost==1.7.5
 tensorboard==2.13.0
-datasieve==0.1.0
+datasieve==0.1.1

From f7f88aa14d5547cb827516dc6538ce4a94676c70 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Wed, 7 Jun 2023 09:28:56 +0200
Subject: [PATCH 041/130] fix pickle file name

---
 freqtrade/freqai/data_drawer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py
index 670dfc620..067790b9a 100644
--- a/freqtrade/freqai/data_drawer.py
+++ b/freqtrade/freqai/data_drawer.py
@@ -523,7 +523,7 @@ class FreqaiDataDrawer:
             dk.data_dictionary["train_features"] = pd.read_pickle(
                 dk.data_path / f"{dk.model_filename}_trained_df.pkl"
             )
-            with (dk.data_path / f"{dk.model_filename}_pipeline.pkl").open("rb") as fp:
+            with (dk.data_path / f"{dk.model_filename}_feature_pipeline.pkl").open("rb") as fp:
                 dk.feature_pipeline = cloudpickle.load(fp)
             with (dk.data_path / f"{dk.model_filename}_label_pipeline.pkl").open("rb") as fp:
                 dk.label_pipeline = cloudpickle.load(fp)

From 4d4589becded66667ae91ff2b91f8dc6712fa81a Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Wed, 7 Jun 2023 14:00:00 +0200
Subject: [PATCH 042/130] fix isort in tests

---
 tests/freqai/test_freqai_datakitchen.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/freqai/test_freqai_datakitchen.py b/tests/freqai/test_freqai_datakitchen.py
index 77816749f..8d09cfc58 100644
--- a/tests/freqai/test_freqai_datakitchen.py
+++ b/tests/freqai/test_freqai_datakitchen.py
@@ -10,8 +10,8 @@ from freqtrade.data.dataprovider import DataProvider
 from freqtrade.exceptions import OperationalException
 from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
 from tests.conftest import get_patched_exchange
-from tests.freqai.conftest import make_unfiltered_dataframe
-from tests.freqai.conftest import get_patched_data_kitchen, get_patched_freqai_strategy
+from tests.freqai.conftest import (get_patched_data_kitchen, get_patched_freqai_strategy,
+                                   make_unfiltered_dataframe)
 from tests.freqai.test_freqai_interface import is_mac
 
 

From dc577d2a1a751180d0c80b4d76c6185ce2d3332d Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Wed, 7 Jun 2023 17:58:27 +0200
Subject: [PATCH 043/130] update to new datasieve interface, add noise to
 pipeline

---
 freqtrade/freqai/data_kitchen.py      | 11 -----------
 freqtrade/freqai/freqai_interface.py  | 13 +++++++------
 requirements-freqai.txt               |  2 +-
 tests/freqai/test_freqai_interface.py | 28 ++++++++++++++-------------
 4 files changed, 23 insertions(+), 31 deletions(-)

diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py
index 127193a35..ecdb2e109 100644
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -371,17 +371,6 @@ class FreqaiDataKitchen:
 
         return df
 
-    def add_noise_to_training_features(self) -> None:
-        """
-        Add noise to train features to reduce the risk of overfitting.
-        """
-        mu = 0  # no shift
-        sigma = self.freqai_config["feature_parameters"]["noise_standard_deviation"]
-        compute_df = self.data_dictionary['train_features']
-        noise = np.random.normal(mu, sigma, [compute_df.shape[0], compute_df.shape[1]])
-        self.data_dictionary['train_features'] += noise
-        return
-
     def find_features(self, dataframe: DataFrame) -> None:
         """
         Find features in the strategy provided dataframe
diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index ffe0ee8c3..632266b00 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -510,12 +510,12 @@ class IFreqaiModel(ABC):
     def define_data_pipeline(self, dk: FreqaiDataKitchen) -> None:
         ft_params = self.freqai_info["feature_parameters"]
         dk.feature_pipeline = Pipeline([
-            ('const', ds.DataSieveVarianceThreshold(threshold=0)),
+            ('const', ds.VarianceThreshold(threshold=0)),
             ('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1))))
             ])
 
         if ft_params.get("principal_component_analysis", False):
-            dk.feature_pipeline.append(('pca', ds.DataSievePCA()))
+            dk.feature_pipeline.append(('pca', ds.PCA()))
             dk.feature_pipeline.append(('post-pca-scaler',
                                         SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1)))))
 
@@ -529,14 +529,15 @@ class IFreqaiModel(ABC):
             dk.feature_pipeline.append(('di', ds.DissimilarityIndex(di_threshold=di)))
 
         if ft_params.get("use_DBSCAN_to_remove_outliers", False):
-            dk.feature_pipeline.append(('dbscan', ds.DataSieveDBSCAN()))
+            dk.feature_pipeline.append(('dbscan', ds.DBSCAN()))
+
+        sigma = self.freqai_info["feature_parameters"].get('noise_standard_deviation', 0)
+        if sigma:
+            dk.feature_pipeline.append(('noise', ds.Noise(sigma=sigma)))
 
         dk.feature_pipeline.fitparams = dk.feature_pipeline._validate_fitparams(
             {}, dk.feature_pipeline.steps)
 
-        # if self.freqai_info["feature_parameters"].get('noise_standard_deviation', 0):
-        #     dk.pipeline.extend(('noise', ds.Noise()))
-
     def define_label_pipeline(self, dk: FreqaiDataKitchen) -> None:
 
         dk.label_pipeline = Pipeline([
diff --git a/requirements-freqai.txt b/requirements-freqai.txt
index 748950e24..a515ba2b5 100644
--- a/requirements-freqai.txt
+++ b/requirements-freqai.txt
@@ -10,4 +10,4 @@ catboost==1.2; 'arm' not in platform_machine and (sys_platform != 'darwin' or py
 lightgbm==3.3.5
 xgboost==1.7.5
 tensorboard==2.13.0
-datasieve==0.1.1
+datasieve==0.1.2
diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py
index 90959ec2c..fec9de724 100644
--- a/tests/freqai/test_freqai_interface.py
+++ b/tests/freqai/test_freqai_interface.py
@@ -37,21 +37,22 @@ def can_run_model(model: str) -> None:
         pytest.skip("Reinforcement learning / PyTorch module not available on intel based Mac OS.")
 
 
-@pytest.mark.parametrize('model, pca, dbscan, float32, can_short, shuffle, buffer', [
-    ('LightGBMRegressor', True, False, True, True, False, 0),
-    ('XGBoostRegressor', False, True, False, True, False, 10),
-    ('XGBoostRFRegressor', False, False, False, True, False, 0),
-    ('CatboostRegressor', False, False, False, True, True, 0),
-    ('PyTorchMLPRegressor', False, False, False, False, False, 0),
-    ('PyTorchTransformerRegressor', False, False, False, False, False, 0),
-    ('ReinforcementLearner', False, True, False, True, False, 0),
-    ('ReinforcementLearner_multiproc', False, False, False, True, False, 0),
-    ('ReinforcementLearner_test_3ac', False, False, False, False, False, 0),
-    ('ReinforcementLearner_test_3ac', False, False, False, True, False, 0),
-    ('ReinforcementLearner_test_4ac', False, False, False, True, False, 0),
+@pytest.mark.parametrize('model, pca, dbscan, float32, can_short, shuffle, buffer, noise', [
+    ('LightGBMRegressor', True, False, True, True, False, 0, 0),
+    ('XGBoostRegressor', False, True, False, True, False, 10, 0.05),
+    ('XGBoostRFRegressor', False, False, False, True, False, 0, 0),
+    ('CatboostRegressor', False, False, False, True, True, 0, 0),
+    ('PyTorchMLPRegressor', False, False, False, False, False, 0, 0),
+    ('PyTorchTransformerRegressor', False, False, False, False, False, 0, 0),
+    ('ReinforcementLearner', False, True, False, True, False, 0, 0),
+    ('ReinforcementLearner_multiproc', False, False, False, True, False, 0, 0),
+    ('ReinforcementLearner_test_3ac', False, False, False, False, False, 0, 0),
+    ('ReinforcementLearner_test_3ac', False, False, False, True, False, 0, 0),
+    ('ReinforcementLearner_test_4ac', False, False, False, True, False, 0, 0),
     ])
 def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca,
-                                               dbscan, float32, can_short, shuffle, buffer):
+                                               dbscan, float32, can_short, shuffle,
+                                               buffer, noise):
 
     can_run_model(model)
 
@@ -68,6 +69,7 @@ def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca,
     freqai_conf.update({"reduce_df_footprint": float32})
     freqai_conf['freqai']['feature_parameters'].update({"shuffle_after_split": shuffle})
     freqai_conf['freqai']['feature_parameters'].update({"buffer_train_data_candles": buffer})
+    freqai_conf['freqai']['feature_parameters'].update({"noise_standard_deviation": noise})
 
     if 'ReinforcementLearner' in model:
         model_save_ext = 'zip'

From 135aaa2be2450da404693ae45a90e1461443d62e Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Wed, 7 Jun 2023 18:26:49 +0200
Subject: [PATCH 044/130] update docs, improve the interaction with
 `define_data_pipeline`

---
 docs/freqai-feature-engineering.md            | 21 ++++++----
 .../RL/BaseReinforcementLearningModel.py      | 37 +++++++++---------
 .../freqai/base_models/BaseClassifierModel.py | 29 +++++++-------
 .../base_models/BasePyTorchClassifier.py      | 32 ++++++++--------
 .../base_models/BasePyTorchRegressor.py       | 38 +++++++++----------
 .../freqai/base_models/BaseRegressionModel.py | 35 +++++++++--------
 freqtrade/freqai/freqai_interface.py          | 30 ++++++++-------
 7 files changed, 114 insertions(+), 108 deletions(-)

diff --git a/docs/freqai-feature-engineering.md b/docs/freqai-feature-engineering.md
index 0eee0793b..364b920a1 100644
--- a/docs/freqai-feature-engineering.md
+++ b/docs/freqai-feature-engineering.md
@@ -226,8 +226,10 @@ FreqAI uses the the [`DataSieve`](https://github.com/emergentmethods/datasieve)
 This means that users can use/customize any SKLearn modules and easily add them to their FreqAI data pipeline. By default, FreqAI builds the following pipeline:
 
 ```py
+from datasieve.transforms import SKLearnWrapper, DissimilarityIndex
+from datasieve.pipeline import Pipeline
 dk.feature_pipeline = Pipeline([
-    ('scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1))),
+    ('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1))),
     ('di', ds.DissimilarityIndex(di_threshold=1)),
     ])
 ```
@@ -235,10 +237,12 @@ dk.feature_pipeline = Pipeline([
 But users will find that they can add PCA and other steps just by changing their configuration settings, for example, if you add `"principal_component_analysis": true` to the `feature_parameters` dict in the `freqai` config, then FreqAI will add the PCA step for you resulting in the following pipeline:
 
 ```py
+from datasieve.transforms import SKLearnWrapper, DissimilarityIndex, PCA
+from datasieve.pipeline import Pipeline
 dk.feature_pipeline = Pipeline([
-    ('scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1))),
-    ('pca', ds.DataSievePCA()),
-    ('post-pca-scaler', ds.DataSieveMinMaxScaler(feature_range=(-1, 1)))
+    ('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1)))),
+    ('pca', ds.PCA()),
+    ('post-pca-scaler', ds.MinMaxScaler(feature_range=(-1, 1)))
     ('di', ds.DissimilarityIndex(di_threshold=1)),
     ])
 ```
@@ -247,16 +251,19 @@ The same concept follows if users activate other config options like `"use_SVM_t
 
 ## Customizing the pipeline
 
-Users are encouraged to customize the data pipeline to their needs by building their own data pipeline. This can be done by overriding `define_data_pipeline` in their `IFreqaiModel`. For example:
+Users are encouraged to customize the data pipeline to their needs by building their own data pipeline. This can be done by simply setting `dk.feature_pipeline` to their desired `Pipeline` object inside their `IFreqaiModel` `train()` function, or if they prefer not to touch the `train()` function, they can override `define_data_pipeline` in their `IFreqaiModel`:
 
 ```py
+    from datasieve.transforms import SKLearnWrapper, DissimilarityIndex
+    from datasieve.pipeline import Pipeline
+    from sklearn.preprocessing import QuantileTransformer
     def define_data_pipeline(self, dk: FreqaiDataKitchen) -> None:
         """
         User defines their custom eature pipeline here (if they wish)
         """
-        from sklearn.preprocessing import QuantileTransformer
         dk.feature_pipeline = Pipeline([
-            ('qt', SKLearnWrapper(QuantileTransformer(output_distribution='normal')))
+            ('qt', SKLearnWrapper(QuantileTransformer(output_distribution='normal'))),
+            ('di', ds.DissimilarityIndex(di_threshold=1)
         ])
 
         return
diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
index bd22decaa..90e60ec5c 100644
--- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
+++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
@@ -110,40 +110,37 @@ class BaseReinforcementLearningModel(IFreqaiModel):
             training_filter=True,
         )
 
-        d: Dict[str, Any] = dk.make_train_test_datasets(
+        dd: Dict[str, Any] = dk.make_train_test_datasets(
             features_filtered, labels_filtered)
-        self.df_raw = copy.deepcopy(d["train_features"])
+        self.df_raw = copy.deepcopy(dd["train_features"])
         dk.fit_labels()  # FIXME useless for now, but just satiating append methods
 
         # normalize all data based on train_dataset only
         prices_train, prices_test = self.build_ohlc_price_dataframes(dk.data_dictionary, pair, dk)
 
-        self.define_data_pipeline(dk)
-        self.define_label_pipeline(dk)
+        dk.feature_pipeline = self.define_data_pipeline()
+        dk.label_pipeline = self.define_label_pipeline()
 
-        # d["train_labels"], _, _ = dk.label_pipeline.fit_transform(d["train_labels"])
-        # d["test_labels"], _, _ = dk.label_pipeline.transform(d["test_labels"])
+        (dd["train_features"],
+         dd["train_labels"],
+         dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"],
+                                                                  dd["train_labels"],
+                                                                  dd["train_weights"])
 
-        (d["train_features"],
-         d["train_labels"],
-         d["train_weights"]) = dk.feature_pipeline.fit_transform(d["train_features"],
-                                                                 d["train_labels"],
-                                                                 d["train_weights"])
-
-        (d["test_features"],
-         d["test_labels"],
-         d["test_weights"]) = dk.feature_pipeline.transform(d["test_features"],
-                                                            d["test_labels"],
-                                                            d["test_weights"])
+        (dd["test_features"],
+         dd["test_labels"],
+         dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
+                                                             dd["test_labels"],
+                                                             dd["test_weights"])
 
         logger.info(
             f'Training model on {len(dk.data_dictionary["train_features"].columns)}'
-            f' features and {len(d["train_features"])} data points'
+            f' features and {len(dd["train_features"])} data points'
         )
 
-        self.set_train_and_eval_environments(d, prices_train, prices_test, dk)
+        self.set_train_and_eval_environments(dd, prices_train, prices_test, dk)
 
-        model = self.fit(d, dk)
+        model = self.fit(dd, dk)
 
         logger.info(f"--------------------done training {pair}--------------------")
 
diff --git a/freqtrade/freqai/base_models/BaseClassifierModel.py b/freqtrade/freqai/base_models/BaseClassifierModel.py
index 179e8a5af..8495cd9b9 100644
--- a/freqtrade/freqai/base_models/BaseClassifierModel.py
+++ b/freqtrade/freqai/base_models/BaseClassifierModel.py
@@ -50,30 +50,29 @@ class BaseClassifierModel(IFreqaiModel):
         logger.info(f"-------------------- Training on data from {start_date} to "
                     f"{end_date} --------------------")
         # split data into train/test data.
-        d = dk.make_train_test_datasets(features_filtered, labels_filtered)
+        dd = dk.make_train_test_datasets(features_filtered, labels_filtered)
         if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
             dk.fit_labels()
-        self.define_data_pipeline(dk)
-        self.define_label_pipeline(dk)
+        dk.feature_pipeline = self.define_data_pipeline()
 
-        (d["train_features"],
-         d["train_labels"],
-         d["train_weights"]) = dk.feature_pipeline.fit_transform(d["train_features"],
-                                                                 d["train_labels"],
-                                                                 d["train_weights"])
+        (dd["train_features"],
+         dd["train_labels"],
+         dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"],
+                                                                  dd["train_labels"],
+                                                                  dd["train_weights"])
 
-        (d["test_features"],
-         d["test_labels"],
-         d["test_weights"]) = dk.feature_pipeline.transform(d["test_features"],
-                                                            d["test_labels"],
-                                                            d["test_weights"])
+        (dd["test_features"],
+         dd["test_labels"],
+         dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
+                                                             dd["test_labels"],
+                                                             dd["test_weights"])
 
         logger.info(
             f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
         )
-        logger.info(f"Training model on {len(d['train_features'])} data points")
+        logger.info(f"Training model on {len(dd['train_features'])} data points")
 
-        model = self.fit(d, dk)
+        model = self.fit(dd, dk)
 
         end_time = time()
 
diff --git a/freqtrade/freqai/base_models/BasePyTorchClassifier.py b/freqtrade/freqai/base_models/BasePyTorchClassifier.py
index 448384852..85328aa41 100644
--- a/freqtrade/freqai/base_models/BasePyTorchClassifier.py
+++ b/freqtrade/freqai/base_models/BasePyTorchClassifier.py
@@ -36,6 +36,7 @@ class BasePyTorchClassifier(BasePyTorchModel):
 
             return dataframe
     """
+
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
         self.class_name_to_index = None
@@ -184,31 +185,30 @@ class BasePyTorchClassifier(BasePyTorchModel):
         )
 
         # split data into train/test data.
-        d = dk.make_train_test_datasets(features_filtered, labels_filtered)
-        if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
+        dd = dk.make_train_test_datasets(features_filtered, labels_filtered)
+        if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
             dk.fit_labels()
 
-        d["train_labels"], _, _ = dk.label_pipeline.fit_transform(d["train_labels"])
-        d["test_labels"], _, _ = dk.label_pipeline.transform(d["test_labels"])
+        dk.feature_pipeline = self.define_data_pipeline()
 
-        (d["train_features"],
-         d["train_labels"],
-         d["train_weights"]) = dk.feature_pipeline.fit_transform(d["train_features"],
-                                                                 d["train_labels"],
-                                                                 d["train_weights"])
+        (dd["train_features"],
+         dd["train_labels"],
+         dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"],
+                                                                  dd["train_labels"],
+                                                                  dd["train_weights"])
 
-        (d["test_features"],
-         d["test_labels"],
-         d["test_weights"]) = dk.feature_pipeline.transform(d["test_features"],
-                                                            d["test_labels"],
-                                                            d["test_weights"])
+        (dd["test_features"],
+         dd["test_labels"],
+         dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
+                                                             dd["test_labels"],
+                                                             dd["test_weights"])
 
         logger.info(
             f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
         )
-        logger.info(f"Training model on {len(d['train_features'])} data points")
+        logger.info(f"Training model on {len(dd['train_features'])} data points")
 
-        model = self.fit(d, dk)
+        model = self.fit(dd, dk)
         end_time = time()
 
         logger.info(f"-------------------- Done training {pair} "
diff --git a/freqtrade/freqai/base_models/BasePyTorchRegressor.py b/freqtrade/freqai/base_models/BasePyTorchRegressor.py
index 2f2aaef39..8b304fce4 100644
--- a/freqtrade/freqai/base_models/BasePyTorchRegressor.py
+++ b/freqtrade/freqai/base_models/BasePyTorchRegressor.py
@@ -18,6 +18,7 @@ class BasePyTorchRegressor(BasePyTorchModel):
     A PyTorch implementation of a regressor.
     User must implement fit method
     """
+
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
 
@@ -81,34 +82,33 @@ class BasePyTorchRegressor(BasePyTorchModel):
         )
 
         # split data into train/test data.
-        d = dk.make_train_test_datasets(features_filtered, labels_filtered)
-        if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
+        dd = dk.make_train_test_datasets(features_filtered, labels_filtered)
+        if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
             dk.fit_labels()
+        dk.feature_pipeline = self.define_data_pipeline()
+        dk.label_pipeline = self.define_label_pipeline()
 
-        self.define_data_pipeline(dk)
-        self.define_label_pipeline(dk)
+        dd["train_labels"], _, _ = dk.label_pipeline.fit_transform(dd["train_labels"])
+        dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"])
 
-        d["train_labels"], _, _ = dk.label_pipeline.fit_transform(d["train_labels"])
-        d["test_labels"], _, _ = dk.label_pipeline.transform(d["test_labels"])
+        (dd["train_features"],
+         dd["train_labels"],
+         dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"],
+                                                                  dd["train_labels"],
+                                                                  dd["train_weights"])
 
-        (d["train_features"],
-         d["train_labels"],
-         d["train_weights"]) = dk.feature_pipeline.fit_transform(d["train_features"],
-                                                                 d["train_labels"],
-                                                                 d["train_weights"])
-
-        (d["test_features"],
-         d["test_labels"],
-         d["test_weights"]) = dk.feature_pipeline.transform(d["test_features"],
-                                                            d["test_labels"],
-                                                            d["test_weights"])
+        (dd["test_features"],
+         dd["test_labels"],
+         dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
+                                                             dd["test_labels"],
+                                                             dd["test_weights"])
 
         logger.info(
             f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
         )
-        logger.info(f"Training model on {len(d['train_features'])} data points")
+        logger.info(f"Training model on {len(dd['train_features'])} data points")
 
-        model = self.fit(d, dk)
+        model = self.fit(dd, dk)
         end_time = time()
 
         logger.info(f"-------------------- Done training {pair} "
diff --git a/freqtrade/freqai/base_models/BaseRegressionModel.py b/freqtrade/freqai/base_models/BaseRegressionModel.py
index 1babd5f0c..d86b21107 100644
--- a/freqtrade/freqai/base_models/BaseRegressionModel.py
+++ b/freqtrade/freqai/base_models/BaseRegressionModel.py
@@ -49,34 +49,33 @@ class BaseRegressionModel(IFreqaiModel):
         logger.info(f"-------------------- Training on data from {start_date} to "
                     f"{end_date} --------------------")
         # split data into train/test data.
-        d = dk.make_train_test_datasets(features_filtered, labels_filtered)
+        dd = dk.make_train_test_datasets(features_filtered, labels_filtered)
         if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
             dk.fit_labels()
+        dk.feature_pipeline = self.define_data_pipeline()
+        dk.label_pipeline = self.define_label_pipeline()
 
-        self.define_data_pipeline(dk)
-        self.define_label_pipeline(dk)
+        (dd["train_features"],
+         dd["train_labels"],
+         dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"],
+                                                                  dd["train_labels"],
+                                                                  dd["train_weights"])
 
-        (d["train_features"],
-         d["train_labels"],
-         d["train_weights"]) = dk.feature_pipeline.fit_transform(d["train_features"],
-                                                                 d["train_labels"],
-                                                                 d["train_weights"])
+        (dd["test_features"],
+         dd["test_labels"],
+         dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
+                                                             dd["test_labels"],
+                                                             dd["test_weights"])
 
-        (d["test_features"],
-         d["test_labels"],
-         d["test_weights"]) = dk.feature_pipeline.transform(d["test_features"],
-                                                            d["test_labels"],
-                                                            d["test_weights"])
-
-        d["train_labels"], _, _ = dk.label_pipeline.fit_transform(d["train_labels"])
-        d["test_labels"], _, _ = dk.label_pipeline.transform(d["test_labels"])
+        dd["train_labels"], _, _ = dk.label_pipeline.fit_transform(dd["train_labels"])
+        dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"])
 
         logger.info(
             f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
         )
-        logger.info(f"Training model on {len(d['train_features'])} data points")
+        logger.info(f"Training model on {len(dd['train_features'])} data points")
 
-        model = self.fit(d, dk)
+        model = self.fit(dd, dk)
 
         end_time = time()
 
diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index 632266b00..a98bd92b5 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -507,43 +507,47 @@ class IFreqaiModel(ABC):
                 "feature_engineering_* functions"
             )
 
-    def define_data_pipeline(self, dk: FreqaiDataKitchen) -> None:
+    def define_data_pipeline(self) -> Pipeline:
         ft_params = self.freqai_info["feature_parameters"]
-        dk.feature_pipeline = Pipeline([
+        feature_pipeline = Pipeline([
             ('const', ds.VarianceThreshold(threshold=0)),
             ('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1))))
             ])
 
         if ft_params.get("principal_component_analysis", False):
-            dk.feature_pipeline.append(('pca', ds.PCA()))
-            dk.feature_pipeline.append(('post-pca-scaler',
-                                        SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1)))))
+            feature_pipeline.append(('pca', ds.PCA()))
+            feature_pipeline.append(('post-pca-scaler',
+                                     SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1)))))
 
         if ft_params.get("use_SVM_to_remove_outliers", False):
             svm_params = ft_params.get(
                 "svm_params", {"shuffle": False, "nu": 0.01})
-            dk.feature_pipeline.append(('svm', ds.SVMOutlierExtractor(**svm_params)))
+            feature_pipeline.append(('svm', ds.SVMOutlierExtractor(**svm_params)))
 
         di = ft_params.get("DI_threshold", 0)
         if di:
-            dk.feature_pipeline.append(('di', ds.DissimilarityIndex(di_threshold=di)))
+            feature_pipeline.append(('di', ds.DissimilarityIndex(di_threshold=di)))
 
         if ft_params.get("use_DBSCAN_to_remove_outliers", False):
-            dk.feature_pipeline.append(('dbscan', ds.DBSCAN()))
+            feature_pipeline.append(('dbscan', ds.DBSCAN()))
 
         sigma = self.freqai_info["feature_parameters"].get('noise_standard_deviation', 0)
         if sigma:
-            dk.feature_pipeline.append(('noise', ds.Noise(sigma=sigma)))
+            feature_pipeline.append(('noise', ds.Noise(sigma=sigma)))
 
-        dk.feature_pipeline.fitparams = dk.feature_pipeline._validate_fitparams(
-            {}, dk.feature_pipeline.steps)
+        feature_pipeline.fitparams = feature_pipeline._validate_fitparams(
+            {}, feature_pipeline.steps)
 
-    def define_label_pipeline(self, dk: FreqaiDataKitchen) -> None:
+        return feature_pipeline
 
-        dk.label_pipeline = Pipeline([
+    def define_label_pipeline(self) -> Pipeline:
+
+        label_pipeline = Pipeline([
             ('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1))))
             ])
 
+        return label_pipeline
+
     def model_exists(self, dk: FreqaiDataKitchen) -> bool:
         """
         Given a pair and path, check if a model already exists

From 6d39adc7391e345fb73bea9ce9baa8991e8f597d Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Wed, 7 Jun 2023 18:29:49 +0200
Subject: [PATCH 045/130] bump datasieve version

---
 requirements-freqai.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements-freqai.txt b/requirements-freqai.txt
index a515ba2b5..94c6e2662 100644
--- a/requirements-freqai.txt
+++ b/requirements-freqai.txt
@@ -10,4 +10,4 @@ catboost==1.2; 'arm' not in platform_machine and (sys_platform != 'darwin' or py
 lightgbm==3.3.5
 xgboost==1.7.5
 tensorboard==2.13.0
-datasieve==0.1.2
+datasieve==0.1.3

From c066f014e35ba89684e116e5dc4358957ca336b9 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Wed, 7 Jun 2023 18:36:07 +0200
Subject: [PATCH 046/130] fix docs

---
 docs/freqai-feature-engineering.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/freqai-feature-engineering.md b/docs/freqai-feature-engineering.md
index 364b920a1..c91e78afc 100644
--- a/docs/freqai-feature-engineering.md
+++ b/docs/freqai-feature-engineering.md
@@ -257,16 +257,16 @@ Users are encouraged to customize the data pipeline to their needs by building t
     from datasieve.transforms import SKLearnWrapper, DissimilarityIndex
     from datasieve.pipeline import Pipeline
     from sklearn.preprocessing import QuantileTransformer
-    def define_data_pipeline(self, dk: FreqaiDataKitchen) -> None:
+    def define_data_pipeline(self) -> Pipeline:
         """
         User defines their custom eature pipeline here (if they wish)
         """
-        dk.feature_pipeline = Pipeline([
+        feature_pipeline = Pipeline([
             ('qt', SKLearnWrapper(QuantileTransformer(output_distribution='normal'))),
             ('di', ds.DissimilarityIndex(di_threshold=1)
         ])
 
-        return
+        return feature_pipeline
 ```
 
 Here, you are defining the exact pipeline that will be used for your feature set during training and prediction. Here you can use *most* SKLearn transformation steps by wrapping them in the `SKLearnWrapper` class.

From 14557f2d326b5b452fa6182d6b3b2f5f36401135 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Wed, 7 Jun 2023 19:24:21 +0200
Subject: [PATCH 047/130] merge develop into outsource-data-pipeline

---
 requirements-freqai.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements-freqai.txt b/requirements-freqai.txt
index 94c6e2662..424c74a9e 100644
--- a/requirements-freqai.txt
+++ b/requirements-freqai.txt
@@ -10,4 +10,4 @@ catboost==1.2; 'arm' not in platform_machine and (sys_platform != 'darwin' or py
 lightgbm==3.3.5
 xgboost==1.7.5
 tensorboard==2.13.0
-datasieve==0.1.3
+datasieve==0.1.4

From e39e40dc60599f26cb1719837fa168e738fbc6ed Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Thu, 8 Jun 2023 11:56:31 +0200
Subject: [PATCH 048/130] improve documentation of pipeline
 building/customization

---
 docs/freqai-feature-engineering.md            | 46 ++++++++++++++-----
 .../prediction_models/XGBoostRegressor.py     |  3 --
 2 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/docs/freqai-feature-engineering.md b/docs/freqai-feature-engineering.md
index c91e78afc..1151f01a3 100644
--- a/docs/freqai-feature-engineering.md
+++ b/docs/freqai-feature-engineering.md
@@ -221,20 +221,20 @@ where $W_i$ is the weight of data point $i$ in a total set of $n$ data points. B
 
 # Building the data pipeline
 
-FreqAI uses the the [`DataSieve`](https://github.com/emergentmethods/datasieve) pipeline, which follows the SKlearn pipeline API, but adds, among other features, coherence between the X, y, and sample_weight vector point removals, and feature removal feature name following. 
+FreqAI uses the the [`DataSieve`](https://github.com/emergentmethods/datasieve) pipeline, which follows the SKlearn pipeline API, but adds, among other features, coherence between the X, y, and sample_weight vector point removals, feature removal, feature name following. 
 
-This means that users can use/customize any SKLearn modules and easily add them to their FreqAI data pipeline. By default, FreqAI builds the following pipeline:
+By default, FreqAI builds the following pipeline inside the `IFreqaiModel` `train()` method:
 
 ```py
 from datasieve.transforms import SKLearnWrapper, DissimilarityIndex
 from datasieve.pipeline import Pipeline
 dk.feature_pipeline = Pipeline([
-    ('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1))),
+    ('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1)))),
     ('di', ds.DissimilarityIndex(di_threshold=1)),
     ])
 ```
 
-But users will find that they can add PCA and other steps just by changing their configuration settings, for example, if you add `"principal_component_analysis": true` to the `feature_parameters` dict in the `freqai` config, then FreqAI will add the PCA step for you resulting in the following pipeline:
+But users will find that they can add PCA and other steps just by changing their configuration settings, for example, if you add `"principal_component_analysis": true` to the `feature_parameters` dict in the `freqai` config, then FreqAI will automatically add the PCA step for you resulting in the following pipeline:
 
 ```py
 from datasieve.transforms import SKLearnWrapper, DissimilarityIndex, PCA
@@ -251,27 +251,49 @@ The same concept follows if users activate other config options like `"use_SVM_t
 
 ## Customizing the pipeline
 
-Users are encouraged to customize the data pipeline to their needs by building their own data pipeline. This can be done by simply setting `dk.feature_pipeline` to their desired `Pipeline` object inside their `IFreqaiModel` `train()` function, or if they prefer not to touch the `train()` function, they can override `define_data_pipeline` in their `IFreqaiModel`:
+Users are encouraged to customize the data pipeline to their needs by building their own data pipeline. This can be done by simply setting `dk.feature_pipeline` to their desired `Pipeline` object inside their `IFreqaiModel` `train()` function, or if they prefer not to touch the `train()` function, they can override `define_data_pipeline`/`define_label_pipeline` functions in their `IFreqaiModel`:
 
 ```py
-    from datasieve.transforms import SKLearnWrapper, DissimilarityIndex
-    from datasieve.pipeline import Pipeline
-    from sklearn.preprocessing import QuantileTransformer
+from datasieve.transforms import SKLearnWrapper, DissimilarityIndex
+from datasieve.pipeline import Pipeline
+from sklearn.preprocessing import QuantileTransformer, StandardScaler
+from freqai.base_models import BaseRegressionModel
+
+
+class MyFreqaiModel(BaseRegressionModel):
+    """
+    Some cool custom model
+    """
+    def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
+        """
+        My custom fit function
+        """
+        model = cool_model.fit()
+        return model
+
     def define_data_pipeline(self) -> Pipeline:
         """
-        User defines their custom eature pipeline here (if they wish)
+        User defines their custom feature pipeline here (if they wish)
         """
         feature_pipeline = Pipeline([
             ('qt', SKLearnWrapper(QuantileTransformer(output_distribution='normal'))),
             ('di', ds.DissimilarityIndex(di_threshold=1)
         ])
 
+        return feature_pipeline
+    
+    def define_label_pipeline(self) -> Pipeline:
+        """
+        User defines their custom label pipeline here (if they wish)
+        """
+        feature_pipeline = Pipeline([
+            ('qt', SKLearnWrapper(StandardScaler())),
+        ])
+
         return feature_pipeline
 ```
 
-Here, you are defining the exact pipeline that will be used for your feature set during training and prediction. Here you can use *most* SKLearn transformation steps by wrapping them in the `SKLearnWrapper` class.
-
-As there is the `feature_pipeline`, there also exists a definition for the `label_pipeline` which can be defined the same way as the `feature_pipeline`, by overriding `define_label_pipeline`.
+Here, you are defining the exact pipeline that will be used for your feature set during training and prediction. You can use *most* SKLearn transformation steps by wrapping them in the `SKLearnWrapper` class as shown above.
 
 ## Outlier detection
 
diff --git a/freqtrade/freqai/prediction_models/XGBoostRegressor.py b/freqtrade/freqai/prediction_models/XGBoostRegressor.py
index 19c051b91..c1142191d 100644
--- a/freqtrade/freqai/prediction_models/XGBoostRegressor.py
+++ b/freqtrade/freqai/prediction_models/XGBoostRegressor.py
@@ -8,9 +8,6 @@ from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
 from freqtrade.freqai.tensorboard import TBCallback
 
 
-# from datasieve.pipeline import Pipeline
-# from sklearn.preprocessing import QuantileTransformer
-
 logger = logging.getLogger(__name__)
 
 

From 88337b6c5eef4c8894911a2b48a12250498d1da3 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Thu, 8 Jun 2023 12:19:42 +0200
Subject: [PATCH 049/130] convert to using constants in data_drawer. Remove
 unneeded check_if_pred_in_spaces function

---
 freqtrade/freqai/data_drawer.py  | 41 ++++++++++++++++++--------------
 freqtrade/freqai/data_kitchen.py | 33 +------------------------
 2 files changed, 24 insertions(+), 50 deletions(-)

diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py
index 067790b9a..2a3ec6dd2 100644
--- a/freqtrade/freqai/data_drawer.py
+++ b/freqtrade/freqai/data_drawer.py
@@ -27,6 +27,11 @@ from freqtrade.strategy.interface import IStrategy
 
 logger = logging.getLogger(__name__)
 
+FEATURE_PIPELINE = "feature_pipeline"
+LABEL_PIPELINE = "label_pipeline"
+TRAINDF = "trained_df"
+METADATA = "metadata"
+
 
 class pair_info(TypedDict):
     model_filename: str
@@ -424,7 +429,7 @@ class FreqaiDataDrawer:
         dk.data["training_features_list"] = list(dk.data_dictionary["train_features"].columns)
         dk.data["label_list"] = dk.label_list
 
-        with (save_path / f"{dk.model_filename}_metadata.json").open("w") as fp:
+        with (save_path / f"{dk.model_filename}_{METADATA}.json").open("w") as fp:
             rapidjson.dump(dk.data, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE)
 
         return
@@ -454,19 +459,19 @@ class FreqaiDataDrawer:
         dk.data["training_features_list"] = dk.training_features_list
         dk.data["label_list"] = dk.label_list
         # store the metadata
-        with (save_path / f"{dk.model_filename}_metadata.json").open("w") as fp:
+        with (save_path / f"{dk.model_filename}_{METADATA}.json").open("w") as fp:
             rapidjson.dump(dk.data, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE)
 
         # save the pipelines to pickle files
-        with (save_path / f"{dk.model_filename}_feature_pipeline.pkl").open("wb") as fp:
+        with (save_path / f"{dk.model_filename}_{FEATURE_PIPELINE}.pkl").open("wb") as fp:
             cloudpickle.dump(dk.feature_pipeline, fp)
 
-        with (save_path / f"{dk.model_filename}_label_pipeline.pkl").open("wb") as fp:
+        with (save_path / f"{dk.model_filename}_{LABEL_PIPELINE}.pkl").open("wb") as fp:
             cloudpickle.dump(dk.label_pipeline, fp)
 
         # save the train data to file so we can check preds for area of applicability later
         dk.data_dictionary["train_features"].to_pickle(
-            save_path / f"{dk.model_filename}_trained_df.pkl"
+            save_path / f"{dk.model_filename}_{TRAINDF}.pkl"
         )
 
         dk.data_dictionary["train_dates"].to_pickle(
@@ -479,10 +484,10 @@ class FreqaiDataDrawer:
 
         if coin not in self.meta_data_dictionary:
             self.meta_data_dictionary[coin] = {}
-        self.meta_data_dictionary[coin]["train_df"] = dk.data_dictionary["train_features"]
-        self.meta_data_dictionary[coin]["meta_data"] = dk.data
-        self.meta_data_dictionary[coin]["feature_pipeline"] = dk.feature_pipeline
-        self.meta_data_dictionary[coin]["label_pipeline"] = dk.label_pipeline
+        self.meta_data_dictionary[coin][TRAINDF] = dk.data_dictionary["train_features"]
+        self.meta_data_dictionary[coin][METADATA] = dk.data
+        self.meta_data_dictionary[coin][FEATURE_PIPELINE] = dk.feature_pipeline
+        self.meta_data_dictionary[coin][LABEL_PIPELINE] = dk.label_pipeline
         self.save_drawer_to_disk()
 
         return
@@ -492,7 +497,7 @@ class FreqaiDataDrawer:
         Load only metadata into datakitchen to increase performance during
         presaved backtesting (prediction file loading).
         """
-        with (dk.data_path / f"{dk.model_filename}_metadata.json").open("r") as fp:
+        with (dk.data_path / f"{dk.model_filename}_{METADATA}.json").open("r") as fp:
             dk.data = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
             dk.training_features_list = dk.data["training_features_list"]
             dk.label_list = dk.data["label_list"]
@@ -512,20 +517,20 @@ class FreqaiDataDrawer:
             dk.data_path = Path(self.pair_dict[coin]["data_path"])
 
         if coin in self.meta_data_dictionary:
-            dk.data = self.meta_data_dictionary[coin]["meta_data"]
-            dk.data_dictionary["train_features"] = self.meta_data_dictionary[coin]["train_df"]
-            dk.feature_pipeline = self.meta_data_dictionary[coin]["feature_pipeline"]
-            dk.label_pipeline = self.meta_data_dictionary[coin]["label_pipeline"]
+            dk.data = self.meta_data_dictionary[coin][METADATA]
+            dk.data_dictionary["train_features"] = self.meta_data_dictionary[coin][TRAINDF]
+            dk.feature_pipeline = self.meta_data_dictionary[coin][FEATURE_PIPELINE]
+            dk.label_pipeline = self.meta_data_dictionary[coin][LABEL_PIPELINE]
         else:
-            with (dk.data_path / f"{dk.model_filename}_metadata.json").open("r") as fp:
+            with (dk.data_path / f"{dk.model_filename}_{METADATA}.json").open("r") as fp:
                 dk.data = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
 
             dk.data_dictionary["train_features"] = pd.read_pickle(
-                dk.data_path / f"{dk.model_filename}_trained_df.pkl"
+                dk.data_path / f"{dk.model_filename}_{TRAINDF}.pkl"
             )
-            with (dk.data_path / f"{dk.model_filename}_feature_pipeline.pkl").open("rb") as fp:
+            with (dk.data_path / f"{dk.model_filename}_{FEATURE_PIPELINE}.pkl").open("rb") as fp:
                 dk.feature_pipeline = cloudpickle.load(fp)
-            with (dk.data_path / f"{dk.model_filename}_label_pipeline.pkl").open("rb") as fp:
+            with (dk.data_path / f"{dk.model_filename}_{LABEL_PIPELINE}.pkl").open("rb") as fp:
                 dk.label_pipeline = cloudpickle.load(fp)
 
         dk.training_features_list = dk.data["training_features_list"]
diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py
index ecdb2e109..de07865d3 100644
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -13,7 +13,6 @@ import pandas as pd
 import psutil
 from datasieve.pipeline import Pipeline
 from pandas import DataFrame
-from sklearn.metrics.pairwise import pairwise_distances
 from sklearn.model_selection import train_test_split
 
 from freqtrade.configuration import TimeRange
@@ -82,6 +81,7 @@ class FreqaiDataKitchen:
         self.backtest_live_models = config.get("freqai_backtest_live_models", False)
         self.feature_pipeline = Pipeline()
         self.label_pipeline = Pipeline()
+        self.DI_values: npt.NDArray = np.array([])
 
         if not self.live:
             self.full_path = self.get_full_models_path(self.config)
@@ -391,37 +391,6 @@ class FreqaiDataKitchen:
         labels = [c for c in column_names if "&" in c]
         self.label_list = labels
 
-    def check_if_pred_in_training_spaces(self) -> None:
-        """
-        Compares the distance from each prediction point to each training data
-        point. It uses this information to estimate a Dissimilarity Index (DI)
-        and avoid making predictions on any points that are too far away
-        from the training data set.
-        """
-
-        distance = pairwise_distances(
-            self.data_dictionary["train_features"],
-            self.data_dictionary["prediction_features"],
-            n_jobs=self.thread_count,
-        )
-
-        self.DI_values = distance.min(axis=0) / self.data["avg_mean_dist"]
-
-        do_predict = np.where(
-            self.DI_values < self.freqai_config["feature_parameters"]["DI_threshold"],
-            1,
-            0,
-        )
-
-        if (len(do_predict) - do_predict.sum()) > 0:
-            logger.info(
-                f"{self.pair}: DI tossed {len(do_predict) - do_predict.sum()} predictions for "
-                "being too far from training data."
-            )
-
-        self.do_predict += do_predict
-        self.do_predict -= 1
-
     def set_weights_higher_recent(self, num_weights: int) -> npt.ArrayLike:
         """
         Set weights so that recent data is more heavily weighted during

From 33b028b104ea56233d7ebbf61d3c8fa27ed6afad Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Thu, 8 Jun 2023 12:33:08 +0200
Subject: [PATCH 050/130] ensure data kitchen thread count is propagated to
 pipeline

---
 .../RL/BaseReinforcementLearningModel.py      |  4 ++--
 .../freqai/base_models/BaseClassifierModel.py |  2 +-
 .../base_models/BasePyTorchClassifier.py      |  2 +-
 .../base_models/BasePyTorchRegressor.py       |  4 ++--
 .../freqai/base_models/BaseRegressionModel.py |  4 ++--
 freqtrade/freqai/freqai_interface.py          |  8 ++++----
 .../prediction_models/XGBoostRegressor.py     | 20 -------------------
 7 files changed, 12 insertions(+), 32 deletions(-)

diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
index 90e60ec5c..cffab602d 100644
--- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
+++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
@@ -118,8 +118,8 @@ class BaseReinforcementLearningModel(IFreqaiModel):
         # normalize all data based on train_dataset only
         prices_train, prices_test = self.build_ohlc_price_dataframes(dk.data_dictionary, pair, dk)
 
-        dk.feature_pipeline = self.define_data_pipeline()
-        dk.label_pipeline = self.define_label_pipeline()
+        dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count)
+        dk.label_pipeline = self.define_label_pipeline(threads=dk.thread_count)
 
         (dd["train_features"],
          dd["train_labels"],
diff --git a/freqtrade/freqai/base_models/BaseClassifierModel.py b/freqtrade/freqai/base_models/BaseClassifierModel.py
index 8495cd9b9..2df639b55 100644
--- a/freqtrade/freqai/base_models/BaseClassifierModel.py
+++ b/freqtrade/freqai/base_models/BaseClassifierModel.py
@@ -53,7 +53,7 @@ class BaseClassifierModel(IFreqaiModel):
         dd = dk.make_train_test_datasets(features_filtered, labels_filtered)
         if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
             dk.fit_labels()
-        dk.feature_pipeline = self.define_data_pipeline()
+        dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count)
 
         (dd["train_features"],
          dd["train_labels"],
diff --git a/freqtrade/freqai/base_models/BasePyTorchClassifier.py b/freqtrade/freqai/base_models/BasePyTorchClassifier.py
index 85328aa41..57f31629a 100644
--- a/freqtrade/freqai/base_models/BasePyTorchClassifier.py
+++ b/freqtrade/freqai/base_models/BasePyTorchClassifier.py
@@ -189,7 +189,7 @@ class BasePyTorchClassifier(BasePyTorchModel):
         if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
             dk.fit_labels()
 
-        dk.feature_pipeline = self.define_data_pipeline()
+        dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count)
 
         (dd["train_features"],
          dd["train_labels"],
diff --git a/freqtrade/freqai/base_models/BasePyTorchRegressor.py b/freqtrade/freqai/base_models/BasePyTorchRegressor.py
index 8b304fce4..ec4d6b80c 100644
--- a/freqtrade/freqai/base_models/BasePyTorchRegressor.py
+++ b/freqtrade/freqai/base_models/BasePyTorchRegressor.py
@@ -85,8 +85,8 @@ class BasePyTorchRegressor(BasePyTorchModel):
         dd = dk.make_train_test_datasets(features_filtered, labels_filtered)
         if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
             dk.fit_labels()
-        dk.feature_pipeline = self.define_data_pipeline()
-        dk.label_pipeline = self.define_label_pipeline()
+        dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count)
+        dk.label_pipeline = self.define_label_pipeline(threads=dk.thread_count)
 
         dd["train_labels"], _, _ = dk.label_pipeline.fit_transform(dd["train_labels"])
         dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"])
diff --git a/freqtrade/freqai/base_models/BaseRegressionModel.py b/freqtrade/freqai/base_models/BaseRegressionModel.py
index d86b21107..d7e7d9916 100644
--- a/freqtrade/freqai/base_models/BaseRegressionModel.py
+++ b/freqtrade/freqai/base_models/BaseRegressionModel.py
@@ -52,8 +52,8 @@ class BaseRegressionModel(IFreqaiModel):
         dd = dk.make_train_test_datasets(features_filtered, labels_filtered)
         if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
             dk.fit_labels()
-        dk.feature_pipeline = self.define_data_pipeline()
-        dk.label_pipeline = self.define_label_pipeline()
+        dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count)
+        dk.label_pipeline = self.define_label_pipeline(threads=dk.thread_count)
 
         (dd["train_features"],
          dd["train_labels"],
diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index a98bd92b5..87f682ad3 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -507,7 +507,7 @@ class IFreqaiModel(ABC):
                 "feature_engineering_* functions"
             )
 
-    def define_data_pipeline(self) -> Pipeline:
+    def define_data_pipeline(self, threads=-1) -> Pipeline:
         ft_params = self.freqai_info["feature_parameters"]
         feature_pipeline = Pipeline([
             ('const', ds.VarianceThreshold(threshold=0)),
@@ -526,10 +526,10 @@ class IFreqaiModel(ABC):
 
         di = ft_params.get("DI_threshold", 0)
         if di:
-            feature_pipeline.append(('di', ds.DissimilarityIndex(di_threshold=di)))
+            feature_pipeline.append(('di', ds.DissimilarityIndex(di_threshold=di, n_jobs=threads)))
 
         if ft_params.get("use_DBSCAN_to_remove_outliers", False):
-            feature_pipeline.append(('dbscan', ds.DBSCAN()))
+            feature_pipeline.append(('dbscan', ds.DBSCAN(n_jobs=threads)))
 
         sigma = self.freqai_info["feature_parameters"].get('noise_standard_deviation', 0)
         if sigma:
@@ -540,7 +540,7 @@ class IFreqaiModel(ABC):
 
         return feature_pipeline
 
-    def define_label_pipeline(self) -> Pipeline:
+    def define_label_pipeline(self, threads=-1) -> Pipeline:
 
         label_pipeline = Pipeline([
             ('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1))))
diff --git a/freqtrade/freqai/prediction_models/XGBoostRegressor.py b/freqtrade/freqai/prediction_models/XGBoostRegressor.py
index c1142191d..f8b4d353d 100644
--- a/freqtrade/freqai/prediction_models/XGBoostRegressor.py
+++ b/freqtrade/freqai/prediction_models/XGBoostRegressor.py
@@ -52,23 +52,3 @@ class XGBoostRegressor(BaseRegressionModel):
         model.set_params(callbacks=[])
 
         return model
-
-    # def define_data_pipeline(self, dk: FreqaiDataKitchen) -> None:
-    #     """
-    #     User defines their custom eature pipeline here (if they wish)
-    #     """
-    #     dk.feature_pipeline = Pipeline([
-    #         ('qt', SKLearnWrapper(QuantileTransformer(output_distribution='normal')))
-    #     ])
-
-    #     return
-
-    # def define_label_pipeline(self, dk: FreqaiDataKitchen) -> None:
-    #     """
-    #     User defines their custom label pipeline here (if they wish)
-    #     """
-    #     dk.label_pipeline = Pipeline([
-    #          ('qt', SKLearnWrapper(QuantileTransformer(output_distribution='normal')))
-    #     ])
-
-    #     return

From 6b736c49d4366618ff5d2cc4accc983c44ef4c66 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Thu, 8 Jun 2023 20:13:28 +0200
Subject: [PATCH 051/130] Dont persist Backtesting to avoid memory leak

---
 freqtrade/optimize/lookahead_analysis.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/freqtrade/optimize/lookahead_analysis.py b/freqtrade/optimize/lookahead_analysis.py
index 4f3d7a4d0..a567b3b83 100755
--- a/freqtrade/optimize/lookahead_analysis.py
+++ b/freqtrade/optimize/lookahead_analysis.py
@@ -61,7 +61,7 @@ class LookaheadAnalysis:
         return timestamp
 
     @staticmethod
-    def get_result(backtesting, processed: pd.DataFrame):
+    def get_result(backtesting: Backtesting, processed: pd.DataFrame):
         min_date, max_date = get_timerange(processed)
 
         result = backtesting.backtest(
@@ -143,15 +143,15 @@ class LookaheadAnalysis:
                                             str(self.dt_to_timestamp(varholder.to_dt)))
         prepare_data_config['exchange']['pair_whitelist'] = pairs_to_load
 
-        self.backtesting = Backtesting(prepare_data_config)
-        self.backtesting._set_strategy(self.backtesting.strategylist[0])
+        backtesting = Backtesting(prepare_data_config)
+        backtesting._set_strategy(backtesting.strategylist[0])
 
-        varholder.data, varholder.timerange = self.backtesting.load_bt_data()
-        self.backtesting.load_bt_data_detail()
-        varholder.timeframe = self.backtesting.timeframe
+        varholder.data, varholder.timerange = backtesting.load_bt_data()
+        backtesting.load_bt_data_detail()
+        varholder.timeframe = backtesting.timeframe
 
-        varholder.indicators = self.backtesting.strategy.advise_all_indicators(varholder.data)
-        varholder.result = self.get_result(self.backtesting, varholder.indicators)
+        varholder.indicators = backtesting.strategy.advise_all_indicators(varholder.data)
+        varholder.result = self.get_result(backtesting, varholder.indicators)
 
     def fill_full_varholder(self):
         self.full_varHolder = VarHolder()

From 05ea36f03b02ff896cd84c7e876579710082c710 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Fri, 9 Jun 2023 06:45:34 +0200
Subject: [PATCH 052/130] Fix performance when running tons of backtests

---
 freqtrade/optimize/backtesting.py        | 31 +++++++++++++++---------
 freqtrade/optimize/lookahead_analysis.py |  4 ++-
 2 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/freqtrade/optimize/backtesting.py b/freqtrade/optimize/backtesting.py
index d77fc469b..4a5536e84 100644
--- a/freqtrade/optimize/backtesting.py
+++ b/freqtrade/optimize/backtesting.py
@@ -24,6 +24,7 @@ from freqtrade.enums import (BacktestState, CandleType, ExitCheckTuple, ExitType
 from freqtrade.exceptions import DependencyException, OperationalException
 from freqtrade.exchange import (amount_to_contract_precision, price_to_precision,
                                 timeframe_to_minutes, timeframe_to_seconds)
+from freqtrade.exchange.exchange import Exchange
 from freqtrade.mixins import LoggingMixin
 from freqtrade.optimize.backtest_caching import get_strategy_run_id
 from freqtrade.optimize.bt_progress import BTProgress
@@ -72,7 +73,7 @@ class Backtesting:
     backtesting.start()
     """
 
-    def __init__(self, config: Config) -> None:
+    def __init__(self, config: Config, exchange: Optional[Exchange] = None) -> None:
 
         LoggingMixin.show_output = False
         self.config = config
@@ -89,7 +90,10 @@ class Backtesting:
         self.rejected_df: Dict[str, Dict] = {}
 
         self._exchange_name = self.config['exchange']['name']
-        self.exchange = ExchangeResolver.load_exchange(self.config, load_leverage_tiers=True)
+        if not exchange:
+            exchange = ExchangeResolver.load_exchange(self.config, load_leverage_tiers=True)
+        self.exchange = exchange
+
         self.dataprovider = DataProvider(self.config, self.exchange)
 
         if self.config.get('strategy_list'):
@@ -114,16 +118,7 @@ class Backtesting:
         self.timeframe_min = timeframe_to_minutes(self.timeframe)
         self.init_backtest_detail()
         self.pairlists = PairListManager(self.exchange, self.config, self.dataprovider)
-        if 'VolumePairList' in self.pairlists.name_list:
-            raise OperationalException("VolumePairList not allowed for backtesting. "
-                                       "Please use StaticPairList instead.")
-        if 'PerformanceFilter' in self.pairlists.name_list:
-            raise OperationalException("PerformanceFilter not allowed for backtesting.")
-
-        if len(self.strategylist) > 1 and 'PrecisionFilter' in self.pairlists.name_list:
-            raise OperationalException(
-                "PrecisionFilter not allowed for backtesting multiple strategies."
-            )
+        self._validate_pairlists_for_backtesting()
 
         self.dataprovider.add_pairlisthandler(self.pairlists)
         self.pairlists.refresh_pairlist()
@@ -164,6 +159,18 @@ class Backtesting:
 
         self.init_backtest()
 
+    def _validate_pairlists_for_backtesting(self):
+        if 'VolumePairList' in self.pairlists.name_list:
+            raise OperationalException("VolumePairList not allowed for backtesting. "
+                                       "Please use StaticPairList instead.")
+        if 'PerformanceFilter' in self.pairlists.name_list:
+            raise OperationalException("PerformanceFilter not allowed for backtesting.")
+
+        if len(self.strategylist) > 1 and 'PrecisionFilter' in self.pairlists.name_list:
+            raise OperationalException(
+                "PrecisionFilter not allowed for backtesting multiple strategies."
+            )
+
     @staticmethod
     def cleanup():
         LoggingMixin.show_output = True
diff --git a/freqtrade/optimize/lookahead_analysis.py b/freqtrade/optimize/lookahead_analysis.py
index a567b3b83..ca419f7e6 100755
--- a/freqtrade/optimize/lookahead_analysis.py
+++ b/freqtrade/optimize/lookahead_analysis.py
@@ -46,6 +46,7 @@ class LookaheadAnalysis:
 
         self.entry_varHolders: List[VarHolder] = []
         self.exit_varHolders: List[VarHolder] = []
+        self.exchange = None
 
         # pull variables the scope of the lookahead_analysis-instance
         self.local_config = deepcopy(config)
@@ -143,7 +144,8 @@ class LookaheadAnalysis:
                                             str(self.dt_to_timestamp(varholder.to_dt)))
         prepare_data_config['exchange']['pair_whitelist'] = pairs_to_load
 
-        backtesting = Backtesting(prepare_data_config)
+        backtesting = Backtesting(prepare_data_config, self.exchange)
+        self.exchange = backtesting.exchange
         backtesting._set_strategy(backtesting.strategylist[0])
 
         varholder.data, varholder.timerange = backtesting.load_bt_data()

From b89390c06bc42b210c63fbfb5662539ecbae8a4e Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Fri, 9 Jun 2023 07:13:45 +0200
Subject: [PATCH 053/130] Reduce log verbosity during bias tester runs

---
 freqtrade/commands/optimize_commands.py  |  2 --
 freqtrade/loggers/set_log_levels.py      | 29 ++++++++++++++++++++++++
 freqtrade/optimize/lookahead_analysis.py |  6 +++++
 tests/test_log_setup.py                  | 18 +++++++++++++++
 4 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/freqtrade/commands/optimize_commands.py b/freqtrade/commands/optimize_commands.py
index 4b8763737..cdddf0fe5 100644
--- a/freqtrade/commands/optimize_commands.py
+++ b/freqtrade/commands/optimize_commands.py
@@ -144,5 +144,3 @@ def start_lookahead_analysis(args: Dict[str, Any]) -> None:
 
     config = setup_utils_configuration(args, RunMode.UTIL_NO_EXCHANGE)
     LookaheadAnalysisSubFunctions.start(config)
-
-
diff --git a/freqtrade/loggers/set_log_levels.py b/freqtrade/loggers/set_log_levels.py
index acd8df379..da046f439 100644
--- a/freqtrade/loggers/set_log_levels.py
+++ b/freqtrade/loggers/set_log_levels.py
@@ -2,6 +2,9 @@
 import logging
 
 
+logger = logging.getLogger(__name__)
+
+
 def set_loggers(verbosity: int = 0, api_verbosity: str = 'info') -> None:
     """
     Set the logging level for third party libraries
@@ -23,3 +26,29 @@ def set_loggers(verbosity: int = 0, api_verbosity: str = 'info') -> None:
     logging.getLogger('werkzeug').setLevel(
         logging.ERROR if api_verbosity == 'error' else logging.INFO
     )
+
+
+__BIAS_TESTER_LOGGERS = [
+    'freqtrade.resolvers',
+    'freqtrade.strategy.hyper',
+]
+
+
+def reduce_verbosity_for_bias_tester() -> None:
+    """
+    Reduce verbosity for bias tester.
+    It loads the same strategy several times, which would spam the log.
+    """
+    logger.info("Reducing verbosity for bias tester.")
+    for logger_name in __BIAS_TESTER_LOGGERS:
+        logging.getLogger(logger_name).setLevel(logging.WARNING)
+
+
+def restore_verbosity_for_bias_tester() -> None:
+    """
+    Restore verbosity after bias tester.
+    """
+    logger.info("Restoring log verbosity.")
+    log_level = logging.getLogger('freqtrade').getEffectiveLevel()
+    for logger_name in __BIAS_TESTER_LOGGERS:
+        logging.getLogger(logger_name).setLevel(log_level)
diff --git a/freqtrade/optimize/lookahead_analysis.py b/freqtrade/optimize/lookahead_analysis.py
index ca419f7e6..e98eebeef 100755
--- a/freqtrade/optimize/lookahead_analysis.py
+++ b/freqtrade/optimize/lookahead_analysis.py
@@ -11,6 +11,8 @@ import pandas as pd
 from freqtrade.configuration import TimeRange
 from freqtrade.data.history import get_timerange
 from freqtrade.exchange import timeframe_to_minutes
+from freqtrade.loggers.set_log_levels import (reduce_verbosity_for_bias_tester,
+                                              restore_verbosity_for_bias_tester)
 from freqtrade.optimize.backtesting import Backtesting
 
 
@@ -231,6 +233,8 @@ class LookaheadAnalysis:
         # first make a single backtest
         self.fill_full_varholder()
 
+        reduce_verbosity_for_bias_tester()
+
         # check if requirements have been met of full_varholder
         found_signals: int = self.full_varHolder.result['results'].shape[0] + 1
         if found_signals >= self.targeted_trade_amount:
@@ -251,6 +255,8 @@ class LookaheadAnalysis:
                 break
             self.analyze_row(idx, result_row)
 
+        # Restore verbosity, so it's not too quiet for the next strategy
+        restore_verbosity_for_bias_tester()
         # check and report signals
         if self.current_analysis.total_signals < self.local_config['minimum_trade_amount']:
             logger.info(f" -> {self.local_config['strategy']} : too few trades. "
diff --git a/tests/test_log_setup.py b/tests/test_log_setup.py
index a9be24723..2ce06b6b0 100644
--- a/tests/test_log_setup.py
+++ b/tests/test_log_setup.py
@@ -7,6 +7,8 @@ import pytest
 from freqtrade.exceptions import OperationalException
 from freqtrade.loggers import (FTBufferingHandler, FTStdErrStreamHandler, set_loggers,
                                setup_logging, setup_logging_pre)
+from freqtrade.loggers.set_log_levels import (reduce_verbosity_for_bias_tester,
+                                              restore_verbosity_for_bias_tester)
 
 
 def test_set_loggers() -> None:
@@ -128,3 +130,19 @@ def test_set_loggers_journald_importerror(import_fails):
                        match=r'You need the cysystemd python package.*'):
         setup_logging(config)
     logger.handlers = orig_handlers
+
+
+def test_reduce_verbosity():
+    reduce_verbosity_for_bias_tester()
+
+    assert logging.getLogger('freqtrade.resolvers').level is logging.WARNING
+    assert logging.getLogger('freqtrade.strategy.hyper').level is logging.WARNING
+    # base level wasn't changed
+    assert logging.getLogger('freqtrade').level is logging.INFO
+
+    restore_verbosity_for_bias_tester()
+
+    assert logging.getLogger('freqtrade.resolvers').level is logging.INFO
+    assert logging.getLogger('freqtrade.strategy.hyper').level is logging.INFO
+    assert logging.getLogger('freqtrade').level is logging.INFO
+    # base level wasn't changed

From 16b3363970733ffb7ca6503402f20cc16b05a827 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Fri, 9 Jun 2023 07:16:06 +0200
Subject: [PATCH 054/130] Fix type problem

---
 freqtrade/optimize/lookahead_analysis.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/freqtrade/optimize/lookahead_analysis.py b/freqtrade/optimize/lookahead_analysis.py
index e98eebeef..65e9cad3f 100755
--- a/freqtrade/optimize/lookahead_analysis.py
+++ b/freqtrade/optimize/lookahead_analysis.py
@@ -4,7 +4,7 @@ import pathlib
 import shutil
 from copy import deepcopy
 from datetime import datetime, timedelta, timezone
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional
 
 import pandas as pd
 
@@ -48,7 +48,7 @@ class LookaheadAnalysis:
 
         self.entry_varHolders: List[VarHolder] = []
         self.exit_varHolders: List[VarHolder] = []
-        self.exchange = None
+        self.exchange: Optional[Any] = None
 
         # pull variables the scope of the lookahead_analysis-instance
         self.local_config = deepcopy(config)

From 99842402f7d12e2581b68cd18e46b14f3fbf2f55 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Fri, 9 Jun 2023 07:18:35 +0200
Subject: [PATCH 055/130] Further reduce unnecessary output

---
 freqtrade/loggers/set_log_levels.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/freqtrade/loggers/set_log_levels.py b/freqtrade/loggers/set_log_levels.py
index da046f439..d666361b6 100644
--- a/freqtrade/loggers/set_log_levels.py
+++ b/freqtrade/loggers/set_log_levels.py
@@ -31,6 +31,7 @@ def set_loggers(verbosity: int = 0, api_verbosity: str = 'info') -> None:
 __BIAS_TESTER_LOGGERS = [
     'freqtrade.resolvers',
     'freqtrade.strategy.hyper',
+    'freqtrade.configuration.config_validation',
 ]
 
 

From 6656740f2120b0c0d07cd4cfa4c175e1506d1446 Mon Sep 17 00:00:00 2001
From: hippocritical <gunngirspam@gmail.com>
Date: Fri, 9 Jun 2023 22:11:30 +0200
Subject: [PATCH 056/130] Moved config overrides to its' own function Added
 config overrides to dry_run_wallet and max_open_trades to avoid false
 positives.

---
 .../optimize/lookahead_analysis_helpers.py    | 45 ++++++++++++-------
 1 file changed, 30 insertions(+), 15 deletions(-)

diff --git a/freqtrade/optimize/lookahead_analysis_helpers.py b/freqtrade/optimize/lookahead_analysis_helpers.py
index 0eccf0526..0f2b78e24 100644
--- a/freqtrade/optimize/lookahead_analysis_helpers.py
+++ b/freqtrade/optimize/lookahead_analysis_helpers.py
@@ -119,25 +119,22 @@ class LookaheadAnalysisSubFunctions:
         csv_df.to_csv(config['lookahead_analysis_exportfilename'], index=False)
 
     @staticmethod
-    def initialize_single_lookahead_analysis(strategy_obj: Dict[str, Any], config: Dict[str, Any]):
-
-        logger.info(f"Bias test of {Path(strategy_obj['location']).name} started.")
-        start = time.perf_counter()
-        current_instance = LookaheadAnalysis(config, strategy_obj)
-        current_instance.start()
-        elapsed = time.perf_counter() - start
-        logger.info(f"checking look ahead bias via backtests "
-                    f"of {Path(strategy_obj['location']).name} "
-                    f"took {elapsed:.0f} seconds.")
-        return current_instance
-
-    @staticmethod
-    def start(config: Config):
+    def calculate_config_overrides(config: Config):
         if config['targeted_trade_amount'] < config['minimum_trade_amount']:
             # this combo doesn't make any sense.
             raise OperationalException(
-                "targeted trade amount can't be smaller than minimum trade amount."
+                "Targeted trade amount can't be smaller than minimum trade amount."
             )
+        if len(config['pairs']) > config['max_open_trades']:
+            logger.info('Max_open_trades were less than amount of pairs. '
+                        'Set max_open_trades to amount of pairs just to avoid false positives.')
+            config['max_open_trades'] = len(config['pairs'])
+
+        min_dry_run_wallet = 1000000000
+        if config['dry_run_wallet'] < min_dry_run_wallet:
+            logger.info('Dry run wallet was not set to 1 billion, pushing it up there '
+                        'just to avoid false positives')
+            config['dry_run_wallet'] = min_dry_run_wallet
 
         # enforce cache to be 'none', shift it to 'none' if not already
         # (since the default value is 'day')
@@ -149,6 +146,24 @@ class LookaheadAnalysisSubFunctions:
                         f"Inside lookahead-analysis it is enforced to be 'none'. "
                         f"Changed it to 'none'")
             config['backtest_cache'] = 'none'
+        return config
+
+    @staticmethod
+    def initialize_single_lookahead_analysis(strategy_obj: Dict[str, Any], config: Dict[str, Any]):
+
+        logger.info(f"Bias test of {Path(strategy_obj['location']).name} started.")
+        start = time.perf_counter()
+        current_instance = LookaheadAnalysis(config, strategy_obj)
+        current_instance.start()
+        elapsed = time.perf_counter() - start
+        logger.info(f"Checking look ahead bias via backtests "
+                    f"of {Path(strategy_obj['location']).name} "
+                    f"took {elapsed:.0f} seconds.")
+        return current_instance
+
+    @staticmethod
+    def start(config: Config):
+        config = LookaheadAnalysisSubFunctions.calculate_config_overrides(config)
 
         strategy_objs = StrategyResolver.search_all_objects(
             config, enum_failed=False, recursive=config.get('recursive_strategy_search', False))

From 94ca2988a0b58dec3ab69f9c9bd1770bc6227518 Mon Sep 17 00:00:00 2001
From: hippocritical <gunngirspam@gmail.com>
Date: Fri, 9 Jun 2023 23:32:58 +0200
Subject: [PATCH 057/130] updated docs

---
 docs/lookahead-analysis.md | 42 +++++++++++++++++++++-----------------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/docs/lookahead-analysis.md b/docs/lookahead-analysis.md
index 22440a6d6..d61acf370 100644
--- a/docs/lookahead-analysis.md
+++ b/docs/lookahead-analysis.md
@@ -5,8 +5,7 @@ Checking look ahead bias is the bane of any strategy since it is sometimes very
 but very hard to detect.
 
 Backtesting initializes all timestamps at once and calculates all indicators in the beginning.
-This means that if you are allowing your indicators (or the libraries that get used) then you would 
-look into the future and falsify your backtest.
+This means that if your indicators or entry/exit signals could look into future candles and falsify your backtest.
 
 Lookahead-analysis requires historic data to be available.
 To learn how to get data for the pairs and exchange you're interested in,
@@ -14,13 +13,15 @@ head over to the [Data Downloading](data-download.md) section of the documentati
 
 This command is built upon backtesting
 since it internally chains backtests and pokes at the strategy to provoke it to show look ahead bias.
-This is done by looking not at the strategy itself - but at the results it returned.
+This is done by not looking at the strategy itself - but at the results it returned.
 The results are things like changed indicator-values and moved entries/exits compared to the full backtest. 
 
 You can use commands of [Backtesting](backtesting.md).
 It also supports the lookahead-analysis of freqai strategies.
 
---cache is enforced to be "none"
+- --cache is forced to "none"
+- --max_open_trades is forced to be at least equal to the number of pairs 
+- --dry_run_wallet is forced to be basically infinite
 
 ## Backtesting command reference
 
@@ -46,7 +47,7 @@ optional arguments:
 
 
 #### Summary
-Checks a given strategy for look ahead bias via backtest-analysis
+Checks a given strategy for look ahead bias via lookahead-analysis
 Look ahead bias means that the backtest uses data from future candles thereby not making it viable beyond backtesting
 and producing false hopes for the one backtesting.
 
@@ -61,19 +62,22 @@ but not realistic.
 This command is made to try to verify the validity in the form of the aforementioned look ahead bias.
 
 #### How does the command work?
-It will not look at the strategy or any contents itself but instead will run multiple backtests 
-by using precisely cut timeranges and analyzing the results each time, comparing to the full timerange.
+It will start with a backtest of all pairs to generate a baseline for indicators and entries/exits.
+After the backtest ran, it will look if the minimum-trade-amount is met
+and if not cancel the lookahead-analysis for this strategy.
 
-At first, it starts a backtest over the whole duration
-and then repeats backtests from the same starting point to the respective points to watch.
-In addition, it analyzes the dataframes form the overall backtest to the cut ones.
+After setting the baseline it will then do additional runs for every entry and exit separately.
+When a verification-backtest is done, it will compare the indicators as the signal (either entry or exit) 
+and report the bias.
+After all signals have been verified or falsified a result-table will be generated for the user to see.
 
-At the end it will return a result-table in terminal.
-
-Hint:
-If an entry or exit condition is only triggered rarely or the timerange was chosen
-so only a few entry conditions are met
-then the bias checker is unable to catch the biased entry or exit condition.
-In the end it only checks which entry and exit signals have been triggered.
-
----Flow chart here for better understanding---
+#### Caveats:
+- The lookahead-analysis can only verify / falsify the trades it calculated through.
+If there was a strategy with signals that were not triggered in the lookahead-analysis
+then it will not have it verified that entry/exit signal either.
+This could then lead to a false-negative (the strategy will then be reported as non-biased).
+- lookahead-analysis has access to everything that backtesting has too. 
+Please don't provoke any configs like enabling position stacking.
+If you decide to do so,
+then make doubly sure that you won't ever run out of max_open_trades
+amount and neither leftover money in your wallet.

From 3523f564bd546298e9d3d4d134ac118df13a5dea Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 10 Jun 2023 09:44:08 +0200
Subject: [PATCH 058/130] Improve Log reduction and corresponding test

---
 freqtrade/loggers/set_log_levels.py |  2 +-
 tests/test_log_setup.py             | 13 +++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/freqtrade/loggers/set_log_levels.py b/freqtrade/loggers/set_log_levels.py
index d666361b6..7311fa0a0 100644
--- a/freqtrade/loggers/set_log_levels.py
+++ b/freqtrade/loggers/set_log_levels.py
@@ -50,6 +50,6 @@ def restore_verbosity_for_bias_tester() -> None:
     Restore verbosity after bias tester.
     """
     logger.info("Restoring log verbosity.")
-    log_level = logging.getLogger('freqtrade').getEffectiveLevel()
+    log_level = logging.NOTSET
     for logger_name in __BIAS_TESTER_LOGGERS:
         logging.getLogger(logger_name).setLevel(log_level)
diff --git a/tests/test_log_setup.py b/tests/test_log_setup.py
index 2ce06b6b0..af9c43fbd 100644
--- a/tests/test_log_setup.py
+++ b/tests/test_log_setup.py
@@ -133,16 +133,17 @@ def test_set_loggers_journald_importerror(import_fails):
 
 
 def test_reduce_verbosity():
+    setup_logging_pre()
     reduce_verbosity_for_bias_tester()
 
-    assert logging.getLogger('freqtrade.resolvers').level is logging.WARNING
-    assert logging.getLogger('freqtrade.strategy.hyper').level is logging.WARNING
+    assert logging.getLogger('freqtrade.resolvers').getEffectiveLevel() is logging.WARNING
+    assert logging.getLogger('freqtrade.strategy.hyper').getEffectiveLevel() is logging.WARNING
     # base level wasn't changed
-    assert logging.getLogger('freqtrade').level is logging.INFO
+    assert logging.getLogger('freqtrade').getEffectiveLevel() is logging.INFO
 
     restore_verbosity_for_bias_tester()
 
-    assert logging.getLogger('freqtrade.resolvers').level is logging.INFO
-    assert logging.getLogger('freqtrade.strategy.hyper').level is logging.INFO
-    assert logging.getLogger('freqtrade').level is logging.INFO
+    assert logging.getLogger('freqtrade.resolvers').getEffectiveLevel() is logging.INFO
+    assert logging.getLogger('freqtrade.strategy.hyper').getEffectiveLevel() is logging.INFO
+    assert logging.getLogger('freqtrade').getEffectiveLevel() is logging.INFO
     # base level wasn't changed

From e246259792dbdd309d3fdeb1f0fa8ae01ed682f3 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Sat, 10 Jun 2023 11:40:57 +0200
Subject: [PATCH 059/130] avoid manual pipeline validation

---
 docs/freqai-feature-engineering.md   | 67 ++++++++++++++++------------
 freqtrade/freqai/freqai_interface.py | 23 +++++-----
 2 files changed, 48 insertions(+), 42 deletions(-)

diff --git a/docs/freqai-feature-engineering.md b/docs/freqai-feature-engineering.md
index 1151f01a3..6e3e7fda6 100644
--- a/docs/freqai-feature-engineering.md
+++ b/docs/freqai-feature-engineering.md
@@ -221,39 +221,20 @@ where $W_i$ is the weight of data point $i$ in a total set of $n$ data points. B
 
 # Building the data pipeline
 
-FreqAI uses the the [`DataSieve`](https://github.com/emergentmethods/datasieve) pipeline, which follows the SKlearn pipeline API, but adds, among other features, coherence between the X, y, and sample_weight vector point removals, feature removal, feature name following. 
+By default, FreqAI builds a dynamic pipeline based on user congfiguration settings. The default settings are robust and designed to work with a variety of methods. These two steps are a `MinMaxScaler(-1,1)` and a `VarianceThreshold` which removes any column that has 0 variance. Users can activate other steps with more configuration parameters. For example if users add `use_SVM_to_remove_outliers: true` to the `freqai` config, then FreqAI will automatically add the [`SVMOutlierExtractor`](#identifying-outliers-using-a-support-vector-machine-svm) to the pipeline. Likewise, users can add `principal_component_analysis: true` to the `freqai` config to activate PCA. The [DissimilarityIndex](#identifying-outliers-with-the-dissimilarity-index-di) is activated with `DI_threshold: 1`. Finally, noise can also be added to the data with `noise_standard_deviation: 0.1`. Finally, users can add [DBSCAN](#identifying-outliers-with-dbscan) outlier removal with `use_DBSCAN_to_remove_outliers: true`.
 
-By default, FreqAI builds the following pipeline inside the `IFreqaiModel` `train()` method:
+!!! note "More information available"
+    Please review the [parameter table](freqai-parameter-table.md) for more information on these parameters.
 
-```py
-from datasieve.transforms import SKLearnWrapper, DissimilarityIndex
-from datasieve.pipeline import Pipeline
-dk.feature_pipeline = Pipeline([
-    ('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1)))),
-    ('di', ds.DissimilarityIndex(di_threshold=1)),
-    ])
-```
-
-But users will find that they can add PCA and other steps just by changing their configuration settings, for example, if you add `"principal_component_analysis": true` to the `feature_parameters` dict in the `freqai` config, then FreqAI will automatically add the PCA step for you resulting in the following pipeline:
-
-```py
-from datasieve.transforms import SKLearnWrapper, DissimilarityIndex, PCA
-from datasieve.pipeline import Pipeline
-dk.feature_pipeline = Pipeline([
-    ('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1)))),
-    ('pca', ds.PCA()),
-    ('post-pca-scaler', ds.MinMaxScaler(feature_range=(-1, 1)))
-    ('di', ds.DissimilarityIndex(di_threshold=1)),
-    ])
-```
-
-The same concept follows if users activate other config options like `"use_SVM_to_remove_outliers": true` or `"use_DBSCAN_to_remove_outliers": true`. FreqAI will add the appropriate steps to the pipeline for you.
 
 ## Customizing the pipeline
 
 Users are encouraged to customize the data pipeline to their needs by building their own data pipeline. This can be done by simply setting `dk.feature_pipeline` to their desired `Pipeline` object inside their `IFreqaiModel` `train()` function, or if they prefer not to touch the `train()` function, they can override `define_data_pipeline`/`define_label_pipeline` functions in their `IFreqaiModel`:
 
-```py
+!!! note "More information available"
+    FreqAI uses the the [`DataSieve`](https://github.com/emergentmethods/datasieve) pipeline, which follows the SKlearn pipeline API, but adds, among other features, coherence between the X, y, and sample_weight vector point removals, feature removal, feature name following. 
+
+```python
 from datasieve.transforms import SKLearnWrapper, DissimilarityIndex
 from datasieve.pipeline import Pipeline
 from sklearn.preprocessing import QuantileTransformer, StandardScaler
@@ -286,14 +267,42 @@ class MyFreqaiModel(BaseRegressionModel):
         """
         User defines their custom label pipeline here (if they wish)
         """
-        feature_pipeline = Pipeline([
+        label_pipeline = Pipeline([
             ('qt', SKLearnWrapper(StandardScaler())),
         ])
 
-        return feature_pipeline
+        return label_pipeline
 ```
 
-Here, you are defining the exact pipeline that will be used for your feature set during training and prediction. You can use *most* SKLearn transformation steps by wrapping them in the `SKLearnWrapper` class as shown above.
+Here, you are defining the exact pipeline that will be used for your feature set during training and prediction. You can use *most* SKLearn transformation steps by wrapping them in the `SKLearnWrapper` class as shown above. In addition, you can use any of the transformations available in the [`DataSieve` library](https://github.com/emergentmethods/datasieve). 
+
+You can easily add your own transformation by creating a class that inherits from the datasieve `BaseTransform` and implementing your `fit()`, `transform()` and `inverse_transform()` methods:
+
+```python
+from datasieve.transforms.base_transform import BaseTransform
+# import whatever else you need
+
+class MyCoolTransform(BaseTransform):
+    def __init__(self, **kwargs):
+        self.param1 = kwargs.get('param1', 1)
+
+    def fit(self, X, y=None, sample_weight=None, feature_list=None, **kwargs):
+        # do something with X, y, sample_weight, or/and feature_list
+        return X, y, sample_weight, feature_list
+
+    def transform(self, X, y=None, sample_weight=None,
+                  feature_list=None, outlier_check=False, **kwargs):
+        # do something with X, y, sample_weight, or/and feature_list
+        return X, y, sample_weight, feature_list
+
+    def inverse_transform(self, X, y=None, sample_weight=None, feature_list=None, **kwargs):
+        # do/dont do something with X, y, sample_weight, or/and feature_list
+        return X, y, sample_weight, feature_list
+```
+
+!!! note "Hint"
+    You can define this custom class in the same file as your `IFreqaiModel`.
+
 
 ## Outlier detection
 
diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index 87f682ad3..104fcb24d 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -509,36 +509,33 @@ class IFreqaiModel(ABC):
 
     def define_data_pipeline(self, threads=-1) -> Pipeline:
         ft_params = self.freqai_info["feature_parameters"]
-        feature_pipeline = Pipeline([
+        pipe_steps = [
             ('const', ds.VarianceThreshold(threshold=0)),
             ('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1))))
-            ])
+            ]
 
         if ft_params.get("principal_component_analysis", False):
-            feature_pipeline.append(('pca', ds.PCA()))
-            feature_pipeline.append(('post-pca-scaler',
-                                     SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1)))))
+            pipe_steps.append(('pca', ds.PCA()))
+            pipe_steps.append(('post-pca-scaler',
+                               SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1)))))
 
         if ft_params.get("use_SVM_to_remove_outliers", False):
             svm_params = ft_params.get(
                 "svm_params", {"shuffle": False, "nu": 0.01})
-            feature_pipeline.append(('svm', ds.SVMOutlierExtractor(**svm_params)))
+            pipe_steps.append(('svm', ds.SVMOutlierExtractor(**svm_params)))
 
         di = ft_params.get("DI_threshold", 0)
         if di:
-            feature_pipeline.append(('di', ds.DissimilarityIndex(di_threshold=di, n_jobs=threads)))
+            pipe_steps.append(('di', ds.DissimilarityIndex(di_threshold=di, n_jobs=threads)))
 
         if ft_params.get("use_DBSCAN_to_remove_outliers", False):
-            feature_pipeline.append(('dbscan', ds.DBSCAN(n_jobs=threads)))
+            pipe_steps.append(('dbscan', ds.DBSCAN(n_jobs=threads)))
 
         sigma = self.freqai_info["feature_parameters"].get('noise_standard_deviation', 0)
         if sigma:
-            feature_pipeline.append(('noise', ds.Noise(sigma=sigma)))
+            pipe_steps.append(('noise', ds.Noise(sigma=sigma)))
 
-        feature_pipeline.fitparams = feature_pipeline._validate_fitparams(
-            {}, feature_pipeline.steps)
-
-        return feature_pipeline
+        return Pipeline(pipe_steps)
 
     def define_label_pipeline(self, threads=-1) -> Pipeline:
 

From 4cdd6bc6c34c24c00aff2850f9ef5883495d3527 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Sat, 10 Jun 2023 12:07:03 +0200
Subject: [PATCH 060/130] avoid using ram for unnecessary train_df, fix some
 deprecation warnings

---
 .../freqai/base_models/FreqaiMultiOutputClassifier.py      | 3 +--
 freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py | 3 +--
 freqtrade/freqai/data_drawer.py                            | 7 +------
 3 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/freqtrade/freqai/base_models/FreqaiMultiOutputClassifier.py b/freqtrade/freqai/base_models/FreqaiMultiOutputClassifier.py
index 435c0e646..4646bb9a8 100644
--- a/freqtrade/freqai/base_models/FreqaiMultiOutputClassifier.py
+++ b/freqtrade/freqai/base_models/FreqaiMultiOutputClassifier.py
@@ -1,9 +1,8 @@
 import numpy as np
-from joblib import Parallel
 from sklearn.base import is_classifier
 from sklearn.multioutput import MultiOutputClassifier, _fit_estimator
-from sklearn.utils.fixes import delayed
 from sklearn.utils.multiclass import check_classification_targets
+from sklearn.utils.parallel import Parallel, delayed
 from sklearn.utils.validation import has_fit_parameter
 
 from freqtrade.exceptions import OperationalException
diff --git a/freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py b/freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py
index 54136d5e0..a6cc4f39b 100644
--- a/freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py
+++ b/freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py
@@ -1,6 +1,5 @@
-from joblib import Parallel
 from sklearn.multioutput import MultiOutputRegressor, _fit_estimator
-from sklearn.utils.fixes import delayed
+from sklearn.utils.parallel import Parallel, delayed
 from sklearn.utils.validation import has_fit_parameter
 
 
diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py
index 2a3ec6dd2..edd9640c9 100644
--- a/freqtrade/freqai/data_drawer.py
+++ b/freqtrade/freqai/data_drawer.py
@@ -469,7 +469,7 @@ class FreqaiDataDrawer:
         with (save_path / f"{dk.model_filename}_{LABEL_PIPELINE}.pkl").open("wb") as fp:
             cloudpickle.dump(dk.label_pipeline, fp)
 
-        # save the train data to file so we can check preds for area of applicability later
+        # save the train data to file for post processing if desired
         dk.data_dictionary["train_features"].to_pickle(
             save_path / f"{dk.model_filename}_{TRAINDF}.pkl"
         )
@@ -484,7 +484,6 @@ class FreqaiDataDrawer:
 
         if coin not in self.meta_data_dictionary:
             self.meta_data_dictionary[coin] = {}
-        self.meta_data_dictionary[coin][TRAINDF] = dk.data_dictionary["train_features"]
         self.meta_data_dictionary[coin][METADATA] = dk.data
         self.meta_data_dictionary[coin][FEATURE_PIPELINE] = dk.feature_pipeline
         self.meta_data_dictionary[coin][LABEL_PIPELINE] = dk.label_pipeline
@@ -518,16 +517,12 @@ class FreqaiDataDrawer:
 
         if coin in self.meta_data_dictionary:
             dk.data = self.meta_data_dictionary[coin][METADATA]
-            dk.data_dictionary["train_features"] = self.meta_data_dictionary[coin][TRAINDF]
             dk.feature_pipeline = self.meta_data_dictionary[coin][FEATURE_PIPELINE]
             dk.label_pipeline = self.meta_data_dictionary[coin][LABEL_PIPELINE]
         else:
             with (dk.data_path / f"{dk.model_filename}_{METADATA}.json").open("r") as fp:
                 dk.data = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
 
-            dk.data_dictionary["train_features"] = pd.read_pickle(
-                dk.data_path / f"{dk.model_filename}_{TRAINDF}.pkl"
-            )
             with (dk.data_path / f"{dk.model_filename}_{FEATURE_PIPELINE}.pkl").open("rb") as fp:
                 dk.feature_pipeline = cloudpickle.load(fp)
             with (dk.data_path / f"{dk.model_filename}_{LABEL_PIPELINE}.pkl").open("rb") as fp:

From f8d7c2e21dc11c5e716a431b51fcf4094213d365 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Sat, 10 Jun 2023 12:48:27 +0200
Subject: [PATCH 061/130] add migration guide, add protections and migration
 assistance

---
 docs/freqai-feature-engineering.md   | 66 ++++++++++++++++++++++++-
 docs/strategy_migration.md           | 74 ++++++++++++++++++++++++++++
 freqtrade/freqai/data_kitchen.py     | 64 ++++++++++++++++++++++++
 freqtrade/freqai/freqai_interface.py | 23 +++++++++
 4 files changed, 225 insertions(+), 2 deletions(-)

diff --git a/docs/freqai-feature-engineering.md b/docs/freqai-feature-engineering.md
index 6e3e7fda6..12e01e30d 100644
--- a/docs/freqai-feature-engineering.md
+++ b/docs/freqai-feature-engineering.md
@@ -219,7 +219,7 @@ where $W_i$ is the weight of data point $i$ in a total set of $n$ data points. B
 
 ![weight-factor](assets/freqai_weight-factor.jpg)
 
-# Building the data pipeline
+## Building the data pipeline
 
 By default, FreqAI builds a dynamic pipeline based on user congfiguration settings. The default settings are robust and designed to work with a variety of methods. These two steps are a `MinMaxScaler(-1,1)` and a `VarianceThreshold` which removes any column that has 0 variance. Users can activate other steps with more configuration parameters. For example if users add `use_SVM_to_remove_outliers: true` to the `freqai` config, then FreqAI will automatically add the [`SVMOutlierExtractor`](#identifying-outliers-using-a-support-vector-machine-svm) to the pipeline. Likewise, users can add `principal_component_analysis: true` to the `freqai` config to activate PCA. The [DissimilarityIndex](#identifying-outliers-with-the-dissimilarity-index-di) is activated with `DI_threshold: 1`. Finally, noise can also be added to the data with `noise_standard_deviation: 0.1`. Finally, users can add [DBSCAN](#identifying-outliers-with-dbscan) outlier removal with `use_DBSCAN_to_remove_outliers: true`.
 
@@ -227,7 +227,7 @@ By default, FreqAI builds a dynamic pipeline based on user congfiguration settin
     Please review the [parameter table](freqai-parameter-table.md) for more information on these parameters.
 
 
-## Customizing the pipeline
+### Customizing the pipeline
 
 Users are encouraged to customize the data pipeline to their needs by building their own data pipeline. This can be done by simply setting `dk.feature_pipeline` to their desired `Pipeline` object inside their `IFreqaiModel` `train()` function, or if they prefer not to touch the `train()` function, they can override `define_data_pipeline`/`define_label_pipeline` functions in their `IFreqaiModel`:
 
@@ -303,6 +303,68 @@ class MyCoolTransform(BaseTransform):
 !!! note "Hint"
     You can define this custom class in the same file as your `IFreqaiModel`.
 
+### Migrating a custom `IFreqaiModel` to the new Pipeline
+
+If you have created your own custom `IFreqaiModel` with a custom `train()`/`predict()` function, *and* you still rely on `data_cleaning_train/predict()`, then you will need to migrate to the new pipeline. If your model does *not* rely on `data_cleaning_train/predict()`, then you do not need to worry about this migration.
+
+The conversion involves first removing `data_cleaning_train/predict()` and replacing them with a `define_data_pipeline()` and `define_label_pipeline()` function to your `IFreqaiModel` class:
+
+```python
+class MyCoolFreqaiModel(BaseRegressionModel):
+    def train(
+        self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
+    ) -> Any:
+
+        # ... your custom stuff
+
+        # Remove these lines
+        # data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
+        # self.data_cleaning_train(dk)
+        # data_dictionary = dk.normalize_data(data_dictionary)
+
+        # Add these lines. Now we control the pipeline fit/transform ourselves
+        dd = dk.make_train_test_datasets(features_filtered, labels_filtered)
+        dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count)
+        dk.label_pipeline = self.define_label_pipeline(threads=dk.thread_count)
+
+        (dd["train_features"],
+         dd["train_labels"],
+         dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"],
+                                                                  dd["train_labels"],
+                                                                  dd["train_weights"])
+
+        (dd["test_features"],
+         dd["test_labels"],
+         dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
+                                                             dd["test_labels"],
+                                                             dd["test_weights"])
+
+        dd["train_labels"], _, _ = dk.label_pipeline.fit_transform(dd["train_labels"])
+        dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"])
+
+    def predict(
+        self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
+    ) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
+
+        # ... your custom stuff
+
+        # Remove these lines:
+        # self.data_cleaning_predict(dk)
+
+        # Add these lines:
+        dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform(
+            dk.data_dictionary["prediction_features"], outlier_check=True)
+
+        # Remove this line
+        # pred_df = dk.denormalize_labels_from_metadata(pred_df)
+
+        # Replace with these lines
+        pred_df, _, _ = dk.label_pipeline.inverse_transform(pred_df)
+        if self.freqai_info.get("DI_threshold", 0) > 0:
+            dk.DI_values = dk.feature_pipeline["di"].di_values
+        else:
+            dk.DI_values = np.zeros(len(outliers.index))
+        dk.do_predict = outliers.to_numpy()
 
 ## Outlier detection
 
diff --git a/docs/strategy_migration.md b/docs/strategy_migration.md
index 5ef7a5a4c..4c10fb126 100644
--- a/docs/strategy_migration.md
+++ b/docs/strategy_migration.md
@@ -728,3 +728,77 @@ Targets now get their own, dedicated method.
 
         return dataframe
 ```
+
+
+### FreqAI - New data Pipeline
+
+If you have created your own custom `IFreqaiModel` with a custom `train()`/`predict()` function, *and* you still rely on `data_cleaning_train/predict()`, then you will need to migrate to the new pipeline. If your model does *not* rely on `data_cleaning_train/predict()`, then you do not need to worry about this migration. That means that this migration guide is relevant for a very small percentage of power-users. If you stumbled upon this guide by mistake, feel free to inquire in depth about your problem in the Freqtrade discord server.
+
+The conversion involves first removing `data_cleaning_train/predict()` and replacing them with a `define_data_pipeline()` and `define_label_pipeline()` function to your `IFreqaiModel` class:
+
+```python  linenums="1" hl_lines="10-13 41-42 48-49"
+class MyCoolFreqaiModel(BaseRegressionModel):
+    """
+    Some cool custom IFreqaiModel you made before Freqtrade version 2023.6
+    """
+    def train(
+        self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
+    ) -> Any:
+
+        # ... your custom stuff
+
+        # Remove these lines
+        # data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
+        # self.data_cleaning_train(dk)
+        # data_dictionary = dk.normalize_data(data_dictionary)
+
+        # Add these lines. Now we control the pipeline fit/transform ourselves
+        dd = dk.make_train_test_datasets(features_filtered, labels_filtered)
+        dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count)
+        dk.label_pipeline = self.define_label_pipeline(threads=dk.thread_count)
+
+        (dd["train_features"],
+         dd["train_labels"],
+         dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"],
+                                                                  dd["train_labels"],
+                                                                  dd["train_weights"])
+
+        (dd["test_features"],
+         dd["test_labels"],
+         dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
+                                                             dd["test_labels"],
+                                                             dd["test_weights"])
+
+        dd["train_labels"], _, _ = dk.label_pipeline.fit_transform(dd["train_labels"])
+        dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"])
+
+    def predict(
+        self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
+    ) -> Tuple[DataFrame, npt.NDArray[np.int_]]: # 37
+
+        # ... your custom stuff
+
+        # Remove these lines:
+        # self.data_cleaning_predict(dk)
+
+        # Add these lines:
+        dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform(
+            dk.data_dictionary["prediction_features"], outlier_check=True)
+
+        # Remove this line
+        # pred_df = dk.denormalize_labels_from_metadata(pred_df)
+
+        # Replace with these lines
+        pred_df, _, _ = dk.label_pipeline.inverse_transform(pred_df)
+        if self.freqai_info.get("DI_threshold", 0) > 0:
+            dk.DI_values = dk.feature_pipeline["di"].di_values
+        else:
+            dk.DI_values = np.zeros(len(outliers.index))
+        dk.do_predict = outliers.to_numpy()
+```
+
+
+1. Features - Move to `feature_engineering_expand_all`
+2. Basic features, not expanded across `include_periods_candles` - move to`feature_engineering_expand_basic()`.
+3. Standard features which should not be expanded - move to `feature_engineering_standard()`.
+4. Targets - Move this part to `set_freqai_targets()`.
diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py
index de07865d3..215457992 100644
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -12,6 +12,7 @@ import numpy.typing as npt
 import pandas as pd
 import psutil
 from datasieve.pipeline import Pipeline
+from datasieve.transforms import SKLearnWrapper
 from pandas import DataFrame
 from sklearn.model_selection import train_test_split
 
@@ -950,3 +951,66 @@ class FreqaiDataKitchen:
             timerange.startts += buffer * timeframe_to_seconds(self.config["timeframe"])
 
         return timerange
+
+    # deprecated functions
+    def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
+        """
+        Deprecation warning, migration assistance
+        """
+        ft = "https://www.freqtrade.io/en/latest"
+        logger.warning(f"Your custom IFreqaiModel relies on the deprecated"
+                       " data pipeline. Please update your model to use the new data pipeline."
+                       " This can be achieved by following the migration guide at "
+                       f"{ft}/strategy_migration/#freqai-new-data-pipeline "
+                       "We added a basic pipeline for you, but this will be removed "
+                       "in a future version.\n"
+                       "This version does not include any outlier configurations")
+
+        import datasieve.transforms as ds
+        from sklearn.preprocessing import MinMaxScaler
+        dd = data_dictionary
+
+        self.feature_pipeline = Pipeline([
+            ('variance_threshold', ds.VarianceThreshold()),
+            ('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1))))
+            ])
+
+        (dd["train_features"],
+         dd["train_labels"],
+         dd["train_weights"]) = self.feature_pipeline.fit_transform(dd["train_features"],
+                                                                    dd["train_labels"],
+                                                                    dd["train_weights"])
+
+        (dd["test_features"],
+         dd["test_labels"],
+         dd["test_weights"]) = self.feature_pipeline.transform(dd["test_features"],
+                                                               dd["test_labels"],
+                                                               dd["test_weights"])
+
+        self.label_pipeline = Pipeline([
+            ('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1))))
+            ])
+
+        dd["train_labels"], _, _ = self.label_pipeline.fit_transform(dd["train_labels"])
+        dd["test_labels"], _, _ = self.label_pipeline.transform(dd["test_labels"])
+
+        return dd
+
+    def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
+        """
+        Deprecation warning, migration assistance
+        """
+        ft = "https://www.freqtrade.io/en/latest"
+        logger.warning(f"Your custom IFreqaiModel relies on the deprecated"
+                       " data pipeline. Please update your model to use the new data pipeline."
+                       " This can be achieved by following the migration guide at "
+                       f"{ft}/strategy_migration/#freqai-new-data-pipeline "
+                       "We added a basic pipeline for you, but this will be removed "
+                       "in a future version.\n"
+                       "This version does not include any outlier configurations")
+
+        pred_df, _, _ = self.label_pipeline.inverse_transform(df)
+        self.DI_values = np.zeros(len(pred_df.index))
+        self.do_predict = np.ones(len(pred_df.index))
+
+        return pred_df
diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index 104fcb24d..eff8d4bd5 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -968,3 +968,26 @@ class IFreqaiModel(ABC):
         :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
         data (NaNs) or felt uncertain about data (i.e. SVM and/or DI index)
         """
+
+    # deprecated functions
+    def data_cleaning_train(self, dk: FreqaiDataKitchen, pair: str):
+        """
+        throw deprecation warning if this function is called
+        """
+        ft = "https://www.freqtrade.io/en/latest"
+        logger.warning(f"Your model {self.__class__.__name__} relies on the deprecated"
+                       " data pipeline. Please update your model to use the new data pipeline."
+                       " This can be achieved by following the migration guide at "
+                       f"{ft}/strategy_migration/#freqai-new-data-pipeline")
+        return
+
+    def data_cleaning_predict(self, dk: FreqaiDataKitchen, pair: str):
+        """
+        throw deprecation warning if this function is called
+        """
+        ft = "https://www.freqtrade.io/en/latest"
+        logger.warning(f"Your model {self.__class__.__name__} relies on the deprecated"
+                       " data pipeline. Please update your model to use the new data pipeline."
+                       " This can be achieved by following the migration guide at "
+                       f"{ft}/strategy_migration/#freqai-new-data-pipeline")
+        return

From d9bdd879ab35131aa708a2dce27a447b43451886 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Sat, 10 Jun 2023 13:00:59 +0200
Subject: [PATCH 062/130] improve migration doc

---
 docs/strategy_migration.md | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/docs/strategy_migration.md b/docs/strategy_migration.md
index 4c10fb126..2fef5e516 100644
--- a/docs/strategy_migration.md
+++ b/docs/strategy_migration.md
@@ -736,7 +736,7 @@ If you have created your own custom `IFreqaiModel` with a custom `train()`/`pred
 
 The conversion involves first removing `data_cleaning_train/predict()` and replacing them with a `define_data_pipeline()` and `define_label_pipeline()` function to your `IFreqaiModel` class:
 
-```python  linenums="1" hl_lines="10-13 41-42 48-49"
+```python  linenums="1" hl_lines="11-14 43-44 51-52"
 class MyCoolFreqaiModel(BaseRegressionModel):
     """
     Some cool custom IFreqaiModel you made before Freqtrade version 2023.6
@@ -751,6 +751,7 @@ class MyCoolFreqaiModel(BaseRegressionModel):
         # data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
         # self.data_cleaning_train(dk)
         # data_dictionary = dk.normalize_data(data_dictionary)
+        # (1)
 
         # Add these lines. Now we control the pipeline fit/transform ourselves
         dd = dk.make_train_test_datasets(features_filtered, labels_filtered)
@@ -780,6 +781,7 @@ class MyCoolFreqaiModel(BaseRegressionModel):
 
         # Remove these lines:
         # self.data_cleaning_predict(dk)
+        # (2)
 
         # Add these lines:
         dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform(
@@ -787,6 +789,7 @@ class MyCoolFreqaiModel(BaseRegressionModel):
 
         # Remove this line
         # pred_df = dk.denormalize_labels_from_metadata(pred_df)
+        # (3)
 
         # Replace with these lines
         pred_df, _, _ = dk.label_pipeline.inverse_transform(pred_df)
@@ -798,7 +801,6 @@ class MyCoolFreqaiModel(BaseRegressionModel):
 ```
 
 
-1. Features - Move to `feature_engineering_expand_all`
-2. Basic features, not expanded across `include_periods_candles` - move to`feature_engineering_expand_basic()`.
-3. Standard features which should not be expanded - move to `feature_engineering_standard()`.
-4. Targets - Move this part to `set_freqai_targets()`.
+1. Data normalization and cleaning is now homogenized with the new pipeline definition. This is created in the new `define_data_pipeline()` and `define_label_pipeline()` functions. The `data_cleaning_train()` and `data_cleaning_predict()` functions are no longer used. You can override `define_data_pipeline()` to create your own custom pipeline if you wish.
+2. Data normalization and cleaning is now homogenized with the new pipeline definition. This is created in the new `define_data_pipeline()` and `define_label_pipeline()` functions. The `data_cleaning_train()` and `data_cleaning_predict()` functions are no longer used. You can override `define_data_pipeline()` to create your own custom pipeline if you wish.
+3. Data denormalization is done with the new pipeline. Replace this with the lines below.

From 41e37f9d322d95706c9c11afe7792a544c56d778 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Sat, 10 Jun 2023 13:11:47 +0200
Subject: [PATCH 063/130] improve docs, update doc strings

---
 docs/strategy_migration.md                          | 11 +++++++++--
 freqtrade/freqai/base_models/BaseClassifierModel.py |  4 ++--
 freqtrade/freqai/base_models/BaseRegressionModel.py |  4 ++--
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/docs/strategy_migration.md b/docs/strategy_migration.md
index 2fef5e516..353da0ccb 100644
--- a/docs/strategy_migration.md
+++ b/docs/strategy_migration.md
@@ -736,7 +736,7 @@ If you have created your own custom `IFreqaiModel` with a custom `train()`/`pred
 
 The conversion involves first removing `data_cleaning_train/predict()` and replacing them with a `define_data_pipeline()` and `define_label_pipeline()` function to your `IFreqaiModel` class:
 
-```python  linenums="1" hl_lines="11-14 43-44 51-52"
+```python  linenums="1" hl_lines="11-14 47-49 55-57"
 class MyCoolFreqaiModel(BaseRegressionModel):
     """
     Some cool custom IFreqaiModel you made before Freqtrade version 2023.6
@@ -773,9 +773,13 @@ class MyCoolFreqaiModel(BaseRegressionModel):
         dd["train_labels"], _, _ = dk.label_pipeline.fit_transform(dd["train_labels"])
         dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"])
 
+        # ... your custom code
+
+        return model
+
     def predict(
         self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
-    ) -> Tuple[DataFrame, npt.NDArray[np.int_]]: # 37
+    ) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
 
         # ... your custom stuff
 
@@ -798,6 +802,9 @@ class MyCoolFreqaiModel(BaseRegressionModel):
         else:
             dk.DI_values = np.zeros(len(outliers.index))
         dk.do_predict = outliers.to_numpy()
+
+        # ... your custom code
+        return (pred_df, dk.do_predict)
 ```
 
 
diff --git a/freqtrade/freqai/base_models/BaseClassifierModel.py b/freqtrade/freqai/base_models/BaseClassifierModel.py
index 2df639b55..e536efea3 100644
--- a/freqtrade/freqai/base_models/BaseClassifierModel.py
+++ b/freqtrade/freqai/base_models/BaseClassifierModel.py
@@ -17,8 +17,8 @@ logger = logging.getLogger(__name__)
 class BaseClassifierModel(IFreqaiModel):
     """
     Base class for regression type models (e.g. Catboost, LightGBM, XGboost etc.).
-    User *must* inherit from this class and set fit() and predict(). See example scripts
-    such as prediction_models/CatboostPredictionModel.py for guidance.
+    User *must* inherit from this class and set fit(). See example scripts
+    such as prediction_models/CatboostClassifier.py for guidance.
     """
 
     def train(
diff --git a/freqtrade/freqai/base_models/BaseRegressionModel.py b/freqtrade/freqai/base_models/BaseRegressionModel.py
index d7e7d9916..f1e33bff8 100644
--- a/freqtrade/freqai/base_models/BaseRegressionModel.py
+++ b/freqtrade/freqai/base_models/BaseRegressionModel.py
@@ -16,8 +16,8 @@ logger = logging.getLogger(__name__)
 class BaseRegressionModel(IFreqaiModel):
     """
     Base class for regression type models (e.g. Catboost, LightGBM, XGboost etc.).
-    User *must* inherit from this class and set fit() and predict(). See example scripts
-    such as prediction_models/CatboostPredictionModel.py for guidance.
+    User *must* inherit from this class and set fit(). See example scripts
+    such as prediction_models/CatboostRegressor.py for guidance.
     """
 
     def train(

From 229ee643cdf50c34ff19eea48ce99432f7241c58 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Sat, 10 Jun 2023 13:24:09 +0200
Subject: [PATCH 064/130] revert change to deal with FT pinning old
 scikit-learn version

---
 freqtrade/freqai/base_models/FreqaiMultiOutputClassifier.py | 3 ++-
 freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py  | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/freqtrade/freqai/base_models/FreqaiMultiOutputClassifier.py b/freqtrade/freqai/base_models/FreqaiMultiOutputClassifier.py
index 4646bb9a8..435c0e646 100644
--- a/freqtrade/freqai/base_models/FreqaiMultiOutputClassifier.py
+++ b/freqtrade/freqai/base_models/FreqaiMultiOutputClassifier.py
@@ -1,8 +1,9 @@
 import numpy as np
+from joblib import Parallel
 from sklearn.base import is_classifier
 from sklearn.multioutput import MultiOutputClassifier, _fit_estimator
+from sklearn.utils.fixes import delayed
 from sklearn.utils.multiclass import check_classification_targets
-from sklearn.utils.parallel import Parallel, delayed
 from sklearn.utils.validation import has_fit_parameter
 
 from freqtrade.exceptions import OperationalException
diff --git a/freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py b/freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py
index a6cc4f39b..54136d5e0 100644
--- a/freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py
+++ b/freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py
@@ -1,5 +1,6 @@
+from joblib import Parallel
 from sklearn.multioutput import MultiOutputRegressor, _fit_estimator
-from sklearn.utils.parallel import Parallel, delayed
+from sklearn.utils.fixes import delayed
 from sklearn.utils.validation import has_fit_parameter
 
 

From ad8a4897cee98d8b3963662aa002164ec493ab8e Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Sat, 10 Jun 2023 16:13:28 +0200
Subject: [PATCH 065/130] remove unnecessary example in feature_engineering.md

---
 docs/freqai-feature-engineering.md | 59 +-----------------------------
 1 file changed, 1 insertion(+), 58 deletions(-)

diff --git a/docs/freqai-feature-engineering.md b/docs/freqai-feature-engineering.md
index 12e01e30d..415ac2c63 100644
--- a/docs/freqai-feature-engineering.md
+++ b/docs/freqai-feature-engineering.md
@@ -307,64 +307,7 @@ class MyCoolTransform(BaseTransform):
 
 If you have created your own custom `IFreqaiModel` with a custom `train()`/`predict()` function, *and* you still rely on `data_cleaning_train/predict()`, then you will need to migrate to the new pipeline. If your model does *not* rely on `data_cleaning_train/predict()`, then you do not need to worry about this migration.
 
-The conversion involves first removing `data_cleaning_train/predict()` and replacing them with a `define_data_pipeline()` and `define_label_pipeline()` function to your `IFreqaiModel` class:
-
-```python
-class MyCoolFreqaiModel(BaseRegressionModel):
-    def train(
-        self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
-    ) -> Any:
-
-        # ... your custom stuff
-
-        # Remove these lines
-        # data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
-        # self.data_cleaning_train(dk)
-        # data_dictionary = dk.normalize_data(data_dictionary)
-
-        # Add these lines. Now we control the pipeline fit/transform ourselves
-        dd = dk.make_train_test_datasets(features_filtered, labels_filtered)
-        dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count)
-        dk.label_pipeline = self.define_label_pipeline(threads=dk.thread_count)
-
-        (dd["train_features"],
-         dd["train_labels"],
-         dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"],
-                                                                  dd["train_labels"],
-                                                                  dd["train_weights"])
-
-        (dd["test_features"],
-         dd["test_labels"],
-         dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
-                                                             dd["test_labels"],
-                                                             dd["test_weights"])
-
-        dd["train_labels"], _, _ = dk.label_pipeline.fit_transform(dd["train_labels"])
-        dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"])
-
-    def predict(
-        self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
-    ) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
-
-        # ... your custom stuff
-
-        # Remove these lines:
-        # self.data_cleaning_predict(dk)
-
-        # Add these lines:
-        dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform(
-            dk.data_dictionary["prediction_features"], outlier_check=True)
-
-        # Remove this line
-        # pred_df = dk.denormalize_labels_from_metadata(pred_df)
-
-        # Replace with these lines
-        pred_df, _, _ = dk.label_pipeline.inverse_transform(pred_df)
-        if self.freqai_info.get("DI_threshold", 0) > 0:
-            dk.DI_values = dk.feature_pipeline["di"].di_values
-        else:
-            dk.DI_values = np.zeros(len(outliers.index))
-        dk.do_predict = outliers.to_numpy()
+More details about the migration can be found [here](strategy_migration.md#freqai---new-data-pipeline).
 
 ## Outlier detection
 

From 1da1972c18ebd1b5e624987fba22bf74ac48985c Mon Sep 17 00:00:00 2001
From: hippocritical <gunngirspam@gmail.com>
Date: Sun, 11 Jun 2023 00:18:34 +0200
Subject: [PATCH 066/130] added test for config overrides

---
 tests/optimize/test_lookahead_analysis.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tests/optimize/test_lookahead_analysis.py b/tests/optimize/test_lookahead_analysis.py
index 476627c57..5d054cfa1 100644
--- a/tests/optimize/test_lookahead_analysis.py
+++ b/tests/optimize/test_lookahead_analysis.py
@@ -20,6 +20,8 @@ def lookahead_conf(default_conf_usdt):
     default_conf_usdt['strategy_path'] = str(
         Path(__file__).parent.parent / "strategy/strats/lookahead_bias")
     default_conf_usdt['strategy'] = 'strategy_test_v3_with_lookahead_bias'
+    default_conf_usdt['max_open_trades'] = 1
+    default_conf_usdt['dry_run_wallet'] = 1000000000
 
     return default_conf_usdt
 
@@ -339,3 +341,13 @@ def test_biased_strategy(lookahead_conf, mocker, caplog, scenario) -> None:
     # check biased strategy
     elif scenario == "bias1":
         assert instance.current_analysis.has_bias
+
+
+def test_config_overrides(lookahead_conf):
+    lookahead_conf['max_open_trades'] = 0
+    lookahead_conf['dry_run_wallet'] = 1
+    lookahead_conf['pairs'] = ['BTC/USDT', 'ETH/USDT', 'SOL/USDT']
+    lookahead_conf = LookaheadAnalysisSubFunctions.calculate_config_overrides(lookahead_conf)
+
+    assert lookahead_conf['dry_run_wallet'] == 1000000000
+    assert lookahead_conf['max_open_trades'] == 3

From afd54d39a5b675e0ce19afcb2bfbc9c2465a9d16 Mon Sep 17 00:00:00 2001
From: steam <steam@system-root.ru>
Date: Sun, 11 Jun 2023 20:00:12 +0300
Subject: [PATCH 067/130] add action_masks

---
 freqtrade/freqai/RL/BaseEnvironment.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/freqtrade/freqai/RL/BaseEnvironment.py b/freqtrade/freqai/RL/BaseEnvironment.py
index 42e644f0a..d1a399c48 100644
--- a/freqtrade/freqai/RL/BaseEnvironment.py
+++ b/freqtrade/freqai/RL/BaseEnvironment.py
@@ -141,6 +141,9 @@ class BaseEnvironment(gym.Env):
         Unique to the environment action count. Must be inherited.
         """
 
+    def action_masks(self) -> list[bool]:
+        return [self._is_valid(action.value) for action in self.actions]
+
     def seed(self, seed: int = 1):
         self.np_random, seed = seeding.np_random(seed)
         return [seed]

From c36547a5632c35bc76a7a977b51c3682c87531cf Mon Sep 17 00:00:00 2001
From: steam <steam@system-root.ru>
Date: Sun, 11 Jun 2023 20:05:53 +0300
Subject: [PATCH 068/130] add maskable eval callback

---
 .../freqai/RL/BaseReinforcementLearningModel.py     | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
index 8ee3c7c56..642a9edf2 100644
--- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
+++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
@@ -13,7 +13,8 @@ import pandas as pd
 import torch as th
 import torch.multiprocessing
 from pandas import DataFrame
-from stable_baselines3.common.callbacks import EvalCallback
+from sb3_contrib.common.maskable.callbacks import MaskableEvalCallback
+from sb3_contrib.common.maskable.utils import is_masking_supported
 from stable_baselines3.common.monitor import Monitor
 from stable_baselines3.common.utils import set_random_seed
 from stable_baselines3.common.vec_env import SubprocVecEnv, VecMonitor
@@ -48,7 +49,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
         self.reward_params = self.freqai_info['rl_config']['model_reward_parameters']
         self.train_env: Union[VecMonitor, SubprocVecEnv, gym.Env] = gym.Env()
         self.eval_env: Union[VecMonitor, SubprocVecEnv, gym.Env] = gym.Env()
-        self.eval_callback: Optional[EvalCallback] = None
+        self.eval_callback: Optional[MaskableEvalCallback] = None
         self.model_type = self.freqai_info['rl_config']['model_type']
         self.rl_config = self.freqai_info['rl_config']
         self.df_raw: DataFrame = DataFrame()
@@ -151,9 +152,11 @@ class BaseReinforcementLearningModel(IFreqaiModel):
 
         self.train_env = self.MyRLEnv(df=train_df, prices=prices_train, **env_info)
         self.eval_env = Monitor(self.MyRLEnv(df=test_df, prices=prices_test, **env_info))
-        self.eval_callback = EvalCallback(self.eval_env, deterministic=True,
-                                          render=False, eval_freq=len(train_df),
-                                          best_model_save_path=str(dk.data_path))
+        self.eval_callback = MaskableEvalCallback(self.eval_env, deterministic=True,
+                                                  render=False, eval_freq=len(train_df),
+                                                  best_model_save_path=str(dk.data_path),
+                                                  use_masking=(self.model_type == 'MaskablePPO' and
+                                                               is_masking_supported(self.eval_env)))
 
         actions = self.train_env.get_actions()
         self.tensorboard_callback = TensorboardCallback(verbose=1, actions=actions)

From 5dee86eda7760d646927e9f064c094dc0a143fa3 Mon Sep 17 00:00:00 2001
From: steam <steam@system-root.ru>
Date: Sun, 11 Jun 2023 21:44:57 +0300
Subject: [PATCH 069/130] fix action_masks typing list

---
 freqtrade/freqai/RL/BaseEnvironment.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/freqtrade/freqai/RL/BaseEnvironment.py b/freqtrade/freqai/RL/BaseEnvironment.py
index d1a399c48..91c7501c6 100644
--- a/freqtrade/freqai/RL/BaseEnvironment.py
+++ b/freqtrade/freqai/RL/BaseEnvironment.py
@@ -2,7 +2,7 @@ import logging
 import random
 from abc import abstractmethod
 from enum import Enum
-from typing import Optional, Type, Union
+from typing import List, Optional, Type, Union
 
 import gymnasium as gym
 import numpy as np
@@ -141,7 +141,7 @@ class BaseEnvironment(gym.Env):
         Unique to the environment action count. Must be inherited.
         """
 
-    def action_masks(self) -> list[bool]:
+    def action_masks(self) -> List[bool]:
         return [self._is_valid(action.value) for action in self.actions]
 
     def seed(self, seed: int = 1):

From bdb535d0e689ca0d6bf2c83fb2f13153d329976a Mon Sep 17 00:00:00 2001
From: steam <steam@system-root.ru>
Date: Sun, 11 Jun 2023 22:20:15 +0300
Subject: [PATCH 070/130] add maskable eval callback multiproc

---
 .../ReinforcementLearner_multiproc.py                 | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py b/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py
index 9f0b2d436..f014da602 100644
--- a/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py
+++ b/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py
@@ -2,7 +2,8 @@ import logging
 from typing import Any, Dict
 
 from pandas import DataFrame
-from stable_baselines3.common.callbacks import EvalCallback
+from sb3_contrib.common.maskable.callbacks import MaskableEvalCallback
+from sb3_contrib.common.maskable.utils import is_masking_supported
 from stable_baselines3.common.vec_env import SubprocVecEnv, VecMonitor
 
 from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
@@ -55,9 +56,11 @@ class ReinforcementLearner_multiproc(ReinforcementLearner):
                                                            env_info=env_info) for i
                                                   in range(self.max_threads)]))
 
-        self.eval_callback = EvalCallback(self.eval_env, deterministic=True,
-                                          render=False, eval_freq=eval_freq,
-                                          best_model_save_path=str(dk.data_path))
+        self.eval_callback = MaskableEvalCallback(self.eval_env, deterministic=True,
+                                                  render=False, eval_freq=eval_freq,
+                                                  best_model_save_path=str(dk.data_path),
+                                                  use_masking=(self.model_type == 'MaskablePPO' and
+                                                               is_masking_supported(self.eval_env)))
 
         # TENSORBOARD CALLBACK DOES NOT RECOMMENDED TO USE WITH MULTIPLE ENVS,
         # IT WILL RETURN FALSE INFORMATIONS, NEVERTHLESS NOT THREAD SAFE WITH SB3!!!

From 663cfc62111f7572645eede86ea398071ee6e26a Mon Sep 17 00:00:00 2001
From: hippocritical <gunngirspam@gmail.com>
Date: Sun, 11 Jun 2023 22:53:21 +0200
Subject: [PATCH 071/130] fixing tests

---
 tests/optimize/test_lookahead_analysis.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/tests/optimize/test_lookahead_analysis.py b/tests/optimize/test_lookahead_analysis.py
index 5d054cfa1..7678726ae 100644
--- a/tests/optimize/test_lookahead_analysis.py
+++ b/tests/optimize/test_lookahead_analysis.py
@@ -22,7 +22,7 @@ def lookahead_conf(default_conf_usdt):
     default_conf_usdt['strategy'] = 'strategy_test_v3_with_lookahead_bias'
     default_conf_usdt['max_open_trades'] = 1
     default_conf_usdt['dry_run_wallet'] = 1000000000
-
+    default_conf_usdt['pairs'] = ['UNITTEST/USDT']
     return default_conf_usdt
 
 
@@ -40,6 +40,10 @@ def test_start_lookahead_analysis(mocker):
         "strategy_test_v3_with_lookahead_bias",
         "--strategy-path",
         str(Path(__file__).parent.parent / "strategy/strats/lookahead_bias"),
+        "--pairs",
+        "UNITTEST/BTC",
+        "--max-open-trades",
+        "1"
     ]
     pargs = get_args(args)
     pargs['config'] = None
@@ -65,19 +69,22 @@ def test_start_lookahead_analysis(mocker):
     pargs = get_args(args)
     pargs['config'] = None
     with pytest.raises(OperationalException,
-                       match=r"targeted trade amount can't be smaller than .*"):
+                       match=r"Targeted trade amount can't be smaller than minimum trade amount.*"):
         start_lookahead_analysis(pargs)
 
 
-def test_lookahead_helper_invalid_config(lookahead_conf, mocker, caplog) -> None:
+def test_lookahead_helper_invalid_config(lookahead_conf, caplog) -> None:
     conf = deepcopy(lookahead_conf)
     conf['targeted_trade_amount'] = 10
     conf['minimum_trade_amount'] = 40
     with pytest.raises(OperationalException,
-                       match=r"targeted trade amount can't be smaller than .*"):
+                       match=r"Targeted trade amount can't be smaller than minimum trade amount.*"):
         LookaheadAnalysisSubFunctions.start(conf)
 
+
+def test_lookahead_helper_no_strategy_defined(lookahead_conf, caplog):
     conf = deepcopy(lookahead_conf)
+    conf['pairs'] = ['UNITTEST/USDT']
     del conf['strategy']
     with pytest.raises(OperationalException,
                        match=r"No Strategy specified"):

From 7542909e18ca9118fa2824108598d7d19d8d4818 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 12 Jun 2023 03:56:48 +0000
Subject: [PATCH 072/130] Bump stable-baselines3 from 2.0.0a10 to 2.0.0a13

Bumps [stable-baselines3](https://github.com/DLR-RM/stable-baselines3) from 2.0.0a10 to 2.0.0a13.
- [Release notes](https://github.com/DLR-RM/stable-baselines3/releases)
- [Commits](https://github.com/DLR-RM/stable-baselines3/commits)

---
updated-dependencies:
- dependency-name: stable-baselines3
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 requirements-freqai-rl.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements-freqai-rl.txt b/requirements-freqai-rl.txt
index de48a1da4..2672f9c38 100644
--- a/requirements-freqai-rl.txt
+++ b/requirements-freqai-rl.txt
@@ -5,7 +5,7 @@
 torch==2.0.1
 #until these branches will be released we can use this
 gymnasium==0.28.1
-stable_baselines3==2.0.0a10
+stable_baselines3==2.0.0a13
 sb3_contrib>=2.0.0a9
 # Progress bar for stable-baselines3 and sb3-contrib
 tqdm==4.65.0

From 2e087750e055ca15541ff24723099e9ae625b650 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 12 Jun 2023 05:12:52 +0000
Subject: [PATCH 073/130] Bump fastapi from 0.96.0 to 0.97.0

Bumps [fastapi](https://github.com/tiangolo/fastapi) from 0.96.0 to 0.97.0.
- [Release notes](https://github.com/tiangolo/fastapi/releases)
- [Commits](https://github.com/tiangolo/fastapi/compare/0.96.0...0.97.0)

---
updated-dependencies:
- dependency-name: fastapi
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 203a555bf..3f7ec1546 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -38,7 +38,7 @@ orjson==3.9.0
 sdnotify==0.3.2
 
 # API Server
-fastapi==0.96.0
+fastapi==0.97.0
 pydantic==1.10.9
 uvicorn==0.22.0
 pyjwt==2.7.0

From 1e44cfe2fcc7e0e4b3815eb1ba28a3e6db8ee2ad Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Mon, 12 Jun 2023 07:10:52 +0200
Subject: [PATCH 074/130] Improve stoploss test

---
 tests/test_freqtradebot.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/test_freqtradebot.py b/tests/test_freqtradebot.py
index 71f494372..945a81aa6 100644
--- a/tests/test_freqtradebot.py
+++ b/tests/test_freqtradebot.py
@@ -1241,6 +1241,8 @@ def test_handle_stoploss_on_exchange(mocker, default_conf_usdt, fee, caplog, is_
         'type': 'stop_loss_limit',
         'price': 3,
         'average': 2,
+        'filled': enter_order['amount'],
+        'remaining': 0,
         'amount': enter_order['amount'],
     })
     mocker.patch(f'{EXMS}.fetch_stoploss_order', stoploss_order_hit)

From 1a4d94a6f3cd0454fae6b7c4259ae7ee6c9c3ebd Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Mon, 12 Jun 2023 19:59:18 +0200
Subject: [PATCH 075/130] OKX stop should convert contracts to amount

---
 freqtrade/exchange/okx.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/freqtrade/exchange/okx.py b/freqtrade/exchange/okx.py
index 8ad3c2cdb..c703e3a78 100644
--- a/freqtrade/exchange/okx.py
+++ b/freqtrade/exchange/okx.py
@@ -199,6 +199,7 @@ class Okx(Exchange):
             order_reg['type'] = 'stoploss'
             order_reg['status_stop'] = 'triggered'
             return order_reg
+        order = self._order_contracts_to_amount(order)
         order['type'] = 'stoploss'
         return order
 

From 9a7794c520163f7681058ce164a7d258c03f3695 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Mon, 12 Jun 2023 20:29:23 +0200
Subject: [PATCH 076/130] Improve behavior for when stoploss cancels without
 content

closes #8761
---
 freqtrade/persistence/trade_model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/freqtrade/persistence/trade_model.py b/freqtrade/persistence/trade_model.py
index 5d8aada6b..5dee2a53c 100644
--- a/freqtrade/persistence/trade_model.py
+++ b/freqtrade/persistence/trade_model.py
@@ -97,7 +97,7 @@ class Order(ModelBase):
 
     @property
     def safe_filled(self) -> float:
-        return self.filled if self.filled is not None else self.amount or 0.0
+        return self.filled if self.filled is not None else 0.0
 
     @property
     def safe_cost(self) -> float:
@@ -703,7 +703,7 @@ class LocalTrade():
             self.stoploss_order_id = None
             self.close_rate_requested = self.stop_loss
             self.exit_reason = ExitType.STOPLOSS_ON_EXCHANGE.value
-            if self.is_open:
+            if self.is_open and order.safe_filled > 0:
                 logger.info(f'{order.order_type.upper()} is hit for {self}.')
         else:
             raise ValueError(f'Unknown order type: {order.order_type}')

From 11d7e7925eca75d89c2b4d3d061d42e5539a7f60 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Wed, 14 Jun 2023 20:34:18 +0200
Subject: [PATCH 077/130] Fix random test failures

---
 tests/test_log_setup.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/tests/test_log_setup.py b/tests/test_log_setup.py
index af9c43fbd..bd3399615 100644
--- a/tests/test_log_setup.py
+++ b/tests/test_log_setup.py
@@ -135,15 +135,16 @@ def test_set_loggers_journald_importerror(import_fails):
 def test_reduce_verbosity():
     setup_logging_pre()
     reduce_verbosity_for_bias_tester()
+    prior_level = logging.getLogger('freqtrade').getEffectiveLevel()
 
-    assert logging.getLogger('freqtrade.resolvers').getEffectiveLevel() is logging.WARNING
-    assert logging.getLogger('freqtrade.strategy.hyper').getEffectiveLevel() is logging.WARNING
+    assert logging.getLogger('freqtrade.resolvers').getEffectiveLevel() == logging.WARNING
+    assert logging.getLogger('freqtrade.strategy.hyper').getEffectiveLevel() == logging.WARNING
     # base level wasn't changed
-    assert logging.getLogger('freqtrade').getEffectiveLevel() is logging.INFO
+    assert logging.getLogger('freqtrade').getEffectiveLevel() == prior_level
 
     restore_verbosity_for_bias_tester()
 
-    assert logging.getLogger('freqtrade.resolvers').getEffectiveLevel() is logging.INFO
-    assert logging.getLogger('freqtrade.strategy.hyper').getEffectiveLevel() is logging.INFO
-    assert logging.getLogger('freqtrade').getEffectiveLevel() is logging.INFO
+    assert logging.getLogger('freqtrade.resolvers').getEffectiveLevel() == prior_level
+    assert logging.getLogger('freqtrade.strategy.hyper').getEffectiveLevel() == prior_level
+    assert logging.getLogger('freqtrade').getEffectiveLevel() == prior_level
     # base level wasn't changed

From ca88cac08bec8baaac08582a1f4b6adfa85dc3b6 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Thu, 15 Jun 2023 06:39:00 +0200
Subject: [PATCH 078/130] Remove unused code file

---
 .../backtest_lookahead_bias_checker.py        | 252 ------------------
 1 file changed, 252 deletions(-)
 delete mode 100755 freqtrade/strategy/backtest_lookahead_bias_checker.py

diff --git a/freqtrade/strategy/backtest_lookahead_bias_checker.py b/freqtrade/strategy/backtest_lookahead_bias_checker.py
deleted file mode 100755
index 2e5ef4165..000000000
--- a/freqtrade/strategy/backtest_lookahead_bias_checker.py
+++ /dev/null
@@ -1,252 +0,0 @@
-import copy
-import pathlib
-import shutil
-from copy import deepcopy
-from datetime import datetime, timedelta, timezone
-
-from pandas import DataFrame
-
-from freqtrade.configuration import TimeRange
-from freqtrade.data.history import get_timerange
-from freqtrade.exchange import timeframe_to_minutes
-from freqtrade.optimize.backtesting import Backtesting
-
-
-class VarHolder:
-    timerange: TimeRange
-    data: DataFrame
-    indicators: DataFrame
-    result: DataFrame
-    compared: DataFrame
-    from_dt: datetime
-    to_dt: datetime
-    compared_dt: datetime
-
-
-class Analysis:
-    def __init__(self):
-        self.total_signals = 0
-        self.false_entry_signals = 0
-        self.false_exit_signals = 0
-        self.false_indicators = []
-        self.has_bias = False
-
-    total_signals: int
-    false_entry_signals: int
-    false_exit_signals: int
-
-    false_indicators: list
-    has_bias: bool
-
-
-class BacktestLookaheadBiasChecker:
-
-    def __init__(self):
-        self.exportfilename = None
-        self.strategy_obj = None
-        self.current_analysis = None
-        self.local_config = None
-        self.full_varHolder = None
-
-        self.entry_varHolder = None
-        self.exit_varHolder = None
-        self.entry_varHolders = []
-        self.exit_varHolders = []
-        self.backtesting = None
-        self.minimum_trade_amount = None
-        self.targeted_trade_amount = None
-        self.failed_bias_check = True
-
-    @staticmethod
-    def dt_to_timestamp(dt):
-        timestamp = int(dt.replace(tzinfo=timezone.utc).timestamp())
-        return timestamp
-
-    @staticmethod
-    def get_result(backtesting, processed):
-        min_date, max_date = get_timerange(processed)
-
-        result = backtesting.backtest(
-            processed=deepcopy(processed),
-            start_date=min_date,
-            end_date=max_date
-        )
-        return result
-
-    @staticmethod
-    def report_signal(result, column_name, checked_timestamp):
-        df = result['results']
-        row_count = df[column_name].shape[0]
-
-        if row_count == 0:
-            return False
-        else:
-
-            df_cut = df[(df[column_name] == checked_timestamp)]
-            if df_cut[column_name].shape[0] == 0:
-                # print("did NOT find the same signal in column " + column_name +
-                #       " at timestamp " + str(checked_timestamp))
-                return False
-            else:
-                return True
-        return False
-
-    # analyzes two data frames with processed indicators and shows differences between them.
-    def analyze_indicators(self, full_vars, cut_vars, current_pair):
-        # extract dataframes
-        cut_df = cut_vars.indicators[current_pair]
-        full_df = full_vars.indicators[current_pair]
-
-        # cut longer dataframe to length of the shorter
-        full_df_cut = full_df[
-            (full_df.date == cut_vars.compared_dt)
-        ].reset_index(drop=True)
-        cut_df_cut = cut_df[
-            (cut_df.date == cut_vars.compared_dt)
-        ].reset_index(drop=True)
-
-        # compare dataframes
-        if full_df_cut.shape[0] != 0:
-            if cut_df_cut.shape[0] != 0:
-                compare_df = full_df_cut.compare(cut_df_cut)
-
-                if compare_df.shape[0] > 0:
-                    for col_name, values in compare_df.items():
-                        col_idx = compare_df.columns.get_loc(col_name)
-                        compare_df_row = compare_df.iloc[0]
-                        # compare_df now comprises tuples with [1] having either 'self' or 'other'
-                        if 'other' in col_name[1]:
-                            continue
-                        self_value = compare_df_row[col_idx]
-                        other_value = compare_df_row[col_idx + 1]
-
-                        # output differences
-                        if self_value != other_value:
-
-                            if not self.current_analysis.false_indicators.__contains__(col_name[0]):
-                                self.current_analysis.false_indicators.append(col_name[0])
-                                print(f"=> found look ahead bias in indicator {col_name[0]}. " +
-                                      f"{str(self_value)} != {str(other_value)}")
-
-    def prepare_data(self, varHolder, pairs_to_load):
-
-        # purge previous data
-        abs_folder_path = pathlib.Path("user_data/models/uniqe-id").resolve()
-        # remove folder and its contents
-        if pathlib.Path.exists(abs_folder_path):
-            shutil.rmtree(abs_folder_path)
-
-        prepare_data_config = copy.deepcopy(self.local_config)
-        prepare_data_config['timerange'] = (str(self.dt_to_timestamp(varHolder.from_dt)) + "-" +
-                                            str(self.dt_to_timestamp(varHolder.to_dt)))
-        prepare_data_config['exchange']['pair_whitelist'] = pairs_to_load
-
-        self.backtesting = Backtesting(prepare_data_config)
-        self.backtesting._set_strategy(self.backtesting.strategylist[0])
-        varHolder.data, varHolder.timerange = self.backtesting.load_bt_data()
-        self.backtesting.load_bt_data_detail()
-
-        varHolder.indicators = self.backtesting.strategy.advise_all_indicators(varHolder.data)
-        varHolder.result = self.get_result(self.backtesting, varHolder.indicators)
-
-    def start(self, config, strategy_obj: dict, args) -> None:
-
-        # deepcopy so we can change the pairs for the 2ndary runs
-        # and not worry about another strategy to check after.
-        self.local_config = deepcopy(config)
-        self.local_config['strategy_list'] = [strategy_obj['name']]
-        self.current_analysis = Analysis()
-        self.minimum_trade_amount = args['minimum_trade_amount']
-        self.targeted_trade_amount = args['targeted_trade_amount']
-        self.exportfilename = args['exportfilename']
-        self.strategy_obj = strategy_obj
-
-        # first make a single backtest
-        self.full_varHolder = VarHolder()
-
-        # define datetime in human-readable format
-        parsed_timerange = TimeRange.parse_timerange(config['timerange'])
-
-        if parsed_timerange.startdt is None:
-            self.full_varHolder.from_dt = datetime.utcfromtimestamp(0)
-        else:
-            self.full_varHolder.from_dt = parsed_timerange.startdt
-
-        if parsed_timerange.stopdt is None:
-            self.full_varHolder.to_dt = datetime.now()
-        else:
-            self.full_varHolder.to_dt = parsed_timerange.stopdt
-
-        self.prepare_data(self.full_varHolder, self.local_config['pairs'])
-
-        found_signals: int = self.full_varHolder.result['results'].shape[0] + 1
-        if found_signals >= self.targeted_trade_amount:
-            print(f"Found {found_signals} trades, calculating {self.targeted_trade_amount} trades.")
-        elif self.targeted_trade_amount >= found_signals >= self.minimum_trade_amount:
-            print(f"Only found {found_signals} trades. Calculating all available trades.")
-        else:
-            print(f"found {found_signals} trades "
-                  f"which is less than minimum_trade_amount {self.minimum_trade_amount}. "
-                  f"Cancelling this backtest lookahead bias test.")
-            return
-
-        # now we loop through all entry signals
-        # starting from the same datetime to avoid miss-reports of bias
-        for idx, result_row in self.full_varHolder.result['results'].iterrows():
-            if self.current_analysis.total_signals == self.targeted_trade_amount:
-                break
-
-            # if force-sold, ignore this signal since here it will unconditionally exit.
-            if result_row.close_date == self.dt_to_timestamp(self.full_varHolder.to_dt):
-                continue
-
-            self.current_analysis.total_signals += 1
-
-            self.entry_varHolder = VarHolder()
-            self.exit_varHolder = VarHolder()
-            self.entry_varHolders.append(self.entry_varHolder)
-            self.exit_varHolders.append(self.exit_varHolder)
-
-            self.entry_varHolder.from_dt = self.full_varHolder.from_dt
-            self.entry_varHolder.compared_dt = result_row['open_date']
-            # to_dt needs +1 candle since it won't buy on the last candle
-            self.entry_varHolder.to_dt = (result_row['open_date'] +
-                                          timedelta(minutes=timeframe_to_minutes(
-                                              self.local_config['timeframe'])))
-
-            self.prepare_data(self.entry_varHolder, [result_row['pair']])
-
-            # to_dt needs +1 candle since it will always exit/force-exit trades on the last candle
-            self.exit_varHolder.from_dt = self.full_varHolder.from_dt
-            self.exit_varHolder.to_dt = (result_row['close_date'] +
-                                         timedelta(minutes=timeframe_to_minutes(
-                                             self.local_config['timeframe'])))
-            self.exit_varHolder.compared_dt = result_row['close_date']
-
-            self.prepare_data(self.exit_varHolder, [result_row['pair']])
-
-            # register if buy signal is broken
-            if not self.report_signal(
-                    self.entry_varHolder.result, "open_date", self.entry_varHolder.compared_dt):
-                self.current_analysis.false_entry_signals += 1
-
-            # register if buy or sell signal is broken
-            if not self.report_signal(
-                    self.exit_varHolder.result, "close_date", self.exit_varHolder.compared_dt):
-                self.current_analysis.false_exit_signals += 1
-
-            if len(self.entry_varHolders) >= 10:
-                pass
-            # check if the indicators themselves contain biased data
-            self.analyze_indicators(self.full_varHolder, self.entry_varHolder, result_row['pair'])
-            self.analyze_indicators(self.full_varHolder, self.exit_varHolder, result_row['pair'])
-
-        if (self.current_analysis.false_entry_signals > 0 or
-                self.current_analysis.false_exit_signals > 0 or
-                len(self.current_analysis.false_indicators) > 0):
-            print(" => " + self.local_config['strategy_list'][0] + ": bias detected!")
-            self.current_analysis.has_bias = True
-        else:
-            print(self.local_config['strategy_list'][0] + ": no bias detected")
-
-        self.failed_bias_check = False

From ac36ba65926f6ee25c7960113ed1fbd9bd7dcd58 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Thu, 15 Jun 2023 20:12:11 +0200
Subject: [PATCH 079/130] Improve arguments file formatting

---
 freqtrade/commands/arguments.py               | 16 ++++++----------
 freqtrade/commands/strategy_utils_commands.py |  0
 2 files changed, 6 insertions(+), 10 deletions(-)
 mode change 100755 => 100644 freqtrade/commands/strategy_utils_commands.py

diff --git a/freqtrade/commands/arguments.py b/freqtrade/commands/arguments.py
index af5f0a470..a2d2f8c5c 100755
--- a/freqtrade/commands/arguments.py
+++ b/freqtrade/commands/arguments.py
@@ -124,10 +124,6 @@ ARGS_LOOKAHEAD_ANALYSIS = ARGS_BACKTEST + ["minimum_trade_amount",
                                            "lookahead_analysis_exportfilename"]
 
 
-# + ["target_trades", "minimum_trades",
-# "target_trades", "exportfilename"]
-# will be added when the base version works.
-
 class Arguments:
     """
     Arguments Class. Manage the arguments received by the cli
@@ -460,14 +456,14 @@ class Arguments:
                                                           'files to the current version',
                                                      parents=[_common_parser])
         strategy_updater_cmd.set_defaults(func=start_strategy_update)
-        self._build_args(optionlist=ARGS_STRATEGY_UPDATER,
-                         parser=strategy_updater_cmd)
+        self._build_args(optionlist=ARGS_STRATEGY_UPDATER, parser=strategy_updater_cmd)
 
         # Add lookahead_analysis subcommand
-        lookahead_analayis_cmd = \
-            subparsers.add_parser('lookahead-analysis',
-                                  help="checks for potential look ahead bias",
-                                  parents=[_common_parser, _strategy_parser])
+        lookahead_analayis_cmd = subparsers.add_parser(
+            'lookahead-analysis',
+            help="Check for potential look ahead bias.",
+            parents=[_common_parser, _strategy_parser])
+
         lookahead_analayis_cmd.set_defaults(func=start_lookahead_analysis)
 
         self._build_args(optionlist=ARGS_LOOKAHEAD_ANALYSIS,
diff --git a/freqtrade/commands/strategy_utils_commands.py b/freqtrade/commands/strategy_utils_commands.py
old mode 100755
new mode 100644

From ad74e65673d7d528ef07f49d56350077dd00bfa3 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Thu, 15 Jun 2023 20:23:47 +0200
Subject: [PATCH 080/130] Simplify configuration setup

---
 freqtrade/configuration/configuration.py         |  6 ++----
 freqtrade/optimize/lookahead_analysis_helpers.py |  1 +
 tests/optimize/test_lookahead_analysis.py        | 11 +++++------
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/freqtrade/configuration/configuration.py b/freqtrade/configuration/configuration.py
index 5bbbf301d..a64eaa0ca 100644
--- a/freqtrade/configuration/configuration.py
+++ b/freqtrade/configuration/configuration.py
@@ -300,10 +300,8 @@ class Configuration:
         self._args_to_config(config, argname='hyperoptexportfilename',
                              logstring='Using hyperopt file: {}')
 
-        if self.args.get('lookahead_analysis_exportfilename'):
-            if self.args["lookahead_analysis_exportfilename"] is not None:
-                self._args_to_config(config, argname='lookahead_analysis_exportfilename',
-                                     logstring='saving lookahead analysis results into {} ...')
+        self._args_to_config(config, argname='lookahead_analysis_exportfilename',
+                             logstring='Saving lookahead analysis results into {} ...')
 
         self._args_to_config(config, argname='epochs',
                              logstring='Parameter --epochs detected ... '
diff --git a/freqtrade/optimize/lookahead_analysis_helpers.py b/freqtrade/optimize/lookahead_analysis_helpers.py
index 0f2b78e24..22cbbfa6b 100644
--- a/freqtrade/optimize/lookahead_analysis_helpers.py
+++ b/freqtrade/optimize/lookahead_analysis_helpers.py
@@ -15,6 +15,7 @@ logger = logging.getLogger(__name__)
 
 
 class LookaheadAnalysisSubFunctions:
+
     @staticmethod
     def text_table_lookahead_analysis_instances(
             config: Dict[str, Any],
diff --git a/tests/optimize/test_lookahead_analysis.py b/tests/optimize/test_lookahead_analysis.py
index 7678726ae..d2e6fbbe1 100644
--- a/tests/optimize/test_lookahead_analysis.py
+++ b/tests/optimize/test_lookahead_analysis.py
@@ -114,12 +114,11 @@ def test_lookahead_helper_text_table_lookahead_analysis_instances(lookahead_conf
     analysis.false_entry_signals = 4
     analysis.false_exit_signals = 3
 
-    strategy_obj = \
-        {
-            'name': "strategy_test_v3_with_lookahead_bias",
-            'location': PurePosixPath(lookahead_conf['strategy_path'],
-                                      f"{lookahead_conf['strategy']}.py")
-        }
+    strategy_obj = {
+        'name': "strategy_test_v3_with_lookahead_bias",
+        'location': PurePosixPath(lookahead_conf['strategy_path'],
+                                  f"{lookahead_conf['strategy']}.py")
+    }
 
     instance = LookaheadAnalysis(lookahead_conf, strategy_obj)
     instance.current_analysis = analysis

From 964bf76469414137a27300112423b3787d651d40 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Thu, 15 Jun 2023 20:42:26 +0200
Subject: [PATCH 081/130] Invert parameters for
 initialize_single_lookahead_analysis

otherwise their order is reversed before calling LookaheadAnalysis for no good reason
---
 freqtrade/optimize/lookahead_analysis_helpers.py |  4 ++--
 tests/optimize/test_lookahead_analysis.py        | 14 +++++++++++---
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/freqtrade/optimize/lookahead_analysis_helpers.py b/freqtrade/optimize/lookahead_analysis_helpers.py
index 22cbbfa6b..702eee774 100644
--- a/freqtrade/optimize/lookahead_analysis_helpers.py
+++ b/freqtrade/optimize/lookahead_analysis_helpers.py
@@ -150,7 +150,7 @@ class LookaheadAnalysisSubFunctions:
         return config
 
     @staticmethod
-    def initialize_single_lookahead_analysis(strategy_obj: Dict[str, Any], config: Dict[str, Any]):
+    def initialize_single_lookahead_analysis(config: Config, strategy_obj: Dict[str, Any]):
 
         logger.info(f"Bias test of {Path(strategy_obj['location']).name} started.")
         start = time.perf_counter()
@@ -186,7 +186,7 @@ class LookaheadAnalysisSubFunctions:
                 if strategy_obj['name'] == strat and strategy_obj not in strategy_list:
                     lookaheadAnalysis_instances.append(
                         LookaheadAnalysisSubFunctions.initialize_single_lookahead_analysis(
-                            strategy_obj, config))
+                            config, strategy_obj))
                     break
 
         # report the results
diff --git a/tests/optimize/test_lookahead_analysis.py b/tests/optimize/test_lookahead_analysis.py
index d2e6fbbe1..8539db7f3 100644
--- a/tests/optimize/test_lookahead_analysis.py
+++ b/tests/optimize/test_lookahead_analysis.py
@@ -291,7 +291,7 @@ def test_lookahead_helper_export_to_csv(lookahead_conf):
         Path(lookahead_conf['lookahead_analysis_exportfilename']).unlink()
 
 
-def test_initialize_single_lookahead_analysis(lookahead_conf, mocker):
+def test_initialize_single_lookahead_analysis(lookahead_conf, mocker, caplog):
     mocker.patch('freqtrade.data.history.get_timerange', get_timerange)
     mocker.patch(f'{EXMS}.get_fee', return_value=0.0)
     mocker.patch(f'{EXMS}.get_min_pair_stake_amount', return_value=0.00001)
@@ -303,9 +303,17 @@ def test_initialize_single_lookahead_analysis(lookahead_conf, mocker):
 
     lookahead_conf['timeframe'] = '5m'
     lookahead_conf['timerange'] = '20180119-20180122'
-    strategy_obj = {'name': "strategy_test_v3_with_lookahead_bias"}
+    start_mock = mocker.patch('freqtrade.optimize.lookahead_analysis.LookaheadAnalysis.start')
+    strategy_obj = {
+        'name': "strategy_test_v3_with_lookahead_bias",
+        'location': Path(lookahead_conf['strategy_path'], f"{lookahead_conf['strategy']}.py")
+    }
+
+    instance = LookaheadAnalysisSubFunctions.initialize_single_lookahead_analysis(
+        lookahead_conf, strategy_obj)
+    assert log_has_re(r"Bias test of .* started\.", caplog)
+    assert start_mock.call_count == 1
 
-    instance = LookaheadAnalysis(lookahead_conf, strategy_obj)
     assert instance.strategy_obj['name'] == "strategy_test_v3_with_lookahead_bias"
 
 

From b3ef024e9e6c402edf279254b52e14ccf18e28a2 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Thu, 15 Jun 2023 20:43:05 +0200
Subject: [PATCH 082/130] Don't use PurePosixPath

---
 tests/optimize/test_lookahead_analysis.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/tests/optimize/test_lookahead_analysis.py b/tests/optimize/test_lookahead_analysis.py
index 8539db7f3..3c6a5ad6d 100644
--- a/tests/optimize/test_lookahead_analysis.py
+++ b/tests/optimize/test_lookahead_analysis.py
@@ -1,6 +1,6 @@
 # pragma pylint: disable=missing-docstring, W0212, line-too-long, C0103, unused-argument
 from copy import deepcopy
-from pathlib import Path, PurePosixPath
+from pathlib import Path
 from unittest.mock import MagicMock, PropertyMock
 
 import pytest
@@ -73,7 +73,7 @@ def test_start_lookahead_analysis(mocker):
         start_lookahead_analysis(pargs)
 
 
-def test_lookahead_helper_invalid_config(lookahead_conf, caplog) -> None:
+def test_lookahead_helper_invalid_config(lookahead_conf) -> None:
     conf = deepcopy(lookahead_conf)
     conf['targeted_trade_amount'] = 10
     conf['minimum_trade_amount'] = 40
@@ -82,7 +82,7 @@ def test_lookahead_helper_invalid_config(lookahead_conf, caplog) -> None:
         LookaheadAnalysisSubFunctions.start(conf)
 
 
-def test_lookahead_helper_no_strategy_defined(lookahead_conf, caplog):
+def test_lookahead_helper_no_strategy_defined(lookahead_conf):
     conf = deepcopy(lookahead_conf)
     conf['pairs'] = ['UNITTEST/USDT']
     del conf['strategy']
@@ -91,7 +91,7 @@ def test_lookahead_helper_no_strategy_defined(lookahead_conf, caplog):
         LookaheadAnalysisSubFunctions.start(conf)
 
 
-def test_lookahead_helper_start(lookahead_conf, mocker, caplog) -> None:
+def test_lookahead_helper_start(lookahead_conf, mocker) -> None:
     single_mock = MagicMock()
     text_table_mock = MagicMock()
     mocker.patch.multiple(
@@ -107,7 +107,7 @@ def test_lookahead_helper_start(lookahead_conf, mocker, caplog) -> None:
     text_table_mock.reset_mock()
 
 
-def test_lookahead_helper_text_table_lookahead_analysis_instances(lookahead_conf, caplog):
+def test_lookahead_helper_text_table_lookahead_analysis_instances(lookahead_conf):
     analysis = Analysis()
     analysis.has_bias = True
     analysis.total_signals = 5
@@ -116,8 +116,7 @@ def test_lookahead_helper_text_table_lookahead_analysis_instances(lookahead_conf
 
     strategy_obj = {
         'name': "strategy_test_v3_with_lookahead_bias",
-        'location': PurePosixPath(lookahead_conf['strategy_path'],
-                                  f"{lookahead_conf['strategy']}.py")
+        'location': Path(lookahead_conf['strategy_path'], f"{lookahead_conf['strategy']}.py")
     }
 
     instance = LookaheadAnalysis(lookahead_conf, strategy_obj)
@@ -192,7 +191,7 @@ def test_lookahead_helper_export_to_csv(lookahead_conf):
 
     strategy_obj1 = {
         'name': "strat1",
-        'location': PurePosixPath("file1.py"),
+        'location': Path("file1.py"),
     }
 
     instance1 = LookaheadAnalysis(lookahead_conf, strategy_obj1)
@@ -237,7 +236,7 @@ def test_lookahead_helper_export_to_csv(lookahead_conf):
 
     strategy_obj2 = {
         'name': "strat1",
-        'location': PurePosixPath("file1.py"),
+        'location': Path("file1.py"),
     }
 
     instance2 = LookaheadAnalysis(lookahead_conf, strategy_obj2)
@@ -275,7 +274,7 @@ def test_lookahead_helper_export_to_csv(lookahead_conf):
 
     strategy_obj3 = {
         'name': "strat3",
-        'location': PurePosixPath("file3.py"),
+        'location': Path("file3.py"),
     }
 
     instance3 = LookaheadAnalysis(lookahead_conf, strategy_obj3)

From 2cd9043c51e88b9ccdb6c15235eb0bb5657626a5 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Fri, 16 Jun 2023 06:44:55 +0200
Subject: [PATCH 083/130] Make documentation discoverable / linked

---
 docs/lookahead-analysis.md | 38 +++++++++++++++++++-------------------
 mkdocs.yml                 |  1 +
 2 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/docs/lookahead-analysis.md b/docs/lookahead-analysis.md
index d61acf370..cd1c74d13 100644
--- a/docs/lookahead-analysis.md
+++ b/docs/lookahead-analysis.md
@@ -1,4 +1,5 @@
 # Lookahead analysis
+
 This page explains how to validate your strategy in terms of look ahead bias.
 
 Checking look ahead bias is the bane of any strategy since it is sometimes very easy to introduce backtest bias -
@@ -11,19 +12,18 @@ Lookahead-analysis requires historic data to be available.
 To learn how to get data for the pairs and exchange you're interested in,
 head over to the [Data Downloading](data-download.md) section of the documentation.
 
-This command is built upon backtesting
-since it internally chains backtests and pokes at the strategy to provoke it to show look ahead bias.
+This command is built upon backtesting since it internally chains backtests and pokes at the strategy to provoke it to show look ahead bias.
 This is done by not looking at the strategy itself - but at the results it returned.
-The results are things like changed indicator-values and moved entries/exits compared to the full backtest. 
+The results are things like changed indicator-values and moved entries/exits compared to the full backtest.
 
 You can use commands of [Backtesting](backtesting.md).
 It also supports the lookahead-analysis of freqai strategies.
 
-- --cache is forced to "none"
-- --max_open_trades is forced to be at least equal to the number of pairs 
-- --dry_run_wallet is forced to be basically infinite
+- `--cache` is forced to "none".
+- `--max-open-trades` is forced to be at least equal to the number of pairs.
+- `--dry-run-wallet` is forced to be basically infinite.
 
-## Backtesting command reference
+## Lookahead-analysis command reference
 
 ```
 usage: freqtrade lookahead-analysis [-h] [-v] [-V] 
@@ -45,13 +45,14 @@ optional arguments:
                         Use this filename to save your lookahead-analysis-results to a csv file
 ```
 
+### Summary
 
-#### Summary
 Checks a given strategy for look ahead bias via lookahead-analysis
 Look ahead bias means that the backtest uses data from future candles thereby not making it viable beyond backtesting
 and producing false hopes for the one backtesting.
 
-#### Introduction:
+### Introduction
+
 Many strategies - without the programmer knowing - have fallen prey to look ahead bias.
 
 Any backtest will populate the full dataframe including all time stamps at the beginning.
@@ -61,9 +62,10 @@ but not realistic.
 
 This command is made to try to verify the validity in the form of the aforementioned look ahead bias.
 
-#### How does the command work?
+### How does the command work?
+
 It will start with a backtest of all pairs to generate a baseline for indicators and entries/exits.
-After the backtest ran, it will look if the minimum-trade-amount is met
+After the backtest ran, it will look if the `minimum-trade-amount` is met
 and if not cancel the lookahead-analysis for this strategy.
 
 After setting the baseline it will then do additional runs for every entry and exit separately.
@@ -71,13 +73,11 @@ When a verification-backtest is done, it will compare the indicators as the sign
 and report the bias.
 After all signals have been verified or falsified a result-table will be generated for the user to see.
 
-#### Caveats:
-- The lookahead-analysis can only verify / falsify the trades it calculated through.
-If there was a strategy with signals that were not triggered in the lookahead-analysis
-then it will not have it verified that entry/exit signal either.
+### Caveats
+
+- `lookahead-analysis` can only verify / falsify the trades it calculated through.
+If there was a strategy with signals that were not triggered during the lookahead-analysis, then it will not have it verified that entry/exit signal either.
 This could then lead to a false-negative (the strategy will then be reported as non-biased).
-- lookahead-analysis has access to everything that backtesting has too. 
+- `lookahead-analysis` has access to everything that backtesting has too.
 Please don't provoke any configs like enabling position stacking.
-If you decide to do so,
-then make doubly sure that you won't ever run out of max_open_trades
-amount and neither leftover money in your wallet.
+If you decide to do so, then make doubly sure that you won't ever run out of `max_open_trades` amount and neither leftover money in your wallet.
diff --git a/mkdocs.yml b/mkdocs.yml
index 3f9e8a880..815a10419 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -22,6 +22,7 @@ nav:
         - Web Hook: webhook-config.md
     - Data Downloading: data-download.md
     - Backtesting: backtesting.md
+    - Lookahead analysis: lookahead-analysis.md
     - Hyperopt: hyperopt.md
     - FreqAI:
         - Introduction: freqai.md

From 1b86bf8a1db0d304417dc8e110d9205c37a6fd2a Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Fri, 16 Jun 2023 06:58:34 +0200
Subject: [PATCH 084/130] Don't include non-used parameters in command
 structure

---
 freqtrade/commands/arguments.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/freqtrade/commands/arguments.py b/freqtrade/commands/arguments.py
index a2d2f8c5c..b0da8fa9d 100755
--- a/freqtrade/commands/arguments.py
+++ b/freqtrade/commands/arguments.py
@@ -119,9 +119,9 @@ NO_CONF_ALLOWED = ["create-userdir", "list-exchanges", "new-strategy"]
 
 ARGS_STRATEGY_UPDATER = ["strategy_list", "strategy_path", "recursive_strategy_search"]
 
-ARGS_LOOKAHEAD_ANALYSIS = ARGS_BACKTEST + ["minimum_trade_amount",
-                                           "targeted_trade_amount",
-                                           "lookahead_analysis_exportfilename"]
+ARGS_LOOKAHEAD_ANALYSIS = [
+    a for a in ARGS_BACKTEST if a not in ("position_stacking", "use_max_market_positions", 'cache')
+    ] + ["minimum_trade_amount", "targeted_trade_amount", "lookahead_analysis_exportfilename"]
 
 
 class Arguments:

From dec3c0f37490d25db691c92b04cb589e7a683910 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Fri, 16 Jun 2023 07:02:40 +0200
Subject: [PATCH 085/130] Remove environment.yml completely

---
 environment.yml | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 environment.yml

diff --git a/environment.yml b/environment.yml
deleted file mode 100644
index e69de29bb..000000000

From 64fcb1ed11292d419b0ded645339a180c27f66d1 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Fri, 16 Jun 2023 10:15:45 +0200
Subject: [PATCH 086/130] Better pin scikit-learn

caused by #7896
---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 106b5b6d3..b394a6877 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@ from setuptools import setup
 plot = ['plotly>=4.0']
 hyperopt = [
     'scipy',
-    'scikit-learn',
+    'scikit-learn<=1.1.3',
     'scikit-optimize>=0.7.0',
     'filelock',
 ]

From 75ec19062c883da435d07f1f7d879e84f7075d08 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Fri, 16 Jun 2023 13:06:21 +0200
Subject: [PATCH 087/130] chore: make DOCS_LINK in constants.py, ensure
 datasieve is added to setup.py

---
 freqtrade/constants.py                        |  1 +
 .../RL/BaseReinforcementLearningModel.py      |  1 -
 freqtrade/freqai/data_kitchen.py              | 31 +------------------
 freqtrade/freqai/freqai_interface.py          | 10 +++---
 setup.py                                      |  3 +-
 5 files changed, 9 insertions(+), 37 deletions(-)

diff --git a/freqtrade/constants.py b/freqtrade/constants.py
index 7012acb7c..acfca6fa5 100644
--- a/freqtrade/constants.py
+++ b/freqtrade/constants.py
@@ -8,6 +8,7 @@ from typing import Any, Dict, List, Literal, Tuple
 from freqtrade.enums import CandleType, PriceType, RPCMessageType
 
 
+DOCS_LINK = "https://www.freqtrade.io/en/stable"
 DEFAULT_CONFIG = 'config.json'
 DEFAULT_EXCHANGE = 'bittrex'
 PROCESS_THROTTLE_SECS = 5  # sec
diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
index cffab602d..b59c47ad2 100644
--- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
+++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
@@ -119,7 +119,6 @@ class BaseReinforcementLearningModel(IFreqaiModel):
         prices_train, prices_test = self.build_ohlc_price_dataframes(dk.data_dictionary, pair, dk)
 
         dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count)
-        dk.label_pipeline = self.define_label_pipeline(threads=dk.thread_count)
 
         (dd["train_features"],
          dd["train_labels"],
diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py
index 215457992..3f8d0fb4b 100644
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -12,7 +12,6 @@ import numpy.typing as npt
 import pandas as pd
 import psutil
 from datasieve.pipeline import Pipeline
-from datasieve.transforms import SKLearnWrapper
 from pandas import DataFrame
 from sklearn.model_selection import train_test_split
 
@@ -966,35 +965,7 @@ class FreqaiDataKitchen:
                        "in a future version.\n"
                        "This version does not include any outlier configurations")
 
-        import datasieve.transforms as ds
-        from sklearn.preprocessing import MinMaxScaler
-        dd = data_dictionary
-
-        self.feature_pipeline = Pipeline([
-            ('variance_threshold', ds.VarianceThreshold()),
-            ('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1))))
-            ])
-
-        (dd["train_features"],
-         dd["train_labels"],
-         dd["train_weights"]) = self.feature_pipeline.fit_transform(dd["train_features"],
-                                                                    dd["train_labels"],
-                                                                    dd["train_weights"])
-
-        (dd["test_features"],
-         dd["test_labels"],
-         dd["test_weights"]) = self.feature_pipeline.transform(dd["test_features"],
-                                                               dd["test_labels"],
-                                                               dd["test_weights"])
-
-        self.label_pipeline = Pipeline([
-            ('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1))))
-            ])
-
-        dd["train_labels"], _, _ = self.label_pipeline.fit_transform(dd["train_labels"])
-        dd["test_labels"], _, _ = self.label_pipeline.transform(dd["test_labels"])
-
-        return dd
+        return data_dictionary
 
     def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
         """
diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index eff8d4bd5..a6e5d40ed 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -18,7 +18,7 @@ from pandas import DataFrame
 from sklearn.preprocessing import MinMaxScaler
 
 from freqtrade.configuration import TimeRange
-from freqtrade.constants import Config
+from freqtrade.constants import DOCS_LINK, Config
 from freqtrade.data.dataprovider import DataProvider
 from freqtrade.enums import RunMode
 from freqtrade.exceptions import OperationalException
@@ -974,20 +974,20 @@ class IFreqaiModel(ABC):
         """
         throw deprecation warning if this function is called
         """
-        ft = "https://www.freqtrade.io/en/latest"
         logger.warning(f"Your model {self.__class__.__name__} relies on the deprecated"
                        " data pipeline. Please update your model to use the new data pipeline."
                        " This can be achieved by following the migration guide at "
-                       f"{ft}/strategy_migration/#freqai-new-data-pipeline")
+                       f"{DOCS_LINK}/strategy_migration/#freqai-new-data-pipeline")
+        dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count)
         return
 
     def data_cleaning_predict(self, dk: FreqaiDataKitchen, pair: str):
         """
         throw deprecation warning if this function is called
         """
-        ft = "https://www.freqtrade.io/en/latest"
         logger.warning(f"Your model {self.__class__.__name__} relies on the deprecated"
                        " data pipeline. Please update your model to use the new data pipeline."
                        " This can be achieved by following the migration guide at "
-                       f"{ft}/strategy_migration/#freqai-new-data-pipeline")
+                       f"{DOCS_LINK}/strategy_migration/#freqai-new-data-pipeline")
+        dk.label_pipeline = self.define_data_pipeline(threads=dk.thread_count)
         return
diff --git a/setup.py b/setup.py
index 106b5b6d3..9a04e07d0 100644
--- a/setup.py
+++ b/setup.py
@@ -16,7 +16,8 @@ freqai = [
     'catboost; platform_machine != "aarch64"',
     'lightgbm',
     'xgboost',
-    'tensorboard'
+    'tensorboard',
+    'datasieve>=0.1.4'
 ]
 
 freqai_rl = [

From 72101f059dad268c7977d5eb2227766d5df86da6 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Fri, 16 Jun 2023 13:20:35 +0200
Subject: [PATCH 088/130] feat: ensure full backwards compatibility

---
 freqtrade/freqai/data_kitchen.py     |  8 ++------
 freqtrade/freqai/freqai_interface.py | 26 +++++++++++++++++++++++++-
 2 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py
index 3f8d0fb4b..3a91c8551 100644
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -962,8 +962,7 @@ class FreqaiDataKitchen:
                        " This can be achieved by following the migration guide at "
                        f"{ft}/strategy_migration/#freqai-new-data-pipeline "
                        "We added a basic pipeline for you, but this will be removed "
-                       "in a future version.\n"
-                       "This version does not include any outlier configurations")
+                       "in a future version.")
 
         return data_dictionary
 
@@ -977,11 +976,8 @@ class FreqaiDataKitchen:
                        " This can be achieved by following the migration guide at "
                        f"{ft}/strategy_migration/#freqai-new-data-pipeline "
                        "We added a basic pipeline for you, but this will be removed "
-                       "in a future version.\n"
-                       "This version does not include any outlier configurations")
+                       "in a future version.")
 
         pred_df, _, _ = self.label_pipeline.inverse_transform(df)
-        self.DI_values = np.zeros(len(pred_df.index))
-        self.do_predict = np.ones(len(pred_df.index))
 
         return pred_df
diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index a6e5d40ed..4ca5467b6 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -979,6 +979,23 @@ class IFreqaiModel(ABC):
                        " This can be achieved by following the migration guide at "
                        f"{DOCS_LINK}/strategy_migration/#freqai-new-data-pipeline")
         dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count)
+        dd = dk.data_dictionary
+        (dd["train_features"],
+         dd["train_labels"],
+         dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"],
+                                                                  dd["train_labels"],
+                                                                  dd["train_weights"])
+
+        (dd["test_features"],
+         dd["test_labels"],
+         dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
+                                                             dd["test_labels"],
+                                                             dd["test_weights"])
+
+        dk.label_pipeline = self.define_label_pipeline(threads=dk.thread_count)
+
+        dd["train_labels"], _, _ = dk.label_pipeline.fit_transform(dd["train_labels"])
+        dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"])
         return
 
     def data_cleaning_predict(self, dk: FreqaiDataKitchen, pair: str):
@@ -989,5 +1006,12 @@ class IFreqaiModel(ABC):
                        " data pipeline. Please update your model to use the new data pipeline."
                        " This can be achieved by following the migration guide at "
                        f"{DOCS_LINK}/strategy_migration/#freqai-new-data-pipeline")
-        dk.label_pipeline = self.define_data_pipeline(threads=dk.thread_count)
+        dd = dk.data_dictionary
+        dd["predict_features"], outliers, _ = dk.feature_pipeline.transform(
+            dd["predict_features"], outlier_check=True)
+        if self.freqai_info.get("DI_threshold", 0) > 0:
+            dk.DI_values = dk.feature_pipeline["di"].di_values
+        else:
+            dk.DI_values = np.zeros(len(outliers.index))
+        dk.do_predict = outliers.to_numpy()
         return

From 2107dce2cd13331953089403cde2ed491e659888 Mon Sep 17 00:00:00 2001
From: Robert Caulk <rob.caulk@gmail.com>
Date: Fri, 16 Jun 2023 15:03:49 +0200
Subject: [PATCH 089/130] Update freqai-feature-engineering.md

---
 docs/freqai-feature-engineering.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/freqai-feature-engineering.md b/docs/freqai-feature-engineering.md
index 82b7569a5..d106b1dd8 100644
--- a/docs/freqai-feature-engineering.md
+++ b/docs/freqai-feature-engineering.md
@@ -180,6 +180,9 @@ You can ask for each of the defined features to be included also for informative
 
 In total, the number of features the user of the presented example strat has created is: length of `include_timeframes` * no. features in `feature_engineering_expand_*()` * length of `include_corr_pairlist` * no. `include_shifted_candles` * length of `indicator_periods_candles`
  $= 3 * 3 * 3 * 2 * 2 = 108$.
+ 
+ !!! note "Learn more about creative feature engineering"
+    Check out our [medium article](https://emergentmethods.medium.com/freqai-from-price-to-prediction-6fadac18b665) geared toward helping users learn how to creatively engineer features.
 
 ### Gain finer control over `feature_engineering_*` functions with `metadata`
 

From ffd7394adb5429791188fa1a2e2775771e800b05 Mon Sep 17 00:00:00 2001
From: Robert Caulk <rob.caulk@gmail.com>
Date: Fri, 16 Jun 2023 15:10:11 +0200
Subject: [PATCH 090/130] Update freqai.md

---
 docs/freqai.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/docs/freqai.md b/docs/freqai.md
index a1b20ae1e..820fb81f6 100644
--- a/docs/freqai.md
+++ b/docs/freqai.md
@@ -107,6 +107,13 @@ This is for performance reasons - FreqAI relies on making quick predictions/retr
 it needs to download all the training data at the beginning of a dry/live instance. FreqAI stores and appends
 new candles automatically for future retrains. This means that if new pairs arrive later in the dry run due to a volume pairlist, it will not have the data ready. However, FreqAI does work with the `ShufflePairlist` or a `VolumePairlist` which keeps the total pairlist constant (but reorders the pairs according to volume).
 
+## Additional learning materials
+
+Here we compile some external materials that provide deeper looks into various components of FreqAI:
+
+- [Real-time head-to-head: Adaptive modeling of financial market data using XGBoost and CatBoost](https://emergentmethods.medium.com/real-time-head-to-head-adaptive-modeling-of-financial-market-data-using-xgboost-and-catboost-995a115a7495)
+- [FreqAI - from price to prediction](https://emergentmethods.medium.com/freqai-from-price-to-prediction-6fadac18b665)
+
 ## Credits
 
 FreqAI is developed by a group of individuals who all contribute specific skillsets to the project.

From 4f834c89642a4e77db1abffab93c0f76f802ea4b Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Fri, 16 Jun 2023 15:15:40 +0200
Subject: [PATCH 091/130] Remove old version pin for catboost

---
 .github/workflows/ci.yml | 1 +
 requirements-freqai.txt  | 3 +--
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 9ecd27cc3..0393b5cb9 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -136,6 +136,7 @@ jobs:
       uses: actions/setup-python@v4
       with:
         python-version: ${{ matrix.python-version }}
+        check-latest: true
 
     - name: Cache_dependencies
       uses: actions/cache@v3
diff --git a/requirements-freqai.txt b/requirements-freqai.txt
index ad069ade2..9f6390e56 100644
--- a/requirements-freqai.txt
+++ b/requirements-freqai.txt
@@ -5,8 +5,7 @@
 # Required for freqai
 scikit-learn==1.1.3
 joblib==1.2.0
-catboost==1.1.1; sys_platform == 'darwin' and python_version < '3.9'
-catboost==1.2; 'arm' not in platform_machine and (sys_platform != 'darwin' or python_version >= '3.9')
+catboost==1.2; 'arm' not in platform_machine
 lightgbm==3.3.5
 xgboost==1.7.5
 tensorboard==2.13.0

From 7939716a5e2f5475ca18e5c8919a0bf3d3565bde Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Fri, 16 Jun 2023 18:00:18 +0200
Subject: [PATCH 092/130] Improve formatting of telegram /status messages

---
 freqtrade/rpc/telegram.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/freqtrade/rpc/telegram.py b/freqtrade/rpc/telegram.py
index f12610d3d..aad7fd8c4 100644
--- a/freqtrade/rpc/telegram.py
+++ b/freqtrade/rpc/telegram.py
@@ -534,10 +534,10 @@ class Telegram(RPCHandler):
             if order_nr == 1:
                 lines.append(f"*{wording} #{order_nr}:*")
                 lines.append(
-                    f"*Amount:* {cur_entry_amount} "
+                    f"*Amount:* {cur_entry_amount:.8g} "
                     f"({round_coin_value(order['cost'], quote_currency)})"
                 )
-                lines.append(f"*Average Price:* {cur_entry_average}")
+                lines.append(f"*Average Price:* {cur_entry_average:.8g}")
             else:
                 sum_stake = 0
                 sum_amount = 0
@@ -560,9 +560,9 @@ class Telegram(RPCHandler):
                 if is_open:
                     lines.append("({})".format(dt_humanize(order["order_filled_date"],
                                                            granularity=["day", "hour", "minute"])))
-                lines.append(f"*Amount:* {cur_entry_amount} "
+                lines.append(f"*Amount:* {cur_entry_amount:.8g} "
                              f"({round_coin_value(order['cost'], quote_currency)})")
-                lines.append(f"*Average {wording} Price:* {cur_entry_average} "
+                lines.append(f"*Average {wording} Price:* {cur_entry_average:.8g} "
                              f"({price_to_1st_entry:.2%} from 1st entry Rate)")
                 lines.append(f"*Order filled:* {order['order_filled_date']}")
 
@@ -633,11 +633,11 @@ class Telegram(RPCHandler):
                 ])
 
             lines.extend([
-                "*Open Rate:* `{open_rate:.8f}`",
-                "*Close Rate:* `{close_rate:.8f}`" if r['close_rate'] else "",
+                "*Open Rate:* `{open_rate:.8g}`",
+                "*Close Rate:* `{close_rate:.8g}`" if r['close_rate'] else "",
                 "*Open Date:* `{open_date}`",
                 "*Close Date:* `{close_date}`" if r['close_date'] else "",
-                " \n*Current Rate:* `{current_rate:.8f}`" if r['is_open'] else "",
+                " \n*Current Rate:* `{current_rate:.8g}`" if r['is_open'] else "",
                 ("*Unrealized Profit:* " if r['is_open'] else "*Close Profit: *")
                 + "`{profit_ratio:.2%}` `({profit_abs_r})`",
             ])
@@ -658,9 +658,9 @@ class Telegram(RPCHandler):
                                  "`({initial_stop_loss_ratio:.2%})`")
 
                 # Adding stoploss and stoploss percentage only if it is not None
-                lines.append("*Stoploss:* `{stop_loss_abs:.8f}` " +
+                lines.append("*Stoploss:* `{stop_loss_abs:.8g}` " +
                              ("`({stop_loss_ratio:.2%})`" if r['stop_loss_ratio'] else ""))
-                lines.append("*Stoploss distance:* `{stoploss_current_dist:.8f}` "
+                lines.append("*Stoploss distance:* `{stoploss_current_dist:.8g}` "
                              "`({stoploss_current_dist_ratio:.2%})`")
                 if r['open_order']:
                     lines.append(

From 24e806f081ab6cb2b12779551534c67a630aa6f0 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Fri, 16 Jun 2023 19:58:35 +0200
Subject: [PATCH 093/130] Improve resiliance by using non-exchange controlled
 order attributes.

---
 freqtrade/strategy/interface.py |  2 +-
 tests/test_freqtradebot.py      | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/freqtrade/strategy/interface.py b/freqtrade/strategy/interface.py
index dfdfd31d8..d0655b504 100644
--- a/freqtrade/strategy/interface.py
+++ b/freqtrade/strategy/interface.py
@@ -1300,7 +1300,7 @@ class IStrategy(ABC, HyperStrategyMixin):
             timedout = (order.status == 'open' and order.order_date_utc < timeout_threshold)
             if timedout:
                 return True
-        time_method = (self.check_exit_timeout if order.side == trade.exit_side
+        time_method = (self.check_exit_timeout if order.ft_order_side == trade.exit_side
                        else self.check_entry_timeout)
 
         return strategy_safe_wrapper(time_method,
diff --git a/tests/test_freqtradebot.py b/tests/test_freqtradebot.py
index 945a81aa6..24e726403 100644
--- a/tests/test_freqtradebot.py
+++ b/tests/test_freqtradebot.py
@@ -3011,8 +3011,8 @@ def test_manage_open_orders_exit_usercustom(
     freqtrade.manage_open_orders()
     assert cancel_order_mock.call_count == 0
     assert rpc_mock.call_count == 1
-    assert freqtrade.strategy.check_exit_timeout.call_count == 1
-    assert freqtrade.strategy.check_entry_timeout.call_count == 0
+    assert freqtrade.strategy.check_exit_timeout.call_count == (0 if is_short else 1)
+    assert freqtrade.strategy.check_entry_timeout.call_count == (1 if is_short else 0)
 
     freqtrade.strategy.check_exit_timeout = MagicMock(side_effect=KeyError)
     freqtrade.strategy.check_entry_timeout = MagicMock(side_effect=KeyError)
@@ -3020,8 +3020,8 @@ def test_manage_open_orders_exit_usercustom(
     freqtrade.manage_open_orders()
     assert cancel_order_mock.call_count == 0
     assert rpc_mock.call_count == 1
-    assert freqtrade.strategy.check_exit_timeout.call_count == 1
-    assert freqtrade.strategy.check_entry_timeout.call_count == 0
+    assert freqtrade.strategy.check_exit_timeout.call_count == (0 if is_short else 1)
+    assert freqtrade.strategy.check_entry_timeout.call_count == (1 if is_short else 0)
 
     # Return True - sells!
     freqtrade.strategy.check_exit_timeout = MagicMock(return_value=True)
@@ -3029,8 +3029,8 @@ def test_manage_open_orders_exit_usercustom(
     freqtrade.manage_open_orders()
     assert cancel_order_mock.call_count == 1
     assert rpc_mock.call_count == 2
-    assert freqtrade.strategy.check_exit_timeout.call_count == 1
-    assert freqtrade.strategy.check_entry_timeout.call_count == 0
+    assert freqtrade.strategy.check_exit_timeout.call_count == (0 if is_short else 1)
+    assert freqtrade.strategy.check_entry_timeout.call_count == (1 if is_short else 0)
     trade = Trade.session.scalars(select(Trade)).first()
     # cancelling didn't succeed - order-id remains open.
     assert trade.open_order_id is not None

From 2c7aa9f721e181b3461cf95cd8eca1f85aeca299 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 17 Jun 2023 08:37:38 +0200
Subject: [PATCH 094/130] Update doc wording

---
 docs/lookahead-analysis.md | 55 +++++++++++++++++++++++++-------------
 1 file changed, 36 insertions(+), 19 deletions(-)

diff --git a/docs/lookahead-analysis.md b/docs/lookahead-analysis.md
index cd1c74d13..9d57de779 100644
--- a/docs/lookahead-analysis.md
+++ b/docs/lookahead-analysis.md
@@ -26,25 +26,43 @@ It also supports the lookahead-analysis of freqai strategies.
 ## Lookahead-analysis command reference
 
 ```
-usage: freqtrade lookahead-analysis [-h] [-v] [-V] 
-                             [--minimum-trade-amount INT]
-                             [--targeted-trade-amount INT]
-                             [--lookahead-analysis-exportfilename PATH]
+usage: freqtrade lookahead-analysis [-h] [-v] [--logfile FILE] [-V] [-c PATH]
+                                    [-d PATH] [--userdir PATH] [-s NAME]
+                                    [--strategy-path PATH]
+                                    [--recursive-strategy-search]
+                                    [--freqaimodel NAME]
+                                    [--freqaimodel-path PATH] [-i TIMEFRAME]
+                                    [--timerange TIMERANGE]
+                                    [--data-format-ohlcv {json,jsongz,hdf5,feather,parquet}]
+                                    [--max-open-trades INT]
+                                    [--stake-amount STAKE_AMOUNT]
+                                    [--fee FLOAT] [-p PAIRS [PAIRS ...]]
+                                    [--enable-protections]
+                                    [--dry-run-wallet DRY_RUN_WALLET]
+                                    [--timeframe-detail TIMEFRAME_DETAIL]
+                                    [--strategy-list STRATEGY_LIST [STRATEGY_LIST ...]]
+                                    [--export {none,trades,signals}]
+                                    [--export-filename PATH]
+                                    [--breakdown {day,week,month} [{day,week,month} ...]]
+                                    [--cache {none,day,week,month}]
+                                    [--freqai-backtest-live-models]
+                                    [--minimum-trade-amount INT]
+                                    [--targeted-trade-amount INT]
+                                    [--lookahead-analysis-exportfilename LOOKAHEAD_ANALYSIS_EXPORTFILENAME]
 
-optional arguments:
-  -h, --help            show this help message and exit
+options:
   --minimum-trade-amount INT
-                        Override the value of the `minimum_trade_amount` configuration
-                        setting
-                        Requires `--targeted-trade-amount` to be larger or equal to --minimum-trade-amount.
-                        (default: 10)
+                        Minimum trade amount for lookahead-analysis
   --targeted-trade-amount INT
-                        Override the value of the `minimum_trade_amount` configuration
-                        (default: 20)
-  --lookahead-analysis-exportfilename PATH
-                        Use this filename to save your lookahead-analysis-results to a csv file
+                        Targeted trade amount for lookahead analysis
+  --lookahead-analysis-exportfilename LOOKAHEAD_ANALYSIS_EXPORTFILENAME
+                        Use this csv-filename to store lookahead-analysis-
+                        results
 ```
 
+!!! Note ""
+    The above Output was reduced to options `lookahead-analysis` adds on top of regular backtesting commands.
+
 ### Summary
 
 Checks a given strategy for look ahead bias via lookahead-analysis
@@ -69,15 +87,14 @@ After the backtest ran, it will look if the `minimum-trade-amount` is met
 and if not cancel the lookahead-analysis for this strategy.
 
 After setting the baseline it will then do additional runs for every entry and exit separately.
-When a verification-backtest is done, it will compare the indicators as the signal (either entry or exit) 
-and report the bias.
+When a verification-backtest is done, it will compare the indicators as the signal (either entry or exit) and report the bias.
 After all signals have been verified or falsified a result-table will be generated for the user to see.
 
 ### Caveats
 
-- `lookahead-analysis` can only verify / falsify the trades it calculated through.
-If there was a strategy with signals that were not triggered during the lookahead-analysis, then it will not have it verified that entry/exit signal either.
-This could then lead to a false-negative (the strategy will then be reported as non-biased).
+- `lookahead-analysis` can only verify / falsify the trades it calculated and verified.
+If the strategy has many different signals / signal types, it's up to you to select appropriate parameters to ensure that all signals have triggered at least once. Not triggered signals will not have been verified.
+This could lead to a false-negative (the strategy will then be reported as non-biased).
 - `lookahead-analysis` has access to everything that backtesting has too.
 Please don't provoke any configs like enabling position stacking.
 If you decide to do so, then make doubly sure that you won't ever run out of `max_open_trades` amount and neither leftover money in your wallet.

From 34e7e3efea14e707b89bc95d02f7b725dceea9eb Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 17 Jun 2023 08:40:09 +0200
Subject: [PATCH 095/130] Simplify imports

---
 freqtrade/optimize/lookahead_analysis.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/freqtrade/optimize/lookahead_analysis.py b/freqtrade/optimize/lookahead_analysis.py
index 65e9cad3f..889e43375 100755
--- a/freqtrade/optimize/lookahead_analysis.py
+++ b/freqtrade/optimize/lookahead_analysis.py
@@ -1,9 +1,8 @@
-import copy
 import logging
-import pathlib
 import shutil
 from copy import deepcopy
 from datetime import datetime, timedelta, timezone
+from pathlib import Path
 from typing import Any, Dict, List, Optional
 
 import pandas as pd
@@ -134,14 +133,13 @@ class LookaheadAnalysis:
             # purge previous data if the freqai model is defined
             # (to be sure nothing is carried over from older backtests)
             path_to_current_identifier = (
-                pathlib.Path(f"{self.local_config['user_data_dir']}"
-                             "/models/"
-                             f"{self.local_config['freqai']['identifier']}").resolve())
+                Path(f"{self.local_config['user_data_dir']}/models/"
+                     f"{self.local_config['freqai']['identifier']}").resolve())
             # remove folder and its contents
-            if pathlib.Path.exists(path_to_current_identifier):
+            if Path.exists(path_to_current_identifier):
                 shutil.rmtree(path_to_current_identifier)
 
-        prepare_data_config = copy.deepcopy(self.local_config)
+        prepare_data_config = deepcopy(self.local_config)
         prepare_data_config['timerange'] = (str(self.dt_to_timestamp(varholder.from_dt)) + "-" +
                                             str(self.dt_to_timestamp(varholder.to_dt)))
         prepare_data_config['exchange']['pair_whitelist'] = pairs_to_load

From 1567cd28496e765b809568550f17793554d3e70b Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 17 Jun 2023 09:10:54 +0200
Subject: [PATCH 096/130] Use DOCS_LINK throughout

---
 freqtrade/freqai/data_kitchen.py | 12 +++++-------
 freqtrade/util/binance_mig.py    |  4 ++--
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py
index 3a91c8551..7d4bf39ca 100644
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -16,7 +16,7 @@ from pandas import DataFrame
 from sklearn.model_selection import train_test_split
 
 from freqtrade.configuration import TimeRange
-from freqtrade.constants import Config
+from freqtrade.constants import DOCS_LINK, Config
 from freqtrade.data.converter import reduce_dataframe_footprint
 from freqtrade.exceptions import OperationalException
 from freqtrade.exchange import timeframe_to_seconds
@@ -760,9 +760,9 @@ class FreqaiDataKitchen:
                 " which was deprecated on March 1, 2023. Please refer "
                 "to the strategy migration guide to use the new "
                 "feature_engineering_* methods: \n"
-                "https://www.freqtrade.io/en/stable/strategy_migration/#freqai-strategy \n"
+                f"{DOCS_LINK}/strategy_migration/#freqai-strategy \n"
                 "And the feature_engineering_* documentation: \n"
-                "https://www.freqtrade.io/en/latest/freqai-feature-engineering/"
+                f"{DOCS_LINK}/freqai-feature-engineering/"
                 )
 
         tfs: List[str] = self.freqai_config["feature_parameters"].get("include_timeframes")
@@ -956,11 +956,10 @@ class FreqaiDataKitchen:
         """
         Deprecation warning, migration assistance
         """
-        ft = "https://www.freqtrade.io/en/latest"
         logger.warning(f"Your custom IFreqaiModel relies on the deprecated"
                        " data pipeline. Please update your model to use the new data pipeline."
                        " This can be achieved by following the migration guide at "
-                       f"{ft}/strategy_migration/#freqai-new-data-pipeline "
+                       f"{DOCS_LINK}/strategy_migration/#freqai-new-data-pipeline "
                        "We added a basic pipeline for you, but this will be removed "
                        "in a future version.")
 
@@ -970,11 +969,10 @@ class FreqaiDataKitchen:
         """
         Deprecation warning, migration assistance
         """
-        ft = "https://www.freqtrade.io/en/latest"
         logger.warning(f"Your custom IFreqaiModel relies on the deprecated"
                        " data pipeline. Please update your model to use the new data pipeline."
                        " This can be achieved by following the migration guide at "
-                       f"{ft}/strategy_migration/#freqai-new-data-pipeline "
+                       f"{DOCS_LINK}/strategy_migration/#freqai-new-data-pipeline "
                        "We added a basic pipeline for you, but this will be removed "
                        "in a future version.")
 
diff --git a/freqtrade/util/binance_mig.py b/freqtrade/util/binance_mig.py
index 37a2d2ef1..9b0f8521f 100644
--- a/freqtrade/util/binance_mig.py
+++ b/freqtrade/util/binance_mig.py
@@ -3,7 +3,7 @@ import logging
 from packaging import version
 from sqlalchemy import select
 
-from freqtrade.constants import Config
+from freqtrade.constants import DOCS_LINK, Config
 from freqtrade.enums.tradingmode import TradingMode
 from freqtrade.exceptions import OperationalException
 from freqtrade.persistence.pairlock import PairLock
@@ -25,7 +25,7 @@ def migrate_binance_futures_names(config: Config):
     if version.parse("2.6.26") > version.parse(ccxt.__version__):
         raise OperationalException(
             "Please follow the update instructions in the docs "
-            "(https://www.freqtrade.io/en/latest/updating/) to install a compatible ccxt version.")
+            f"({DOCS_LINK}/updating/) to install a compatible ccxt version.")
     _migrate_binance_futures_db(config)
     migrate_binance_futures_data(config)
 

From 6bb75f0dd498abeb32c1688ff1283b99fe2ad108 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 17 Jun 2023 10:12:36 +0200
Subject: [PATCH 097/130] Simplify import if only one element is used

---
 freqtrade/optimize/lookahead_analysis.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/freqtrade/optimize/lookahead_analysis.py b/freqtrade/optimize/lookahead_analysis.py
index 889e43375..32528909d 100755
--- a/freqtrade/optimize/lookahead_analysis.py
+++ b/freqtrade/optimize/lookahead_analysis.py
@@ -5,7 +5,7 @@ from datetime import datetime, timedelta, timezone
 from pathlib import Path
 from typing import Any, Dict, List, Optional
 
-import pandas as pd
+from pandas import DataFrame
 
 from freqtrade.configuration import TimeRange
 from freqtrade.data.history import get_timerange
@@ -20,10 +20,10 @@ logger = logging.getLogger(__name__)
 
 class VarHolder:
     timerange: TimeRange
-    data: pd.DataFrame
-    indicators: pd.DataFrame
-    result: pd.DataFrame
-    compared: pd.DataFrame
+    data: DataFrame
+    indicators: Dict[str, DataFrame]
+    result: DataFrame
+    compared: DataFrame
     from_dt: datetime
     to_dt: datetime
     compared_dt: datetime
@@ -63,7 +63,7 @@ class LookaheadAnalysis:
         return timestamp
 
     @staticmethod
-    def get_result(backtesting: Backtesting, processed: pd.DataFrame):
+    def get_result(backtesting: Backtesting, processed: DataFrame):
         min_date, max_date = get_timerange(processed)
 
         result = backtesting.backtest(
@@ -92,8 +92,8 @@ class LookaheadAnalysis:
     # analyzes two data frames with processed indicators and shows differences between them.
     def analyze_indicators(self, full_vars: VarHolder, cut_vars: VarHolder, current_pair):
         # extract dataframes
-        cut_df = cut_vars.indicators[current_pair]
-        full_df = full_vars.indicators[current_pair]
+        cut_df: DataFrame = cut_vars.indicators[current_pair]
+        full_df: DataFrame = full_vars.indicators[current_pair]
 
         # cut longer dataframe to length of the shorter
         full_df_cut = full_df[
@@ -127,7 +127,7 @@ class LookaheadAnalysis:
                                             f"{col_name[0]}. "
                                             f"{str(self_value)} != {str(other_value)}")
 
-    def prepare_data(self, varholder: VarHolder, pairs_to_load: List[pd.DataFrame]):
+    def prepare_data(self, varholder: VarHolder, pairs_to_load: List[DataFrame]):
 
         if 'freqai' in self.local_config and 'identifier' in self.local_config['freqai']:
             # purge previous data if the freqai model is defined

From 11ff454b3bb371eeb2ea038b49bf8856e045e5ec Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Sat, 17 Jun 2023 13:21:31 +0200
Subject: [PATCH 098/130] fix: ensure that a user setting up their own pipeline
 wont have conflicts with DI_values

---
 freqtrade/freqai/RL/BaseReinforcementLearningModel.py | 6 +-----
 freqtrade/freqai/base_models/BasePyTorchRegressor.py  | 2 +-
 freqtrade/freqai/base_models/BaseRegressionModel.py   | 2 +-
 3 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
index b59c47ad2..81cacc055 100644
--- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
+++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
@@ -250,17 +250,13 @@ class BaseReinforcementLearningModel(IFreqaiModel):
 
         dk.data_dictionary["prediction_features"] = self.drop_ohlc_from_df(filtered_dataframe, dk)
 
-        dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform(
+        dk.data_dictionary["prediction_features"], _, _ = dk.feature_pipeline.transform(
             dk.data_dictionary["prediction_features"], outlier_check=True)
 
         pred_df = self.rl_model_predict(
             dk.data_dictionary["prediction_features"], dk, self.model)
         pred_df.fillna(0, inplace=True)
 
-        if self.freqai_info.get("DI_threshold", 0) > 0:
-            dk.DI_values = dk.feature_pipeline["di"].di_values
-        dk.do_predict = outliers.to_numpy()
-
         return (pred_df, dk.do_predict)
 
     def rl_model_predict(self, dataframe: DataFrame,
diff --git a/freqtrade/freqai/base_models/BasePyTorchRegressor.py b/freqtrade/freqai/base_models/BasePyTorchRegressor.py
index ec4d6b80c..b77fec31a 100644
--- a/freqtrade/freqai/base_models/BasePyTorchRegressor.py
+++ b/freqtrade/freqai/base_models/BasePyTorchRegressor.py
@@ -52,7 +52,7 @@ class BasePyTorchRegressor(BasePyTorchModel):
         pred_df = DataFrame(y.detach().tolist(), columns=[dk.label_list[0]])
         pred_df, _, _ = dk.label_pipeline.inverse_transform(pred_df)
 
-        if self.freqai_info.get("DI_threshold", 0) > 0:
+        if dk.feature_pipeline["di"]:
             dk.DI_values = dk.feature_pipeline["di"].di_values
         else:
             dk.DI_values = np.zeros(len(outliers.index))
diff --git a/freqtrade/freqai/base_models/BaseRegressionModel.py b/freqtrade/freqai/base_models/BaseRegressionModel.py
index f1e33bff8..3cce978b5 100644
--- a/freqtrade/freqai/base_models/BaseRegressionModel.py
+++ b/freqtrade/freqai/base_models/BaseRegressionModel.py
@@ -111,7 +111,7 @@ class BaseRegressionModel(IFreqaiModel):
         pred_df = DataFrame(predictions, columns=dk.label_list)
 
         pred_df, _, _ = dk.label_pipeline.inverse_transform(pred_df)
-        if self.freqai_info.get("DI_threshold", 0) > 0:
+        if dk.feature_pipeline["di"]:
             dk.DI_values = dk.feature_pipeline["di"].di_values
         else:
             dk.DI_values = np.zeros(len(outliers.index))

From bf872e8ed48dda59ebe704f7a059488c0aa72eef Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 17 Jun 2023 14:25:46 +0200
Subject: [PATCH 099/130] Simplify comparison depth

---
 freqtrade/optimize/lookahead_analysis.py | 45 ++++++++++++------------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/freqtrade/optimize/lookahead_analysis.py b/freqtrade/optimize/lookahead_analysis.py
index 32528909d..dcc1088b3 100755
--- a/freqtrade/optimize/lookahead_analysis.py
+++ b/freqtrade/optimize/lookahead_analysis.py
@@ -43,7 +43,7 @@ class LookaheadAnalysis:
 
     def __init__(self, config: Dict[str, Any], strategy_obj: Dict):
         self.failed_bias_check = True
-        self.full_varHolder = VarHolder
+        self.full_varHolder = VarHolder()
 
         self.entry_varHolders: List[VarHolder] = []
         self.exit_varHolders: List[VarHolder] = []
@@ -90,7 +90,7 @@ class LookaheadAnalysis:
         return False
 
     # analyzes two data frames with processed indicators and shows differences between them.
-    def analyze_indicators(self, full_vars: VarHolder, cut_vars: VarHolder, current_pair):
+    def analyze_indicators(self, full_vars: VarHolder, cut_vars: VarHolder, current_pair: str):
         # extract dataframes
         cut_df: DataFrame = cut_vars.indicators[current_pair]
         full_df: DataFrame = full_vars.indicators[current_pair]
@@ -103,29 +103,30 @@ class LookaheadAnalysis:
             (cut_df.date == cut_vars.compared_dt)
         ].reset_index(drop=True)
 
-        # compare dataframes
-        if full_df_cut.shape[0] != 0:
-            if cut_df_cut.shape[0] != 0:
-                compare_df = full_df_cut.compare(cut_df_cut)
+        # check if dataframes are not empty
+        if full_df_cut.shape[0] != 0 and cut_df_cut.shape[0] != 0:
 
-                if compare_df.shape[0] > 0:
-                    for col_name, values in compare_df.items():
-                        col_idx = compare_df.columns.get_loc(col_name)
-                        compare_df_row = compare_df.iloc[0]
-                        # compare_df now comprises tuples with [1] having either 'self' or 'other'
-                        if 'other' in col_name[1]:
-                            continue
-                        self_value = compare_df_row[col_idx]
-                        other_value = compare_df_row[col_idx + 1]
+            # compare dataframes
+            compare_df = full_df_cut.compare(cut_df_cut)
 
-                        # output differences
-                        if self_value != other_value:
+            if compare_df.shape[0] > 0:
+                for col_name, values in compare_df.items():
+                    col_idx = compare_df.columns.get_loc(col_name)
+                    compare_df_row = compare_df.iloc[0]
+                    # compare_df now comprises tuples with [1] having either 'self' or 'other'
+                    if 'other' in col_name[1]:
+                        continue
+                    self_value = compare_df_row[col_idx]
+                    other_value = compare_df_row[col_idx + 1]
 
-                            if not self.current_analysis.false_indicators.__contains__(col_name[0]):
-                                self.current_analysis.false_indicators.append(col_name[0])
-                                logger.info(f"=> found look ahead bias in indicator "
-                                            f"{col_name[0]}. "
-                                            f"{str(self_value)} != {str(other_value)}")
+                    # output differences
+                    if self_value != other_value:
+
+                        if not self.current_analysis.false_indicators.__contains__(col_name[0]):
+                            self.current_analysis.false_indicators.append(col_name[0])
+                            logger.info(f"=> found look ahead bias in indicator "
+                                        f"{col_name[0]}. "
+                                        f"{str(self_value)} != {str(other_value)}")
 
     def prepare_data(self, varholder: VarHolder, pairs_to_load: List[DataFrame]):
 

From b0ab400ff36b5c60105540ead2c4a6d4ea177b0a Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Sat, 17 Jun 2023 15:39:33 +0200
Subject: [PATCH 100/130] fix: ensure test_size=0 is still accommodated

---
 .../freqai/RL/BaseReinforcementLearningModel.py  | 11 ++++++-----
 .../freqai/base_models/BaseClassifierModel.py    | 11 ++++++-----
 .../freqai/base_models/BasePyTorchClassifier.py  | 11 ++++++-----
 .../freqai/base_models/BasePyTorchRegressor.py   | 13 ++++++++-----
 .../freqai/base_models/BaseRegressionModel.py    | 16 ++++++++--------
 5 files changed, 34 insertions(+), 28 deletions(-)

diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
index 81cacc055..4f7b55967 100644
--- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
+++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
@@ -126,11 +126,12 @@ class BaseReinforcementLearningModel(IFreqaiModel):
                                                                   dd["train_labels"],
                                                                   dd["train_weights"])
 
-        (dd["test_features"],
-         dd["test_labels"],
-         dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
-                                                             dd["test_labels"],
-                                                             dd["test_weights"])
+        if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
+            (dd["test_features"],
+             dd["test_labels"],
+             dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
+                                                                 dd["test_labels"],
+                                                                 dd["test_weights"])
 
         logger.info(
             f'Training model on {len(dk.data_dictionary["train_features"].columns)}'
diff --git a/freqtrade/freqai/base_models/BaseClassifierModel.py b/freqtrade/freqai/base_models/BaseClassifierModel.py
index e536efea3..0a6100df3 100644
--- a/freqtrade/freqai/base_models/BaseClassifierModel.py
+++ b/freqtrade/freqai/base_models/BaseClassifierModel.py
@@ -61,11 +61,12 @@ class BaseClassifierModel(IFreqaiModel):
                                                                   dd["train_labels"],
                                                                   dd["train_weights"])
 
-        (dd["test_features"],
-         dd["test_labels"],
-         dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
-                                                             dd["test_labels"],
-                                                             dd["test_weights"])
+        if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
+            (dd["test_features"],
+             dd["test_labels"],
+             dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
+                                                                 dd["test_labels"],
+                                                                 dd["test_weights"])
 
         logger.info(
             f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
diff --git a/freqtrade/freqai/base_models/BasePyTorchClassifier.py b/freqtrade/freqai/base_models/BasePyTorchClassifier.py
index 57f31629a..8a4e15308 100644
--- a/freqtrade/freqai/base_models/BasePyTorchClassifier.py
+++ b/freqtrade/freqai/base_models/BasePyTorchClassifier.py
@@ -197,11 +197,12 @@ class BasePyTorchClassifier(BasePyTorchModel):
                                                                   dd["train_labels"],
                                                                   dd["train_weights"])
 
-        (dd["test_features"],
-         dd["test_labels"],
-         dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
-                                                             dd["test_labels"],
-                                                             dd["test_weights"])
+        if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
+            (dd["test_features"],
+             dd["test_labels"],
+             dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
+                                                                 dd["test_labels"],
+                                                                 dd["test_weights"])
 
         logger.info(
             f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
diff --git a/freqtrade/freqai/base_models/BasePyTorchRegressor.py b/freqtrade/freqai/base_models/BasePyTorchRegressor.py
index b77fec31a..325743134 100644
--- a/freqtrade/freqai/base_models/BasePyTorchRegressor.py
+++ b/freqtrade/freqai/base_models/BasePyTorchRegressor.py
@@ -96,12 +96,15 @@ class BasePyTorchRegressor(BasePyTorchModel):
          dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"],
                                                                   dd["train_labels"],
                                                                   dd["train_weights"])
+        dd["train_labels"], _, _ = dk.label_pipeline.fit_transform(dd["train_labels"])
 
-        (dd["test_features"],
-         dd["test_labels"],
-         dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
-                                                             dd["test_labels"],
-                                                             dd["test_weights"])
+        if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
+            (dd["test_features"],
+             dd["test_labels"],
+             dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
+                                                                 dd["test_labels"],
+                                                                 dd["test_weights"])
+            dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"])
 
         logger.info(
             f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
diff --git a/freqtrade/freqai/base_models/BaseRegressionModel.py b/freqtrade/freqai/base_models/BaseRegressionModel.py
index 3cce978b5..2e07d3fb7 100644
--- a/freqtrade/freqai/base_models/BaseRegressionModel.py
+++ b/freqtrade/freqai/base_models/BaseRegressionModel.py
@@ -60,15 +60,15 @@ class BaseRegressionModel(IFreqaiModel):
          dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"],
                                                                   dd["train_labels"],
                                                                   dd["train_weights"])
-
-        (dd["test_features"],
-         dd["test_labels"],
-         dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
-                                                             dd["test_labels"],
-                                                             dd["test_weights"])
-
         dd["train_labels"], _, _ = dk.label_pipeline.fit_transform(dd["train_labels"])
-        dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"])
+
+        if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
+            (dd["test_features"],
+             dd["test_labels"],
+             dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
+                                                                 dd["test_labels"],
+                                                                 dd["test_weights"])
+            dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"])
 
         logger.info(
             f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"

From 886b86f7c567e46e589717bb6691dbed6f22e6bd Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Sat, 17 Jun 2023 16:14:48 +0200
Subject: [PATCH 101/130] chore: bump datasieve

---
 requirements-freqai.txt | 2 +-
 setup.py                | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements-freqai.txt b/requirements-freqai.txt
index b9a50d92f..0d88976d0 100644
--- a/requirements-freqai.txt
+++ b/requirements-freqai.txt
@@ -9,4 +9,4 @@ catboost==1.2; 'arm' not in platform_machine
 lightgbm==3.3.5
 xgboost==1.7.5
 tensorboard==2.13.0
-datasieve==0.1.4
+datasieve==0.1.5
diff --git a/setup.py b/setup.py
index 996a8b8f9..4b73ae653 100644
--- a/setup.py
+++ b/setup.py
@@ -17,7 +17,7 @@ freqai = [
     'lightgbm',
     'xgboost',
     'tensorboard',
-    'datasieve>=0.1.4'
+    'datasieve>=0.1.5'
 ]
 
 freqai_rl = [

From e0d5242a45adcdba7d44b8b629471c5740bac624 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 17 Jun 2023 14:55:23 +0200
Subject: [PATCH 102/130] Reduce download-data verbosity

---
 freqtrade/data/history/history_utils.py | 2 +-
 tests/data/test_history.py              | 8 +++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/freqtrade/data/history/history_utils.py b/freqtrade/data/history/history_utils.py
index 2833a6d50..7881130e2 100644
--- a/freqtrade/data/history/history_utils.py
+++ b/freqtrade/data/history/history_utils.py
@@ -294,7 +294,7 @@ def refresh_backtest_ohlcv_data(exchange: Exchange, pairs: List[str], timeframes
             continue
         for timeframe in timeframes:
 
-            logger.info(f'Downloading pair {pair}, interval {timeframe}.')
+            logger.debug(f'Downloading pair {pair}, {candle_type}, interval {timeframe}.')
             process = f'{idx}/{len(pairs)}'
             _download_pair_history(pair=pair, process=process,
                                    datadir=datadir, exchange=exchange,
diff --git a/tests/data/test_history.py b/tests/data/test_history.py
index e397c97c1..ab2238b08 100644
--- a/tests/data/test_history.py
+++ b/tests/data/test_history.py
@@ -1,6 +1,7 @@
 # pragma pylint: disable=missing-docstring, protected-access, C0103
 
 import json
+import logging
 import uuid
 from pathlib import Path
 from shutil import copyfile
@@ -503,9 +504,10 @@ def test_validate_backtest_data(default_conf, mocker, caplog, testdatadir) -> No
 ])
 def test_refresh_backtest_ohlcv_data(
         mocker, default_conf, markets, caplog, testdatadir, trademode, callcount):
-    dl_mock = mocker.patch('freqtrade.data.history.history_utils._download_pair_history',
-                           MagicMock())
+    caplog.set_level(logging.DEBUG)
+    dl_mock = mocker.patch('freqtrade.data.history.history_utils._download_pair_history')
     mocker.patch(f'{EXMS}.markets', PropertyMock(return_value=markets))
+
     mocker.patch.object(Path, "exists", MagicMock(return_value=True))
     mocker.patch.object(Path, "unlink", MagicMock())
 
@@ -520,7 +522,7 @@ def test_refresh_backtest_ohlcv_data(
     assert dl_mock.call_count == callcount
     assert dl_mock.call_args[1]['timerange'].starttype == 'date'
 
-    assert log_has("Downloading pair ETH/BTC, interval 1m.", caplog)
+    assert log_has_re(r"Downloading pair ETH/BTC, .* interval 1m\.", caplog)
 
 
 def test_download_data_no_markets(mocker, default_conf, caplog, testdatadir):

From b8ab6fe42bae3bcef4014181b27aca631b734c86 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 17 Jun 2023 14:59:01 +0200
Subject: [PATCH 103/130] Improve wording of check command

---
 freqtrade/commands/data_commands.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/freqtrade/commands/data_commands.py b/freqtrade/commands/data_commands.py
index ed1571002..d4fd050eb 100644
--- a/freqtrade/commands/data_commands.py
+++ b/freqtrade/commands/data_commands.py
@@ -20,7 +20,7 @@ from freqtrade.util.binance_mig import migrate_binance_futures_data
 logger = logging.getLogger(__name__)
 
 
-def _data_download_sanity(config: Config) -> None:
+def _check_data_config_download_sanity(config: Config) -> None:
     if 'days' in config and 'timerange' in config:
         raise OperationalException("--days and --timerange are mutually exclusive. "
                                    "You can only specify one or the other.")
@@ -37,7 +37,7 @@ def start_download_data(args: Dict[str, Any]) -> None:
     """
     config = setup_utils_configuration(args, RunMode.UTIL_EXCHANGE)
 
-    _data_download_sanity(config)
+    _check_data_config_download_sanity(config)
     timerange = TimeRange()
     if 'days' in config:
         time_since = (datetime.now() - timedelta(days=config['days'])).strftime("%Y%m%d")

From 7453ff2fb587938bd7871f2f4c89cd7edfe04ebb Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 17 Jun 2023 15:13:56 +0200
Subject: [PATCH 104/130] Migrate download-data out of commands section

---
 freqtrade/commands/data_commands.py     | 76 ++----------------------
 freqtrade/data/history/__init__.py      |  6 +-
 freqtrade/data/history/history_utils.py | 77 ++++++++++++++++++++++++-
 3 files changed, 83 insertions(+), 76 deletions(-)

diff --git a/freqtrade/commands/data_commands.py b/freqtrade/commands/data_commands.py
index d4fd050eb..6d26b3ba9 100644
--- a/freqtrade/commands/data_commands.py
+++ b/freqtrade/commands/data_commands.py
@@ -1,18 +1,16 @@
 import logging
 import sys
 from collections import defaultdict
-from datetime import datetime, timedelta
-from typing import Any, Dict, List
+from typing import Any, Dict
 
 from freqtrade.configuration import TimeRange, setup_utils_configuration
 from freqtrade.constants import DATETIME_PRINT_FORMAT, Config
 from freqtrade.data.converter import convert_ohlcv_format, convert_trades_format
-from freqtrade.data.history import (convert_trades_to_ohlcv, refresh_backtest_ohlcv_data,
-                                    refresh_backtest_trades_data)
+from freqtrade.data.history import convert_trades_to_ohlcv, download_data_main
 from freqtrade.enums import CandleType, RunMode, TradingMode
 from freqtrade.exceptions import OperationalException
-from freqtrade.exchange import market_is_active, timeframe_to_minutes
-from freqtrade.plugins.pairlist.pairlist_helpers import dynamic_expand_pairlist, expand_pairlist
+from freqtrade.exchange import timeframe_to_minutes
+from freqtrade.plugins.pairlist.pairlist_helpers import expand_pairlist
 from freqtrade.resolvers import ExchangeResolver
 from freqtrade.util.binance_mig import migrate_binance_futures_data
 
@@ -38,77 +36,13 @@ def start_download_data(args: Dict[str, Any]) -> None:
     config = setup_utils_configuration(args, RunMode.UTIL_EXCHANGE)
 
     _check_data_config_download_sanity(config)
-    timerange = TimeRange()
-    if 'days' in config:
-        time_since = (datetime.now() - timedelta(days=config['days'])).strftime("%Y%m%d")
-        timerange = TimeRange.parse_timerange(f'{time_since}-')
-
-    if 'timerange' in config:
-        timerange = timerange.parse_timerange(config['timerange'])
-
-    # Remove stake-currency to skip checks which are not relevant for datadownload
-    config['stake_currency'] = ''
-
-    pairs_not_available: List[str] = []
-
-    # Init exchange
-    exchange = ExchangeResolver.load_exchange(config, validate=False)
-    markets = [p for p, m in exchange.markets.items() if market_is_active(m)
-               or config.get('include_inactive')]
-
-    expanded_pairs = dynamic_expand_pairlist(config, markets)
-
-    # Manual validations of relevant settings
-    if not config['exchange'].get('skip_pair_validation', False):
-        exchange.validate_pairs(expanded_pairs)
-    logger.info(f"About to download pairs: {expanded_pairs}, "
-                f"intervals: {config['timeframes']} to {config['datadir']}")
-
-    for timeframe in config['timeframes']:
-        exchange.validate_timeframes(timeframe)
 
     try:
-
-        if config.get('download_trades'):
-            if config.get('trading_mode') == 'futures':
-                raise OperationalException("Trade download not supported for futures.")
-            pairs_not_available = refresh_backtest_trades_data(
-                exchange, pairs=expanded_pairs, datadir=config['datadir'],
-                timerange=timerange, new_pairs_days=config['new_pairs_days'],
-                erase=bool(config.get('erase')), data_format=config['dataformat_trades'])
-
-            # Convert downloaded trade data to different timeframes
-            convert_trades_to_ohlcv(
-                pairs=expanded_pairs, timeframes=config['timeframes'],
-                datadir=config['datadir'], timerange=timerange, erase=bool(config.get('erase')),
-                data_format_ohlcv=config['dataformat_ohlcv'],
-                data_format_trades=config['dataformat_trades'],
-            )
-        else:
-            if not exchange.get_option('ohlcv_has_history', True):
-                raise OperationalException(
-                    f"Historic klines not available for {exchange.name}. "
-                    "Please use `--dl-trades` instead for this exchange "
-                    "(will unfortunately take a long time)."
-                    )
-            migrate_binance_futures_data(config)
-            pairs_not_available = refresh_backtest_ohlcv_data(
-                exchange, pairs=expanded_pairs, timeframes=config['timeframes'],
-                datadir=config['datadir'], timerange=timerange,
-                new_pairs_days=config['new_pairs_days'],
-                erase=bool(config.get('erase')), data_format=config['dataformat_ohlcv'],
-                trading_mode=config.get('trading_mode', 'spot'),
-                prepend=config.get('prepend_data', False)
-            )
+        download_data_main(config)
 
     except KeyboardInterrupt:
         sys.exit("SIGINT received, aborting ...")
 
-    finally:
-        if pairs_not_available:
-            logger.info(f"Pairs [{','.join(pairs_not_available)}] not available "
-                        f"on exchange {exchange.name}.")
-
 
 def start_convert_trades(args: Dict[str, Any]) -> None:
 
diff --git a/freqtrade/data/history/__init__.py b/freqtrade/data/history/__init__.py
index 107f9c401..414848c22 100644
--- a/freqtrade/data/history/__init__.py
+++ b/freqtrade/data/history/__init__.py
@@ -6,7 +6,7 @@ Includes:
 * download data from exchange and store to disk
 """
 # flake8: noqa: F401
-from .history_utils import (convert_trades_to_ohlcv, get_timerange, load_data, load_pair_history,
-                            refresh_backtest_ohlcv_data, refresh_backtest_trades_data, refresh_data,
-                            validate_backtest_data)
+from .history_utils import (convert_trades_to_ohlcv, download_data_main, get_timerange, load_data,
+                            load_pair_history, refresh_backtest_ohlcv_data,
+                            refresh_backtest_trades_data, refresh_data, validate_backtest_data)
 from .idatahandler import get_datahandler
diff --git a/freqtrade/data/history/history_utils.py b/freqtrade/data/history/history_utils.py
index 7881130e2..b2b59337d 100644
--- a/freqtrade/data/history/history_utils.py
+++ b/freqtrade/data/history/history_utils.py
@@ -7,14 +7,16 @@ from typing import Dict, List, Optional, Tuple
 from pandas import DataFrame, concat
 
 from freqtrade.configuration import TimeRange
-from freqtrade.constants import DATETIME_PRINT_FORMAT, DEFAULT_DATAFRAME_COLUMNS
+from freqtrade.constants import DATETIME_PRINT_FORMAT, DEFAULT_DATAFRAME_COLUMNS, Config
 from freqtrade.data.converter import (clean_ohlcv_dataframe, ohlcv_to_dataframe,
                                       trades_remove_duplicates, trades_to_ohlcv)
 from freqtrade.data.history.idatahandler import IDataHandler, get_datahandler
 from freqtrade.enums import CandleType
 from freqtrade.exceptions import OperationalException
-from freqtrade.exchange import Exchange
+from freqtrade.exchange import Exchange, market_is_active
 from freqtrade.misc import format_ms_time
+from freqtrade.plugins.pairlist.pairlist_helpers import dynamic_expand_pairlist
+from freqtrade.util.binance_mig import migrate_binance_futures_data
 
 
 logger = logging.getLogger(__name__)
@@ -483,3 +485,74 @@ def validate_backtest_data(data: DataFrame, pair: str, min_date: datetime,
         logger.warning("%s has missing frames: expected %s, got %s, that's %s missing values",
                        pair, expected_frames, dflen, expected_frames - dflen)
     return found_missing
+
+
+def download_data_main(config: Config) -> None:
+
+    timerange = TimeRange()
+    if 'days' in config:
+        time_since = (datetime.now() - timedelta(days=config['days'])).strftime("%Y%m%d")
+        timerange = TimeRange.parse_timerange(f'{time_since}-')
+
+    if 'timerange' in config:
+        timerange = timerange.parse_timerange(config['timerange'])
+
+    # Remove stake-currency to skip checks which are not relevant for datadownload
+    config['stake_currency'] = ''
+
+    pairs_not_available: List[str] = []
+
+    # Init exchange
+    from freqtrade.resolvers.exchange_resolver import ExchangeResolver
+    exchange = ExchangeResolver.load_exchange(config, validate=False)
+    markets = [p for p, m in exchange.markets.items() if market_is_active(m)
+               or config.get('include_inactive')]
+
+    expanded_pairs = dynamic_expand_pairlist(config, markets)
+
+    # Manual validations of relevant settings
+    if not config['exchange'].get('skip_pair_validation', False):
+        exchange.validate_pairs(expanded_pairs)
+    logger.info(f"About to download pairs: {expanded_pairs}, "
+                f"intervals: {config['timeframes']} to {config['datadir']}")
+
+    for timeframe in config['timeframes']:
+        exchange.validate_timeframes(timeframe)
+
+    # Start downloading
+    try:
+        if config.get('download_trades'):
+            if config.get('trading_mode') == 'futures':
+                raise OperationalException("Trade download not supported for futures.")
+            pairs_not_available = refresh_backtest_trades_data(
+                exchange, pairs=expanded_pairs, datadir=config['datadir'],
+                timerange=timerange, new_pairs_days=config['new_pairs_days'],
+                erase=bool(config.get('erase')), data_format=config['dataformat_trades'])
+
+            # Convert downloaded trade data to different timeframes
+            convert_trades_to_ohlcv(
+                pairs=expanded_pairs, timeframes=config['timeframes'],
+                datadir=config['datadir'], timerange=timerange, erase=bool(config.get('erase')),
+                data_format_ohlcv=config['dataformat_ohlcv'],
+                data_format_trades=config['dataformat_trades'],
+            )
+        else:
+            if not exchange.get_option('ohlcv_has_history', True):
+                raise OperationalException(
+                    f"Historic klines not available for {exchange.name}. "
+                    "Please use `--dl-trades` instead for this exchange "
+                    "(will unfortunately take a long time)."
+                    )
+            migrate_binance_futures_data(config)
+            pairs_not_available = refresh_backtest_ohlcv_data(
+                exchange, pairs=expanded_pairs, timeframes=config['timeframes'],
+                datadir=config['datadir'], timerange=timerange,
+                new_pairs_days=config['new_pairs_days'],
+                erase=bool(config.get('erase')), data_format=config['dataformat_ohlcv'],
+                trading_mode=config.get('trading_mode', 'spot'),
+                prepend=config.get('prepend_data', False)
+            )
+    finally:
+        if pairs_not_available:
+            logger.info(f"Pairs [{','.join(pairs_not_available)}] not available "
+                        f"on exchange {exchange.name}.")

From 6f0f9546868e1bbe9f42c2f59fb59c13904aa269 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 17 Jun 2023 15:18:31 +0200
Subject: [PATCH 105/130] Adjust mocks for new import location

---
 tests/commands/test_commands.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/commands/test_commands.py b/tests/commands/test_commands.py
index fe847e94b..5a27b60c8 100644
--- a/tests/commands/test_commands.py
+++ b/tests/commands/test_commands.py
@@ -641,7 +641,7 @@ def test_get_ui_download_url_direct(mocker):
 
 
 def test_download_data_keyboardInterrupt(mocker, markets):
-    dl_mock = mocker.patch('freqtrade.commands.data_commands.refresh_backtest_ohlcv_data',
+    dl_mock = mocker.patch('freqtrade.commands.data_commands.download_data_main',
                            MagicMock(side_effect=KeyboardInterrupt))
     patch_exchange(mocker)
     mocker.patch(f'{EXMS}.markets', PropertyMock(return_value=markets))
@@ -660,7 +660,7 @@ def test_download_data_keyboardInterrupt(mocker, markets):
 
 
 def test_download_data_timerange(mocker, markets):
-    dl_mock = mocker.patch('freqtrade.commands.data_commands.refresh_backtest_ohlcv_data',
+    dl_mock = mocker.patch('freqtrade.data.history.history_utils.refresh_backtest_ohlcv_data',
                            MagicMock(return_value=["ETH/BTC", "XRP/BTC"]))
     patch_exchange(mocker)
     mocker.patch(f'{EXMS}.markets', PropertyMock(return_value=markets))
@@ -708,7 +708,7 @@ def test_download_data_timerange(mocker, markets):
 
 
 def test_download_data_no_markets(mocker, caplog):
-    dl_mock = mocker.patch('freqtrade.commands.data_commands.refresh_backtest_ohlcv_data',
+    dl_mock = mocker.patch('freqtrade.data.history.history_utils.refresh_backtest_ohlcv_data',
                            MagicMock(return_value=["ETH/BTC", "XRP/BTC"]))
     patch_exchange(mocker, id='binance')
     mocker.patch(f'{EXMS}.markets', PropertyMock(return_value={}))
@@ -724,7 +724,7 @@ def test_download_data_no_markets(mocker, caplog):
 
 
 def test_download_data_no_exchange(mocker, caplog):
-    mocker.patch('freqtrade.commands.data_commands.refresh_backtest_ohlcv_data',
+    mocker.patch('freqtrade.data.history.history_utils.refresh_backtest_ohlcv_data',
                  MagicMock(return_value=["ETH/BTC", "XRP/BTC"]))
     patch_exchange(mocker)
     mocker.patch(f'{EXMS}.markets', PropertyMock(return_value={}))
@@ -740,7 +740,7 @@ def test_download_data_no_exchange(mocker, caplog):
 
 def test_download_data_no_pairs(mocker):
 
-    mocker.patch('freqtrade.commands.data_commands.refresh_backtest_ohlcv_data',
+    mocker.patch('freqtrade.data.history.history_utils.refresh_backtest_ohlcv_data',
                  MagicMock(return_value=["ETH/BTC", "XRP/BTC"]))
     patch_exchange(mocker)
     mocker.patch(f'{EXMS}.markets', PropertyMock(return_value={}))
@@ -758,7 +758,7 @@ def test_download_data_no_pairs(mocker):
 
 def test_download_data_all_pairs(mocker, markets):
 
-    dl_mock = mocker.patch('freqtrade.commands.data_commands.refresh_backtest_ohlcv_data',
+    dl_mock = mocker.patch('freqtrade.data.history.history_utils.refresh_backtest_ohlcv_data',
                            MagicMock(return_value=["ETH/BTC", "XRP/BTC"]))
     patch_exchange(mocker)
     mocker.patch(f'{EXMS}.markets', PropertyMock(return_value=markets))
@@ -793,9 +793,9 @@ def test_download_data_all_pairs(mocker, markets):
 
 
 def test_download_data_trades(mocker, caplog):
-    dl_mock = mocker.patch('freqtrade.commands.data_commands.refresh_backtest_trades_data',
+    dl_mock = mocker.patch('freqtrade.data.history.history_utils.refresh_backtest_trades_data',
                            MagicMock(return_value=[]))
-    convert_mock = mocker.patch('freqtrade.commands.data_commands.convert_trades_to_ohlcv',
+    convert_mock = mocker.patch('freqtrade.data.history.history_utils.convert_trades_to_ohlcv',
                                 MagicMock(return_value=[]))
     patch_exchange(mocker)
     mocker.patch(f'{EXMS}.markets', PropertyMock(return_value={}))

From 66b34edc0b8e235fdd31af9e5b4d54d9aa429a72 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 17 Jun 2023 18:03:57 +0200
Subject: [PATCH 106/130] Clarify variable name

---
 freqtrade/data/history/history_utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/freqtrade/data/history/history_utils.py b/freqtrade/data/history/history_utils.py
index b2b59337d..93e230891 100644
--- a/freqtrade/data/history/history_utils.py
+++ b/freqtrade/data/history/history_utils.py
@@ -505,10 +505,10 @@ def download_data_main(config: Config) -> None:
     # Init exchange
     from freqtrade.resolvers.exchange_resolver import ExchangeResolver
     exchange = ExchangeResolver.load_exchange(config, validate=False)
-    markets = [p for p, m in exchange.markets.items() if market_is_active(m)
-               or config.get('include_inactive')]
+    available_pairs = [p for p, m in exchange.markets.items() if market_is_active(m)
+                       or config.get('include_inactive')]
 
-    expanded_pairs = dynamic_expand_pairlist(config, markets)
+    expanded_pairs = dynamic_expand_pairlist(config, available_pairs)
 
     # Manual validations of relevant settings
     if not config['exchange'].get('skip_pair_validation', False):

From 937734365f8e6559c3eb1c6129e11c0bf5c00daa Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 17 Jun 2023 18:04:41 +0200
Subject: [PATCH 107/130] Improve typehint for markets

---
 freqtrade/exchange/exchange.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/freqtrade/exchange/exchange.py b/freqtrade/exchange/exchange.py
index ef3bea537..2cf98c266 100644
--- a/freqtrade/exchange/exchange.py
+++ b/freqtrade/exchange/exchange.py
@@ -301,7 +301,7 @@ class Exchange:
         return list((self._api.timeframes or {}).keys())
 
     @property
-    def markets(self) -> Dict:
+    def markets(self) -> Dict[str, Any]:
         """exchange ccxt markets"""
         if not self._markets:
             logger.info("Markets were not loaded. Loading them now..")

From 0be4084eac3a2dfbba273795f68c2fe21cb9ad48 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 17 Jun 2023 18:14:58 +0200
Subject: [PATCH 108/130] Don't allow downloading wrong pairs

Prior to this, BTC/USDT:USDT could be downloaded to the spot directory, as it was filtered inproperly.
---
 freqtrade/data/history/history_utils.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/freqtrade/data/history/history_utils.py b/freqtrade/data/history/history_utils.py
index 93e230891..e61f59cfa 100644
--- a/freqtrade/data/history/history_utils.py
+++ b/freqtrade/data/history/history_utils.py
@@ -13,7 +13,7 @@ from freqtrade.data.converter import (clean_ohlcv_dataframe, ohlcv_to_dataframe,
 from freqtrade.data.history.idatahandler import IDataHandler, get_datahandler
 from freqtrade.enums import CandleType
 from freqtrade.exceptions import OperationalException
-from freqtrade.exchange import Exchange, market_is_active
+from freqtrade.exchange import Exchange
 from freqtrade.misc import format_ms_time
 from freqtrade.plugins.pairlist.pairlist_helpers import dynamic_expand_pairlist
 from freqtrade.util.binance_mig import migrate_binance_futures_data
@@ -505,8 +505,11 @@ def download_data_main(config: Config) -> None:
     # Init exchange
     from freqtrade.resolvers.exchange_resolver import ExchangeResolver
     exchange = ExchangeResolver.load_exchange(config, validate=False)
-    available_pairs = [p for p, m in exchange.markets.items() if market_is_active(m)
-                       or config.get('include_inactive')]
+    available_pairs = [
+        p for p in exchange.get_markets(
+            tradable_only=True, active_only=not config.get('include_inactive')
+            ).keys()
+    ]
 
     expanded_pairs = dynamic_expand_pairlist(config, available_pairs)
 

From 44a38e8362c42826458778a19a4a230eb779ccfc Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 17 Jun 2023 18:22:47 +0200
Subject: [PATCH 109/130] Update download data tests

---
 tests/commands/test_commands.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/commands/test_commands.py b/tests/commands/test_commands.py
index 5a27b60c8..fccfd8ebb 100644
--- a/tests/commands/test_commands.py
+++ b/tests/commands/test_commands.py
@@ -711,7 +711,7 @@ def test_download_data_no_markets(mocker, caplog):
     dl_mock = mocker.patch('freqtrade.data.history.history_utils.refresh_backtest_ohlcv_data',
                            MagicMock(return_value=["ETH/BTC", "XRP/BTC"]))
     patch_exchange(mocker, id='binance')
-    mocker.patch(f'{EXMS}.markets', PropertyMock(return_value={}))
+    mocker.patch(f'{EXMS}.get_markets', return_value={})
     args = [
         "download-data",
         "--exchange", "binance",
@@ -723,11 +723,11 @@ def test_download_data_no_markets(mocker, caplog):
     assert log_has("Pairs [ETH/BTC,XRP/BTC] not available on exchange Binance.", caplog)
 
 
-def test_download_data_no_exchange(mocker, caplog):
+def test_download_data_no_exchange(mocker):
     mocker.patch('freqtrade.data.history.history_utils.refresh_backtest_ohlcv_data',
                  MagicMock(return_value=["ETH/BTC", "XRP/BTC"]))
     patch_exchange(mocker)
-    mocker.patch(f'{EXMS}.markets', PropertyMock(return_value={}))
+    mocker.patch(f'{EXMS}.get_markets', return_value={})
     args = [
         "download-data",
     ]
@@ -792,13 +792,13 @@ def test_download_data_all_pairs(mocker, markets):
     assert set(dl_mock.call_args_list[0][1]['pairs']) == expected
 
 
-def test_download_data_trades(mocker, caplog):
+def test_download_data_trades(mocker):
     dl_mock = mocker.patch('freqtrade.data.history.history_utils.refresh_backtest_trades_data',
                            MagicMock(return_value=[]))
     convert_mock = mocker.patch('freqtrade.data.history.history_utils.convert_trades_to_ohlcv',
                                 MagicMock(return_value=[]))
     patch_exchange(mocker)
-    mocker.patch(f'{EXMS}.markets', PropertyMock(return_value={}))
+    mocker.patch(f'{EXMS}.get_markets', return_value={})
     args = [
         "download-data",
         "--exchange", "kraken",
@@ -829,7 +829,7 @@ def test_download_data_trades(mocker, caplog):
 
 def test_download_data_data_invalid(mocker):
     patch_exchange(mocker, id="kraken")
-    mocker.patch(f'{EXMS}.markets', PropertyMock(return_value={}))
+    mocker.patch(f'{EXMS}.get_markets', return_value={})
     args = [
         "download-data",
         "--exchange", "kraken",

From 7af14d19851b68495db1ac9b5cfdae6545f457cd Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 17 Jun 2023 18:26:08 +0200
Subject: [PATCH 110/130] Fix random test failure

---
 freqtrade/persistence/trade_model.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/freqtrade/persistence/trade_model.py b/freqtrade/persistence/trade_model.py
index 5dee2a53c..ddc147763 100644
--- a/freqtrade/persistence/trade_model.py
+++ b/freqtrade/persistence/trade_model.py
@@ -1391,7 +1391,10 @@ class Trade(ModelBase, LocalTrade):
                              e.g. `(trade_filter=Trade.id == trade_id)`
         :return: unsorted query object
         """
-        return Trade.session.scalars(Trade.get_trades_query(trade_filter, include_orders))
+        query = Trade.get_trades_query(trade_filter, include_orders)
+        # this sholud remain split. if use_db is False, session is not available and the above will
+        # raise an exception.
+        return Trade.session.scalars(query)
 
     @staticmethod
     def get_open_order_trades() -> List['Trade']:

From d94f3e7679f9fab37f9b61648151ab55d07eb75e Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 17 Jun 2023 20:00:24 +0200
Subject: [PATCH 111/130] Add explicit tests for download-data

(without the command part)
---
 tests/data/test_download_data.py | 96 ++++++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)
 create mode 100644 tests/data/test_download_data.py

diff --git a/tests/data/test_download_data.py b/tests/data/test_download_data.py
new file mode 100644
index 000000000..191dbb7d3
--- /dev/null
+++ b/tests/data/test_download_data.py
@@ -0,0 +1,96 @@
+from unittest.mock import MagicMock, PropertyMock
+
+import pytest
+
+from freqtrade.configuration.config_setup import setup_utils_configuration
+from freqtrade.data.history.history_utils import download_data_main
+from freqtrade.enums import RunMode
+from freqtrade.exceptions import OperationalException
+from tests.conftest import EXMS, log_has, patch_exchange
+
+
+def test_download_data_main_no_markets(mocker, caplog):
+    dl_mock = mocker.patch('freqtrade.data.history.history_utils.refresh_backtest_ohlcv_data',
+                           MagicMock(return_value=["ETH/BTC", "XRP/BTC"]))
+    patch_exchange(mocker, id='binance')
+    mocker.patch(f'{EXMS}.get_markets', return_value={})
+    config = setup_utils_configuration({"exchange": "binance"}, RunMode.UTIL_EXCHANGE)
+    config.update({
+        "days": 20,
+        "pairs": ["ETH/BTC", "XRP/BTC"],
+        "timeframes": ["5m", "1h"]
+    })
+    download_data_main(config)
+    assert dl_mock.call_args[1]['timerange'].starttype == "date"
+    assert log_has("Pairs [ETH/BTC,XRP/BTC] not available on exchange Binance.", caplog)
+
+
+def test_download_data_main_all_pairs(mocker, markets):
+
+    dl_mock = mocker.patch('freqtrade.data.history.history_utils.refresh_backtest_ohlcv_data',
+                           MagicMock(return_value=["ETH/BTC", "XRP/BTC"]))
+    patch_exchange(mocker)
+    mocker.patch(f'{EXMS}.markets', PropertyMock(return_value=markets))
+
+    config = setup_utils_configuration({"exchange": "binance"}, RunMode.UTIL_EXCHANGE)
+    config.update({
+        "pairs": [".*/USDT"],
+        "timeframes": ["5m", "1h"]
+    })
+    download_data_main(config)
+    expected = set(['ETH/USDT', 'XRP/USDT', 'NEO/USDT', 'TKN/USDT'])
+    assert set(dl_mock.call_args_list[0][1]['pairs']) == expected
+    assert dl_mock.call_count == 1
+
+    dl_mock.reset_mock()
+
+    config.update({
+        "pairs": [".*/USDT"],
+        "timeframes": ["5m", "1h"],
+        "include_inactive": True
+    })
+    download_data_main(config)
+    expected = set(['ETH/USDT', 'LTC/USDT', 'XRP/USDT', 'NEO/USDT', 'TKN/USDT'])
+    assert set(dl_mock.call_args_list[0][1]['pairs']) == expected
+
+
+def test_download_data_main_trades(mocker):
+    dl_mock = mocker.patch('freqtrade.data.history.history_utils.refresh_backtest_trades_data',
+                           MagicMock(return_value=[]))
+    convert_mock = mocker.patch('freqtrade.data.history.history_utils.convert_trades_to_ohlcv',
+                                MagicMock(return_value=[]))
+    patch_exchange(mocker)
+    mocker.patch(f'{EXMS}.get_markets', return_value={})
+    config = setup_utils_configuration({"exchange": "binance"}, RunMode.UTIL_EXCHANGE)
+    config.update({
+        "days": 20,
+        "pairs": ["ETH/BTC", "XRP/BTC"],
+        "timeframes": ["5m", "1h"],
+        "download_trades": True,
+    })
+    download_data_main(config)
+
+    assert dl_mock.call_args[1]['timerange'].starttype == "date"
+    assert dl_mock.call_count == 1
+    assert convert_mock.call_count == 1
+    config.update({
+        "download_trades": True,
+        "trading_mode": "futures",
+    })
+
+    with pytest.raises(OperationalException,
+                       match="Trade download not supported for futures."):
+        download_data_main(config)
+
+
+def test_download_data_main_data_invalid(mocker):
+    patch_exchange(mocker, id="kraken")
+    mocker.patch(f'{EXMS}.get_markets', return_value={})
+    config = setup_utils_configuration({"exchange": "kraken"}, RunMode.UTIL_EXCHANGE)
+    config.update({
+        "days": 20,
+        "pairs": ["ETH/BTC", "XRP/BTC"],
+        "timeframes": ["5m", "1h"],
+    })
+    with pytest.raises(OperationalException, match=r"Historic klines not available for .*"):
+        download_data_main(config)

From 52db6ac7d759dac1fbf0ed1c209324638d535a85 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sat, 17 Jun 2023 20:35:23 +0200
Subject: [PATCH 112/130] Use proper log level

---
 tests/freqai/test_freqai_interface.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py
index 61a7b7346..0d8ffb766 100644
--- a/tests/freqai/test_freqai_interface.py
+++ b/tests/freqai/test_freqai_interface.py
@@ -1,3 +1,4 @@
+import logging
 import platform
 import shutil
 import sys
@@ -540,6 +541,7 @@ def test_get_required_data_timerange(mocker, freqai_conf):
 
 
 def test_download_all_data_for_training(mocker, freqai_conf, caplog, tmpdir):
+    caplog.set_level(logging.DEBUG)
     strategy = get_patched_freqai_strategy(mocker, freqai_conf)
     exchange = get_patched_exchange(mocker, freqai_conf)
     pairlist = PairListManager(exchange, freqai_conf)

From 7e2f857aa5fcc2273d4d114ff9d0e65ddbcac741 Mon Sep 17 00:00:00 2001
From: Robert Caulk <rob.caulk@gmail.com>
Date: Sun, 18 Jun 2023 11:30:33 +0200
Subject: [PATCH 113/130] Update BasePyTorchClassifier.py

---
 freqtrade/freqai/base_models/BasePyTorchClassifier.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/freqtrade/freqai/base_models/BasePyTorchClassifier.py b/freqtrade/freqai/base_models/BasePyTorchClassifier.py
index 8a4e15308..c47c5069a 100644
--- a/freqtrade/freqai/base_models/BasePyTorchClassifier.py
+++ b/freqtrade/freqai/base_models/BasePyTorchClassifier.py
@@ -91,7 +91,7 @@ class BasePyTorchClassifier(BasePyTorchModel):
         pred_df = DataFrame(predicted_classes_str, columns=[dk.label_list[0]])
         pred_df = pd.concat([pred_df, pred_df_prob], axis=1)
 
-        if self.freqai_info.get("DI_threshold", 0) > 0:
+        if dk.feature_pipeline["di"]:
             dk.DI_values = dk.feature_pipeline["di"].di_values
         else:
             dk.DI_values = np.zeros(len(outliers.index))

From cca4fa1178e8186f21e1b90645a82e7fce8ac9c4 Mon Sep 17 00:00:00 2001
From: Robert Caulk <rob.caulk@gmail.com>
Date: Sun, 18 Jun 2023 11:31:03 +0200
Subject: [PATCH 114/130] Update BaseClassifierModel.py

---
 freqtrade/freqai/base_models/BaseClassifierModel.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/freqtrade/freqai/base_models/BaseClassifierModel.py b/freqtrade/freqai/base_models/BaseClassifierModel.py
index 0a6100df3..f35b07e66 100644
--- a/freqtrade/freqai/base_models/BaseClassifierModel.py
+++ b/freqtrade/freqai/base_models/BaseClassifierModel.py
@@ -117,7 +117,7 @@ class BaseClassifierModel(IFreqaiModel):
 
         pred_df = pd.concat([pred_df, pred_df_prob], axis=1)
 
-        if self.freqai_info.get("DI_threshold", 0) > 0:
+        if dk.feature_pipeline["di"]:
             dk.DI_values = dk.feature_pipeline["di"].di_values
         else:
             dk.DI_values = np.zeros(len(outliers.index))

From 571dea6e9c74fd27fc3f336c24d278da646f380f Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Sun, 18 Jun 2023 15:45:26 +0200
Subject: [PATCH 115/130] Fix wrong final status on bg-tasks

---
 freqtrade/rpc/api_server/api_background_tasks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/freqtrade/rpc/api_server/api_background_tasks.py b/freqtrade/rpc/api_server/api_background_tasks.py
index e5339756b..c13fa31e4 100644
--- a/freqtrade/rpc/api_server/api_background_tasks.py
+++ b/freqtrade/rpc/api_server/api_background_tasks.py
@@ -70,9 +70,9 @@ def __run_pairlist(job_id: str, config_loc: Config):
     except (OperationalException, Exception) as e:
         logger.exception(e)
         ApiBG.jobs[job_id]['error'] = str(e)
+        ApiBG.jobs[job_id]['status'] = 'failed'
     finally:
         ApiBG.jobs[job_id]['is_running'] = False
-        ApiBG.jobs[job_id]['status'] = 'failed'
         ApiBG.pairlist_running = False
 
 

From 1eb691d4617c3b91715b9697844f5ea64348237d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 19 Jun 2023 03:56:37 +0000
Subject: [PATCH 116/130] Bump mkdocs-material from 9.1.15 to 9.1.16

Bumps [mkdocs-material](https://github.com/squidfunk/mkdocs-material) from 9.1.15 to 9.1.16.
- [Release notes](https://github.com/squidfunk/mkdocs-material/releases)
- [Changelog](https://github.com/squidfunk/mkdocs-material/blob/master/CHANGELOG)
- [Commits](https://github.com/squidfunk/mkdocs-material/compare/9.1.15...9.1.16)

---
updated-dependencies:
- dependency-name: mkdocs-material
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 docs/requirements-docs.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/requirements-docs.txt b/docs/requirements-docs.txt
index 6c77736f4..5c936a868 100644
--- a/docs/requirements-docs.txt
+++ b/docs/requirements-docs.txt
@@ -1,6 +1,6 @@
 markdown==3.3.7
 mkdocs==1.4.3
-mkdocs-material==9.1.15
+mkdocs-material==9.1.16
 mdx_truly_sane_lists==1.3
 pymdown-extensions==10.0.1
 jinja2==3.1.2

From ed90e77ea05d71fbe798e21096391ce05a1be9cb Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 19 Jun 2023 03:56:40 +0000
Subject: [PATCH 117/130] Bump nbconvert from 7.4.0 to 7.5.0

Bumps [nbconvert](https://github.com/jupyter/nbconvert) from 7.4.0 to 7.5.0.
- [Release notes](https://github.com/jupyter/nbconvert/releases)
- [Changelog](https://github.com/jupyter/nbconvert/blob/main/CHANGELOG.md)
- [Commits](https://github.com/jupyter/nbconvert/compare/v7.4.0...v7.5.0)

---
updated-dependencies:
- dependency-name: nbconvert
  dependency-type: direct:development
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 requirements-dev.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements-dev.txt b/requirements-dev.txt
index 5239cd993..81ba8c59e 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -20,7 +20,7 @@ isort==5.12.0
 time-machine==2.9.0
 
 # Convert jupyter notebooks to markdown documents
-nbconvert==7.4.0
+nbconvert==7.5.0
 
 # mypy types
 types-cachetools==5.3.0.5

From 8cc763b6644df6d32672644c2ae4c6f291a1b719 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 19 Jun 2023 03:56:44 +0000
Subject: [PATCH 118/130] Bump time-machine from 2.9.0 to 2.10.0

Bumps [time-machine](https://github.com/adamchainz/time-machine) from 2.9.0 to 2.10.0.
- [Changelog](https://github.com/adamchainz/time-machine/blob/main/CHANGELOG.rst)
- [Commits](https://github.com/adamchainz/time-machine/compare/2.9.0...2.10.0)

---
updated-dependencies:
- dependency-name: time-machine
  dependency-type: direct:development
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 requirements-dev.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements-dev.txt b/requirements-dev.txt
index 5239cd993..f5ce4c292 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -17,7 +17,7 @@ pytest-mock==3.10.0
 pytest-random-order==1.1.0
 isort==5.12.0
 # For datetime mocking
-time-machine==2.9.0
+time-machine==2.10.0
 
 # Convert jupyter notebooks to markdown documents
 nbconvert==7.4.0

From fc0548ce0bee55c6db33d292d72c64f08fc148d8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 19 Jun 2023 03:57:02 +0000
Subject: [PATCH 119/130] Bump filelock from 3.12.1 to 3.12.2

Bumps [filelock](https://github.com/tox-dev/py-filelock) from 3.12.1 to 3.12.2.
- [Release notes](https://github.com/tox-dev/py-filelock/releases)
- [Changelog](https://github.com/tox-dev/py-filelock/blob/main/docs/changelog.rst)
- [Commits](https://github.com/tox-dev/py-filelock/compare/3.12.1...3.12.2)

---
updated-dependencies:
- dependency-name: filelock
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 requirements-hyperopt.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements-hyperopt.txt b/requirements-hyperopt.txt
index 20c60afe0..163fee75f 100644
--- a/requirements-hyperopt.txt
+++ b/requirements-hyperopt.txt
@@ -5,4 +5,4 @@
 scipy==1.10.1
 scikit-learn==1.1.3
 scikit-optimize==0.9.0
-filelock==3.12.1
+filelock==3.12.2

From f82d52c6d34bd07eb0300c201532675b40c261aa Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 19 Jun 2023 03:57:17 +0000
Subject: [PATCH 120/130] Bump xgboost from 1.7.5 to 1.7.6

Bumps [xgboost](https://github.com/dmlc/xgboost) from 1.7.5 to 1.7.6.
- [Release notes](https://github.com/dmlc/xgboost/releases)
- [Changelog](https://github.com/dmlc/xgboost/blob/master/NEWS.md)
- [Commits](https://github.com/dmlc/xgboost/compare/v1.7.5...v1.7.6)

---
updated-dependencies:
- dependency-name: xgboost
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 requirements-freqai.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements-freqai.txt b/requirements-freqai.txt
index 0d88976d0..2eacbaffb 100644
--- a/requirements-freqai.txt
+++ b/requirements-freqai.txt
@@ -7,6 +7,6 @@ scikit-learn==1.1.3
 joblib==1.2.0
 catboost==1.2; 'arm' not in platform_machine
 lightgbm==3.3.5
-xgboost==1.7.5
+xgboost==1.7.6
 tensorboard==2.13.0
 datasieve==0.1.5

From f04598c5e5104fa087892ab6eba6f0c241ab2097 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 19 Jun 2023 03:57:29 +0000
Subject: [PATCH 121/130] Bump pre-commit from 3.3.2 to 3.3.3

Bumps [pre-commit](https://github.com/pre-commit/pre-commit) from 3.3.2 to 3.3.3.
- [Release notes](https://github.com/pre-commit/pre-commit/releases)
- [Changelog](https://github.com/pre-commit/pre-commit/blob/main/CHANGELOG.md)
- [Commits](https://github.com/pre-commit/pre-commit/compare/v3.3.2...v3.3.3)

---
updated-dependencies:
- dependency-name: pre-commit
  dependency-type: direct:development
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 requirements-dev.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements-dev.txt b/requirements-dev.txt
index 5239cd993..af0df8c83 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -9,7 +9,7 @@
 coveralls==3.3.1
 ruff==0.0.272
 mypy==1.3.0
-pre-commit==3.3.2
+pre-commit==3.3.3
 pytest==7.3.2
 pytest-asyncio==0.21.0
 pytest-cov==4.1.0

From d82a0ad7b568e93a6c6e4a4024f75be45cc2938d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 19 Jun 2023 03:57:35 +0000
Subject: [PATCH 122/130] Bump ccxt from 3.1.34 to 3.1.44

Bumps [ccxt](https://github.com/ccxt/ccxt) from 3.1.34 to 3.1.44.
- [Changelog](https://github.com/ccxt/ccxt/blob/master/exchanges.cfg)
- [Commits](https://github.com/ccxt/ccxt/compare/3.1.34...3.1.44)

---
updated-dependencies:
- dependency-name: ccxt
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 6d58dd0f4..ef9b79502 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ numpy==1.24.3
 pandas==2.0.2
 pandas-ta==0.3.14b
 
-ccxt==3.1.34
+ccxt==3.1.44
 cryptography==41.0.1; platform_machine != 'armv7l'
 cryptography==40.0.1; platform_machine == 'armv7l'
 aiohttp==3.8.4

From e965b2e4544b3743ad6fc9b92adf5f8ad9e29cbe Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 19 Jun 2023 03:58:02 +0000
Subject: [PATCH 123/130] Bump rich from 13.4.1 to 13.4.2

Bumps [rich](https://github.com/Textualize/rich) from 13.4.1 to 13.4.2.
- [Release notes](https://github.com/Textualize/rich/releases)
- [Changelog](https://github.com/Textualize/rich/blob/master/CHANGELOG.md)
- [Commits](https://github.com/Textualize/rich/compare/v13.4.1...v13.4.2)

---
updated-dependencies:
- dependency-name: rich
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 6d58dd0f4..eea26d4b6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -23,7 +23,7 @@ jinja2==3.1.2
 tables==3.8.0
 blosc==1.11.1
 joblib==1.2.0
-rich==13.4.1
+rich==13.4.2
 pyarrow==12.0.0; platform_machine != 'armv7l'
 
 # find first, C search in arrays

From 6bc3439cb79d855219a4c7439e69ea8b8726867d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 19 Jun 2023 05:08:12 +0000
Subject: [PATCH 124/130] Bump pytest-mock from 3.10.0 to 3.11.1

Bumps [pytest-mock](https://github.com/pytest-dev/pytest-mock) from 3.10.0 to 3.11.1.
- [Release notes](https://github.com/pytest-dev/pytest-mock/releases)
- [Changelog](https://github.com/pytest-dev/pytest-mock/blob/main/CHANGELOG.rst)
- [Commits](https://github.com/pytest-dev/pytest-mock/compare/v3.10.0...v3.11.1)

---
updated-dependencies:
- dependency-name: pytest-mock
  dependency-type: direct:development
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 requirements-dev.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements-dev.txt b/requirements-dev.txt
index ecc4600a0..61e78a24a 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -13,7 +13,7 @@ pre-commit==3.3.2
 pytest==7.3.2
 pytest-asyncio==0.21.0
 pytest-cov==4.1.0
-pytest-mock==3.10.0
+pytest-mock==3.11.1
 pytest-random-order==1.1.0
 isort==5.12.0
 # For datetime mocking

From 859f7ff3de9a3101b7cb3d016bd5c18cc95bf930 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Mon, 19 Jun 2023 18:29:37 +0200
Subject: [PATCH 125/130] be explicit when loading pairs file.

---
 freqtrade/configuration/configuration.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/freqtrade/configuration/configuration.py b/freqtrade/configuration/configuration.py
index a64eaa0ca..43ede568c 100644
--- a/freqtrade/configuration/configuration.py
+++ b/freqtrade/configuration/configuration.py
@@ -568,6 +568,7 @@ class Configuration:
             # Fall back to /dl_path/pairs.json
             pairs_file = config['datadir'] / 'pairs.json'
             if pairs_file.exists():
+                logger.info(f'Reading pairs file "{pairs_file}".')
                 config['pairs'] = load_file(pairs_file)
                 if 'pairs' in config and isinstance(config['pairs'], list):
                     config['pairs'].sort()

From 8c54036fa539a577627cb0059f2e4ca4addc296b Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Tue, 20 Jun 2023 06:45:56 +0200
Subject: [PATCH 126/130] Move Downloading tip from pairs file section

---
 docs/data-download.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/data-download.md b/docs/data-download.md
index a7b1987aa..4ee7aba02 100644
--- a/docs/data-download.md
+++ b/docs/data-download.md
@@ -83,6 +83,11 @@ Common arguments:
 
 ```
 
+!!! Tip "Downloading all data for one quote currency"
+    Often, you'll want to download data for all pairs of a specific quote-currency. In such cases, you can use the following shorthand:
+    `freqtrade download-data --exchange binance --pairs .*/USDT <...>`. The provided "pairs" string will be expanded to contain all active pairs on the exchange.
+    To also download data for inactive (delisted) pairs, add `--include-inactive-pairs` to the command.
+
 !!! Note "Startup period"
     `download-data` is a strategy-independent command. The idea is to download a big chunk of data once, and then iteratively increase the amount of data stored.
 
@@ -113,11 +118,6 @@ Mixing different stake-currencies is allowed for this file, since it's only used
 ]
 ```
 
-!!! Tip "Downloading all data for one quote currency"
-    Often, you'll want to download data for all pairs of a specific quote-currency. In such cases, you can use the following shorthand:
-    `freqtrade download-data --exchange binance --pairs .*/USDT <...>`. The provided "pairs" string will be expanded to contain all active pairs on the exchange.
-    To also download data for inactive (delisted) pairs, add `--include-inactive-pairs` to the command.
-
 ??? Note "Permission denied errors"
     If your configuration directory `user_data` was made by docker, you may get the following error:
 

From b0e5fb3940ee267f721df046b2194545ad6f2935 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Tue, 20 Jun 2023 06:50:59 +0200
Subject: [PATCH 127/130] Improve structure of download-data documentation

---
 docs/data-download.md | 94 +++++++++++++++++++++----------------------
 1 file changed, 47 insertions(+), 47 deletions(-)

diff --git a/docs/data-download.md b/docs/data-download.md
index 4ee7aba02..1588d6eb5 100644
--- a/docs/data-download.md
+++ b/docs/data-download.md
@@ -93,44 +93,6 @@ Common arguments:
 
     For that reason, `download-data` does not care about the "startup-period" defined in a strategy. It's up to the user to download additional days if the backtest should start at a specific point in time (while respecting startup period).
 
-### Pairs file
-
-In alternative to the whitelist from `config.json`, a `pairs.json` file can be used.
-If you are using Binance for example:
-
-- create a directory `user_data/data/binance` and copy or create the `pairs.json` file in that directory.
-- update the `pairs.json` file to contain the currency pairs you are interested in.
-
-```bash
-mkdir -p user_data/data/binance
-touch user_data/data/binance/pairs.json
-```
-
-The format of the `pairs.json` file is a simple json list.
-Mixing different stake-currencies is allowed for this file, since it's only used for downloading.
-
-``` json
-[
-    "ETH/BTC",
-    "ETH/USDT",
-    "BTC/USDT",
-    "XRP/ETH"
-]
-```
-
-??? Note "Permission denied errors"
-    If your configuration directory `user_data` was made by docker, you may get the following error:
-
-    ```
-    cp: cannot create regular file 'user_data/data/binance/pairs.json': Permission denied
-    ```
-
-    You can fix the permissions of your user-data directory as follows:
-
-    ```
-    sudo chown -R $UID:$GID user_data
-    ```
-
 ### Start download
 
 Then run:
@@ -163,6 +125,19 @@ freqtrade download-data --exchange binance --pairs .*/USDT
 - Use `--timeframes` to specify what timeframe download the historical candle (OHLCV) data for. Default is `--timeframes 1m 5m` which will download 1-minute and 5-minute data.
 - To use exchange, timeframe and list of pairs as defined in your configuration file, use the `-c/--config` option. With this, the script uses the whitelist defined in the config as the list of currency pairs to download data for and does not require the pairs.json file. You can combine `-c/--config` with most other options.
 
+??? Note "Permission denied errors"
+    If your configuration directory `user_data` was made by docker, you may get the following error:
+
+    ```
+    cp: cannot create regular file 'user_data/data/binance/pairs.json': Permission denied
+    ```
+
+    You can fix the permissions of your user-data directory as follows:
+
+    ```
+    sudo chown -R $UID:$GID user_data
+    ```
+
 #### Download additional data before the current timerange
 
 Assuming you downloaded all data from 2022 (`--timerange 20220101-`) - but you'd now like to also backtest with earlier data.
@@ -238,7 +213,32 @@ Size has been taken from the BTC/USDT 1m spot combination for the timerange spec
 
 To have a best performance/size mix, we recommend the use of either feather or parquet.
 
-#### Sub-command convert data
+### Pairs file
+
+In alternative to the whitelist from `config.json`, a `pairs.json` file can be used.
+If you are using Binance for example:
+
+- create a directory `user_data/data/binance` and copy or create the `pairs.json` file in that directory.
+- update the `pairs.json` file to contain the currency pairs you are interested in.
+
+```bash
+mkdir -p user_data/data/binance
+touch user_data/data/binance/pairs.json
+```
+
+The format of the `pairs.json` file is a simple json list.
+Mixing different stake-currencies is allowed for this file, since it's only used for downloading.
+
+``` json
+[
+    "ETH/BTC",
+    "ETH/USDT",
+    "BTC/USDT",
+    "XRP/ETH"
+]
+```
+
+## Sub-command convert data
 
 ```
 usage: freqtrade convert-data [-h] [-v] [--logfile FILE] [-V] [-c PATH]
@@ -290,7 +290,7 @@ Common arguments:
 
 ```
 
-##### Example converting data
+### Example converting data
 
 The following command will convert all candle (OHLCV) data available in `~/.freqtrade/data/binance` from json to jsongz, saving diskspace in the process.
 It'll also remove original json data files (`--erase` parameter).
@@ -299,7 +299,7 @@ It'll also remove original json data files (`--erase` parameter).
 freqtrade convert-data --format-from json --format-to jsongz --datadir ~/.freqtrade/data/binance -t 5m 15m --erase
 ```
 
-#### Sub-command convert trade data
+## Sub-command convert trade data
 
 ```
 usage: freqtrade convert-trade-data [-h] [-v] [--logfile FILE] [-V] [-c PATH]
@@ -342,7 +342,7 @@ Common arguments:
 
 ```
 
-##### Example converting trades
+### Example converting trades
 
 The following command will convert all available trade-data in `~/.freqtrade/data/kraken` from jsongz to json.
 It'll also remove original jsongz data files (`--erase` parameter).
@@ -351,7 +351,7 @@ It'll also remove original jsongz data files (`--erase` parameter).
 freqtrade convert-trade-data --format-from jsongz --format-to json --datadir ~/.freqtrade/data/kraken --erase
 ```
 
-### Sub-command trades to ohlcv
+## Sub-command trades to ohlcv
 
 When you need to use `--dl-trades` (kraken only) to download data, conversion of trades data to ohlcv data is the last step.
 This command will allow you to repeat this last step for additional timeframes without re-downloading the data.
@@ -400,13 +400,13 @@ Common arguments:
 
 ```
 
-#### Example trade-to-ohlcv conversion
+### Example trade-to-ohlcv conversion
 
 ``` bash
 freqtrade trades-to-ohlcv --exchange kraken -t 5m 1h 1d --pairs BTC/EUR ETH/EUR
 ```
 
-### Sub-command list-data
+## Sub-command list-data
 
 You can get a list of downloaded data using the `list-data` sub-command.
 
@@ -451,7 +451,7 @@ Common arguments:
 
 ```
 
-#### Example list-data
+### Example list-data
 
 ```bash
 > freqtrade list-data --userdir ~/.freqtrade/user_data/
@@ -465,7 +465,7 @@ ETH/BTC     5m, 15m, 30m, 1h, 2h, 4h, 6h, 12h, 1d
 ETH/USDT    5m, 15m, 30m, 1h, 2h, 4h
 ```
 
-### Trades (tick) data
+## Trades (tick) data
 
 By default, `download-data` sub-command downloads Candles (OHLCV) data. Some exchanges also provide historic trade-data via their API.
 This data can be useful if you need many different timeframes, since it is only downloaded once, and then resampled locally to the desired timeframes.

From 96c2ca67e9010242d93b2bc5a5ed7d37272fee73 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Tue, 20 Jun 2023 06:51:11 +0200
Subject: [PATCH 128/130] Add usage note for pairs.json file

---
 docs/data-download.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/data-download.md b/docs/data-download.md
index 1588d6eb5..06ef1a355 100644
--- a/docs/data-download.md
+++ b/docs/data-download.md
@@ -238,6 +238,10 @@ Mixing different stake-currencies is allowed for this file, since it's only used
 ]
 ```
 
+!!! Note
+    The `pairs.json` file is only used when no configuration is loaded (implicitly by naming, or via `--config` flag).
+    You can force the usage of this file via `--pairs-file pairs.json` - however we recommend to use the pairlist from within the configuration, either via `exchange.pair_whitelist` or `pairs` setting in the configuration.
+
 ## Sub-command convert data
 
 ```

From 5d60c626454120ed8fdb196342d5e3dae5b3a3ec Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Tue, 20 Jun 2023 06:55:19 +0200
Subject: [PATCH 129/130] align list blocks

---
 docs/data-download.md | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/docs/data-download.md b/docs/data-download.md
index 06ef1a355..7b63e4556 100644
--- a/docs/data-download.md
+++ b/docs/data-download.md
@@ -117,13 +117,13 @@ freqtrade download-data --exchange binance --pairs .*/USDT
 
 ### Other Notes
 
-- To use a different directory than the exchange specific default, use `--datadir user_data/data/some_directory`.
-- To change the exchange used to download the historical data from, please use a different configuration file (you'll probably need to adjust rate limits etc.)
-- To use `pairs.json` from some other directory, use `--pairs-file some_other_dir/pairs.json`.
-- To download historical candle (OHLCV) data for only 10 days, use `--days 10` (defaults to 30 days).
-- To download historical candle (OHLCV) data from a fixed starting point, use `--timerange 20200101-` - which will download all data from January 1st, 2020.
-- Use `--timeframes` to specify what timeframe download the historical candle (OHLCV) data for. Default is `--timeframes 1m 5m` which will download 1-minute and 5-minute data.
-- To use exchange, timeframe and list of pairs as defined in your configuration file, use the `-c/--config` option. With this, the script uses the whitelist defined in the config as the list of currency pairs to download data for and does not require the pairs.json file. You can combine `-c/--config` with most other options.
+* To use a different directory than the exchange specific default, use `--datadir user_data/data/some_directory`.
+* To change the exchange used to download the historical data from, please use a different configuration file (you'll probably need to adjust rate limits etc.)
+* To use `pairs.json` from some other directory, use `--pairs-file some_other_dir/pairs.json`.
+* To download historical candle (OHLCV) data for only 10 days, use `--days 10` (defaults to 30 days).
+* To download historical candle (OHLCV) data from a fixed starting point, use `--timerange 20200101-` - which will download all data from January 1st, 2020.
+* Use `--timeframes` to specify what timeframe download the historical candle (OHLCV) data for. Default is `--timeframes 1m 5m` which will download 1-minute and 5-minute data.
+* To use exchange, timeframe and list of pairs as defined in your configuration file, use the `-c/--config` option. With this, the script uses the whitelist defined in the config as the list of currency pairs to download data for and does not require the pairs.json file. You can combine `-c/--config` with most other options.
 
 ??? Note "Permission denied errors"
     If your configuration directory `user_data` was made by docker, you may get the following error:
@@ -138,7 +138,7 @@ freqtrade download-data --exchange binance --pairs .*/USDT
     sudo chown -R $UID:$GID user_data
     ```
 
-#### Download additional data before the current timerange
+### Download additional data before the current timerange
 
 Assuming you downloaded all data from 2022 (`--timerange 20220101-`) - but you'd now like to also backtest with earlier data.
 You can do so by using the `--prepend` flag, combined with `--timerange` - specifying an end-date.
@@ -218,8 +218,8 @@ To have a best performance/size mix, we recommend the use of either feather or p
 In alternative to the whitelist from `config.json`, a `pairs.json` file can be used.
 If you are using Binance for example:
 
-- create a directory `user_data/data/binance` and copy or create the `pairs.json` file in that directory.
-- update the `pairs.json` file to contain the currency pairs you are interested in.
+* create a directory `user_data/data/binance` and copy or create the `pairs.json` file in that directory.
+* update the `pairs.json` file to contain the currency pairs you are interested in.
 
 ```bash
 mkdir -p user_data/data/binance

From c7683a7b618850971e51dab8764b392eb2a1a718 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Tue, 20 Jun 2023 06:57:48 +0200
Subject: [PATCH 130/130] Improve docs wording

---
 docs/data-download.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/data-download.md b/docs/data-download.md
index 7b63e4556..d45c7ef63 100644
--- a/docs/data-download.md
+++ b/docs/data-download.md
@@ -6,7 +6,7 @@ To download data (candles / OHLCV) needed for backtesting and hyperoptimization
 
 If no additional parameter is specified, freqtrade will download data for `"1m"` and `"5m"` timeframes for the last 30 days.
 Exchange and pairs will come from `config.json` (if specified using `-c/--config`).
-Otherwise `--exchange` becomes mandatory.
+Without provided configuration, `--exchange` becomes mandatory.
 
 You can use a relative timerange (`--days 20`) or an absolute starting point (`--timerange 20200101-`). For incremental downloads, the relative approach should be used.
 
@@ -95,13 +95,13 @@ Common arguments:
 
 ### Start download
 
-Then run:
+A very simple command (assuming an available `config.json` file) can look as follows.
 
 ```bash
 freqtrade download-data --exchange binance
 ```
 
-This will download historical candle (OHLCV) data for all the currency pairs you defined in `pairs.json`.
+This will download historical candle (OHLCV) data for all the currency pairs defined in the configuration.
 
 Alternatively, specify the pairs directly
 
@@ -109,7 +109,7 @@ Alternatively, specify the pairs directly
 freqtrade download-data --exchange binance --pairs ETH/USDT XRP/USDT BTC/USDT
 ```
 
-or as regex (to download all active USDT pairs)
+or as regex (in this case, to download all active USDT pairs)
 
 ```bash
 freqtrade download-data --exchange binance --pairs .*/USDT