diff --git a/freqtrade/commands/__init__.py b/freqtrade/commands/__init__.py index b9346fd5f..98e7cb084 100644 --- a/freqtrade/commands/__init__.py +++ b/freqtrade/commands/__init__.py @@ -20,7 +20,8 @@ from freqtrade.commands.list_commands import (start_list_exchanges, start_list_f start_list_timeframes, start_show_trades) from freqtrade.commands.optimize_commands import (start_backtesting, start_backtesting_show, start_edge, start_hyperopt, - start_lookahead_analysis) + start_lookahead_analysis, + start_recursive_analysis) from freqtrade.commands.pairlist_commands import start_test_pairlist from freqtrade.commands.plot_commands import start_plot_dataframe, start_plot_profit from freqtrade.commands.strategy_utils_commands import start_strategy_update diff --git a/freqtrade/commands/arguments.py b/freqtrade/commands/arguments.py index 5473e95e1..70b6b9c01 100755 --- a/freqtrade/commands/arguments.py +++ b/freqtrade/commands/arguments.py @@ -122,6 +122,8 @@ ARGS_LOOKAHEAD_ANALYSIS = [ a for a in ARGS_BACKTEST if a not in ("position_stacking", "use_max_market_positions", 'cache') ] + ["minimum_trade_amount", "targeted_trade_amount", "lookahead_analysis_exportfilename"] +ARGS_RECURSIVE_ANALYSIS = ["timeframe", "timerange", "dataformat_ohlcv", "pairs", "startup_candle"] + class Arguments: """ @@ -206,7 +208,7 @@ class Arguments: start_list_strategies, start_list_timeframes, start_lookahead_analysis, start_new_config, start_new_strategy, start_plot_dataframe, start_plot_profit, - start_show_trades, start_strategy_update, + start_recursive_analysis, start_show_trades, start_strategy_update, start_test_pairlist, start_trading, start_webserver) subparsers = self.parser.add_subparsers(dest='command', @@ -467,3 +469,14 @@ class Arguments: self._build_args(optionlist=ARGS_LOOKAHEAD_ANALYSIS, parser=lookahead_analayis_cmd) + + # Add recursive_analysis subcommand + recursive_analayis_cmd = subparsers.add_parser( + 'recursive-analysis', + help="Check for potential look ahead bias.", + parents=[_common_parser, _strategy_parser]) + + recursive_analayis_cmd.set_defaults(func=start_recursive_analysis) + + self._build_args(optionlist=ARGS_RECURSIVE_ANALYSIS, + parser=recursive_analayis_cmd) diff --git a/freqtrade/commands/optimize_commands.py b/freqtrade/commands/optimize_commands.py index cdddf0fe5..0a63753d9 100644 --- a/freqtrade/commands/optimize_commands.py +++ b/freqtrade/commands/optimize_commands.py @@ -144,3 +144,15 @@ def start_lookahead_analysis(args: Dict[str, Any]) -> None: config = setup_utils_configuration(args, RunMode.UTIL_NO_EXCHANGE) LookaheadAnalysisSubFunctions.start(config) + + +def start_recursive_analysis(args: Dict[str, Any]) -> None: + """ + Start the backtest recursive tester script + :param args: Cli args from Arguments() + :return: None + """ + from freqtrade.optimize.recursive_analysis_helpers import RecursiveAnalysisSubFunctions + + config = setup_utils_configuration(args, RunMode.UTIL_NO_EXCHANGE) + RecursiveAnalysisSubFunctions.start(config) diff --git a/freqtrade/optimize/recursive_analysis.py b/freqtrade/optimize/recursive_analysis.py new file mode 100644 index 000000000..39aee28b3 --- /dev/null +++ b/freqtrade/optimize/recursive_analysis.py @@ -0,0 +1,236 @@ +import logging +import shutil +from copy import deepcopy +from datetime import datetime, timedelta, timezone +from pathlib import Path +from typing import Any, Dict, List, Optional + +from pandas import DataFrame + +from freqtrade.configuration import TimeRange +from freqtrade.data.history import get_timerange +from freqtrade.exchange import timeframe_to_minutes +from freqtrade.loggers.set_log_levels import (reduce_verbosity_for_bias_tester, + restore_verbosity_for_bias_tester) +from freqtrade.optimize.backtesting import Backtesting + + +logger = logging.getLogger(__name__) + + +class VarHolder: + timerange: TimeRange + data: DataFrame + indicators: Dict[str, DataFrame] + from_dt: datetime + to_dt: datetime + timeframe: str + startup_candle: int + +class RecursiveAnalysis: + + def __init__(self, config: Dict[str, Any], strategy_obj: Dict): + self.failed_bias_check = True + self.full_varHolder = VarHolder() + self.partial_varHolder_array = [] + self.partial_varHolder_lookahead_array = [] + + self.entry_varHolders: List[VarHolder] = [] + self.exit_varHolders: List[VarHolder] = [] + self.exchange: Optional[Any] = None + + # pull variables the scope of the recursive_analysis-instance + self.local_config = deepcopy(config) + self.local_config['strategy'] = strategy_obj['name'] + self._startup_candle = config.get('startup_candle', [199, 399, 499, 999, 1999]) + self.strategy_obj = strategy_obj + self.dict_recursive = dict() + + @staticmethod + def dt_to_timestamp(dt: datetime): + timestamp = int(dt.replace(tzinfo=timezone.utc).timestamp()) + return timestamp + + # For recursive bias check + # analyzes two data frames with processed indicators and shows differences between them. + def analyze_indicators(self): + + pair_to_check = self.local_config['pairs'][0] + logger.info(f"Start checking for recursive bias") + + # check and report signals + base_last_row = self.full_varHolder.indicators[pair_to_check].iloc[-1] + base_timerange = self.full_varHolder.from_dt.strftime('%Y-%m-%dT%H:%M:%S') + "-" + self.full_varHolder.to_dt.strftime('%Y-%m-%dT%H:%M:%S') + + for part in self.partial_varHolder_array: + part_last_row = part.indicators[pair_to_check].iloc[-1] + part_timerange = part.from_dt.strftime('%Y-%m-%dT%H:%M:%S') + "-" + part.to_dt.strftime('%Y-%m-%dT%H:%M:%S') + + logger.info(f"Comparing last row of {base_timerange} backtest") + logger.info(f"vs {part_timerange} with {part.startup_candle} startup candle") + + compare_df = base_last_row.compare(part_last_row) + if compare_df.shape[0] > 0: + # print(compare_df) + for col_name, values in compare_df.items(): + # print(col_name) + if 'other' == col_name: + continue + indicators = values.index + + for indicator in indicators: + if(indicator not in self.dict_recursive): + self.dict_recursive[indicator] = {} + + values_diff = compare_df.loc[indicator] + values_diff_self = values_diff.loc['self'] + values_diff_other = values_diff.loc['other'] + difference = (values_diff_other - values_diff_self) / values_diff_self * 100 + + self.dict_recursive[indicator][part.startup_candle] = "{:.3f}%".format(difference) + + # logger.info(f"=> found difference in indicator " + # f"{indicator}, with difference of " + # "{:.8f}%".format(difference)) + + else: + logger.info("No difference found. Stop the process.") + break + + # For lookahead bias check + # analyzes two data frames with processed indicators and shows differences between them. + def analyze_indicators_lookahead(self): + + pair_to_check = self.local_config['pairs'][0] + logger.info(f"Start checking for lookahead bias") + + # check and report signals + # base_last_row = self.full_varHolder.indicators[pair_to_check].iloc[-1] + # base_timerange = self.full_varHolder.from_dt.strftime('%Y-%m-%dT%H:%M:%S') + "-" + self.full_varHolder.to_dt.strftime('%Y-%m-%dT%H:%M:%S') + + part = self.partial_varHolder_lookahead_array[0] + part_last_row = part.indicators[pair_to_check].iloc[-1] + date_to_check = part_last_row['date'] + base_row_to_check = self.full_varHolder.indicators[pair_to_check].loc[(self.full_varHolder.indicators[pair_to_check]['date'] == date_to_check)].iloc[-1] + + check_time = part.to_dt.strftime('%Y-%m-%dT%H:%M:%S') + + logger.info(f"Check indicators at {check_time}") + # logger.info(f"vs {part_timerange} with {part.startup_candle} startup candle") + + compare_df = base_row_to_check.compare(part_last_row) + if compare_df.shape[0] > 0: + # print(compare_df) + for col_name, values in compare_df.items(): + # print(col_name) + if 'other' == col_name: + continue + indicators = values.index + + for indicator in indicators: + logger.info(f"=> found lookahead in indicator {indicator}") + # logger.info("base value {:.5f}".format(values_diff_self)) + # logger.info("part value {:.5f}".format(values_diff_other)) + + else: + logger.info("No lookahead bias found. Stop the process.") + + def prepare_data(self, varholder: VarHolder, pairs_to_load: List[DataFrame]): + + if 'freqai' in self.local_config and 'identifier' in self.local_config['freqai']: + # purge previous data if the freqai model is defined + # (to be sure nothing is carried over from older backtests) + path_to_current_identifier = ( + Path(f"{self.local_config['user_data_dir']}/models/" + f"{self.local_config['freqai']['identifier']}").resolve()) + # remove folder and its contents + if Path.exists(path_to_current_identifier): + shutil.rmtree(path_to_current_identifier) + + prepare_data_config = deepcopy(self.local_config) + prepare_data_config['timerange'] = (str(self.dt_to_timestamp(varholder.from_dt)) + "-" + + str(self.dt_to_timestamp(varholder.to_dt))) + prepare_data_config['exchange']['pair_whitelist'] = pairs_to_load + + backtesting = Backtesting(prepare_data_config, self.exchange) + self.exchange = backtesting.exchange + backtesting._set_strategy(backtesting.strategylist[0]) + + varholder.data, varholder.timerange = backtesting.load_bt_data() + backtesting.load_bt_data_detail() + varholder.timeframe = backtesting.timeframe + + varholder.indicators = backtesting.strategy.advise_all_indicators(varholder.data) + + def fill_full_varholder(self): + self.full_varHolder = VarHolder() + + # define datetime in human-readable format + parsed_timerange = TimeRange.parse_timerange(self.local_config['timerange']) + + if parsed_timerange.startdt is None: + self.full_varHolder.from_dt = datetime.fromtimestamp(0, tz=timezone.utc) + else: + self.full_varHolder.from_dt = parsed_timerange.startdt + + if parsed_timerange.stopdt is None: + self.full_varHolder.to_dt = datetime.utcnow() + else: + self.full_varHolder.to_dt = parsed_timerange.stopdt + + self.prepare_data(self.full_varHolder, self.local_config['pairs']) + + def fill_partial_varholder(self, start_date, startup_candle): + partial_varHolder = VarHolder() + + partial_varHolder.from_dt = start_date + partial_varHolder.to_dt = self.full_varHolder.to_dt + partial_varHolder.startup_candle = startup_candle + + self.local_config['startup_candle_count'] = startup_candle + + self.prepare_data(partial_varHolder, self.local_config['pairs']) + + self.partial_varHolder_array.append(partial_varHolder) + + def fill_partial_varholder_lookahead(self, end_date): + partial_varHolder = VarHolder() + + partial_varHolder.from_dt = self.full_varHolder.from_dt + partial_varHolder.to_dt = end_date + # partial_varHolder.startup_candle = startup_candle + + # self.local_config['startup_candle_count'] = startup_candle + + self.prepare_data(partial_varHolder, self.local_config['pairs']) + + self.partial_varHolder_lookahead_array.append(partial_varHolder) + + def start(self) -> None: + + # first make a single backtest + self.fill_full_varholder() + + reduce_verbosity_for_bias_tester() + + start_date_full = self.full_varHolder.from_dt + end_date_full = self.full_varHolder.to_dt + + timeframe_minutes = timeframe_to_minutes(self.full_varHolder.timeframe) + + end_date_partial = start_date_full + timedelta(minutes=int(timeframe_minutes * 10)) + + self.fill_partial_varholder_lookahead(end_date_partial) + + # restore_verbosity_for_bias_tester() + + start_date_partial = end_date_full - timedelta(minutes=int(timeframe_minutes)) + + for startup_candle in self._startup_candle: + self.fill_partial_varholder(start_date_partial, int(startup_candle)) + + # Restore verbosity, so it's not too quiet for the next strategy + restore_verbosity_for_bias_tester() + + self.analyze_indicators() + self.analyze_indicators_lookahead() \ No newline at end of file diff --git a/freqtrade/optimize/recursive_analysis_helpers.py b/freqtrade/optimize/recursive_analysis_helpers.py new file mode 100644 index 000000000..41ec31ae5 --- /dev/null +++ b/freqtrade/optimize/recursive_analysis_helpers.py @@ -0,0 +1,182 @@ +import logging +import time +from pathlib import Path +from typing import Any, Dict, List + +import pandas as pd + +from freqtrade.constants import Config +from freqtrade.exceptions import OperationalException +from freqtrade.optimize.recursive_analysis import RecursiveAnalysis +from freqtrade.resolvers import StrategyResolver + + +logger = logging.getLogger(__name__) + + +class RecursiveAnalysisSubFunctions: + + @staticmethod + def text_table_recursive_analysis_instances( + config: Dict[str, Any], + recursive_instances: List[RecursiveAnalysis]): + startups = recursive_instances[0]._startup_candle + headers = ['strategy', 'indicators'] + for candle in startups: + headers.append(candle) + + data = [] + for inst in recursive_instances: + if len(inst.dict_recursive) > 0: + for indicator, values in inst.dict_recursive.items(): + temp_data = [inst.strategy_obj['name'], indicator] + for candle in startups: + temp_data.append(values.get(int(candle), '-')) + data.append(temp_data) + + from tabulate import tabulate + table = tabulate(data, headers=headers, tablefmt="orgtbl") + print(table) + return table, headers, data + + @staticmethod + def export_to_csv(config: Dict[str, Any], lookahead_analysis: List[RecursiveAnalysis]): + def add_or_update_row(df, row_data): + if ( + (df['filename'] == row_data['filename']) & + (df['strategy'] == row_data['strategy']) + ).any(): + # Update existing row + pd_series = pd.DataFrame([row_data]) + df.loc[ + (df['filename'] == row_data['filename']) & + (df['strategy'] == row_data['strategy']) + ] = pd_series + else: + # Add new row + df = pd.concat([df, pd.DataFrame([row_data], columns=df.columns)]) + + return df + + if Path(config['lookahead_analysis_exportfilename']).exists(): + # Read CSV file into a pandas dataframe + csv_df = pd.read_csv(config['lookahead_analysis_exportfilename']) + else: + # Create a new empty DataFrame with the desired column names and set the index + csv_df = pd.DataFrame(columns=[ + 'filename', 'strategy', 'has_bias', 'total_signals', + 'biased_entry_signals', 'biased_exit_signals', 'biased_indicators' + ], + index=None) + + for inst in lookahead_analysis: + # only update if + if (inst.current_analysis.total_signals > config['minimum_trade_amount'] + and inst.failed_bias_check is not True): + new_row_data = {'filename': inst.strategy_obj['location'].parts[-1], + 'strategy': inst.strategy_obj['name'], + 'has_bias': inst.current_analysis.has_bias, + 'total_signals': + int(inst.current_analysis.total_signals), + 'biased_entry_signals': + int(inst.current_analysis.false_entry_signals), + 'biased_exit_signals': + int(inst.current_analysis.false_exit_signals), + 'biased_indicators': + ",".join(inst.current_analysis.false_indicators)} + csv_df = add_or_update_row(csv_df, new_row_data) + + # Fill NaN values with a default value (e.g., 0) + csv_df['total_signals'] = csv_df['total_signals'].fillna(0) + csv_df['biased_entry_signals'] = csv_df['biased_entry_signals'].fillna(0) + csv_df['biased_exit_signals'] = csv_df['biased_exit_signals'].fillna(0) + + # Convert columns to integers + csv_df['total_signals'] = csv_df['total_signals'].astype(int) + csv_df['biased_entry_signals'] = csv_df['biased_entry_signals'].astype(int) + csv_df['biased_exit_signals'] = csv_df['biased_exit_signals'].astype(int) + + logger.info(f"saving {config['lookahead_analysis_exportfilename']}") + csv_df.to_csv(config['lookahead_analysis_exportfilename'], index=False) + + @staticmethod + def calculate_config_overrides(config: Config): + if config['targeted_trade_amount'] < config['minimum_trade_amount']: + # this combo doesn't make any sense. + raise OperationalException( + "Targeted trade amount can't be smaller than minimum trade amount." + ) + if len(config['pairs']) > config['max_open_trades']: + logger.info('Max_open_trades were less than amount of pairs. ' + 'Set max_open_trades to amount of pairs just to avoid false positives.') + config['max_open_trades'] = len(config['pairs']) + + min_dry_run_wallet = 1000000000 + if config['dry_run_wallet'] < min_dry_run_wallet: + logger.info('Dry run wallet was not set to 1 billion, pushing it up there ' + 'just to avoid false positives') + config['dry_run_wallet'] = min_dry_run_wallet + + # enforce cache to be 'none', shift it to 'none' if not already + # (since the default value is 'day') + if config.get('backtest_cache') is None: + config['backtest_cache'] = 'none' + elif config['backtest_cache'] != 'none': + logger.info(f"backtest_cache = " + f"{config['backtest_cache']} detected. " + f"Inside lookahead-analysis it is enforced to be 'none'. " + f"Changed it to 'none'") + config['backtest_cache'] = 'none' + return config + + @staticmethod + def initialize_single_recursive_analysis(config: Config, strategy_obj: Dict[str, Any]): + + logger.info(f"Recursive test of {Path(strategy_obj['location']).name} started.") + start = time.perf_counter() + current_instance = RecursiveAnalysis(config, strategy_obj) + current_instance.start() + elapsed = time.perf_counter() - start + logger.info(f"Checking recursive and lookahead bias of indicators " + f"of {Path(strategy_obj['location']).name} " + f"took {elapsed:.0f} seconds.") + return current_instance + + @staticmethod + def start(config: Config): + config = RecursiveAnalysisSubFunctions.calculate_config_overrides(config) + + strategy_objs = StrategyResolver.search_all_objects( + config, enum_failed=False, recursive=config.get('recursive_strategy_search', False)) + + RecursiveAnalysis_instances = [] + + # unify --strategy and --strategy_list to one list + if not (strategy_list := config.get('strategy_list', [])): + if config.get('strategy') is None: + raise OperationalException( + "No Strategy specified. Please specify a strategy via --strategy or " + "--strategy_list" + ) + strategy_list = [config['strategy']] + + # check if strategies can be properly loaded, only check them if they can be. + for strat in strategy_list: + for strategy_obj in strategy_objs: + if strategy_obj['name'] == strat and strategy_obj not in strategy_list: + RecursiveAnalysis_instances.append( + RecursiveAnalysisSubFunctions.initialize_single_recursive_analysis( + config, strategy_obj)) + break + + # report the results + if RecursiveAnalysis_instances: + RecursiveAnalysisSubFunctions.text_table_recursive_analysis_instances( + config, RecursiveAnalysis_instances) + if config.get('lookahead_analysis_exportfilename') is not None: + RecursiveAnalysisSubFunctions.export_to_csv(config, RecursiveAnalysis_instances) + else: + logger.error("There were no strategies specified neither through " + "--strategy nor through " + "--strategy_list " + "or timeframe was not specified.")