diff --git a/docs/recursive-analysis.md b/docs/recursive-analysis.md new file mode 100644 index 000000000..341493f7e --- /dev/null +++ b/docs/recursive-analysis.md @@ -0,0 +1,55 @@ +# Recursive analysis + +This page explains how to validate your strategy in terms of recursive formula issue. + +First of all, what is recursive formula? Recursive formula is a formula that defines any term of a sequence in terms of its preceding term(s). Example of a recursive formula is an = an-1 + b. + +Second question is why is it matter for Freqtrade? It matters because in backtesting, the bot will get full data of the pairs according to the timerange specified. But in dry/live run, the bot will have limited amounts of data, limited by what each exchanges gives. + +For example, let's say that I want to calculate a very basic indicator called `steps`. The first row's value is always 0, while the following rows' values are equal to the value of the previous row's plus 1. If I were to calculate it using latest 1000 candles, then the `steps` value of first row is 0, and the `steps` value at last closed candle is 999. + +But what if I only calculate based of latest 500 candles? Then instead of 999, the `steps` value at last closed candle is 499. The difference of the value means your backtest result can differ from your dry/live run result. + +Recursive-analysis requires historic data to be available. To learn how to get data for the pairs and exchange you're interested in, +head over to the [Data Downloading](data-download.md) section of the documentation. + +This command is built upon backtesting since it internally chains backtests to prepare different lenghts of data and calculate indicators based of each of the prepared data. +This is done by not looking at the strategy itself - but at the value of the indicators it returned. After multiple backtests are done to calculate the indicators of different startup candles value, the values of last rows are compared to see hoe much differences are they compared to the base backtest. + +- `--cache` is forced to "none". +- Since we are only looking at indicators' value, using more than one pair is redundant. It is recommended to set the pair used in the command using `-p` flag, preferably using pair with high price, such as BTC or ETH, to avoid having rounding issue that can make the results inaccurate. If no pair is set on the command, the pair used for this analysis is the first pair in the whitelist. +- It's recommended to set a long timerange (at least consist of 5000 candles), so that the initial backtest that going to be used as benchmark have very small or no recursive issue at all. For example, for a 5m timeframe, timerange of 5000 candles would be equal to 18 days. + +Beside recursive formula check, this command also going to do a simple lookahead bias check on the indicators' value only. It won't replace [Lookahead-analysis](lookahead-analysis.md), since this check won't check the difference in trades' entries and exits, which is the important effect of lookahead bias. It will only check whether there is any lookahead bias in indicators if the end of the data are moved. + +## Recursive-analysis command reference + +``` +usage: freqtrade recursive-analysis [-h] [-v] [--logfile FILE] [-V] [-c PATH] + [-d PATH] [--userdir PATH] [-s NAME] + [--strategy-path PATH] + [--recursive-strategy-search] + [--freqaimodel NAME] + [--freqaimodel-path PATH] [-i TIMEFRAME] + [--timerange TIMERANGE] + [--data-format-ohlcv {json,jsongz,hdf5,feather,parquet}] + [-p PAIRS [PAIRS ...]] + [--freqai-backtest-live-models] + +``` + +### Summary + +Checks a given strategy for recursive formula issue via recursive-analysis. +Recursive formula issue means that the indicator's calculation don't have enough data for its calculation to produce correct value. + +### How does the command work? + +It will start with a backtest using the supplied timerange to generate a baseline for indicators' value. +After setting the baseline it will then do additional runs for each different startup candles. +When the additional runs are done, it will compare the indicators at the last rows and report the differences in a table. + +### Caveats + +- `recursive-analysis` will only calculate and compare the indicators' value at the last row. If there are any differences, the table will only tell you the percentage differences. Whether it has any real impact on your entries and exits isn't checked. +- The ideal scenario is to have your indicators have no difference at all despite the startup candle being varied. But in reality, some of publicly-available formulas are using recursive formula. So the goal isn't to have zero differences, but to have the differences low enough to make sure they won't have any real impact on trading decisions. diff --git a/docs/strategy-customization.md b/docs/strategy-customization.md index 005127715..e23c3cc41 100644 --- a/docs/strategy-customization.md +++ b/docs/strategy-customization.md @@ -168,10 +168,12 @@ Most indicators have an instable startup period, in which they are either not av To account for this, the strategy can be assigned the `startup_candle_count` attribute. This should be set to the maximum number of candles that the strategy requires to calculate stable indicators. In the case where a user includes higher timeframes with informative pairs, the `startup_candle_count` does not necessarily change. The value is the maximum period (in candles) that any of the informatives timeframes need to compute stable indicators. -In this example strategy, this should be set to 100 (`startup_candle_count = 100`), since the longest needed history is 100 candles. +You can use [recursive-analysis](recursive-analysis.md) to check and find the correct `startup_candle_count` to be used. + +In this example strategy, this should be set to 400 (`startup_candle_count = 400`), since the minimum needed history for ema100 calculation to make sure the value is correct is 400 candles. ``` python - dataframe['ema100'] = ta.EMA(dataframe, timeperiod=100) + dataframe['ema100'] = ta.EMA(dataframe, timeperiod=400) ``` By letting the bot know how much history is needed, backtest trades can start at the specified timerange during backtesting and hyperopt. @@ -193,11 +195,11 @@ Let's try to backtest 1 month (January 2019) of 5m candles using an example stra freqtrade backtesting --timerange 20190101-20190201 --timeframe 5m ``` -Assuming `startup_candle_count` is set to 100, backtesting knows it needs 100 candles to generate valid buy signals. It will load data from `20190101 - (100 * 5m)` - which is ~2018-12-31 15:30:00. +Assuming `startup_candle_count` is set to 400, backtesting knows it needs 400 candles to generate valid buy signals. It will load data from `20190101 - (400 * 5m)` - which is ~2018-12-30 11:40:00. If this data is available, indicators will be calculated with this extended timerange. The instable startup period (up to 2019-01-01 00:00:00) will then be removed before starting backtesting. !!! Note - If data for the startup period is not available, then the timerange will be adjusted to account for this startup period - so Backtesting would start at 2019-01-01 08:30:00. + If data for the startup period is not available, then the timerange will be adjusted to account for this startup period - so Backtesting would start at 2019-01-02 09:20:00. ### Entry signal rules diff --git a/freqtrade/optimize/recursive_analysis.py b/freqtrade/optimize/recursive_analysis.py index 599fc4dda..45c2a457c 100644 --- a/freqtrade/optimize/recursive_analysis.py +++ b/freqtrade/optimize/recursive_analysis.py @@ -129,6 +129,7 @@ class RecursiveAnalysis(BaseAnalysis): varholder.indicators = backtesting.strategy.advise_all_indicators(varholder.data) def fill_partial_varholder(self, start_date, startup_candle): + logger.info(f"Calculating indicators using startup candle of {startup_candle}.") partial_varHolder = VarHolder() partial_varHolder.from_dt = start_date @@ -142,6 +143,8 @@ class RecursiveAnalysis(BaseAnalysis): self.partial_varHolder_array.append(partial_varHolder) def fill_partial_varholder_lookahead(self, end_date): + logger.info("Calculating indicators to test lookahead on indicators.") + partial_varHolder = VarHolder() partial_varHolder.from_dt = self.full_varHolder.from_dt diff --git a/freqtrade/optimize/recursive_analysis_helpers.py b/freqtrade/optimize/recursive_analysis_helpers.py index 0b353edfc..b33a24cf4 100644 --- a/freqtrade/optimize/recursive_analysis_helpers.py +++ b/freqtrade/optimize/recursive_analysis_helpers.py @@ -16,10 +16,9 @@ class RecursiveAnalysisSubFunctions: @staticmethod def text_table_recursive_analysis_instances( - config: Dict[str, Any], recursive_instances: List[RecursiveAnalysis]): startups = recursive_instances[0]._startup_candle - headers = ['strategy', 'indicators'] + headers = ['indicators'] for candle in startups: headers.append(candle) @@ -27,7 +26,7 @@ class RecursiveAnalysisSubFunctions: for inst in recursive_instances: if len(inst.dict_recursive) > 0: for indicator, values in inst.dict_recursive.items(): - temp_data = [inst.strategy_obj['name'], indicator] + temp_data = [indicator] for candle in startups: temp_data.append(values.get(int(candle), '-')) data.append(temp_data) @@ -39,12 +38,19 @@ class RecursiveAnalysisSubFunctions: @staticmethod def calculate_config_overrides(config: Config): + if 'timerange' not in config: + # setting a timerange is enforced here + raise OperationalException( + "Please set a timerange. " + "A timerange of 20 candles are enough for recursive analysis." + ) + if config.get('backtest_cache') is None: config['backtest_cache'] = 'none' elif config['backtest_cache'] != 'none': logger.info(f"backtest_cache = " f"{config['backtest_cache']} detected. " - f"Inside lookahead-analysis it is enforced to be 'none'. " + f"Inside recursive-analysis it is enforced to be 'none'. " f"Changed it to 'none'") config['backtest_cache'] = 'none' return config @@ -57,7 +63,7 @@ class RecursiveAnalysisSubFunctions: current_instance = RecursiveAnalysis(config, strategy_obj) current_instance.start() elapsed = time.perf_counter() - start - logger.info(f"Checking recursive and lookahead bias of indicators " + logger.info(f"Checking recursive and indicator-only lookahead bias of indicators " f"of {Path(strategy_obj['location']).name} " f"took {elapsed:.0f} seconds.") return current_instance @@ -92,7 +98,7 @@ class RecursiveAnalysisSubFunctions: # report the results if RecursiveAnalysis_instances: RecursiveAnalysisSubFunctions.text_table_recursive_analysis_instances( - config, RecursiveAnalysis_instances) + RecursiveAnalysis_instances) else: logger.error("There were no strategies specified neither through " "--strategy nor through " diff --git a/freqtrade/templates/sample_strategy.py b/freqtrade/templates/sample_strategy.py index fd81570fe..65a6e440e 100644 --- a/freqtrade/templates/sample_strategy.py +++ b/freqtrade/templates/sample_strategy.py @@ -77,7 +77,7 @@ class SampleStrategy(IStrategy): exit_short_rsi = IntParameter(low=1, high=50, default=30, space='buy', optimize=True, load=True) # Number of candles the strategy requires before producing valid signals - startup_candle_count: int = 30 + startup_candle_count: int = 170 # Optional order type mapping. order_types = { diff --git a/tests/optimize/test_recursive_analysis.py b/tests/optimize/test_recursive_analysis.py new file mode 100644 index 000000000..6560a4f26 --- /dev/null +++ b/tests/optimize/test_recursive_analysis.py @@ -0,0 +1,186 @@ +# pragma pylint: disable=missing-docstring, W0212, line-too-long, C0103, unused-argument +from copy import deepcopy +from pathlib import Path +from unittest.mock import MagicMock, PropertyMock + +import pytest + +from freqtrade.commands.optimize_commands import start_recursive_analysis +from freqtrade.data.history import get_timerange +from freqtrade.exceptions import OperationalException +from freqtrade.optimize.recursive_analysis import RecursiveAnalysis +from freqtrade.optimize.recursive_analysis_helpers import RecursiveAnalysisSubFunctions +from tests.conftest import get_args, log_has_re, patch_exchange + + +@pytest.fixture +def recursive_conf(default_conf_usdt): + default_conf_usdt['timerange'] = '20220101-20220501' + + default_conf_usdt['strategy_path'] = str( + Path(__file__).parent.parent / "strategy/strats") + default_conf_usdt['strategy'] = 'strategy_test_v3_recursive_issue' + default_conf_usdt['pairs'] = ['UNITTEST/USDT'] + default_conf_usdt['startup_candle'] = [100] + return default_conf_usdt + + +def test_start_recursive_analysis(mocker): + single_mock = MagicMock() + text_table_mock = MagicMock() + mocker.patch.multiple( + 'freqtrade.optimize.recursive_analysis_helpers.RecursiveAnalysisSubFunctions', + initialize_single_recursive_analysis=single_mock, + text_table_recursive_analysis_instances=text_table_mock, + ) + args = [ + "recursive-analysis", + "--strategy", + "strategy_test_v3_recursive_issue", + "--strategy-path", + str(Path(__file__).parent.parent / "strategy/strats"), + "--pairs", + "UNITTEST/BTC", + "--timerange", + "20220101-20220201" + ] + pargs = get_args(args) + pargs['config'] = None + + start_recursive_analysis(pargs) + assert single_mock.call_count == 1 + assert text_table_mock.call_count == 1 + + single_mock.reset_mock() + + # Missing timerange + args = [ + "recursive-analysis", + "--strategy", + "strategy_test_v3_with_recursive_bias", + "--strategy-path", + str(Path(__file__).parent.parent / "strategy/strats"), + "--pairs", + "UNITTEST/BTC" + ] + pargs = get_args(args) + pargs['config'] = None + with pytest.raises(OperationalException, + match=r"Please set a timerange\..*"): + start_recursive_analysis(pargs) + + +def test_recursive_helper_no_strategy_defined(recursive_conf): + conf = deepcopy(recursive_conf) + conf['pairs'] = ['UNITTEST/USDT'] + del conf['strategy'] + with pytest.raises(OperationalException, + match=r"No Strategy specified"): + RecursiveAnalysisSubFunctions.start(conf) + + +def test_recursive_helper_start(recursive_conf, mocker) -> None: + single_mock = MagicMock() + text_table_mock = MagicMock() + mocker.patch.multiple( + 'freqtrade.optimize.recursive_analysis_helpers.RecursiveAnalysisSubFunctions', + initialize_single_recursive_analysis=single_mock, + text_table_recursive_analysis_instances=text_table_mock, + ) + RecursiveAnalysisSubFunctions.start(recursive_conf) + assert single_mock.call_count == 1 + assert text_table_mock.call_count == 1 + + single_mock.reset_mock() + text_table_mock.reset_mock() + + +def test_recursive_helper_text_table_recursive_analysis_instances(recursive_conf): + dict_diff = dict() + dict_diff['rsi'] = {} + dict_diff['rsi'][100] = "0.078%" + + strategy_obj = { + 'name': "strategy_test_v3_recursive_issue", + 'location': Path(recursive_conf['strategy_path'], f"{recursive_conf['strategy']}.py") + } + + instance = RecursiveAnalysis(recursive_conf, strategy_obj) + instance.dict_recursive = dict_diff + table, headers, data = (RecursiveAnalysisSubFunctions. + text_table_recursive_analysis_instances([instance])) + + # check row contents for a try that has too few signals + assert data[0][0] == 'rsi' + assert data[0][1] == '0.078%' + assert len(data[0]) == 2 + + # now check when there is no issue + dict_diff = dict() + instance = RecursiveAnalysis(recursive_conf, strategy_obj) + instance.dict_recursive = dict_diff + table, headers, data = (RecursiveAnalysisSubFunctions. + text_table_recursive_analysis_instances([instance])) + assert len(data) == 0 + + +def test_initialize_single_recursive_analysis(recursive_conf, mocker, caplog): + mocker.patch('freqtrade.data.history.get_timerange', get_timerange) + patch_exchange(mocker) + mocker.patch('freqtrade.plugins.pairlistmanager.PairListManager.whitelist', + PropertyMock(return_value=['UNITTEST/BTC'])) + recursive_conf['pairs'] = ['UNITTEST/BTC'] + + recursive_conf['timeframe'] = '5m' + recursive_conf['timerange'] = '20180119-20180122' + start_mock = mocker.patch('freqtrade.optimize.recursive_analysis.RecursiveAnalysis.start') + strategy_obj = { + 'name': "strategy_test_v3_recursive_issue", + 'location': Path(recursive_conf['strategy_path'], f"{recursive_conf['strategy']}.py") + } + + instance = RecursiveAnalysisSubFunctions.initialize_single_recursive_analysis( + recursive_conf, strategy_obj) + assert log_has_re(r"Recursive test of .* started\.", caplog) + assert start_mock.call_count == 1 + + assert instance.strategy_obj['name'] == "strategy_test_v3_recursive_issue" + + +@pytest.mark.parametrize('scenario', [ + 'no_bias', 'bias1' +]) +def test_biased_strategy(recursive_conf, mocker, caplog, scenario) -> None: + mocker.patch('freqtrade.data.history.get_timerange', get_timerange) + patch_exchange(mocker) + mocker.patch('freqtrade.plugins.pairlistmanager.PairListManager.whitelist', + PropertyMock(return_value=['UNITTEST/BTC'])) + recursive_conf['pairs'] = ['UNITTEST/BTC'] + + recursive_conf['timeframe'] = '5m' + recursive_conf['timerange'] = '20180119-20180122' + recursive_conf['startup_candle'] = [100] + + # Patch scenario Parameter to allow for easy selection + mocker.patch('freqtrade.strategy.hyper.HyperStrategyMixin.load_params_from_file', + return_value={ + 'params': { + "buy": { + "scenario": scenario + } + } + }) + + strategy_obj = {'name': "strategy_test_v3_recursive_issue"} + instance = RecursiveAnalysis(recursive_conf, strategy_obj) + instance.start() + # Assert init correct + assert log_has_re(f"Strategy Parameter: scenario = {scenario}", caplog) + + diff_pct = abs(float(instance.dict_recursive['rsi'][100].replace("%", ""))) + # check non-biased strategy + if scenario == "no_bias": + assert diff_pct < 0.01 + # check biased strategy + elif scenario == "bias1": + assert diff_pct >= 0.01 diff --git a/tests/strategy/strats/strategy_test_v3_recursive_issue.py b/tests/strategy/strats/strategy_test_v3_recursive_issue.py new file mode 100644 index 000000000..78a9dca61 --- /dev/null +++ b/tests/strategy/strats/strategy_test_v3_recursive_issue.py @@ -0,0 +1,42 @@ +# pragma pylint: disable=missing-docstring, invalid-name, pointless-string-statement +import talib.abstract as ta +from pandas import DataFrame + +from freqtrade.strategy import IStrategy +from freqtrade.strategy.parameters import CategoricalParameter + + +class strategy_test_v3_recursive_issue(IStrategy): + INTERFACE_VERSION = 3 + + # Minimal ROI designed for the strategy + minimal_roi = { + "0": 0.04 + } + + # Optimal stoploss designed for the strategy + stoploss = -0.10 + + # Optimal timeframe for the strategy + timeframe = '5m' + scenario = CategoricalParameter(['no_bias', 'bias1'], default='bias1', space="buy") + + # Number of candles the strategy requires before producing valid signals + startup_candle_count: int = 100 + + def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame: + # bias is introduced here + if self.scenario.value == 'no_bias': + dataframe['rsi'] = ta.RSI(dataframe, timeperiod=14) + else: + dataframe['rsi'] = ta.RSI(dataframe, timeperiod=50) + + return dataframe + + def populate_entry_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame: + + return dataframe + + def populate_exit_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame: + + return dataframe