Merge pull request #10527 from freqtrade/feat/bt_generator

Backtesting - dynamic pairlist sorting
2025-11-29 00:23:07 +00:00 · 2024-08-13 09:56:19 +02:00
parent 6fc2a604b4 948e67a2b7
commit 7e502beafc
4 changed files with 355 additions and 85 deletions
--- a/docs/backtesting.md
+++ b/docs/backtesting.md
@@ -530,10 +530,10 @@ You can then load the trades to perform further analysis as shown in the [data a
 Since backtesting lacks some detailed information about what happens within a candle, it needs to take a few assumptions:
 - Exchange [trading limits](#trading-limits-in-backtesting) are respected
- Entries happen at open-price
+- Entries happen at open-price unless a custom price logic has been specified
 - All orders are filled at the requested price (no slippage) as long as the price is within the candle's high/low range
 - Exit-signal exits happen at open-price of the consecutive candle
- Exits don't free their trade slot for a new trade until the next candle
+- Exits free their trade slot for a new trade with a different pair
 - Exit-signal is favored over Stoploss, because exit-signals are assumed to trigger on candle's open
 - ROI
  - Exits are compared to high - but the ROI value is used (e.g. ROI = 2%, high=5% - so the exit will be at 2%)
--- a/freqtrade/data/btanalysis.py
+++ b/freqtrade/data/btanalysis.py
@@ -401,7 +401,15 @@ def analyze_trade_parallelism(results: pd.DataFrame, timeframe: str) -> pd.DataF
    timeframe_freq = timeframe_to_resample_freq(timeframe)
    dates = [
-        pd.Series(pd.date_range(row[1]["open_date"], row[1]["close_date"], freq=timeframe_freq))
+        pd.Series(
            pd.date_range(
                row[1]["open_date"],
                row[1]["close_date"],
                freq=timeframe_freq,
                # Exclude right boundary - the date is the candle open date.
                inclusive="left",
            )
        )
        for row in results[["open_date", "close_date"]].iterrows()
    ]
    deltas = [len(x) for x in dates]
--- a/freqtrade/optimize/backtesting.py
+++ b/freqtrade/optimize/backtesting.py
@@ -1332,10 +1332,9 @@ class Backtesting:
        pair: str,
        current_time: datetime,
        end_date: datetime,
        open_trade_count_start: int,
        trade_dir: Optional[LongShort],
        is_first: bool = True,
-    ) -> int:
+    ) -> None:
        """
        NOTE: This method is used by Hyperopt at each iteration. Please keep it optimized.
@@ -1345,7 +1344,6 @@ class Backtesting:
            # 1. Manage currently open orders of active trades
            if self.manage_open_orders(t, current_time, row):
                # Close trade
                open_trade_count_start -= 1
                LocalTrade.remove_bt_trade(t)
                self.wallets.update()
@@ -1361,13 +1359,9 @@ class Backtesting:
            and trade_dir is not None
            and not PairLocks.is_pair_locked(pair, row[DATE_IDX], trade_dir)
        ):
-            if self.trade_slot_available(open_trade_count_start):
+            if self.trade_slot_available(LocalTrade.bt_open_open_trade_count):
                trade = self._enter_trade(pair, row, trade_dir)
                if trade:
                    # TODO: hacky workaround to avoid opening > max_open_trades
                    # This emulates previous behavior - not sure if this is correct
                    # Prevents entering if the trade-slot was freed in this candle
                    open_trade_count_start += 1
                    self.wallets.update()
            else:
                self._collate_rejected(pair, row)
@@ -1386,7 +1380,28 @@ class Backtesting:
            order = trade.select_order(trade.exit_side, is_open=True)
            if order:
                self._process_exit_order(order, trade, current_time, row, pair)
-        return open_trade_count_start
+
    def time_pair_generator(
        self, start_date: datetime, end_date: datetime, increment: timedelta, pairs: List[str]
    ):
        """
        Backtest time and pair generator
        """
        current_time = start_date + increment
        self.progress.init_step(
            BacktestState.BACKTEST, int((end_date - start_date) / self.timeframe_td)
        )
        while current_time <= end_date:
            is_first = True
            # Pairs that have open trades should be processed first
            new_pairlist = list(dict.fromkeys([t.pair for t in LocalTrade.bt_trades_open] + pairs))
            for pair in new_pairlist:
                yield current_time, pair, is_first
                is_first = False
            self.progress.increment()
            current_time += increment
    def backtest(self, processed: Dict, start_date: datetime, end_date: datetime) -> Dict[str, Any]:
        """
@@ -1411,82 +1426,70 @@ class Backtesting:
        # Indexes per pair, so some pairs are allowed to have a missing start.
        indexes: Dict = defaultdict(int)
        current_time = start_date + self.timeframe_td
        self.progress.init_step(
            BacktestState.BACKTEST, int((end_date - start_date) / self.timeframe_td)
        )
        # Loop timerange and get candle for each pair at that point in time
-        while current_time <= end_date:
+        for current_time, pair, is_first in self.time_pair_generator(
-            open_trade_count_start = LocalTrade.bt_open_open_trade_count
+            start_date, end_date, self.timeframe_td, list(data.keys())
-            self.check_abort()
+        ):
-            strategy_safe_wrapper(self.strategy.bot_loop_start, supress_error=True)(
+            if is_first:
-                current_time=current_time
+                self.check_abort()
-            )
+                strategy_safe_wrapper(self.strategy.bot_loop_start, supress_error=True)(
-            for i, pair in enumerate(data):
+                    current_time=current_time
-                row_index = indexes[pair]
+                )
-                row = self.validate_row(data, pair, row_index, current_time)
+            row_index = indexes[pair]
-                if not row:
+            row = self.validate_row(data, pair, row_index, current_time)
            if not row:
                continue
            row_index += 1
            indexes[pair] = row_index
            self.dataprovider._set_dataframe_max_index(self.required_startup + row_index)
            self.dataprovider._set_dataframe_max_date(current_time)
            current_detail_time: datetime = row[DATE_IDX].to_pydatetime()
            trade_dir: Optional[LongShort] = self.check_for_trade_entry(row)
            if (
                (trade_dir is not None or len(LocalTrade.bt_trades_open_pp[pair]) > 0)
                and self.timeframe_detail
                and pair in self.detail_data
            ):
                # Spread out into detail timeframe.
                # Should only happen when we are either in a trade for this pair
                # or when we got the signal for a new trade.
                exit_candle_end = current_detail_time + self.timeframe_td
                detail_data = self.detail_data[pair]
                detail_data = detail_data.loc[
                    (detail_data["date"] >= current_detail_time)
                    & (detail_data["date"] < exit_candle_end)
                ].copy()
                if len(detail_data) == 0:
                    # Fall back to "regular" data if no detail data was found for this candle
                    self.backtest_loop(row, pair, current_time, end_date, trade_dir)
                    continue
-
+                detail_data.loc[:, "enter_long"] = row[LONG_IDX]
-                row_index += 1
+                detail_data.loc[:, "exit_long"] = row[ELONG_IDX]
-                indexes[pair] = row_index
+                detail_data.loc[:, "enter_short"] = row[SHORT_IDX]
-                self.dataprovider._set_dataframe_max_index(self.required_startup + row_index)
+                detail_data.loc[:, "exit_short"] = row[ESHORT_IDX]
-                self.dataprovider._set_dataframe_max_date(current_time)
+                detail_data.loc[:, "enter_tag"] = row[ENTER_TAG_IDX]
-                current_detail_time: datetime = row[DATE_IDX].to_pydatetime()
+                detail_data.loc[:, "exit_tag"] = row[EXIT_TAG_IDX]
-                trade_dir: Optional[LongShort] = self.check_for_trade_entry(row)
+                is_first = True
-
+                current_time_det = current_time
-                if (
+                for det_row in detail_data[HEADERS].values.tolist():
-                    (trade_dir is not None or len(LocalTrade.bt_trades_open_pp[pair]) > 0)
+                    self.dataprovider._set_dataframe_max_date(current_time_det)
-                    and self.timeframe_detail
+                    self.backtest_loop(
-                    and pair in self.detail_data
+                        det_row,
-                ):
+                        pair,
-                    # Spread out into detail timeframe.
+                        current_time_det,
-                    # Should only happen when we are either in a trade for this pair
+                        end_date,
-                    # or when we got the signal for a new trade.
+                        trade_dir,
-                    exit_candle_end = current_detail_time + self.timeframe_td
+                        is_first,
                    detail_data = self.detail_data[pair]
                    detail_data = detail_data.loc[
                        (detail_data["date"] >= current_detail_time)
                        & (detail_data["date"] < exit_candle_end)
                    ].copy()
                    if len(detail_data) == 0:
                        # Fall back to "regular" data if no detail data was found for this candle
                        open_trade_count_start = self.backtest_loop(
                            row, pair, current_time, end_date, open_trade_count_start, trade_dir
                        )
                        continue
                    detail_data.loc[:, "enter_long"] = row[LONG_IDX]
                    detail_data.loc[:, "exit_long"] = row[ELONG_IDX]
                    detail_data.loc[:, "enter_short"] = row[SHORT_IDX]
                    detail_data.loc[:, "exit_short"] = row[ESHORT_IDX]
                    detail_data.loc[:, "enter_tag"] = row[ENTER_TAG_IDX]
                    detail_data.loc[:, "exit_tag"] = row[EXIT_TAG_IDX]
                    is_first = True
                    current_time_det = current_time
                    for det_row in detail_data[HEADERS].values.tolist():
                        self.dataprovider._set_dataframe_max_date(current_time_det)
                        open_trade_count_start = self.backtest_loop(
                            det_row,
                            pair,
                            current_time_det,
                            end_date,
                            open_trade_count_start,
                            trade_dir,
                            is_first,
                        )
                        current_time_det += self.timeframe_detail_td
                        is_first = False
                else:
                    self.dataprovider._set_dataframe_max_date(current_time)
                    open_trade_count_start = self.backtest_loop(
                        row, pair, current_time, end_date, open_trade_count_start, trade_dir
                    )
-
+                    current_time_det += self.timeframe_detail_td
-            # Move time one configured time_interval ahead.
+                    is_first = False
-            self.progress.increment()
+            else:
-            current_time += self.timeframe_td
+                self.dataprovider._set_dataframe_max_date(current_time)
                self.backtest_loop(row, pair, current_time, end_date, trade_dir)
        self.handle_left_open(LocalTrade.bt_trades_open_pp, data=data)
        self.wallets.update()
--- a/tests/optimize/test_backtesting.py
+++ b/tests/optimize/test_backtesting.py
@@ -1,6 +1,7 @@
 # pragma pylint: disable=missing-docstring, W0212, line-too-long, C0103, unused-argument
 import random
 from collections import defaultdict
 from copy import deepcopy
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
@@ -15,7 +16,7 @@ from freqtrade.commands.optimize_commands import setup_optimize_configuration, s
 from freqtrade.configuration import TimeRange
 from freqtrade.data import history
 from freqtrade.data.btanalysis import BT_DATA_COLUMNS, evaluate_result_multi
-from freqtrade.data.converter import clean_ohlcv_dataframe
+from freqtrade.data.converter import clean_ohlcv_dataframe, ohlcv_fill_up_missing_data
 from freqtrade.data.dataprovider import DataProvider
 from freqtrade.data.history import get_timerange
 from freqtrade.enums import CandleType, ExitType, RunMode
@@ -29,6 +30,7 @@ from freqtrade.util.datetime_helpers import dt_utc
 from tests.conftest import (
    CURRENT_TEST_STRATEGY,
    EXMS,
    generate_test_data,
    get_args,
    log_has,
    log_has_re,
@@ -1485,6 +1487,7 @@ def test_backtest_multi_pair(default_conf, fee, mocker, tres, pair, testdatadir)
    default_conf["max_open_trades"] = 3
    backtesting = Backtesting(default_conf)
    vr_spy = mocker.spy(backtesting, "validate_row")
    backtesting._set_strategy(backtesting.strategylist[0])
    backtesting.strategy.bot_loop_start = MagicMock()
    backtesting.strategy.advise_entry = _trend_alternate_hold  # Override
@@ -1503,6 +1506,36 @@ def test_backtest_multi_pair(default_conf, fee, mocker, tres, pair, testdatadir)
    # bot_loop_start is called once per candle.
    assert backtesting.strategy.bot_loop_start.call_count == 499
    # Validated row once per candle and pair
    assert vr_spy.call_count == 2495
    # List of calls pair args - in batches of 5 (s)
    calls_per_candle = defaultdict(list)
    for call in vr_spy.call_args_list:
        calls_per_candle[call[0][3]].append(call[0][1])
    all_orients = [x for _, x in calls_per_candle.items()]
    distinct_calls = [list(x) for x in set(tuple(x) for x in all_orients)]
    # All calls must be made for the full pairlist
    assert all(len(x) == 5 for x in distinct_calls)
    # order varied - and is not always identical
    assert not all(
        x == ["ADA/BTC", "DASH/BTC", "ETH/BTC", "LTC/BTC", "NXT/BTC"] for x in distinct_calls
    )
    # But some calls should've kept the original ordering
    assert any(
        x == ["ADA/BTC", "DASH/BTC", "ETH/BTC", "LTC/BTC", "NXT/BTC"] for x in distinct_calls
    )
    assert (
        # Ordering can be different, but should be one of the following
        any(x == ["ETH/BTC", "ADA/BTC", "DASH/BTC", "LTC/BTC", "NXT/BTC"] for x in distinct_calls)
        or any(
            x == ["ETH/BTC", "LTC/BTC", "ADA/BTC", "DASH/BTC", "NXT/BTC"] for x in distinct_calls
        )
    )
    # Make sure we have parallel trades
    assert len(evaluate_result_multi(results["results"], "5m", 2)) > 0
    # make sure we don't have trades with more than configured max_open_trades
@@ -1528,6 +1561,232 @@ def test_backtest_multi_pair(default_conf, fee, mocker, tres, pair, testdatadir)
    assert len(evaluate_result_multi(results["results"], "5m", 1)) == 0
@pytest.mark.parametrize("use_detail", [True, False])
@pytest.mark.parametrize("pair", ["ADA/USDT", "LTC/USDT"])
@pytest.mark.parametrize("tres", [0, 20, 30])
 def test_backtest_multi_pair_detail(
    default_conf_usdt,
    fee,
    mocker,
    tres,
    pair,
    use_detail,
 ):
    """
    literally the same as test_backtest_multi_pair - but with artificial data
    and detail timeframe.
    """
    def _trend_alternate_hold(dataframe=None, metadata=None):
        """
        Buy every xth candle - sell every other xth -2 (hold on to pairs a bit)
        """
        if metadata["pair"] in ("ETH/USDT", "LTC/USDT"):
            multi = 20
        else:
            multi = 18
        dataframe["enter_long"] = np.where(dataframe.index % multi == 0, 1, 0)
        dataframe["exit_long"] = np.where((dataframe.index + multi - 2) % multi == 0, 1, 0)
        dataframe["enter_short"] = 0
        dataframe["exit_short"] = 0
        return dataframe
    default_conf_usdt.update(
        {
            "runmode": "backtest",
            "stoploss": -1.0,
            "minimal_roi": {"0": 100},
        }
    )
    if use_detail:
        default_conf_usdt["timeframe_detail"] = "1m"
    mocker.patch(f"{EXMS}.get_min_pair_stake_amount", return_value=0.00001)
    mocker.patch(f"{EXMS}.get_max_pair_stake_amount", return_value=float("inf"))
    mocker.patch(f"{EXMS}.get_fee", fee)
    patch_exchange(mocker)
    raw_candles_1m = generate_test_data("1m", 2500, "2022-01-03 12:00:00+00:00")
    raw_candles = ohlcv_fill_up_missing_data(raw_candles_1m, "5m", "dummy")
    pairs = ["ADA/USDT", "DASH/USDT", "ETH/USDT", "LTC/USDT", "NXT/USDT"]
    data = {pair: raw_candles for pair in pairs}
    detail_data = {pair: raw_candles_1m for pair in pairs}
    # Only use 500 lines to increase performance
    data = trim_dictlist(data, -500)
    # Remove data for one pair from the beginning of the data
    if tres > 0:
        data[pair] = data[pair][tres:].reset_index()
    default_conf_usdt["timeframe"] = "5m"
    default_conf_usdt["max_open_trades"] = 3
    backtesting = Backtesting(default_conf_usdt)
    vr_spy = mocker.spy(backtesting, "validate_row")
    bl_spy = mocker.spy(backtesting, "backtest_loop")
    backtesting.detail_data = detail_data
    backtesting._set_strategy(backtesting.strategylist[0])
    backtesting.strategy.bot_loop_start = MagicMock()
    backtesting.strategy.advise_entry = _trend_alternate_hold  # Override
    backtesting.strategy.advise_exit = _trend_alternate_hold  # Override
    processed = backtesting.strategy.advise_all_indicators(data)
    min_date, max_date = get_timerange(processed)
    backtest_conf = {
        "processed": deepcopy(processed),
        "start_date": min_date,
        "end_date": max_date,
    }
    results = backtesting.backtest(**backtest_conf)
    # bot_loop_start is called once per candle.
    assert backtesting.strategy.bot_loop_start.call_count == 499
    # Validated row once per candle and pair
    assert vr_spy.call_count == 2495
    if use_detail:
        # Backtest loop is called once per candle per pair
        # Exact numbers depend on trade state - but should be around 3_800
        assert bl_spy.call_count > 3_800
        assert bl_spy.call_count < 3_900
    else:
        assert bl_spy.call_count < 2495
    # Make sure we have parallel trades
    assert len(evaluate_result_multi(results["results"], "5m", 2)) > 0
    # make sure we don't have trades with more than configured max_open_trades
    assert len(evaluate_result_multi(results["results"], "5m", 3)) == 0
    # Cached data correctly removed amounts
    offset = 1 if tres == 0 else 0
    removed_candles = len(data[pair]) - offset
    assert len(backtesting.dataprovider.get_analyzed_dataframe(pair, "5m")[0]) == removed_candles
    assert (
        len(backtesting.dataprovider.get_analyzed_dataframe("NXT/USDT", "5m")[0])
        == len(data["NXT/USDT"]) - 1
    )
    backtesting.strategy.max_open_trades = 1
    backtesting.config.update({"max_open_trades": 1})
    backtest_conf = {
        "processed": deepcopy(processed),
        "start_date": min_date,
        "end_date": max_date,
    }
    results = backtesting.backtest(**backtest_conf)
    assert len(evaluate_result_multi(results["results"], "5m", 1)) == 0
@pytest.mark.parametrize("use_detail", [True, False])
 def test_backtest_multi_pair_long_short_switch(
    default_conf_usdt,
    fee,
    mocker,
    use_detail,
 ):
    """
    literally the same as test_backtest_multi_pair - but with artificial data
    and detail timeframe.
    """
    def _trend_alternate_hold(dataframe=None, metadata=None):
        """
        Buy every xth candle - sell every other xth -2 (hold on to pairs a bit)
        """
        if metadata["pair"] in ("ETH/USDT", "LTC/USDT"):
            multi = 20
        else:
            multi = 18
        dataframe["enter_long"] = np.where(dataframe.index % multi == 0, 1, 0)
        dataframe["exit_long"] = np.where((dataframe.index + multi - 2) % multi == 0, 1, 0)
        dataframe["enter_short"] = dataframe["exit_long"]
        dataframe["exit_short"] = dataframe["enter_long"]
        return dataframe
    default_conf_usdt.update(
        {
            "runmode": "backtest",
            "timeframe": "5m",
            "max_open_trades": 1,
            "stoploss": -1.0,
            "minimal_roi": {"0": 100},
            "margin_mode": "isolated",
            "trading_mode": "futures",
        }
    )
    if use_detail:
        default_conf_usdt["timeframe_detail"] = "1m"
    mocker.patch(f"{EXMS}.get_min_pair_stake_amount", return_value=0.00001)
    mocker.patch(f"{EXMS}.get_max_pair_stake_amount", return_value=float("inf"))
    mocker.patch(f"{EXMS}.get_fee", fee)
    patch_exchange(mocker)
    raw_candles_1m = generate_test_data("1m", 2500, "2022-01-03 12:00:00+00:00")
    raw_candles = ohlcv_fill_up_missing_data(raw_candles_1m, "5m", "dummy")
    pairs = [
        "ETH/USDT:USDT",
    ]
    default_conf_usdt["exchange"]["pair_whitelist"] = pairs
    # Fake whitelist to avoid some mock data issues
    mocker.patch(f"{EXMS}.get_maintenance_ratio_and_amt", return_value=(0.01, 0.01))
    data = {pair: raw_candles for pair in pairs}
    detail_data = {pair: raw_candles_1m for pair in pairs}
    # Only use 500 lines to increase performance
    data = trim_dictlist(data, -500)
    backtesting = Backtesting(default_conf_usdt)
    vr_spy = mocker.spy(backtesting, "validate_row")
    bl_spy = mocker.spy(backtesting, "backtest_loop")
    backtesting.detail_data = detail_data
    backtesting.funding_fee_timeframe_secs = 3600 * 8  # 8h
    backtesting.futures_data = {pair: pd.DataFrame() for pair in pairs}
    backtesting.strategylist[0].can_short = True
    backtesting._set_strategy(backtesting.strategylist[0])
    backtesting.strategy.bot_loop_start = MagicMock()
    backtesting.strategy.advise_entry = _trend_alternate_hold  # Override
    backtesting.strategy.advise_exit = _trend_alternate_hold  # Override
    processed = backtesting.strategy.advise_all_indicators(data)
    min_date, max_date = get_timerange(processed)
    backtest_conf = {
        "processed": deepcopy(processed),
        "start_date": min_date,
        "end_date": max_date,
    }
    results = backtesting.backtest(**backtest_conf)
    # bot_loop_start is called once per candle.
    assert backtesting.strategy.bot_loop_start.call_count == 499
    # Validated row once per candle and pair
    assert vr_spy.call_count == 499
    if use_detail:
        # Backtest loop is called once per candle per pair
        assert bl_spy.call_count == 1071
    else:
        assert bl_spy.call_count == 479
    # Make sure we have parallel trades
    assert len(evaluate_result_multi(results["results"], "5m", 0)) > 0
    # make sure we don't have trades with more than configured max_open_trades
    assert len(evaluate_result_multi(results["results"], "5m", 1)) == 0
    # Expect 26 results initially
    assert len(results["results"]) == 30
 def test_backtest_start_timerange(default_conf, mocker, caplog, testdatadir):
    patch_exchange(mocker)
    mocker.patch("freqtrade.optimize.backtesting.Backtesting.backtest")