fix: Improved caching

closes #11008
2026-02-06 14:20:24 +00:00 · 2024-12-06 06:38:49 +01:00
parent 9d07f5dc2e
commit 621dfc136e
2 changed files with 20 additions and 42 deletions
--- a/freqtrade/data/converter/orderflow.py
+++ b/freqtrade/data/converter/orderflow.py
@@ -68,18 +68,17 @@ def _calculate_ohlcv_candle_start_and_end(df: pd.DataFrame, timeframe: str):


 def populate_dataframe_with_trades(
-    cached_grouped_trades: OrderedDict[tuple[datetime, datetime], pd.DataFrame],
+    cached_grouped_trades: pd.DataFrame | None,
    config: Config,
    dataframe: pd.DataFrame,
    trades: pd.DataFrame,
-) -> tuple[pd.DataFrame, OrderedDict[tuple[datetime, datetime], pd.DataFrame]]:
+) -> tuple[pd.DataFrame, pd.DataFrame]:
    """
    Populates a dataframe with trades
    :param dataframe: Dataframe to populate
    :param trades: Trades to populate with
    :return: Dataframe with trades populated
    """
-    from freqtrade.exchange import timeframe_to_next_date

    timeframe = config["timeframe"]
    config_orderflow = config["orderflow"]
@@ -101,34 +100,27 @@ def populate_dataframe_with_trades(
        trades = trades.loc[trades["candle_start"] >= start_date]
        trades.reset_index(inplace=True, drop=True)

-        # group trades by candle start
        trades_grouped_by_candle_start = trades.groupby("candle_start", group_keys=False)

        candle_start: datetime
        for candle_start, trades_grouped_df in trades_grouped_by_candle_start:
            is_between = candle_start == dataframe["date"]
            if is_between.any():
-                candle_next = timeframe_to_next_date(timeframe, candle_start)
-                if candle_next not in trades_grouped_by_candle_start.groups:
-                    logger.warning(
-                        f"candle at {candle_start} with {len(trades_grouped_df)} trades "
-                        f"might be unfinished, because no finished trades at {candle_next}"
-                    )
-
-                # Use caching mechanism
-                if (candle_start, candle_next) in cached_grouped_trades:
-                    cache_entry = cached_grouped_trades[(candle_start, candle_next)]
-                    # dataframe.loc[is_between] = cache_entry # doesn't take, so we need workaround:
-                    # Create a dictionary of the column values to be assigned
-                    update_dict = {c: cache_entry[c].iat[0] for c in cache_entry.columns}
-                    # Assign the values using the update_dict
-                    dataframe.loc[is_between, update_dict.keys()] = pd.DataFrame(
-                        [update_dict], index=dataframe.loc[is_between].index
-                    )
-                    continue
-
                # there can only be one row with the same date
                index = dataframe.index[is_between][0]
+
+                if (
+                    cached_grouped_trades is not None
+                    and (candle_start == cached_grouped_trades["date"]).any()
+                ):
+                    logger.info(f"Using cached orderflow data for {candle_start}")
+                    # Check if the trades are already in the cache
+                    for col in ADDED_COLUMNS:
+                        dataframe.at[index, col] = cached_grouped_trades.loc[
+                            (cached_grouped_trades["date"] == candle_start), col
+                        ].values
+                    continue
+
                dataframe.at[index, "trades"] = trades_grouped_df.drop(
                    columns=["candle_start", "candle_end"]
                ).to_dict(orient="records")
@@ -176,21 +168,11 @@ def populate_dataframe_with_trades(
                )
                dataframe.at[index, "total_trades"] = len(trades_grouped_df)

-                # Cache the result
-                cached_grouped_trades[(candle_start, candle_next)] = dataframe.loc[
-                    is_between
-                ].copy()
-
-                # Maintain cache size
-                if (
-                    config.get("runmode") in (RunMode.DRY_RUN, RunMode.LIVE)
-                    and len(cached_grouped_trades) > config_orderflow["cache_size"]
-                ):
-                    cached_grouped_trades.popitem(last=False)
-            else:
-                logger.debug(f"Found NO candles for trades starting with {candle_start}")
        logger.debug(f"trades.groups_keys in {time.time() - start_time} seconds")

+        # Cache the entire dataframe
+        cached_grouped_trades = dataframe.tail(config_orderflow["cache_size"]).copy()
+
    except Exception as e:
        logger.exception("Error populating dataframe with trades")
        raise DependencyException(e)
--- a/freqtrade/strategy/interface.py
+++ b/freqtrade/strategy/interface.py
@@ -141,9 +141,7 @@ class IStrategy(ABC, HyperStrategyMixin):
    market_direction: MarketDirection = MarketDirection.NONE

    # Global cache dictionary
-    _cached_grouped_trades_per_pair: dict[
-        str, OrderedDict[tuple[datetime, datetime], DataFrame]
-    ] = {}
+    _cached_grouped_trades_per_pair: dict[str, DataFrame] = {}

    def __init__(self, config: Config) -> None:
        self.config = config
@@ -1608,9 +1606,7 @@ class IStrategy(ABC, HyperStrategyMixin):
            config["timeframe"] = self.timeframe
            pair = metadata["pair"]
            # TODO: slice trades to size of dataframe for faster backtesting
-            cached_grouped_trades: OrderedDict[tuple[datetime, datetime], DataFrame] = (
-                self._cached_grouped_trades_per_pair.get(pair, OrderedDict())
-            )
+            cached_grouped_trades: DataFrame | None = self._cached_grouped_trades_per_pair.get(pair)
            dataframe, cached_grouped_trades = populate_dataframe_with_trades(
                cached_grouped_trades, config, dataframe, trades
            )