feat: Only load trades needed for specified timerange

This commit is contained in:
Maxime Pagnoulle
2025-08-23 20:24:10 +02:00
parent 87b0a6d3f3
commit 0233c38711
2 changed files with 28 additions and 5 deletions

View File

@@ -498,7 +498,12 @@ class DataProvider:
return DataFrame() return DataFrame()
def trades( def trades(
self, pair: str, timeframe: str | None = None, copy: bool = True, candle_type: str = "" self,
pair: str,
timeframe: str | None = None,
copy: bool = True,
candle_type: str = "",
timerange: TimeRange | None = None,
) -> DataFrame: ) -> DataFrame:
""" """
Get candle (TRADES) data for the given pair as DataFrame Get candle (TRADES) data for the given pair as DataFrame
@@ -526,7 +531,7 @@ class DataProvider:
self._config["datadir"], data_format=self._config["dataformat_trades"] self._config["datadir"], data_format=self._config["dataformat_trades"]
) )
trades_df = data_handler.trades_load( trades_df = data_handler.trades_load(
pair, self._config.get("trading_mode", TradingMode.SPOT) pair, self._config.get("trading_mode", TradingMode.SPOT), timerange=timerange
) )
return trades_df return trades_df

View File

@@ -1,6 +1,7 @@
import logging import logging
from pandas import DataFrame, read_feather, to_datetime from pandas import DataFrame, read_feather, to_datetime
from pyarrow import dataset
from freqtrade.configuration import TimeRange from freqtrade.configuration import TimeRange
from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, DEFAULT_TRADES_COLUMNS from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, DEFAULT_TRADES_COLUMNS
@@ -116,16 +117,33 @@ class FeatherDataHandler(IDataHandler):
) -> DataFrame: ) -> DataFrame:
""" """
Load a pair from file, either .json.gz or .json Load a pair from file, either .json.gz or .json
# TODO: respect timerange ...
:param pair: Load trades for this pair :param pair: Load trades for this pair
:param trading_mode: Trading mode to use (used to determine the filename) :param trading_mode: Trading mode to use (used to determine the filename)
:param timerange: Timerange to load trades for - currently not implemented :param timerange: Timerange to load trades for - filters data to this range if provided
:return: Dataframe containing trades :return: Dataframe containing trades
""" """
filename = self._pair_trades_filename(self._datadir, pair, trading_mode) filename = self._pair_trades_filename(self._datadir, pair, trading_mode)
if not filename.exists(): if not filename.exists():
return DataFrame(columns=DEFAULT_TRADES_COLUMNS) return DataFrame(columns=DEFAULT_TRADES_COLUMNS)
# Load trades data with optional timerange filtering
if timerange is None:
# No timerange filter - load entire file
logger.debug(f"Loading entire trades file for {pair}")
tradesdata = read_feather(filename)
else:
# Use Arrow dataset with predicate pushdown for efficient filtering
try:
dataset_reader = dataset.dataset(filename, format="feather")
time_filter = (dataset.field("timestamp") >= timerange.startts) & (
dataset.field("timestamp") <= timerange.stopts
)
tradesdata = dataset_reader.to_table(filter=time_filter).to_pandas()
logger.debug(f"Loaded {len(tradesdata)} trades for {pair}")
except (ImportError, AttributeError, ValueError) as e:
# Fallback: load entire file
logger.debug(f"Unable to use Arrow filtering, loading entire trades file: {e}")
tradesdata = read_feather(filename) tradesdata = read_feather(filename)
return tradesdata return tradesdata