diff --git a/freqtrade/data/converter.py b/freqtrade/data/converter.py index c71f7795c..cd5719326 100644 --- a/freqtrade/data/converter.py +++ b/freqtrade/data/converter.py @@ -195,7 +195,18 @@ def order_book_to_dataframe(bids: list, asks: list) -> DataFrame: return frame -def trades_remove_duplicates(trades: pd.DataFrame) -> pd.DataFrame: +def trades_remove_duplicates(trades: List[List]) -> List[List]: + """ + Removes duplicates from the trades list. + Uses itertools.groupby to avoid converting to pandas. + Tests show it as being pretty efficient on lists of 4M Lists. + :param trades: List of Lists with constants.DEFAULT_TRADES_COLUMNS as columns + :return: same format as above, but with duplicates removed + """ + return [i for i, _ in itertools.groupby(sorted(trades, key=itemgetter(0)))] + + +def trades_df_remove_duplicates(trades: pd.DataFrame) -> pd.DataFrame: """ Removes duplicates from the trades DataFrame. Uses pandas.DataFrame.drop_duplicates to remove duplicates based on the 'timestamp' column. diff --git a/freqtrade/data/history/idatahandler.py b/freqtrade/data/history/idatahandler.py index 4618dfdeb..a11c79bd9 100644 --- a/freqtrade/data/history/idatahandler.py +++ b/freqtrade/data/history/idatahandler.py @@ -16,7 +16,8 @@ from pandas import DataFrame, to_datetime from freqtrade import misc from freqtrade.configuration import TimeRange from freqtrade.constants import ListPairsWithTimeframes, TradeList -from freqtrade.data.converter import clean_ohlcv_dataframe, trades_remove_duplicates, trim_dataframe +from freqtrade.data.converter import (clean_ohlcv_dataframe, trades_df_remove_duplicates, + trim_dataframe) from freqtrade.enums import CandleType, TradingMode from freqtrade.exchange import timeframe_to_seconds @@ -216,12 +217,12 @@ class IDataHandler(ABC): :param timerange: Timerange to load trades for - currently not implemented :return: List of trades """ - trades = trades_remove_duplicates(self._trades_load(pair, timerange=timerange)) + trades = trades_df_remove_duplicates(self._trades_load(pair, timerange=timerange)) trades['timestamp'] = to_datetime(trades['timestamp'], unit='ms', utc=True) return trades def trades_load_aslist(self, pair: str, timerange: Optional[TimeRange] = None) -> TradeList: - trades = trades_remove_duplicates(self._trades_load(pair, timerange=timerange)) + trades = trades_df_remove_duplicates(self._trades_load(pair, timerange=timerange)) return trades.values.tolist() @classmethod diff --git a/tests/data/test_converter.py b/tests/data/test_converter.py index a701e2f07..9f733f316 100644 --- a/tests/data/test_converter.py +++ b/tests/data/test_converter.py @@ -10,8 +10,9 @@ import pytest from freqtrade.configuration.timerange import TimeRange from freqtrade.data.converter import (convert_ohlcv_format, convert_trades_format, ohlcv_fill_up_missing_data, ohlcv_to_dataframe, - reduce_dataframe_footprint, trades_dict_to_list, - trades_remove_duplicates, trades_to_ohlcv, trim_dataframe) + reduce_dataframe_footprint, trades_df_remove_duplicates, + trades_dict_to_list, trades_remove_duplicates, + trades_to_ohlcv, trim_dataframe) from freqtrade.data.history import (get_timerange, load_data, load_pair_history, validate_backtest_data) from freqtrade.data.history.idatahandler import IDataHandler @@ -298,11 +299,20 @@ def test_trim_dataframe(testdatadir) -> None: assert all(data_modify.iloc[0] == data.iloc[25]) -def test_trades_remove_duplicates(trades_history_df): +def test_trades_remove_duplicates(trades_history): + trades_history1 = trades_history * 3 + assert len(trades_history1) == len(trades_history) * 3 + res = trades_remove_duplicates(trades_history1) + assert len(res) == len(trades_history) + for i, t in enumerate(res): + assert t == trades_history[i] + + +def test_trades_df_remove_duplicates(trades_history_df): trades_history1 = pd.concat([trades_history_df, trades_history_df, trades_history_df] ).reset_index(drop=True) assert len(trades_history1) == len(trades_history_df) * 3 - res = trades_remove_duplicates(trades_history1) + res = trades_df_remove_duplicates(trades_history1) assert len(res) == len(trades_history_df) assert res.equals(trades_history_df)