Keep existing trades_remove_duplicates for now

This commit is contained in:
Matthias
2023-08-17 17:22:17 +02:00
parent 53db254cba
commit 0be2250cf5
3 changed files with 30 additions and 8 deletions

View File

@@ -195,7 +195,18 @@ def order_book_to_dataframe(bids: list, asks: list) -> DataFrame:
return frame
def trades_remove_duplicates(trades: pd.DataFrame) -> pd.DataFrame:
def trades_remove_duplicates(trades: List[List]) -> List[List]:
"""
Removes duplicates from the trades list.
Uses itertools.groupby to avoid converting to pandas.
Tests show it as being pretty efficient on lists of 4M Lists.
:param trades: List of Lists with constants.DEFAULT_TRADES_COLUMNS as columns
:return: same format as above, but with duplicates removed
"""
return [i for i, _ in itertools.groupby(sorted(trades, key=itemgetter(0)))]
def trades_df_remove_duplicates(trades: pd.DataFrame) -> pd.DataFrame:
"""
Removes duplicates from the trades DataFrame.
Uses pandas.DataFrame.drop_duplicates to remove duplicates based on the 'timestamp' column.

View File

@@ -16,7 +16,8 @@ from pandas import DataFrame, to_datetime
from freqtrade import misc
from freqtrade.configuration import TimeRange
from freqtrade.constants import ListPairsWithTimeframes, TradeList
from freqtrade.data.converter import clean_ohlcv_dataframe, trades_remove_duplicates, trim_dataframe
from freqtrade.data.converter import (clean_ohlcv_dataframe, trades_df_remove_duplicates,
trim_dataframe)
from freqtrade.enums import CandleType, TradingMode
from freqtrade.exchange import timeframe_to_seconds
@@ -216,12 +217,12 @@ class IDataHandler(ABC):
:param timerange: Timerange to load trades for - currently not implemented
:return: List of trades
"""
trades = trades_remove_duplicates(self._trades_load(pair, timerange=timerange))
trades = trades_df_remove_duplicates(self._trades_load(pair, timerange=timerange))
trades['timestamp'] = to_datetime(trades['timestamp'], unit='ms', utc=True)
return trades
def trades_load_aslist(self, pair: str, timerange: Optional[TimeRange] = None) -> TradeList:
trades = trades_remove_duplicates(self._trades_load(pair, timerange=timerange))
trades = trades_df_remove_duplicates(self._trades_load(pair, timerange=timerange))
return trades.values.tolist()
@classmethod

View File

@@ -10,8 +10,9 @@ import pytest
from freqtrade.configuration.timerange import TimeRange
from freqtrade.data.converter import (convert_ohlcv_format, convert_trades_format,
ohlcv_fill_up_missing_data, ohlcv_to_dataframe,
reduce_dataframe_footprint, trades_dict_to_list,
trades_remove_duplicates, trades_to_ohlcv, trim_dataframe)
reduce_dataframe_footprint, trades_df_remove_duplicates,
trades_dict_to_list, trades_remove_duplicates,
trades_to_ohlcv, trim_dataframe)
from freqtrade.data.history import (get_timerange, load_data, load_pair_history,
validate_backtest_data)
from freqtrade.data.history.idatahandler import IDataHandler
@@ -298,11 +299,20 @@ def test_trim_dataframe(testdatadir) -> None:
assert all(data_modify.iloc[0] == data.iloc[25])
def test_trades_remove_duplicates(trades_history_df):
def test_trades_remove_duplicates(trades_history):
trades_history1 = trades_history * 3
assert len(trades_history1) == len(trades_history) * 3
res = trades_remove_duplicates(trades_history1)
assert len(res) == len(trades_history)
for i, t in enumerate(res):
assert t == trades_history[i]
def test_trades_df_remove_duplicates(trades_history_df):
trades_history1 = pd.concat([trades_history_df, trades_history_df, trades_history_df]
).reset_index(drop=True)
assert len(trades_history1) == len(trades_history_df) * 3
res = trades_remove_duplicates(trades_history1)
res = trades_df_remove_duplicates(trades_history1)
assert len(res) == len(trades_history_df)
assert res.equals(trades_history_df)