Merge pull request #9065 from freqtrade/trades_data_handling

Improve Trades data handling
This commit is contained in:
Matthias
2023-08-18 18:04:38 +02:00
committed by GitHub
14 changed files with 210 additions and 211 deletions

View File

@@ -14,7 +14,7 @@ import pytest
from freqtrade import constants
from freqtrade.commands import Arguments
from freqtrade.data.converter import ohlcv_to_dataframe
from freqtrade.data.converter import ohlcv_to_dataframe, trades_list_to_df
from freqtrade.edge import PairInfo
from freqtrade.enums import CandleType, MarginMode, RunMode, SignalDirection, TradingMode
from freqtrade.exchange import Exchange
@@ -2346,7 +2346,15 @@ def trades_history():
[1565798399629, '1261813bb30', None, 'buy', 0.019627, 0.244, 0.004788987999999999],
[1565798399752, '1261813cc31', None, 'sell', 0.019626, 0.011, 0.00021588599999999999],
[1565798399862, '126181cc332', None, 'sell', 0.019626, 0.011, 0.00021588599999999999],
[1565798399872, '1261aa81333', None, 'sell', 0.019626, 0.011, 0.00021588599999999999]]
[1565798399862, '126181cc333', None, 'sell', 0.019626, 0.012, 0.00021588599999999999],
[1565798399872, '1261aa81334', None, 'sell', 0.019626, 0.011, 0.00021588599999999999]]
@pytest.fixture(scope="function")
def trades_history_df(trades_history):
trades = trades_list_to_df(trades_history)
trades['date'] = pd.to_datetime(trades['timestamp'], unit='ms', utc=True)
return trades
@pytest.fixture(scope="function")

View File

@@ -4,13 +4,14 @@ from pathlib import Path
from shutil import copyfile
import numpy as np
import pandas as pd
import pytest
from freqtrade.configuration.timerange import TimeRange
from freqtrade.data.converter import (convert_ohlcv_format, convert_trades_format,
ohlcv_fill_up_missing_data, ohlcv_to_dataframe,
reduce_dataframe_footprint, trades_dict_to_list,
trades_remove_duplicates, trades_to_ohlcv, trim_dataframe)
reduce_dataframe_footprint, trades_df_remove_duplicates,
trades_dict_to_list, trades_to_ohlcv, trim_dataframe)
from freqtrade.data.history import (get_timerange, load_data, load_pair_history,
validate_backtest_data)
from freqtrade.data.history.idatahandler import IDataHandler
@@ -34,13 +35,13 @@ def test_ohlcv_to_dataframe(ohlcv_history_list, caplog):
assert log_has('Converting candle (OHLCV) data to dataframe for pair UNITTEST/BTC.', caplog)
def test_trades_to_ohlcv(trades_history, caplog):
def test_trades_to_ohlcv(trades_history_df, caplog):
caplog.set_level(logging.DEBUG)
with pytest.raises(ValueError, match="Trade-list empty."):
trades_to_ohlcv([], '1m')
trades_to_ohlcv(pd.DataFrame(columns=trades_history_df.columns), '1m')
df = trades_to_ohlcv(trades_history, '1m')
df = trades_to_ohlcv(trades_history_df, '1m')
assert not df.empty
assert len(df) == 1
assert 'open' in df.columns
@@ -297,13 +298,13 @@ def test_trim_dataframe(testdatadir) -> None:
assert all(data_modify.iloc[0] == data.iloc[25])
def test_trades_remove_duplicates(trades_history):
trades_history1 = trades_history * 3
assert len(trades_history1) == len(trades_history) * 3
res = trades_remove_duplicates(trades_history1)
assert len(res) == len(trades_history)
for i, t in enumerate(res):
assert t == trades_history[i]
def test_trades_df_remove_duplicates(trades_history_df):
trades_history1 = pd.concat([trades_history_df, trades_history_df, trades_history_df]
).reset_index(drop=True)
assert len(trades_history1) == len(trades_history_df) * 3
res = trades_df_remove_duplicates(trades_history1)
assert len(res) == len(trades_history_df)
assert res.equals(trades_history_df)
def test_trades_dict_to_list(fetch_trades_result):

View File

@@ -6,7 +6,8 @@ from pathlib import Path
from unittest.mock import MagicMock
import pytest
from pandas import DataFrame
from pandas import DataFrame, Timestamp
from pandas.testing import assert_frame_equal
from freqtrade.configuration import TimeRange
from freqtrade.constants import AVAILABLE_DATAHANDLERS
@@ -117,12 +118,6 @@ def test_datahandler_ohlcv_get_available_data(testdatadir):
assert set(paircombs) == {('UNITTEST/BTC', '5m', CandleType.SPOT)}
def test_jsondatahandler_trades_get_pairs(testdatadir):
pairs = JsonGzDataHandler.trades_get_pairs(testdatadir)
# Convert to set to avoid failures due to sorting
assert set(pairs) == {'XRP/ETH', 'XRP/OLD'}
def test_jsondatahandler_ohlcv_purge(mocker, testdatadir):
mocker.patch.object(Path, "exists", MagicMock(return_value=False))
unlinkmock = mocker.patch.object(Path, "unlink", MagicMock())
@@ -246,8 +241,10 @@ def test_datahandler__check_empty_df(testdatadir, caplog):
assert log_has_re(expected_text, caplog)
@pytest.mark.parametrize('datahandler', ['parquet'])
# @pytest.mark.parametrize('datahandler', [])
@pytest.mark.skip("All datahandlers currently support trades data.")
def test_datahandler_trades_not_supported(datahandler, testdatadir, ):
# Currently disabled. Reenable should a new provider not support trades data.
dh = get_datahandler(testdatadir, datahandler)
with pytest.raises(NotImplementedError):
dh.trades_load('UNITTEST/ETH')
@@ -266,18 +263,6 @@ def test_jsondatahandler_trades_load(testdatadir, caplog):
assert log_has(logmsg, caplog)
def test_jsondatahandler_trades_purge(mocker, testdatadir):
mocker.patch.object(Path, "exists", MagicMock(return_value=False))
unlinkmock = mocker.patch.object(Path, "unlink", MagicMock())
dh = JsonGzDataHandler(testdatadir)
assert not dh.trades_purge('UNITTEST/NONEXIST')
assert unlinkmock.call_count == 0
mocker.patch.object(Path, "exists", MagicMock(return_value=True))
assert dh.trades_purge('UNITTEST/NONEXIST')
assert unlinkmock.call_count == 1
@pytest.mark.parametrize('datahandler', AVAILABLE_DATAHANDLERS)
def test_datahandler_ohlcv_append(datahandler, testdatadir, ):
dh = get_datahandler(testdatadir, datahandler)
@@ -291,79 +276,48 @@ def test_datahandler_ohlcv_append(datahandler, testdatadir, ):
def test_datahandler_trades_append(datahandler, testdatadir):
dh = get_datahandler(testdatadir, datahandler)
with pytest.raises(NotImplementedError):
dh.trades_append('UNITTEST/ETH', [])
dh.trades_append('UNITTEST/ETH', DataFrame())
def test_hdf5datahandler_trades_get_pairs(testdatadir):
pairs = HDF5DataHandler.trades_get_pairs(testdatadir)
@pytest.mark.parametrize('datahandler,expected', [
('jsongz', {'XRP/ETH', 'XRP/OLD'}),
('hdf5', {'XRP/ETH'}),
('feather', {'XRP/ETH'}),
('parquet', {'XRP/ETH'}),
])
def test_datahandler_trades_get_pairs(testdatadir, datahandler, expected):
pairs = get_datahandlerclass(datahandler).trades_get_pairs(testdatadir)
# Convert to set to avoid failures due to sorting
assert set(pairs) == {'XRP/ETH'}
assert set(pairs) == expected
def test_hdf5datahandler_trades_load(testdatadir):
dh = get_datahandler(testdatadir, 'hdf5')
trades = dh.trades_load('XRP/ETH')
assert isinstance(trades, list)
assert isinstance(trades, DataFrame)
trades1 = dh.trades_load('UNITTEST/NONEXIST')
assert trades1 == []
assert isinstance(trades1, DataFrame)
assert trades1.empty
# data goes from 2019-10-11 - 2019-10-13
timerange = TimeRange.parse_timerange('20191011-20191012')
trades2 = dh._trades_load('XRP/ETH', timerange)
assert len(trades) > len(trades2)
# Check that ID is None (If it's nan, it's wrong)
assert trades2[0][2] is None
assert trades2.iloc[0]['type'] is None
# unfiltered load has trades before starttime
assert len([t for t in trades if t[0] < timerange.startts * 1000]) >= 0
assert len(trades.loc[trades['timestamp'] < timerange.startts * 1000]) >= 0
# filtered list does not have trades before starttime
assert len([t for t in trades2 if t[0] < timerange.startts * 1000]) == 0
assert len(trades2.loc[trades2['timestamp'] < timerange.startts * 1000]) == 0
# unfiltered load has trades after endtime
assert len([t for t in trades if t[0] > timerange.stopts * 1000]) > 0
assert len(trades.loc[trades['timestamp'] > timerange.stopts * 1000]) >= 0
# filtered list does not have trades after endtime
assert len([t for t in trades2 if t[0] > timerange.stopts * 1000]) == 0
def test_hdf5datahandler_trades_store(testdatadir, tmpdir):
tmpdir1 = Path(tmpdir)
dh = get_datahandler(testdatadir, 'hdf5')
trades = dh.trades_load('XRP/ETH')
dh1 = get_datahandler(tmpdir1, 'hdf5')
dh1.trades_store('XRP/NEW', trades)
file = tmpdir1 / 'XRP_NEW-trades.h5'
assert file.is_file()
# Load trades back
trades_new = dh1.trades_load('XRP/NEW')
assert len(trades_new) == len(trades)
assert trades[0][0] == trades_new[0][0]
assert trades[0][1] == trades_new[0][1]
# assert trades[0][2] == trades_new[0][2] # This is nan - so comparison does not make sense
assert trades[0][3] == trades_new[0][3]
assert trades[0][4] == trades_new[0][4]
assert trades[0][5] == trades_new[0][5]
assert trades[0][6] == trades_new[0][6]
assert trades[-1][0] == trades_new[-1][0]
assert trades[-1][1] == trades_new[-1][1]
# assert trades[-1][2] == trades_new[-1][2] # This is nan - so comparison does not make sense
assert trades[-1][3] == trades_new[-1][3]
assert trades[-1][4] == trades_new[-1][4]
assert trades[-1][5] == trades_new[-1][5]
assert trades[-1][6] == trades_new[-1][6]
def test_hdf5datahandler_trades_purge(mocker, testdatadir):
mocker.patch.object(Path, "exists", MagicMock(return_value=False))
unlinkmock = mocker.patch.object(Path, "unlink", MagicMock())
dh = get_datahandler(testdatadir, 'hdf5')
assert not dh.trades_purge('UNITTEST/NONEXIST')
assert unlinkmock.call_count == 0
mocker.patch.object(Path, "exists", MagicMock(return_value=True))
assert dh.trades_purge('UNITTEST/NONEXIST')
assert unlinkmock.call_count == 1
assert len(trades2.loc[trades2['timestamp'] > timerange.stopts * 1000]) == 0
# assert len([t for t in trades2 if t[0] > timerange.stopts * 1000]) == 0
@pytest.mark.parametrize('pair,timeframe,candle_type,candle_append,startdt,enddt', [
@@ -490,50 +444,42 @@ def test_hdf5datahandler_ohlcv_purge(mocker, testdatadir):
assert unlinkmock.call_count == 2
def test_featherdatahandler_trades_load(testdatadir):
dh = get_datahandler(testdatadir, 'feather')
@pytest.mark.parametrize('datahandler', ['jsongz', 'hdf5', 'feather', 'parquet'])
def test_datahandler_trades_load(testdatadir, datahandler):
dh = get_datahandler(testdatadir, datahandler)
trades = dh.trades_load('XRP/ETH')
assert isinstance(trades, list)
assert trades[0][0] == 1570752011620
assert trades[-1][-1] == 0.1986231
assert isinstance(trades, DataFrame)
assert trades.iloc[0]['timestamp'] == 1570752011620
assert trades.iloc[0]['date'] == Timestamp('2019-10-11 00:00:11.620000+0000')
assert trades.iloc[-1]['cost'] == 0.1986231
trades1 = dh.trades_load('UNITTEST/NONEXIST')
assert trades1 == []
assert isinstance(trades, DataFrame)
assert trades1.empty
def test_featherdatahandler_trades_store(testdatadir, tmpdir):
@pytest.mark.parametrize('datahandler', ['jsongz', 'hdf5', 'feather', 'parquet'])
def test_datahandler_trades_store(testdatadir, tmpdir, datahandler):
tmpdir1 = Path(tmpdir)
dh = get_datahandler(testdatadir, 'feather')
dh = get_datahandler(testdatadir, datahandler)
trades = dh.trades_load('XRP/ETH')
dh1 = get_datahandler(tmpdir1, 'feather')
dh1 = get_datahandler(tmpdir1, datahandler)
dh1.trades_store('XRP/NEW', trades)
file = tmpdir1 / 'XRP_NEW-trades.feather'
file = tmpdir1 / f'XRP_NEW-trades.{dh1._get_file_extension()}'
assert file.is_file()
# Load trades back
trades_new = dh1.trades_load('XRP/NEW')
assert_frame_equal(trades, trades_new, check_exact=True)
assert len(trades_new) == len(trades)
assert trades[0][0] == trades_new[0][0]
assert trades[0][1] == trades_new[0][1]
# assert trades[0][2] == trades_new[0][2] # This is nan - so comparison does not make sense
assert trades[0][3] == trades_new[0][3]
assert trades[0][4] == trades_new[0][4]
assert trades[0][5] == trades_new[0][5]
assert trades[0][6] == trades_new[0][6]
assert trades[-1][0] == trades_new[-1][0]
assert trades[-1][1] == trades_new[-1][1]
# assert trades[-1][2] == trades_new[-1][2] # This is nan - so comparison does not make sense
assert trades[-1][3] == trades_new[-1][3]
assert trades[-1][4] == trades_new[-1][4]
assert trades[-1][5] == trades_new[-1][5]
assert trades[-1][6] == trades_new[-1][6]
def test_featherdatahandler_trades_purge(mocker, testdatadir):
@pytest.mark.parametrize('datahandler', ['jsongz', 'hdf5', 'feather', 'parquet'])
def test_datahandler_trades_purge(mocker, testdatadir, datahandler):
mocker.patch.object(Path, "exists", MagicMock(return_value=False))
unlinkmock = mocker.patch.object(Path, "unlink", MagicMock())
dh = get_datahandler(testdatadir, 'feather')
dh = get_datahandler(testdatadir, datahandler)
assert not dh.trades_purge('UNITTEST/NONEXIST')
assert unlinkmock.call_count == 0

View File

@@ -581,7 +581,7 @@ def test_download_trades_history(trades_history, mocker, default_conf, testdatad
assert _download_trades_history(data_handler=data_handler, exchange=exchange,
pair='ETH/BTC')
assert log_has("New Amount of trades: 5", caplog)
assert log_has("New Amount of trades: 6", caplog)
assert file1.is_file()
ght_mock.reset_mock()
@@ -651,10 +651,10 @@ def test_convert_trades_to_ohlcv(testdatadir, tmpdir, caplog):
assert_frame_equal(dfbak_1m, df_1m, check_exact=True)
assert_frame_equal(dfbak_5m, df_5m, check_exact=True)
assert not log_has('Could not convert NoDatapair to OHLCV.', caplog)
msg = 'Could not convert NoDatapair to OHLCV.'
assert not log_has(msg, caplog)
convert_trades_to_ohlcv(['NoDatapair'], timeframes=['1m', '5m'],
data_format_trades='jsongz',
datadir=tmpdir1, timerange=tr, erase=True)
assert log_has('Could not convert NoDatapair to OHLCV.', caplog)
assert log_has(msg, caplog)

BIN
tests/testdata/XRP_ETH-trades.parquet vendored Normal file

Binary file not shown.