Merge pull request #9065 from freqtrade/trades_data_handling

Improve Trades data handling
This commit is contained in:
Matthias
2023-08-18 18:04:38 +02:00
committed by GitHub
14 changed files with 210 additions and 211 deletions

View File

@@ -441,7 +441,7 @@ AVAILABLE_CLI_OPTIONS = {
"dataformat_trades": Arg(
'--data-format-trades',
help='Storage format for downloaded trades data. (default: `feather`).',
choices=constants.AVAILABLE_DATAHANDLERS_TRADES,
choices=constants.AVAILABLE_DATAHANDLERS,
),
"show_timerange": Arg(
'--show-timerange',

View File

@@ -38,8 +38,7 @@ AVAILABLE_PAIRLISTS = ['StaticPairList', 'VolumePairList', 'ProducerPairList', '
'ShuffleFilter', 'SpreadFilter', 'VolatilityFilter']
AVAILABLE_PROTECTIONS = ['CooldownPeriod',
'LowProfitPairs', 'MaxDrawdown', 'StoplossGuard']
AVAILABLE_DATAHANDLERS_TRADES = ['json', 'jsongz', 'hdf5', 'feather']
AVAILABLE_DATAHANDLERS = AVAILABLE_DATAHANDLERS_TRADES + ['parquet']
AVAILABLE_DATAHANDLERS = ['json', 'jsongz', 'hdf5', 'feather', 'parquet']
BACKTEST_BREAKDOWNS = ['day', 'week', 'month']
BACKTEST_CACHE_AGE = ['none', 'day', 'week', 'month']
BACKTEST_CACHE_DEFAULT = 'day'
@@ -50,6 +49,15 @@ DEFAULT_DATAFRAME_COLUMNS = ['date', 'open', 'high', 'low', 'close', 'volume']
# Don't modify sequence of DEFAULT_TRADES_COLUMNS
# it has wide consequences for stored trades files
DEFAULT_TRADES_COLUMNS = ['timestamp', 'id', 'type', 'side', 'price', 'amount', 'cost']
TRADES_DTYPES = {
'timestamp': 'int64',
'id': 'str',
'type': 'str',
'side': 'str',
'price': 'float64',
'amount': 'float64',
'cost': 'float64',
}
TRADING_MODES = ['spot', 'margin', 'futures']
MARGIN_MODES = ['cross', 'isolated', '']
@@ -450,7 +458,7 @@ CONF_SCHEMA = {
},
'dataformat_trades': {
'type': 'string',
'enum': AVAILABLE_DATAHANDLERS_TRADES,
'enum': AVAILABLE_DATAHANDLERS,
'default': 'feather'
},
'position_adjustment_enable': {'type': 'boolean'},

View File

@@ -1,16 +1,15 @@
"""
Functions to convert data from one format to another
"""
import itertools
import logging
from operator import itemgetter
from typing import Dict, List
import numpy as np
import pandas as pd
from pandas import DataFrame, to_datetime
from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, DEFAULT_TRADES_COLUMNS, Config, TradeList
from freqtrade.constants import (DEFAULT_DATAFRAME_COLUMNS, DEFAULT_TRADES_COLUMNS, TRADES_DTYPES,
Config, TradeList)
from freqtrade.enums import CandleType, TradingMode
@@ -195,15 +194,14 @@ def order_book_to_dataframe(bids: list, asks: list) -> DataFrame:
return frame
def trades_remove_duplicates(trades: List[List]) -> List[List]:
def trades_df_remove_duplicates(trades: pd.DataFrame) -> pd.DataFrame:
"""
Removes duplicates from the trades list.
Uses itertools.groupby to avoid converting to pandas.
Tests show it as being pretty efficient on lists of 4M Lists.
:param trades: List of Lists with constants.DEFAULT_TRADES_COLUMNS as columns
:return: same format as above, but with duplicates removed
Removes duplicates from the trades DataFrame.
Uses pandas.DataFrame.drop_duplicates to remove duplicates based on the 'timestamp' column.
:param trades: DataFrame with the columns constants.DEFAULT_TRADES_COLUMNS
:return: DataFrame with duplicates removed based on the 'timestamp' column
"""
return [i for i, _ in itertools.groupby(sorted(trades, key=itemgetter(0)))]
return trades.drop_duplicates(subset=['timestamp', 'id'])
def trades_dict_to_list(trades: List[Dict]) -> TradeList:
@@ -215,7 +213,32 @@ def trades_dict_to_list(trades: List[Dict]) -> TradeList:
return [[t[col] for col in DEFAULT_TRADES_COLUMNS] for t in trades]
def trades_to_ohlcv(trades: TradeList, timeframe: str) -> DataFrame:
def trades_convert_types(trades: DataFrame) -> DataFrame:
"""
Convert Trades dtypes and add 'date' column
"""
trades = trades.astype(TRADES_DTYPES)
trades['date'] = to_datetime(trades['timestamp'], unit='ms', utc=True)
return trades
def trades_list_to_df(trades: TradeList, convert: bool = True):
"""
convert trades list to dataframe
:param trades: List of Lists with constants.DEFAULT_TRADES_COLUMNS as columns
"""
if not trades:
df = DataFrame(columns=DEFAULT_TRADES_COLUMNS)
else:
df = DataFrame(trades, columns=DEFAULT_TRADES_COLUMNS)
if convert:
df = trades_convert_types(df)
return df
def trades_to_ohlcv(trades: DataFrame, timeframe: str) -> DataFrame:
"""
Converts trades list to OHLCV list
:param trades: List of trades, as returned by ccxt.fetch_trades.
@@ -225,12 +248,9 @@ def trades_to_ohlcv(trades: TradeList, timeframe: str) -> DataFrame:
"""
from freqtrade.exchange import timeframe_to_minutes
timeframe_minutes = timeframe_to_minutes(timeframe)
if not trades:
if trades.empty:
raise ValueError('Trade-list empty.')
df = pd.DataFrame(trades, columns=DEFAULT_TRADES_COLUMNS)
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms',
utc=True,)
df = df.set_index('timestamp')
df = trades.set_index('date', drop=True)
df_new = df['price'].resample(f'{timeframe_minutes}min').ohlc()
df_new['volume'] = df['amount'].resample(f'{timeframe_minutes}min').sum()

View File

@@ -4,7 +4,7 @@ from typing import Optional
from pandas import DataFrame, read_feather, to_datetime
from freqtrade.configuration import TimeRange
from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, DEFAULT_TRADES_COLUMNS, TradeList
from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, DEFAULT_TRADES_COLUMNS
from freqtrade.enums import CandleType
from .idatahandler import IDataHandler
@@ -82,43 +82,41 @@ class FeatherDataHandler(IDataHandler):
"""
raise NotImplementedError()
def trades_store(self, pair: str, data: TradeList) -> None:
def _trades_store(self, pair: str, data: DataFrame) -> None:
"""
Store trades data (list of Dicts) to file
:param pair: Pair - used for filename
:param data: List of Lists containing trade data,
:param data: Dataframe containing trades
column sequence as in DEFAULT_TRADES_COLUMNS
"""
filename = self._pair_trades_filename(self._datadir, pair)
self.create_dir_if_needed(filename)
data.reset_index(drop=True).to_feather(filename, compression_level=9, compression='lz4')
tradesdata = DataFrame(data, columns=DEFAULT_TRADES_COLUMNS)
tradesdata.to_feather(filename, compression_level=9, compression='lz4')
def trades_append(self, pair: str, data: TradeList):
def trades_append(self, pair: str, data: DataFrame):
"""
Append data to existing files
:param pair: Pair - used for filename
:param data: List of Lists containing trade data,
:param data: Dataframe containing trades
column sequence as in DEFAULT_TRADES_COLUMNS
"""
raise NotImplementedError()
def _trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> TradeList:
def _trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> DataFrame:
"""
Load a pair from file, either .json.gz or .json
# TODO: respect timerange ...
:param pair: Load trades for this pair
:param timerange: Timerange to load trades for - currently not implemented
:return: List of trades
:return: Dataframe containing trades
"""
filename = self._pair_trades_filename(self._datadir, pair)
if not filename.exists():
return []
return DataFrame(columns=DEFAULT_TRADES_COLUMNS)
tradesdata = read_feather(filename)
return tradesdata.values.tolist()
return tradesdata
@classmethod
def _get_file_extension(cls):

View File

@@ -5,7 +5,7 @@ import numpy as np
import pandas as pd
from freqtrade.configuration import TimeRange
from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, DEFAULT_TRADES_COLUMNS, TradeList
from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, DEFAULT_TRADES_COLUMNS
from freqtrade.enums import CandleType
from .idatahandler import IDataHandler
@@ -100,42 +100,42 @@ class HDF5DataHandler(IDataHandler):
"""
raise NotImplementedError()
def trades_store(self, pair: str, data: TradeList) -> None:
def _trades_store(self, pair: str, data: pd.DataFrame) -> None:
"""
Store trades data (list of Dicts) to file
:param pair: Pair - used for filename
:param data: List of Lists containing trade data,
:param data: Dataframe containing trades
column sequence as in DEFAULT_TRADES_COLUMNS
"""
key = self._pair_trades_key(pair)
pd.DataFrame(data, columns=DEFAULT_TRADES_COLUMNS).to_hdf(
data.to_hdf(
self._pair_trades_filename(self._datadir, pair), key,
mode='a', complevel=9, complib='blosc',
format='table', data_columns=['timestamp']
)
def trades_append(self, pair: str, data: TradeList):
def trades_append(self, pair: str, data: pd.DataFrame):
"""
Append data to existing files
:param pair: Pair - used for filename
:param data: List of Lists containing trade data,
:param data: Dataframe containing trades
column sequence as in DEFAULT_TRADES_COLUMNS
"""
raise NotImplementedError()
def _trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> TradeList:
def _trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> pd.DataFrame:
"""
Load a pair from h5 file.
:param pair: Load trades for this pair
:param timerange: Timerange to load trades for - currently not implemented
:return: List of trades
:return: Dataframe containing trades
"""
key = self._pair_trades_key(pair)
filename = self._pair_trades_filename(self._datadir, pair)
if not filename.exists():
return []
return pd.DataFrame(columns=DEFAULT_TRADES_COLUMNS)
where = []
if timerange:
if timerange.starttype == 'date':
@@ -145,7 +145,7 @@ class HDF5DataHandler(IDataHandler):
trades: pd.DataFrame = pd.read_hdf(filename, key=key, mode="r", where=where)
trades[['id', 'type']] = trades[['id', 'type']].replace({np.nan: None})
return trades.values.tolist()
return trades
@classmethod
def _get_file_extension(cls):

View File

@@ -10,14 +10,16 @@ from freqtrade.configuration import TimeRange
from freqtrade.constants import (DATETIME_PRINT_FORMAT, DEFAULT_DATAFRAME_COLUMNS,
DL_DATA_TIMEFRAMES, Config)
from freqtrade.data.converter import (clean_ohlcv_dataframe, ohlcv_to_dataframe,
trades_remove_duplicates, trades_to_ohlcv)
trades_df_remove_duplicates, trades_list_to_df,
trades_to_ohlcv)
from freqtrade.data.history.idatahandler import IDataHandler, get_datahandler
from freqtrade.enums import CandleType
from freqtrade.exceptions import OperationalException
from freqtrade.exchange import Exchange
from freqtrade.plugins.pairlist.pairlist_helpers import dynamic_expand_pairlist
from freqtrade.util import format_ms_time
from freqtrade.util import dt_ts, format_ms_time
from freqtrade.util.binance_mig import migrate_binance_futures_data
from freqtrade.util.datetime_helpers import dt_now
logger = logging.getLogger(__name__)
@@ -349,24 +351,26 @@ def _download_trades_history(exchange: Exchange,
# DEFAULT_TRADES_COLUMNS: 0 -> timestamp
# DEFAULT_TRADES_COLUMNS: 1 -> id
if trades and since < trades[0][0]:
if not trades.empty and since < trades.iloc[0]['timestamp']:
# since is before the first trade
logger.info(f"Start earlier than available data. Redownloading trades for {pair}...")
trades = []
trades = trades_list_to_df([])
if not since:
since = int((datetime.now() - timedelta(days=new_pairs_days)).timestamp()) * 1000
since = dt_ts(dt_now() - timedelta(days=new_pairs_days))
from_id = trades[-1][1] if trades else None
if trades and since < trades[-1][0]:
from_id = trades.iloc[-1]['id'] if not trades.empty else None
if not trades.empty and since < trades.iloc[-1]['timestamp']:
# Reset since to the last available point
# - 5 seconds (to ensure we're getting all trades)
since = trades[-1][0] - (5 * 1000)
since = trades.iloc[-1]['timestamp'] - (5 * 1000)
logger.info(f"Using last trade date -5s - Downloading trades for {pair} "
f"since: {format_ms_time(since)}.")
logger.debug(f"Current Start: {format_ms_time(trades[0][0]) if trades else 'None'}")
logger.debug(f"Current End: {format_ms_time(trades[-1][0]) if trades else 'None'}")
logger.debug("Current Start: %s", 'None' if trades.empty else
f"{trades.iloc[0]['date']:{DATETIME_PRINT_FORMAT}}")
logger.debug("Current End: %s", 'None' if trades.empty else
f"{trades.iloc[-1]['date']:{DATETIME_PRINT_FORMAT}}")
logger.info(f"Current Amount of trades: {len(trades)}")
# Default since_ms to 30 days if nothing is given
@@ -375,13 +379,16 @@ def _download_trades_history(exchange: Exchange,
until=until,
from_id=from_id,
)
trades.extend(new_trades[1])
new_trades_df = trades_list_to_df(new_trades[1])
trades = concat([trades, new_trades_df], axis=0)
# Remove duplicates to make sure we're not storing data we don't need
trades = trades_remove_duplicates(trades)
trades = trades_df_remove_duplicates(trades)
data_handler.trades_store(pair, data=trades)
logger.debug(f"New Start: {format_ms_time(trades[0][0])}")
logger.debug(f"New End: {format_ms_time(trades[-1][0])}")
logger.debug("New Start: %s", 'None' if trades.empty else
f"{trades.iloc[0]['date']:{DATETIME_PRINT_FORMAT}}")
logger.debug("New End: %s", 'None' if trades.empty else
f"{trades.iloc[-1]['date']:{DATETIME_PRINT_FORMAT}}")
logger.info(f"New Amount of trades: {len(trades)}")
return True

View File

@@ -15,8 +15,9 @@ from pandas import DataFrame
from freqtrade import misc
from freqtrade.configuration import TimeRange
from freqtrade.constants import ListPairsWithTimeframes, TradeList
from freqtrade.data.converter import clean_ohlcv_dataframe, trades_remove_duplicates, trim_dataframe
from freqtrade.constants import DEFAULT_TRADES_COLUMNS, ListPairsWithTimeframes
from freqtrade.data.converter import (clean_ohlcv_dataframe, trades_convert_types,
trades_df_remove_duplicates, trim_dataframe)
from freqtrade.enums import CandleType, TradingMode
from freqtrade.exchange import timeframe_to_seconds
@@ -170,32 +171,42 @@ class IDataHandler(ABC):
return [cls.rebuild_pair_from_filename(match[0]) for match in _tmp if match]
@abstractmethod
def trades_store(self, pair: str, data: TradeList) -> None:
def _trades_store(self, pair: str, data: DataFrame) -> None:
"""
Store trades data (list of Dicts) to file
:param pair: Pair - used for filename
:param data: List of Lists containing trade data,
:param data: Dataframe containing trades
column sequence as in DEFAULT_TRADES_COLUMNS
"""
@abstractmethod
def trades_append(self, pair: str, data: TradeList):
def trades_append(self, pair: str, data: DataFrame):
"""
Append data to existing files
:param pair: Pair - used for filename
:param data: List of Lists containing trade data,
:param data: Dataframe containing trades
column sequence as in DEFAULT_TRADES_COLUMNS
"""
@abstractmethod
def _trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> TradeList:
def _trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> DataFrame:
"""
Load a pair from file, either .json.gz or .json
:param pair: Load trades for this pair
:param timerange: Timerange to load trades for - currently not implemented
:return: List of trades
:return: Dataframe containing trades
"""
def trades_store(self, pair: str, data: DataFrame) -> None:
"""
Store trades data (list of Dicts) to file
:param pair: Pair - used for filename
:param data: Dataframe containing trades
column sequence as in DEFAULT_TRADES_COLUMNS
"""
# Filter on expected columns (will remove the actual date column).
self._trades_store(pair, data[DEFAULT_TRADES_COLUMNS])
def trades_purge(self, pair: str) -> bool:
"""
Remove data for this pair
@@ -208,7 +219,7 @@ class IDataHandler(ABC):
return True
return False
def trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> TradeList:
def trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> DataFrame:
"""
Load a pair from file, either .json.gz or .json
Removes duplicates in the process.
@@ -216,7 +227,10 @@ class IDataHandler(ABC):
:param timerange: Timerange to load trades for - currently not implemented
:return: List of trades
"""
return trades_remove_duplicates(self._trades_load(pair, timerange=timerange))
trades = trades_df_remove_duplicates(self._trades_load(pair, timerange=timerange))
trades = trades_convert_types(trades)
return trades
@classmethod
def create_dir_if_needed(cls, datadir: Path):

View File

@@ -6,8 +6,8 @@ from pandas import DataFrame, read_json, to_datetime
from freqtrade import misc
from freqtrade.configuration import TimeRange
from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, TradeList
from freqtrade.data.converter import trades_dict_to_list
from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, DEFAULT_TRADES_COLUMNS
from freqtrade.data.converter import trades_dict_to_list, trades_list_to_df
from freqtrade.enums import CandleType
from .idatahandler import IDataHandler
@@ -94,45 +94,46 @@ class JsonDataHandler(IDataHandler):
"""
raise NotImplementedError()
def trades_store(self, pair: str, data: TradeList) -> None:
def _trades_store(self, pair: str, data: DataFrame) -> None:
"""
Store trades data (list of Dicts) to file
:param pair: Pair - used for filename
:param data: List of Lists containing trade data,
:param data: Dataframe containing trades
column sequence as in DEFAULT_TRADES_COLUMNS
"""
filename = self._pair_trades_filename(self._datadir, pair)
misc.file_dump_json(filename, data, is_zip=self._use_zip)
trades = data.values.tolist()
misc.file_dump_json(filename, trades, is_zip=self._use_zip)
def trades_append(self, pair: str, data: TradeList):
def trades_append(self, pair: str, data: DataFrame):
"""
Append data to existing files
:param pair: Pair - used for filename
:param data: List of Lists containing trade data,
:param data: Dataframe containing trades
column sequence as in DEFAULT_TRADES_COLUMNS
"""
raise NotImplementedError()
def _trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> TradeList:
def _trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> DataFrame:
"""
Load a pair from file, either .json.gz or .json
# TODO: respect timerange ...
:param pair: Load trades for this pair
:param timerange: Timerange to load trades for - currently not implemented
:return: List of trades
:return: Dataframe containing trades
"""
filename = self._pair_trades_filename(self._datadir, pair)
tradesdata = misc.file_load_json(filename)
if not tradesdata:
return []
return DataFrame(columns=DEFAULT_TRADES_COLUMNS)
if isinstance(tradesdata[0], dict):
# Convert trades dict to list
logger.info("Old trades format detected - converting")
tradesdata = trades_dict_to_list(tradesdata)
pass
return tradesdata
return trades_list_to_df(tradesdata, convert=False)
@classmethod
def _get_file_extension(cls):

View File

@@ -4,7 +4,7 @@ from typing import Optional
from pandas import DataFrame, read_parquet, to_datetime
from freqtrade.configuration import TimeRange
from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, TradeList
from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, DEFAULT_TRADES_COLUMNS, TradeList
from freqtrade.enums import CandleType
from .idatahandler import IDataHandler
@@ -81,25 +81,22 @@ class ParquetDataHandler(IDataHandler):
"""
raise NotImplementedError()
def trades_store(self, pair: str, data: TradeList) -> None:
def _trades_store(self, pair: str, data: DataFrame) -> None:
"""
Store trades data (list of Dicts) to file
:param pair: Pair - used for filename
:param data: List of Lists containing trade data,
:param data: Dataframe containing trades
column sequence as in DEFAULT_TRADES_COLUMNS
"""
# filename = self._pair_trades_filename(self._datadir, pair)
filename = self._pair_trades_filename(self._datadir, pair)
self.create_dir_if_needed(filename)
data.reset_index(drop=True).to_parquet(filename)
raise NotImplementedError()
# array = pa.array(data)
# array
# feather.write_feather(data, filename)
def trades_append(self, pair: str, data: TradeList):
def trades_append(self, pair: str, data: DataFrame):
"""
Append data to existing files
:param pair: Pair - used for filename
:param data: List of Lists containing trade data,
:param data: Dataframe containing trades
column sequence as in DEFAULT_TRADES_COLUMNS
"""
raise NotImplementedError()
@@ -112,14 +109,13 @@ class ParquetDataHandler(IDataHandler):
:param timerange: Timerange to load trades for - currently not implemented
:return: List of trades
"""
raise NotImplementedError()
# filename = self._pair_trades_filename(self._datadir, pair)
# tradesdata = misc.file_load_json(filename)
filename = self._pair_trades_filename(self._datadir, pair)
if not filename.exists():
return DataFrame(columns=DEFAULT_TRADES_COLUMNS)
# if not tradesdata:
# return []
tradesdata = read_parquet(filename)
# return tradesdata
return tradesdata
@classmethod
def _get_file_extension(cls):

View File

@@ -14,7 +14,7 @@ import pytest
from freqtrade import constants
from freqtrade.commands import Arguments
from freqtrade.data.converter import ohlcv_to_dataframe
from freqtrade.data.converter import ohlcv_to_dataframe, trades_list_to_df
from freqtrade.edge import PairInfo
from freqtrade.enums import CandleType, MarginMode, RunMode, SignalDirection, TradingMode
from freqtrade.exchange import Exchange
@@ -2346,7 +2346,15 @@ def trades_history():
[1565798399629, '1261813bb30', None, 'buy', 0.019627, 0.244, 0.004788987999999999],
[1565798399752, '1261813cc31', None, 'sell', 0.019626, 0.011, 0.00021588599999999999],
[1565798399862, '126181cc332', None, 'sell', 0.019626, 0.011, 0.00021588599999999999],
[1565798399872, '1261aa81333', None, 'sell', 0.019626, 0.011, 0.00021588599999999999]]
[1565798399862, '126181cc333', None, 'sell', 0.019626, 0.012, 0.00021588599999999999],
[1565798399872, '1261aa81334', None, 'sell', 0.019626, 0.011, 0.00021588599999999999]]
@pytest.fixture(scope="function")
def trades_history_df(trades_history):
trades = trades_list_to_df(trades_history)
trades['date'] = pd.to_datetime(trades['timestamp'], unit='ms', utc=True)
return trades
@pytest.fixture(scope="function")

View File

@@ -4,13 +4,14 @@ from pathlib import Path
from shutil import copyfile
import numpy as np
import pandas as pd
import pytest
from freqtrade.configuration.timerange import TimeRange
from freqtrade.data.converter import (convert_ohlcv_format, convert_trades_format,
ohlcv_fill_up_missing_data, ohlcv_to_dataframe,
reduce_dataframe_footprint, trades_dict_to_list,
trades_remove_duplicates, trades_to_ohlcv, trim_dataframe)
reduce_dataframe_footprint, trades_df_remove_duplicates,
trades_dict_to_list, trades_to_ohlcv, trim_dataframe)
from freqtrade.data.history import (get_timerange, load_data, load_pair_history,
validate_backtest_data)
from freqtrade.data.history.idatahandler import IDataHandler
@@ -34,13 +35,13 @@ def test_ohlcv_to_dataframe(ohlcv_history_list, caplog):
assert log_has('Converting candle (OHLCV) data to dataframe for pair UNITTEST/BTC.', caplog)
def test_trades_to_ohlcv(trades_history, caplog):
def test_trades_to_ohlcv(trades_history_df, caplog):
caplog.set_level(logging.DEBUG)
with pytest.raises(ValueError, match="Trade-list empty."):
trades_to_ohlcv([], '1m')
trades_to_ohlcv(pd.DataFrame(columns=trades_history_df.columns), '1m')
df = trades_to_ohlcv(trades_history, '1m')
df = trades_to_ohlcv(trades_history_df, '1m')
assert not df.empty
assert len(df) == 1
assert 'open' in df.columns
@@ -297,13 +298,13 @@ def test_trim_dataframe(testdatadir) -> None:
assert all(data_modify.iloc[0] == data.iloc[25])
def test_trades_remove_duplicates(trades_history):
trades_history1 = trades_history * 3
assert len(trades_history1) == len(trades_history) * 3
res = trades_remove_duplicates(trades_history1)
assert len(res) == len(trades_history)
for i, t in enumerate(res):
assert t == trades_history[i]
def test_trades_df_remove_duplicates(trades_history_df):
trades_history1 = pd.concat([trades_history_df, trades_history_df, trades_history_df]
).reset_index(drop=True)
assert len(trades_history1) == len(trades_history_df) * 3
res = trades_df_remove_duplicates(trades_history1)
assert len(res) == len(trades_history_df)
assert res.equals(trades_history_df)
def test_trades_dict_to_list(fetch_trades_result):

View File

@@ -6,7 +6,8 @@ from pathlib import Path
from unittest.mock import MagicMock
import pytest
from pandas import DataFrame
from pandas import DataFrame, Timestamp
from pandas.testing import assert_frame_equal
from freqtrade.configuration import TimeRange
from freqtrade.constants import AVAILABLE_DATAHANDLERS
@@ -117,12 +118,6 @@ def test_datahandler_ohlcv_get_available_data(testdatadir):
assert set(paircombs) == {('UNITTEST/BTC', '5m', CandleType.SPOT)}
def test_jsondatahandler_trades_get_pairs(testdatadir):
pairs = JsonGzDataHandler.trades_get_pairs(testdatadir)
# Convert to set to avoid failures due to sorting
assert set(pairs) == {'XRP/ETH', 'XRP/OLD'}
def test_jsondatahandler_ohlcv_purge(mocker, testdatadir):
mocker.patch.object(Path, "exists", MagicMock(return_value=False))
unlinkmock = mocker.patch.object(Path, "unlink", MagicMock())
@@ -246,8 +241,10 @@ def test_datahandler__check_empty_df(testdatadir, caplog):
assert log_has_re(expected_text, caplog)
@pytest.mark.parametrize('datahandler', ['parquet'])
# @pytest.mark.parametrize('datahandler', [])
@pytest.mark.skip("All datahandlers currently support trades data.")
def test_datahandler_trades_not_supported(datahandler, testdatadir, ):
# Currently disabled. Reenable should a new provider not support trades data.
dh = get_datahandler(testdatadir, datahandler)
with pytest.raises(NotImplementedError):
dh.trades_load('UNITTEST/ETH')
@@ -266,18 +263,6 @@ def test_jsondatahandler_trades_load(testdatadir, caplog):
assert log_has(logmsg, caplog)
def test_jsondatahandler_trades_purge(mocker, testdatadir):
mocker.patch.object(Path, "exists", MagicMock(return_value=False))
unlinkmock = mocker.patch.object(Path, "unlink", MagicMock())
dh = JsonGzDataHandler(testdatadir)
assert not dh.trades_purge('UNITTEST/NONEXIST')
assert unlinkmock.call_count == 0
mocker.patch.object(Path, "exists", MagicMock(return_value=True))
assert dh.trades_purge('UNITTEST/NONEXIST')
assert unlinkmock.call_count == 1
@pytest.mark.parametrize('datahandler', AVAILABLE_DATAHANDLERS)
def test_datahandler_ohlcv_append(datahandler, testdatadir, ):
dh = get_datahandler(testdatadir, datahandler)
@@ -291,79 +276,48 @@ def test_datahandler_ohlcv_append(datahandler, testdatadir, ):
def test_datahandler_trades_append(datahandler, testdatadir):
dh = get_datahandler(testdatadir, datahandler)
with pytest.raises(NotImplementedError):
dh.trades_append('UNITTEST/ETH', [])
dh.trades_append('UNITTEST/ETH', DataFrame())
def test_hdf5datahandler_trades_get_pairs(testdatadir):
pairs = HDF5DataHandler.trades_get_pairs(testdatadir)
@pytest.mark.parametrize('datahandler,expected', [
('jsongz', {'XRP/ETH', 'XRP/OLD'}),
('hdf5', {'XRP/ETH'}),
('feather', {'XRP/ETH'}),
('parquet', {'XRP/ETH'}),
])
def test_datahandler_trades_get_pairs(testdatadir, datahandler, expected):
pairs = get_datahandlerclass(datahandler).trades_get_pairs(testdatadir)
# Convert to set to avoid failures due to sorting
assert set(pairs) == {'XRP/ETH'}
assert set(pairs) == expected
def test_hdf5datahandler_trades_load(testdatadir):
dh = get_datahandler(testdatadir, 'hdf5')
trades = dh.trades_load('XRP/ETH')
assert isinstance(trades, list)
assert isinstance(trades, DataFrame)
trades1 = dh.trades_load('UNITTEST/NONEXIST')
assert trades1 == []
assert isinstance(trades1, DataFrame)
assert trades1.empty
# data goes from 2019-10-11 - 2019-10-13
timerange = TimeRange.parse_timerange('20191011-20191012')
trades2 = dh._trades_load('XRP/ETH', timerange)
assert len(trades) > len(trades2)
# Check that ID is None (If it's nan, it's wrong)
assert trades2[0][2] is None
assert trades2.iloc[0]['type'] is None
# unfiltered load has trades before starttime
assert len([t for t in trades if t[0] < timerange.startts * 1000]) >= 0
assert len(trades.loc[trades['timestamp'] < timerange.startts * 1000]) >= 0
# filtered list does not have trades before starttime
assert len([t for t in trades2 if t[0] < timerange.startts * 1000]) == 0
assert len(trades2.loc[trades2['timestamp'] < timerange.startts * 1000]) == 0
# unfiltered load has trades after endtime
assert len([t for t in trades if t[0] > timerange.stopts * 1000]) > 0
assert len(trades.loc[trades['timestamp'] > timerange.stopts * 1000]) >= 0
# filtered list does not have trades after endtime
assert len([t for t in trades2 if t[0] > timerange.stopts * 1000]) == 0
def test_hdf5datahandler_trades_store(testdatadir, tmpdir):
tmpdir1 = Path(tmpdir)
dh = get_datahandler(testdatadir, 'hdf5')
trades = dh.trades_load('XRP/ETH')
dh1 = get_datahandler(tmpdir1, 'hdf5')
dh1.trades_store('XRP/NEW', trades)
file = tmpdir1 / 'XRP_NEW-trades.h5'
assert file.is_file()
# Load trades back
trades_new = dh1.trades_load('XRP/NEW')
assert len(trades_new) == len(trades)
assert trades[0][0] == trades_new[0][0]
assert trades[0][1] == trades_new[0][1]
# assert trades[0][2] == trades_new[0][2] # This is nan - so comparison does not make sense
assert trades[0][3] == trades_new[0][3]
assert trades[0][4] == trades_new[0][4]
assert trades[0][5] == trades_new[0][5]
assert trades[0][6] == trades_new[0][6]
assert trades[-1][0] == trades_new[-1][0]
assert trades[-1][1] == trades_new[-1][1]
# assert trades[-1][2] == trades_new[-1][2] # This is nan - so comparison does not make sense
assert trades[-1][3] == trades_new[-1][3]
assert trades[-1][4] == trades_new[-1][4]
assert trades[-1][5] == trades_new[-1][5]
assert trades[-1][6] == trades_new[-1][6]
def test_hdf5datahandler_trades_purge(mocker, testdatadir):
mocker.patch.object(Path, "exists", MagicMock(return_value=False))
unlinkmock = mocker.patch.object(Path, "unlink", MagicMock())
dh = get_datahandler(testdatadir, 'hdf5')
assert not dh.trades_purge('UNITTEST/NONEXIST')
assert unlinkmock.call_count == 0
mocker.patch.object(Path, "exists", MagicMock(return_value=True))
assert dh.trades_purge('UNITTEST/NONEXIST')
assert unlinkmock.call_count == 1
assert len(trades2.loc[trades2['timestamp'] > timerange.stopts * 1000]) == 0
# assert len([t for t in trades2 if t[0] > timerange.stopts * 1000]) == 0
@pytest.mark.parametrize('pair,timeframe,candle_type,candle_append,startdt,enddt', [
@@ -490,50 +444,42 @@ def test_hdf5datahandler_ohlcv_purge(mocker, testdatadir):
assert unlinkmock.call_count == 2
def test_featherdatahandler_trades_load(testdatadir):
dh = get_datahandler(testdatadir, 'feather')
@pytest.mark.parametrize('datahandler', ['jsongz', 'hdf5', 'feather', 'parquet'])
def test_datahandler_trades_load(testdatadir, datahandler):
dh = get_datahandler(testdatadir, datahandler)
trades = dh.trades_load('XRP/ETH')
assert isinstance(trades, list)
assert trades[0][0] == 1570752011620
assert trades[-1][-1] == 0.1986231
assert isinstance(trades, DataFrame)
assert trades.iloc[0]['timestamp'] == 1570752011620
assert trades.iloc[0]['date'] == Timestamp('2019-10-11 00:00:11.620000+0000')
assert trades.iloc[-1]['cost'] == 0.1986231
trades1 = dh.trades_load('UNITTEST/NONEXIST')
assert trades1 == []
assert isinstance(trades, DataFrame)
assert trades1.empty
def test_featherdatahandler_trades_store(testdatadir, tmpdir):
@pytest.mark.parametrize('datahandler', ['jsongz', 'hdf5', 'feather', 'parquet'])
def test_datahandler_trades_store(testdatadir, tmpdir, datahandler):
tmpdir1 = Path(tmpdir)
dh = get_datahandler(testdatadir, 'feather')
dh = get_datahandler(testdatadir, datahandler)
trades = dh.trades_load('XRP/ETH')
dh1 = get_datahandler(tmpdir1, 'feather')
dh1 = get_datahandler(tmpdir1, datahandler)
dh1.trades_store('XRP/NEW', trades)
file = tmpdir1 / 'XRP_NEW-trades.feather'
file = tmpdir1 / f'XRP_NEW-trades.{dh1._get_file_extension()}'
assert file.is_file()
# Load trades back
trades_new = dh1.trades_load('XRP/NEW')
assert_frame_equal(trades, trades_new, check_exact=True)
assert len(trades_new) == len(trades)
assert trades[0][0] == trades_new[0][0]
assert trades[0][1] == trades_new[0][1]
# assert trades[0][2] == trades_new[0][2] # This is nan - so comparison does not make sense
assert trades[0][3] == trades_new[0][3]
assert trades[0][4] == trades_new[0][4]
assert trades[0][5] == trades_new[0][5]
assert trades[0][6] == trades_new[0][6]
assert trades[-1][0] == trades_new[-1][0]
assert trades[-1][1] == trades_new[-1][1]
# assert trades[-1][2] == trades_new[-1][2] # This is nan - so comparison does not make sense
assert trades[-1][3] == trades_new[-1][3]
assert trades[-1][4] == trades_new[-1][4]
assert trades[-1][5] == trades_new[-1][5]
assert trades[-1][6] == trades_new[-1][6]
def test_featherdatahandler_trades_purge(mocker, testdatadir):
@pytest.mark.parametrize('datahandler', ['jsongz', 'hdf5', 'feather', 'parquet'])
def test_datahandler_trades_purge(mocker, testdatadir, datahandler):
mocker.patch.object(Path, "exists", MagicMock(return_value=False))
unlinkmock = mocker.patch.object(Path, "unlink", MagicMock())
dh = get_datahandler(testdatadir, 'feather')
dh = get_datahandler(testdatadir, datahandler)
assert not dh.trades_purge('UNITTEST/NONEXIST')
assert unlinkmock.call_count == 0

View File

@@ -581,7 +581,7 @@ def test_download_trades_history(trades_history, mocker, default_conf, testdatad
assert _download_trades_history(data_handler=data_handler, exchange=exchange,
pair='ETH/BTC')
assert log_has("New Amount of trades: 5", caplog)
assert log_has("New Amount of trades: 6", caplog)
assert file1.is_file()
ght_mock.reset_mock()
@@ -651,10 +651,10 @@ def test_convert_trades_to_ohlcv(testdatadir, tmpdir, caplog):
assert_frame_equal(dfbak_1m, df_1m, check_exact=True)
assert_frame_equal(dfbak_5m, df_5m, check_exact=True)
assert not log_has('Could not convert NoDatapair to OHLCV.', caplog)
msg = 'Could not convert NoDatapair to OHLCV.'
assert not log_has(msg, caplog)
convert_trades_to_ohlcv(['NoDatapair'], timeframes=['1m', '5m'],
data_format_trades='jsongz',
datadir=tmpdir1, timerange=tr, erase=True)
assert log_has('Could not convert NoDatapair to OHLCV.', caplog)
assert log_has(msg, caplog)

BIN
tests/testdata/XRP_ETH-trades.parquet vendored Normal file

Binary file not shown.