From 7e1e388b9ce492d390671df30b70ceabd5415e06 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Fri, 23 Sep 2022 18:24:30 +0200
Subject: [PATCH] Add feather/parquet docs

---
 docs/data-download.md                        | 44 ++++++++++++++++++--
 freqtrade/data/history/featherdatahandler.py |  9 ++--
 freqtrade/data/history/parquetdatahandler.py |  9 ++--
 3 files changed, 47 insertions(+), 15 deletions(-)
diff --git a/docs/data-download.md b/docs/data-download.md
index 2b76d4f74..60e3f5efe 100644
--- a/docs/data-download.md
+++ b/docs/data-download.md
@@ -179,9 +179,11 @@ freqtrade download-data --exchange binance --pairs ETH/USDT XRP/USDT BTC/USDT --
 
 Freqtrade currently supports 3 data-formats for both OHLCV and trades data:
 
-* `json` (plain "text" json files)
-* `jsongz` (a gzip-zipped version of json files)
-* `hdf5` (a high performance datastore)
+* `json` -  plain "text" json files
+* `jsongz` - a gzip-zipped version of json files
+* `hdf5` - a high performance datastore
+* `feather` - a dataformat based on Apache Arrow
+* `parquet` - columnar datastore
 
 By default, OHLCV data is stored as `json` data, while trades data is stored as `jsongz` data.
 
@@ -200,6 +202,42 @@ If the default data-format has been changed during download, then the keys `data
 !!! Note
     You can convert between data-formats using the [convert-data](#sub-command-convert-data) and [convert-trade-data](#sub-command-convert-trade-data) methods.
 
+#### Dataformat comparison
+
+The following comparisons have been made with the following data, and by using the linux `time` command.
+
+```
+Found 6 pair / timeframe combinations.
++----------+-------------+--------+---------------------+---------------------+
+|     Pair |   Timeframe |   Type |                From |                  To |
+|----------+-------------+--------+---------------------+---------------------|
+| BTC/USDT |          5m |   spot | 2017-08-17 04:00:00 | 2022-09-13 19:25:00 |
+| ETH/USDT |          1m |   spot | 2017-08-17 04:00:00 | 2022-09-13 19:26:00 |
+| BTC/USDT |          1m |   spot | 2017-08-17 04:00:00 | 2022-09-13 19:30:00 |
+| XRP/USDT |          5m |   spot | 2018-05-04 08:10:00 | 2022-09-13 19:15:00 |
+| XRP/USDT |          1m |   spot | 2018-05-04 08:11:00 | 2022-09-13 19:22:00 |
+| ETH/USDT |          5m |   spot | 2017-08-17 04:00:00 | 2022-09-13 19:20:00 |
++----------+-------------+--------+---------------------+---------------------+
+```
+
+Timings have been taken in a not very scientific way with the following command, which forces reading the data into memory.
+
+``` bash
+time freqtrade list-data --show-timerange --data-format-ohlcv <dataformat>
+```
+
+|  Format | Size | timing |
+|------------|-------------|-------------|
+| `json` | 149Mb | 25.6s |
+| `jsongz` | 39Mb | 27s |
+| `hdf5` | 145Mb | 3.9s |
+| `feather` | 72Mb | 3.5s |
+| `parquet` | 83Mb | 3.8s |
+
+Size has been taken from the BTC/USDT 1m spot combination for the timerange specified above.
+
+To have a best performance/size mix, we recommend the use of either feather or parquet.
+
 #### Sub-command convert data
 
 ```
diff --git a/freqtrade/data/history/featherdatahandler.py b/freqtrade/data/history/featherdatahandler.py
index dfb818ca8..22a6805e7 100644
--- a/freqtrade/data/history/featherdatahandler.py
+++ b/freqtrade/data/history/featherdatahandler.py
@@ -58,12 +58,9 @@ class FeatherDataHandler(IDataHandler):
                 self._datadir, pair, timeframe, candle_type=candle_type, no_timeframe_modify=True)
             if not filename.exists():
                 return DataFrame(columns=self._columns)
-        try:
-            pairdata = read_feather(filename)
-            pairdata.columns = self._columns
-        except ValueError:
-            logger.error(f"Could not load data for {pair}.")
-            return DataFrame(columns=self._columns)
+
+        pairdata = read_feather(filename)
+        pairdata.columns = self._columns
         pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float',
                                           'low': 'float', 'close': 'float', 'volume': 'float'})
         pairdata['date'] = to_datetime(pairdata['date'],
diff --git a/freqtrade/data/history/parquetdatahandler.py b/freqtrade/data/history/parquetdatahandler.py
index 283d90ec0..57581861d 100644
--- a/freqtrade/data/history/parquetdatahandler.py
+++ b/freqtrade/data/history/parquetdatahandler.py
@@ -57,12 +57,9 @@ class ParquetDataHandler(IDataHandler):
                 self._datadir, pair, timeframe, candle_type=candle_type, no_timeframe_modify=True)
             if not filename.exists():
                 return DataFrame(columns=self._columns)
-        try:
-            pairdata = read_parquet(filename)
-            pairdata.columns = self._columns
-        except ValueError:
-            logger.error(f"Could not load data for {pair}.")
-            return DataFrame(columns=self._columns)
+
+        pairdata = read_parquet(filename)
+        pairdata.columns = self._columns
         pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float',
                                           'low': 'float', 'close': 'float', 'volume': 'float'})
         pairdata['date'] = to_datetime(pairdata['date'],