From b2d664c63c061e06ed6b136792146d57c8ffabe8 Mon Sep 17 00:00:00 2001 From: elintornquist <107926911+elintornquist@users.noreply.github.com> Date: Fri, 26 Aug 2022 18:57:27 +0200 Subject: [PATCH 1/9] Change MinPts calculation --- freqtrade/freqai/data_kitchen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index e480ab135..102a4857e 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -635,7 +635,7 @@ class FreqaiDataKitchen: cos(angle) * (point[1] - origin[1]) return (x, y) - MinPts = len(self.data_dictionary['train_features'].columns) * 2 + MinPts = int(len(self.data_dictionary['train_features'].index) * 0.25) # measure pairwise distances to train_features.shape[1]*2 nearest neighbours neighbors = NearestNeighbors( n_neighbors=MinPts, n_jobs=self.thread_count) From 86c5ac44e4f9803dd302c2154320008037f5c50f Mon Sep 17 00:00:00 2001 From: elintornquist <107926911+elintornquist@users.noreply.github.com> Date: Fri, 26 Aug 2022 23:05:07 +0200 Subject: [PATCH 2/9] Add outlier percentage check --- freqtrade/freqai/data_kitchen.py | 37 ++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 102a4857e..c58cfa75a 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -513,6 +513,19 @@ class FreqaiDataKitchen: return avg_mean_dist + def get_outlier_percentage(self, dropped_pts: npt.ArrayLike) -> float: + """ + Check if more than X% of points werer dropped during outlier detection. + """ + outlier_protection_pct = self.freqai_config["feature_parameters"].get( + "outlier_protection_percentage", 30) + outlier_pct = dropped_pts.sum() / len(dropped_pts) + if outlier_pct >= outlier_protection_pct: + self.svm_model = None + return outlier_pct + else: + return 0.0 + def use_SVM_to_remove_outliers(self, predict: bool) -> None: """ Build/inference a Support Vector Machine to detect outliers @@ -552,6 +565,14 @@ class FreqaiDataKitchen: y_pred = self.svm_model.predict(self.data_dictionary["train_features"]) dropped_points = np.where(y_pred == -1, 0, y_pred) # keep_index = np.where(y_pred == 1) + outlier_ptc = self.get_outlier_percentage(dropped_points) + if outlier_ptc: + logger.warning( + f"SVM detected >{outlier_ptc}% of the points as outliers." + f"Keeping original dataset." + ) + return + self.data_dictionary["train_features"] = self.data_dictionary["train_features"][ (y_pred == 1) ] @@ -667,6 +688,14 @@ class FreqaiDataKitchen: self.data['DBSCAN_min_samples'] = MinPts dropped_points = np.where(clustering.labels_ == -1, 1, 0) + outlier_ptc = self.get_outlier_percentage(dropped_points) + if outlier_ptc: + logger.warning( + f"DBSCAN detected >{outlier_ptc}% of the points as outliers." + f"Keeping original dataset." + ) + return + self.data_dictionary['train_features'] = self.data_dictionary['train_features'][ (clustering.labels_ != -1) ] @@ -722,6 +751,14 @@ class FreqaiDataKitchen: 0, ) + outlier_ptc = self.get_outlier_percentage(1 - do_predict) + if outlier_ptc: + logger.warning( + f"DBSCAN detected >{outlier_ptc}% of the points as outliers." + f"Keeping original dataset." + ) + return + if (len(do_predict) - do_predict.sum()) > 0: logger.info( f"DI tossed {len(do_predict) - do_predict.sum()} predictions for " From 71f7d687832316ae5524c3a87491f50ddbfa28f9 Mon Sep 17 00:00:00 2001 From: th0rntwig Date: Sat, 27 Aug 2022 12:44:55 +0200 Subject: [PATCH 3/9] Fixed mypy error --- freqtrade/freqai/data_kitchen.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index c58cfa75a..eb9f6beb7 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -513,7 +513,7 @@ class FreqaiDataKitchen: return avg_mean_dist - def get_outlier_percentage(self, dropped_pts: npt.ArrayLike) -> float: + def get_outlier_percentage(self, dropped_pts: npt.NDArray) -> float: """ Check if more than X% of points werer dropped during outlier detection. """ @@ -568,7 +568,7 @@ class FreqaiDataKitchen: outlier_ptc = self.get_outlier_percentage(dropped_points) if outlier_ptc: logger.warning( - f"SVM detected >{outlier_ptc}% of the points as outliers." + f"SVM detected > {outlier_ptc}% of the points as outliers." f"Keeping original dataset." ) return @@ -691,7 +691,7 @@ class FreqaiDataKitchen: outlier_ptc = self.get_outlier_percentage(dropped_points) if outlier_ptc: logger.warning( - f"DBSCAN detected >{outlier_ptc}% of the points as outliers." + f"DBSCAN detected > {outlier_ptc}% of the points as outliers." f"Keeping original dataset." ) return @@ -754,7 +754,7 @@ class FreqaiDataKitchen: outlier_ptc = self.get_outlier_percentage(1 - do_predict) if outlier_ptc: logger.warning( - f"DBSCAN detected >{outlier_ptc}% of the points as outliers." + f"DI detected > {outlier_ptc}% of the points as outliers." f"Keeping original dataset." ) return From 22b42e91f3d1875e73780d8ce4dcbbedeac01b92 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sun, 28 Aug 2022 11:53:24 +0200 Subject: [PATCH 4/9] add new parameter to freqai doc --- docs/freqai.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/freqai.md b/docs/freqai.md index bba6faaea..78d71e2f8 100644 --- a/docs/freqai.md +++ b/docs/freqai.md @@ -113,6 +113,7 @@ Mandatory parameters are marked as **Required**, which means that they are requi | `use_SVM_to_remove_outliers` | Train a support vector machine to detect and remove outliers from the training data set, as well as from incoming data points. See details about how it works [here](#removing-outliers-using-a-support-vector-machine-svm).
**Datatype:** Boolean. | `svm_params` | All parameters available in Sklearn's `SGDOneClassSVM()`. See details about some select parameters [here](#removing-outliers-using-a-support-vector-machine-svm).
**Datatype:** Dictionary. | `use_DBSCAN_to_remove_outliers` | Cluster data using DBSCAN to identify and remove outliers from training and prediction data. See details about how it works [here](#removing-outliers-with-dbscan).
**Datatype:** Boolean. +| `outlier_protection_percentage` | If more than `outlier_protection_percentage` fraction of points are removed as outliers, FreqAI will log a warning message and ignore outlier detection while keeping the original dataset intact.
**Datatype:** float. Default: `0.3` | | **Data split parameters** | `data_split_parameters` | Include any additional parameters available from Scikit-learn `test_train_split()`, which are shown [here](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html) (external website).
**Datatype:** Dictionary. | `test_size` | Fraction of data that should be used for testing instead of training.
**Datatype:** Positive float < 1. From 1e41c773a067beaf523c7bd3c67717b27fd5e732 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sun, 28 Aug 2022 12:11:29 +0200 Subject: [PATCH 5/9] fix outlier protection --- docs/freqai.md | 2 +- freqtrade/freqai/data_kitchen.py | 28 ++++++++++++++-------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/docs/freqai.md b/docs/freqai.md index 78d71e2f8..c0f764953 100644 --- a/docs/freqai.md +++ b/docs/freqai.md @@ -113,7 +113,7 @@ Mandatory parameters are marked as **Required**, which means that they are requi | `use_SVM_to_remove_outliers` | Train a support vector machine to detect and remove outliers from the training data set, as well as from incoming data points. See details about how it works [here](#removing-outliers-using-a-support-vector-machine-svm).
**Datatype:** Boolean. | `svm_params` | All parameters available in Sklearn's `SGDOneClassSVM()`. See details about some select parameters [here](#removing-outliers-using-a-support-vector-machine-svm).
**Datatype:** Dictionary. | `use_DBSCAN_to_remove_outliers` | Cluster data using DBSCAN to identify and remove outliers from training and prediction data. See details about how it works [here](#removing-outliers-with-dbscan).
**Datatype:** Boolean. -| `outlier_protection_percentage` | If more than `outlier_protection_percentage` fraction of points are removed as outliers, FreqAI will log a warning message and ignore outlier detection while keeping the original dataset intact.
**Datatype:** float. Default: `0.3` +| `outlier_protection_percentage` | If more than `outlier_protection_percentage` fraction of points are removed as outliers, FreqAI will log a warning message and ignore outlier detection while keeping the original dataset intact.
**Datatype:** float. Default: `30` | | **Data split parameters** | `data_split_parameters` | Include any additional parameters available from Scikit-learn `test_train_split()`, which are shown [here](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html) (external website).
**Datatype:** Dictionary. | `test_size` | Fraction of data that should be used for testing instead of training.
**Datatype:** Positive float < 1. diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index eb9f6beb7..ed3990de0 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -519,7 +519,7 @@ class FreqaiDataKitchen: """ outlier_protection_pct = self.freqai_config["feature_parameters"].get( "outlier_protection_percentage", 30) - outlier_pct = dropped_pts.sum() / len(dropped_pts) + outlier_pct = (dropped_pts.sum() / len(dropped_pts)) * 100 if outlier_pct >= outlier_protection_pct: self.svm_model = None return outlier_pct @@ -563,12 +563,12 @@ class FreqaiDataKitchen: self.data_dictionary["train_features"] ) y_pred = self.svm_model.predict(self.data_dictionary["train_features"]) - dropped_points = np.where(y_pred == -1, 0, y_pred) + kept_points = np.where(y_pred == -1, 0, y_pred) # keep_index = np.where(y_pred == 1) - outlier_ptc = self.get_outlier_percentage(dropped_points) - if outlier_ptc: + outlier_pct = self.get_outlier_percentage(1 - kept_points) + if outlier_pct: logger.warning( - f"SVM detected > {outlier_ptc}% of the points as outliers." + f"SVM detected {outlier_pct:.2f}% of the points as outliers. " f"Keeping original dataset." ) return @@ -584,7 +584,7 @@ class FreqaiDataKitchen: ] logger.info( - f"SVM tossed {len(y_pred) - dropped_points.sum()}" + f"SVM tossed {len(y_pred) - kept_points.sum()}" f" train points from {len(y_pred)} total points." ) @@ -593,7 +593,7 @@ class FreqaiDataKitchen: # to reduce code duplication if self.freqai_config['data_split_parameters'].get('test_size', 0.1) != 0: y_pred = self.svm_model.predict(self.data_dictionary["test_features"]) - dropped_points = np.where(y_pred == -1, 0, y_pred) + kept_points = np.where(y_pred == -1, 0, y_pred) self.data_dictionary["test_features"] = self.data_dictionary["test_features"][ (y_pred == 1) ] @@ -604,7 +604,7 @@ class FreqaiDataKitchen: ] logger.info( - f"SVM tossed {len(y_pred) - dropped_points.sum()}" + f"SVM tossed {len(y_pred) - kept_points.sum()}" f" test points from {len(y_pred)} total points." ) @@ -688,10 +688,10 @@ class FreqaiDataKitchen: self.data['DBSCAN_min_samples'] = MinPts dropped_points = np.where(clustering.labels_ == -1, 1, 0) - outlier_ptc = self.get_outlier_percentage(dropped_points) - if outlier_ptc: + outlier_pct = self.get_outlier_percentage(dropped_points) + if outlier_pct: logger.warning( - f"DBSCAN detected > {outlier_ptc}% of the points as outliers." + f"DBSCAN detected {outlier_pct:.2f}% of the points as outliers. " f"Keeping original dataset." ) return @@ -751,10 +751,10 @@ class FreqaiDataKitchen: 0, ) - outlier_ptc = self.get_outlier_percentage(1 - do_predict) - if outlier_ptc: + outlier_pct = self.get_outlier_percentage(1 - do_predict) + if outlier_pct: logger.warning( - f"DI detected > {outlier_ptc}% of the points as outliers." + f"DI detected {outlier_pct:.2f}% of the points as outliers. " f"Keeping original dataset." ) return From dd628eb525acc407fd0643aba2548d5eac6f3800 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sun, 28 Aug 2022 12:56:39 +0200 Subject: [PATCH 6/9] add tests for outlier detection and removal functions --- freqtrade/freqai/data_drawer.py | 3 +- freqtrade/freqai/data_kitchen.py | 2 +- tests/freqai/conftest.py | 47 ++++++++++++++++++++++++- tests/freqai/test_freqai_datakitchen.py | 31 ++++++++++++++-- 4 files changed, 78 insertions(+), 5 deletions(-) diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index b3060deff..477b9e098 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -566,7 +566,7 @@ class FreqaiDataDrawer: for training according to user defined train_period_days metadata: dict = strategy furnished pair metadata """ - + import pytest with self.history_lock: corr_dataframes: Dict[Any, Any] = {} base_dataframes: Dict[Any, Any] = {} @@ -576,6 +576,7 @@ class FreqaiDataDrawer: ) for tf in self.freqai_info["feature_parameters"].get("include_timeframes"): + # pytest.set_trace() base_dataframes[tf] = dk.slice_dataframe(timerange, historic_data[pair][tf]) if pairs: for p in pairs: diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index ed3990de0..8e68c9a38 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -657,7 +657,7 @@ class FreqaiDataKitchen: return (x, y) MinPts = int(len(self.data_dictionary['train_features'].index) * 0.25) - # measure pairwise distances to train_features.shape[1]*2 nearest neighbours + # measure pairwise distances to nearest neighbours neighbors = NearestNeighbors( n_neighbors=MinPts, n_jobs=self.thread_count) neighbors_fit = neighbors.fit(self.data_dictionary['train_features']) diff --git a/tests/freqai/conftest.py b/tests/freqai/conftest.py index 6ace13677..98f086ec9 100644 --- a/tests/freqai/conftest.py +++ b/tests/freqai/conftest.py @@ -2,7 +2,7 @@ from copy import deepcopy from pathlib import Path import pytest - +from unittest.mock import MagicMock from freqtrade.configuration import TimeRange from freqtrade.data.dataprovider import DataProvider from freqtrade.freqai.data_drawer import FreqaiDataDrawer @@ -81,6 +81,51 @@ def get_patched_freqaimodel(mocker, freqaiconf): return freqaimodel +def make_data_dictionary(mocker, freqai_conf): + freqai_conf.update({"timerange": "20180110-20180130"}) + + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.live = True + freqai.dk = FreqaiDataKitchen(freqai_conf) + freqai.dk.pair = "ADA/BTC" + timerange = TimeRange.parse_timerange("20180110-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + + freqai.dd.pair_dict = MagicMock() + + data_load_timerange = TimeRange.parse_timerange("20180110-20180130") + new_timerange = TimeRange.parse_timerange("20180120-20180130") + + corr_dataframes, base_dataframes = freqai.dd.get_base_and_corr_dataframes( + data_load_timerange, freqai.dk.pair, freqai.dk + ) + + unfiltered_dataframe = freqai.dk.use_strategy_to_populate_indicators( + strategy, corr_dataframes, base_dataframes, freqai.dk.pair + ) + + unfiltered_dataframe = freqai.dk.slice_dataframe(new_timerange, unfiltered_dataframe) + + freqai.dk.find_features(unfiltered_dataframe) + + features_filtered, labels_filtered = freqai.dk.filter_features( + unfiltered_dataframe, + freqai.dk.training_features_list, + freqai.dk.label_list, + training_filter=True, + ) + + data_dictionary = freqai.dk.make_train_test_datasets(features_filtered, labels_filtered) + + data_dictionary = freqai.dk.normalize_data(data_dictionary) + + return freqai + + def get_freqai_live_analyzed_dataframe(mocker, freqaiconf): strategy = get_patched_freqai_strategy(mocker, freqaiconf) exchange = get_patched_exchange(mocker, freqaiconf) diff --git a/tests/freqai/test_freqai_datakitchen.py b/tests/freqai/test_freqai_datakitchen.py index 9f2a2f71e..581286715 100644 --- a/tests/freqai/test_freqai_datakitchen.py +++ b/tests/freqai/test_freqai_datakitchen.py @@ -5,8 +5,8 @@ from pathlib import Path import pytest from freqtrade.exceptions import OperationalException -from tests.freqai.conftest import get_patched_data_kitchen - +from tests.freqai.conftest import get_patched_data_kitchen, make_data_dictionary +from tests.conftest import log_has_re @pytest.mark.parametrize( "timerange, train_period_days, expected_result", @@ -66,3 +66,30 @@ def test_check_if_model_expired(mocker, freqai_conf, timestamp, expected): dk = get_patched_data_kitchen(mocker, freqai_conf) assert dk.check_if_model_expired(timestamp) == expected shutil.rmtree(Path(dk.full_path)) + + +def test_use_DBSCAN_to_remove_outliers(mocker, freqai_conf, caplog): + freqai = make_data_dictionary(mocker, freqai_conf) + # freqai_conf['freqai']['feature_parameters'].update({"outlier_protection_percentage": 1}) + freqai.dk.use_DBSCAN_to_remove_outliers(predict=False) + assert log_has_re( + "DBSCAN found eps of 2.42.", + caplog, + ) + + +def test_compute_distances(mocker, freqai_conf): + freqai = make_data_dictionary(mocker, freqai_conf) + freqai_conf['freqai']['feature_parameters'].update({"DI_threshold": 1}) + avg_mean_dist = freqai.dk.compute_distances() + assert round(avg_mean_dist, 2) == 2.56 + + +def test_use_SVM_to_remove_outliers_and_outlier_protection(mocker, freqai_conf, caplog): + freqai = make_data_dictionary(mocker, freqai_conf) + freqai_conf['freqai']['feature_parameters'].update({"outlier_protection_percentage": 0.1}) + freqai.dk.use_SVM_to_remove_outliers(predict=False) + assert log_has_re( + "SVM detected 8.46%", + caplog, + ) From fcb5d1cb5a5419f723d94edde80abe6173d33916 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sun, 28 Aug 2022 13:01:39 +0200 Subject: [PATCH 7/9] remove debugging flag --- freqtrade/freqai/data_drawer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index 477b9e098..b6a1a15d7 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -566,7 +566,6 @@ class FreqaiDataDrawer: for training according to user defined train_period_days metadata: dict = strategy furnished pair metadata """ - import pytest with self.history_lock: corr_dataframes: Dict[Any, Any] = {} base_dataframes: Dict[Any, Any] = {} @@ -576,7 +575,6 @@ class FreqaiDataDrawer: ) for tf in self.freqai_info["feature_parameters"].get("include_timeframes"): - # pytest.set_trace() base_dataframes[tf] = dk.slice_dataframe(timerange, historic_data[pair][tf]) if pairs: for p in pairs: From 6634229cc198f1e264b1fe3a56d4bd919c7216c0 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sun, 28 Aug 2022 13:21:29 +0200 Subject: [PATCH 8/9] appease the flake8 gods --- tests/freqai/test_freqai_datakitchen.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/freqai/test_freqai_datakitchen.py b/tests/freqai/test_freqai_datakitchen.py index 581286715..6ef2971ee 100644 --- a/tests/freqai/test_freqai_datakitchen.py +++ b/tests/freqai/test_freqai_datakitchen.py @@ -8,6 +8,7 @@ from freqtrade.exceptions import OperationalException from tests.freqai.conftest import get_patched_data_kitchen, make_data_dictionary from tests.conftest import log_has_re + @pytest.mark.parametrize( "timerange, train_period_days, expected_result", [ From a44a235b563fa96ca893cbd96f3e64d86aa7ff31 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sun, 28 Aug 2022 13:47:01 +0200 Subject: [PATCH 9/9] isort imports in tests/freqai --- tests/freqai/conftest.py | 3 ++- tests/freqai/test_freqai_datakitchen.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/freqai/conftest.py b/tests/freqai/conftest.py index 98f086ec9..dd148da77 100644 --- a/tests/freqai/conftest.py +++ b/tests/freqai/conftest.py @@ -1,8 +1,9 @@ from copy import deepcopy from pathlib import Path +from unittest.mock import MagicMock import pytest -from unittest.mock import MagicMock + from freqtrade.configuration import TimeRange from freqtrade.data.dataprovider import DataProvider from freqtrade.freqai.data_drawer import FreqaiDataDrawer diff --git a/tests/freqai/test_freqai_datakitchen.py b/tests/freqai/test_freqai_datakitchen.py index 6ef2971ee..9ef955695 100644 --- a/tests/freqai/test_freqai_datakitchen.py +++ b/tests/freqai/test_freqai_datakitchen.py @@ -5,8 +5,8 @@ from pathlib import Path import pytest from freqtrade.exceptions import OperationalException -from tests.freqai.conftest import get_patched_data_kitchen, make_data_dictionary from tests.conftest import log_has_re +from tests.freqai.conftest import get_patched_data_kitchen, make_data_dictionary @pytest.mark.parametrize(