Merge pull request #8692 from freqtrade/feat/outsource-data-pipeline

Outsource data pipeline handling to improve flexibility
This commit is contained in:
Matthias
2023-06-18 13:39:36 +02:00
committed by GitHub
21 changed files with 587 additions and 995 deletions

View File

@@ -9,9 +9,9 @@ from freqtrade.configuration import TimeRange
from freqtrade.data.dataprovider import DataProvider
from freqtrade.exceptions import OperationalException
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from tests.conftest import get_patched_exchange, log_has_re
from tests.conftest import get_patched_exchange
from tests.freqai.conftest import (get_patched_data_kitchen, get_patched_freqai_strategy,
make_data_dictionary, make_unfiltered_dataframe)
make_unfiltered_dataframe)
from tests.freqai.test_freqai_interface import is_mac
@@ -72,68 +72,6 @@ def test_check_if_model_expired(mocker, freqai_conf):
shutil.rmtree(Path(dk.full_path))
def test_use_DBSCAN_to_remove_outliers(mocker, freqai_conf, caplog):
freqai = make_data_dictionary(mocker, freqai_conf)
# freqai_conf['freqai']['feature_parameters'].update({"outlier_protection_percentage": 1})
freqai.dk.use_DBSCAN_to_remove_outliers(predict=False)
assert log_has_re(r"DBSCAN found eps of 1\.7\d\.", caplog)
def test_compute_distances(mocker, freqai_conf):
freqai = make_data_dictionary(mocker, freqai_conf)
freqai_conf['freqai']['feature_parameters'].update({"DI_threshold": 1})
avg_mean_dist = freqai.dk.compute_distances()
assert round(avg_mean_dist, 2) == 1.98
def test_use_SVM_to_remove_outliers_and_outlier_protection(mocker, freqai_conf, caplog):
freqai = make_data_dictionary(mocker, freqai_conf)
freqai_conf['freqai']['feature_parameters'].update({"outlier_protection_percentage": 0.1})
freqai.dk.use_SVM_to_remove_outliers(predict=False)
assert log_has_re(
"SVM detected 7.83%",
caplog,
)
def test_compute_inlier_metric(mocker, freqai_conf, caplog):
freqai = make_data_dictionary(mocker, freqai_conf)
freqai_conf['freqai']['feature_parameters'].update({"inlier_metric_window": 10})
freqai.dk.compute_inlier_metric(set_='train')
assert log_has_re(
"Inlier metric computed and added to features.",
caplog,
)
def test_add_noise_to_training_features(mocker, freqai_conf):
freqai = make_data_dictionary(mocker, freqai_conf)
freqai_conf['freqai']['feature_parameters'].update({"noise_standard_deviation": 0.1})
freqai.dk.add_noise_to_training_features()
def test_remove_beginning_points_from_data_dict(mocker, freqai_conf):
freqai = make_data_dictionary(mocker, freqai_conf)
freqai.dk.remove_beginning_points_from_data_dict(set_='train')
def test_principal_component_analysis(mocker, freqai_conf, caplog):
freqai = make_data_dictionary(mocker, freqai_conf)
freqai.dk.principal_component_analysis()
assert log_has_re(
"reduced feature dimension by",
caplog,
)
def test_normalize_data(mocker, freqai_conf):
freqai = make_data_dictionary(mocker, freqai_conf)
data_dict = freqai.dk.data_dictionary
freqai.dk.normalize_data(data_dict)
assert any('_max' in entry for entry in freqai.dk.data.keys())
assert any('_min' in entry for entry in freqai.dk.data.keys())
def test_filter_features(mocker, freqai_conf):
freqai, unfiltered_dataframe = make_unfiltered_dataframe(mocker, freqai_conf)
freqai.dk.find_features(unfiltered_dataframe)

View File

@@ -38,21 +38,22 @@ def can_run_model(model: str) -> None:
pytest.skip("Reinforcement learning / PyTorch module not available on intel based Mac OS.")
@pytest.mark.parametrize('model, pca, dbscan, float32, can_short, shuffle, buffer', [
('LightGBMRegressor', True, False, True, True, False, 0),
('XGBoostRegressor', False, True, False, True, False, 10),
('XGBoostRFRegressor', False, False, False, True, False, 0),
('CatboostRegressor', False, False, False, True, True, 0),
('PyTorchMLPRegressor', False, False, False, False, False, 0),
('PyTorchTransformerRegressor', False, False, False, False, False, 0),
('ReinforcementLearner', False, True, False, True, False, 0),
('ReinforcementLearner_multiproc', False, False, False, True, False, 0),
('ReinforcementLearner_test_3ac', False, False, False, False, False, 0),
('ReinforcementLearner_test_3ac', False, False, False, True, False, 0),
('ReinforcementLearner_test_4ac', False, False, False, True, False, 0),
@pytest.mark.parametrize('model, pca, dbscan, float32, can_short, shuffle, buffer, noise', [
('LightGBMRegressor', True, False, True, True, False, 0, 0),
('XGBoostRegressor', False, True, False, True, False, 10, 0.05),
('XGBoostRFRegressor', False, False, False, True, False, 0, 0),
('CatboostRegressor', False, False, False, True, True, 0, 0),
('PyTorchMLPRegressor', False, False, False, False, False, 0, 0),
('PyTorchTransformerRegressor', False, False, False, False, False, 0, 0),
('ReinforcementLearner', False, True, False, True, False, 0, 0),
('ReinforcementLearner_multiproc', False, False, False, True, False, 0, 0),
('ReinforcementLearner_test_3ac', False, False, False, False, False, 0, 0),
('ReinforcementLearner_test_3ac', False, False, False, True, False, 0, 0),
('ReinforcementLearner_test_4ac', False, False, False, True, False, 0, 0),
])
def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca,
dbscan, float32, can_short, shuffle, buffer):
dbscan, float32, can_short, shuffle,
buffer, noise):
can_run_model(model)
@@ -69,12 +70,14 @@ def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca,
freqai_conf.update({"reduce_df_footprint": float32})
freqai_conf['freqai']['feature_parameters'].update({"shuffle_after_split": shuffle})
freqai_conf['freqai']['feature_parameters'].update({"buffer_train_data_candles": buffer})
freqai_conf['freqai']['feature_parameters'].update({"noise_standard_deviation": noise})
if 'ReinforcementLearner' in model:
model_save_ext = 'zip'
freqai_conf = make_rl_config(freqai_conf)
# test the RL guardrails
freqai_conf['freqai']['feature_parameters'].update({"use_SVM_to_remove_outliers": True})
freqai_conf['freqai']['feature_parameters'].update({"DI_threshold": 2})
freqai_conf['freqai']['data_split_parameters'].update({'shuffle': True})
if 'test_3ac' in model or 'test_4ac' in model:
@@ -163,7 +166,6 @@ def test_extract_data_and_train_model_MultiTargets(mocker, freqai_conf, model, s
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").is_file()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_trained_df.pkl").is_file()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_svm_model.joblib").is_file()
assert len(freqai.dk.data['training_features_list']) == 14
shutil.rmtree(Path(freqai.dk.full_path))
@@ -219,7 +221,6 @@ def test_extract_data_and_train_model_Classifiers(mocker, freqai_conf, model):
f"{freqai.dk.model_filename}_model{model_file_extension}").exists()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_trained_df.pkl").exists()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_svm_model.joblib").exists()
shutil.rmtree(Path(freqai.dk.full_path))
@@ -284,9 +285,6 @@ def test_start_backtesting(mocker, freqai_conf, model, num_files, strat, caplog)
_, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk)
df = base_df[freqai_conf["timeframe"]]
for i in range(5):
df[f'%-constant_{i}'] = i
metadata = {"pair": "LTC/BTC"}
freqai.dk.set_paths('LTC/BTC', None)
freqai.start_backtesting(df, metadata, freqai.dk, strategy)
@@ -294,14 +292,6 @@ def test_start_backtesting(mocker, freqai_conf, model, num_files, strat, caplog)
assert len(model_folders) == num_files
Trade.use_db = True
assert log_has_re(
"Removed features ",
caplog,
)
assert log_has_re(
"Removed 5 features from prediction features, ",
caplog,
)
Backtesting.cleanup()
shutil.rmtree(Path(freqai.dk.full_path))
@@ -426,36 +416,6 @@ def test_backtesting_fit_live_predictions(mocker, freqai_conf, caplog):
shutil.rmtree(Path(freqai.dk.full_path))
def test_principal_component_analysis(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180110-20180130"})
freqai_conf.get("freqai", {}).get("feature_parameters", {}).update(
{"princpial_component_analysis": "true"})
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf)
freqai.dk.live = True
timerange = TimeRange.parse_timerange("20180110-20180130")
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
freqai.dd.pair_dict = MagicMock()
data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
new_timerange = TimeRange.parse_timerange("20180120-20180130")
freqai.dk.set_paths('ADA/BTC', None)
freqai.extract_data_and_train_model(
new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_pca_object.pkl")
shutil.rmtree(Path(freqai.dk.full_path))
def test_plot_feature_importance(mocker, freqai_conf):
from freqtrade.freqai.utils import plot_feature_importance