- Added a first version of docs (needs checking)

- optimized pairs for entry_varholder and exit_varholder to only check a single pair instead of all pairs.
- bias-check of freqai strategies now possible
- added condition to not crash when compared_df is empty (meaning no differences have been found)
This commit is contained in:
hippocritical
2023-04-16 23:47:10 +02:00
parent d5c98a3c39
commit 2b416d3b62
3 changed files with 71 additions and 19 deletions

View File

@@ -999,3 +999,36 @@ Common arguments:
Path to userdata directory.
```
### Backtest lookahead bias checker
#### Summary
Checks a given strategy for look ahead bias
Look ahead bias means that the backtest uses data from future candles thereby not making it viable beyond backtesting
and producing false hopes for the one backtesting.
#### Introduction:
Many strategies - without the programmer knowing - have fallen prey to look ahead bias.
Any backtest will populate the full dataframe including all time stamps at the beginning.
If the programmer is not careful or oblivious how things work internally
(which sometimes can be really hard to find out) then it will just look into the future making the strategy amazing
but not realistic.
The tool is made to try to verify the validity in the form of the aforementioned look ahead bias.
#### How does the command work?
It will not look at the strategy or any contents itself but instead will run multiple backtests
by using precisely cut timeranges and analyzing the results each time, comparing to the full timerange.
At first, it starts a backtest over the whole duration
and then repeats backtests from the same starting point to the respective points to watch.
In addition, it analyzes the dataframes form the overall backtest to the cut ones.
At the end it will return a result-table in terminal.
Hint:
If an entry or exit condition is only triggered rarely or the timerange was chosen
so only a few entry conditions are met
then the bias checker is unable to catch the biased entry or exit condition.
In the end it only checks which entry and exit signals have been triggered.
---Flow chart here for better understanding---

View File

@@ -91,6 +91,12 @@ def start_backtest_lookahead_bias_checker(args: Dict[str, Any]) -> None:
for filtered_strategy_obj in filtered_strategy_objs:
bias_checker_instances.append(
initialize_single_lookahead_bias_checker(filtered_strategy_obj, config, args))
elif 'strategy' in args and args['strategy'] is not None:
for strategy_obj in strategy_objs:
if strategy_obj['name'] == args['strategy']:
bias_checker_instances.append(
initialize_single_lookahead_bias_checker(strategy_obj, config, args))
break
else:
processed_locations = set()
for strategy_obj in strategy_objs:

View File

@@ -1,4 +1,6 @@
import copy
import pathlib
import shutil
from copy import deepcopy
from datetime import datetime, timedelta, timezone
@@ -45,8 +47,11 @@ class BacktestLookaheadBiasChecker:
self.current_analysis = None
self.local_config = None
self.full_varHolder = None
self.entry_varHolder = None
self.exit_varHolder = None
self.entry_varHolders = []
self.exit_varHolders = []
self.backtesting = None
self.minimum_trade_amount = None
self.targeted_trade_amount = None
@@ -105,29 +110,36 @@ class BacktestLookaheadBiasChecker:
if cut_df_cut.shape[0] != 0:
compare_df = full_df_cut.compare(cut_df_cut)
# skippedColumns = ["date", "open", "high", "low", "close", "volume"]
for col_name, values in compare_df.items():
col_idx = compare_df.columns.get_loc(col_name)
compare_df_row = compare_df.iloc[0]
# compare_df now comprises tuples with [1] having either 'self' or 'other'
if 'other' in col_name[1]:
continue
self_value = compare_df_row[col_idx]
other_value = compare_df_row[col_idx + 1]
if compare_df.shape[0] > 0:
for col_name, values in compare_df.items():
col_idx = compare_df.columns.get_loc(col_name)
compare_df_row = compare_df.iloc[0]
# compare_df now comprises tuples with [1] having either 'self' or 'other'
if 'other' in col_name[1]:
continue
self_value = compare_df_row[col_idx]
other_value = compare_df_row[col_idx + 1]
# output differences
if self_value != other_value:
# output differences
if self_value != other_value:
if not self.current_analysis.false_indicators.__contains__(col_name[0]):
self.current_analysis.false_indicators.append(col_name[0])
print(f"=> found look ahead bias in indicator {col_name[0]}. " +
f"{str(self_value)} != {str(other_value)}")
if not self.current_analysis.false_indicators.__contains__(col_name[0]):
self.current_analysis.false_indicators.append(col_name[0])
print(f"=> found look ahead bias in indicator {col_name[0]}. " +
f"{str(self_value)} != {str(other_value)}")
def prepare_data(self, varHolder, pairs_to_load):
# purge previous data
abs_folder_path = pathlib.Path("user_data/models/uniqe-id").resolve()
# remove folder and its contents
if pathlib.Path.exists(abs_folder_path):
shutil.rmtree(abs_folder_path)
prepare_data_config = copy.deepcopy(self.local_config)
prepare_data_config['timerange'] = (str(self.dt_to_timestamp(varHolder.from_dt)) + "-" +
str(self.dt_to_timestamp(varHolder.to_dt)))
prepare_data_config['pairs'] = pairs_to_load
prepare_data_config['exchange']['pair_whitelist'] = pairs_to_load
self.backtesting = Backtesting(prepare_data_config)
self.backtesting._set_strategy(self.backtesting.strategylist[0])
@@ -137,9 +149,6 @@ class BacktestLookaheadBiasChecker:
varHolder.indicators = self.backtesting.strategy.advise_all_indicators(varHolder.data)
varHolder.result = self.get_result(self.backtesting, varHolder.indicators)
def update_output_file(self):
pass
def start(self, config, strategy_obj: dict, args) -> None:
# deepcopy so we can change the pairs for the 2ndary runs
@@ -195,6 +204,8 @@ class BacktestLookaheadBiasChecker:
self.entry_varHolder = VarHolder()
self.exit_varHolder = VarHolder()
self.entry_varHolders.append(self.entry_varHolder)
self.exit_varHolders.append(self.exit_varHolder)
self.entry_varHolder.from_dt = self.full_varHolder.from_dt
self.entry_varHolder.compared_dt = result_row['open_date']
@@ -224,6 +235,8 @@ class BacktestLookaheadBiasChecker:
self.exit_varHolder.result, "close_date", self.exit_varHolder.compared_dt):
self.current_analysis.false_exit_signals += 1
if len(self.entry_varHolders) >= 10:
pass
# check if the indicators themselves contain biased data
self.analyze_indicators(self.full_varHolder, self.entry_varHolder, result_row['pair'])
self.analyze_indicators(self.full_varHolder, self.exit_varHolder, result_row['pair'])