mirror of
https://github.com/freqtrade/freqtrade.git
synced 2026-02-04 05:10:24 +00:00
Merge pull request #12126 from stash86/main-stash
Improve lookahead analysis to use full dataframe comparison instead of just the last row
This commit is contained in:
@@ -38,7 +38,7 @@ Many strategies, without the programmer knowing, have fallen prey to lookahead b
|
|||||||
This typically makes the strategy backtest look profitable, sometimes to extremes, but this is not realistic as the strategy is "cheating" by looking at data it would not have in dry or live modes.
|
This typically makes the strategy backtest look profitable, sometimes to extremes, but this is not realistic as the strategy is "cheating" by looking at data it would not have in dry or live modes.
|
||||||
|
|
||||||
The reason why strategies can "cheat" is because the freqtrade backtesting process populates the full dataframe including all candle timestamps at the outset.
|
The reason why strategies can "cheat" is because the freqtrade backtesting process populates the full dataframe including all candle timestamps at the outset.
|
||||||
If the programmer is not careful or oblivious how things work internally
|
If the programmer is not careful or oblivious how things work internally
|
||||||
(which sometimes can be really hard to find out) then the strategy will look into the future.
|
(which sometimes can be really hard to find out) then the strategy will look into the future.
|
||||||
|
|
||||||
This command is made to try to verify the validity in the form of the aforementioned lookahead bias.
|
This command is made to try to verify the validity in the form of the aforementioned lookahead bias.
|
||||||
@@ -50,8 +50,7 @@ After this initial backtest runs, it will look if the `minimum-trade-amount` is
|
|||||||
If this happens, use a wider timerange to get more trades for the analysis, or use a timerange where more trades occur.
|
If this happens, use a wider timerange to get more trades for the analysis, or use a timerange where more trades occur.
|
||||||
|
|
||||||
After setting the baseline it will then do additional backtest runs for every entry and exit separately.
|
After setting the baseline it will then do additional backtest runs for every entry and exit separately.
|
||||||
When these verification backtests complete, it will compare the indicators at the signal candles (both entry or exit)
|
When these verification backtests complete, it will compare both dataframes (baseline and sliced) for any difference in columns' value and report the bias.
|
||||||
and report the bias.
|
|
||||||
After all signals have been verified or falsified a result table will be generated for the user to see.
|
After all signals have been verified or falsified a result table will be generated for the user to see.
|
||||||
|
|
||||||
### How to find and remove bias? How can I salvage a biased strategy?
|
### How to find and remove bias? How can I salvage a biased strategy?
|
||||||
@@ -98,8 +97,8 @@ If the strategy has many different signals / signal types, it's up to you to sel
|
|||||||
This would lead to a false-negative, i.e. the strategy will be reported as non-biased.
|
This would lead to a false-negative, i.e. the strategy will be reported as non-biased.
|
||||||
- `lookahead-analysis` has access to the same backtesting options and this can introduce problems.
|
- `lookahead-analysis` has access to the same backtesting options and this can introduce problems.
|
||||||
Please don't use any options like enabling position stacking as this will distort the number of checked signals.
|
Please don't use any options like enabling position stacking as this will distort the number of checked signals.
|
||||||
If you decide to do so, then make doubly sure that you won't ever run out of `max_open_trades` slots,
|
If you decide to do so, then make doubly sure that you won't ever run out of `max_open_trades` slots,
|
||||||
and that you have enough capital in the backtest wallet configuration.
|
and that you have enough capital in the backtest wallet configuration.
|
||||||
- In the results table, the `biased_indicators` column
|
- In the results table, the `biased_indicators` column
|
||||||
will falsely flag FreqAI target indicators defined in `set_freqai_targets()` as biased.
|
will falsely flag FreqAI target indicators defined in `set_freqai_targets()` as biased.
|
||||||
**These are not biased and can safely be ignored.**
|
**These are not biased and can safely be ignored.**
|
||||||
|
|||||||
@@ -70,34 +70,29 @@ class LookaheadAnalysis(BaseAnalysis):
|
|||||||
cut_df: DataFrame = cut_vars.indicators[current_pair]
|
cut_df: DataFrame = cut_vars.indicators[current_pair]
|
||||||
full_df: DataFrame = full_vars.indicators[current_pair]
|
full_df: DataFrame = full_vars.indicators[current_pair]
|
||||||
|
|
||||||
# cut longer dataframe to length of the shorter
|
# trim full_df to the same index and length as cut_df
|
||||||
full_df_cut = full_df[(full_df.date == cut_vars.compared_dt)].reset_index(drop=True)
|
cut_full_df = full_df.loc[cut_df.index]
|
||||||
cut_df_cut = cut_df[(cut_df.date == cut_vars.compared_dt)].reset_index(drop=True)
|
compare_df = cut_full_df.compare(cut_df)
|
||||||
|
|
||||||
# check if dataframes are not empty
|
if compare_df.shape[0] > 0:
|
||||||
if full_df_cut.shape[0] != 0 and cut_df_cut.shape[0] != 0:
|
for col_name in compare_df:
|
||||||
# compare dataframes
|
col_idx = compare_df.columns.get_loc(col_name)
|
||||||
compare_df = full_df_cut.compare(cut_df_cut)
|
compare_df_row = compare_df.iloc[0]
|
||||||
|
# compare_df now comprises tuples with [1] having either 'self' or 'other'
|
||||||
|
if "other" in col_name[1]:
|
||||||
|
continue
|
||||||
|
self_value = compare_df_row.iloc[col_idx]
|
||||||
|
other_value = compare_df_row.iloc[col_idx + 1]
|
||||||
|
|
||||||
if compare_df.shape[0] > 0:
|
# output differences
|
||||||
for col_name, values in compare_df.items():
|
if self_value != other_value:
|
||||||
col_idx = compare_df.columns.get_loc(col_name)
|
if not self.current_analysis.false_indicators.__contains__(col_name[0]):
|
||||||
compare_df_row = compare_df.iloc[0]
|
self.current_analysis.false_indicators.append(col_name[0])
|
||||||
# compare_df now comprises tuples with [1] having either 'self' or 'other'
|
logger.info(
|
||||||
if "other" in col_name[1]:
|
f"=> found look ahead bias in column "
|
||||||
continue
|
f"{col_name[0]}. "
|
||||||
self_value = compare_df_row.iloc[col_idx]
|
f"{str(self_value)} != {str(other_value)}"
|
||||||
other_value = compare_df_row.iloc[col_idx + 1]
|
)
|
||||||
|
|
||||||
# output differences
|
|
||||||
if self_value != other_value:
|
|
||||||
if not self.current_analysis.false_indicators.__contains__(col_name[0]):
|
|
||||||
self.current_analysis.false_indicators.append(col_name[0])
|
|
||||||
logger.info(
|
|
||||||
f"=> found look ahead bias in indicator "
|
|
||||||
f"{col_name[0]}. "
|
|
||||||
f"{str(self_value)} != {str(other_value)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
def prepare_data(self, varholder: VarHolder, pairs_to_load: list[DataFrame]):
|
def prepare_data(self, varholder: VarHolder, pairs_to_load: list[DataFrame]):
|
||||||
if "freqai" in self.local_config and "identifier" in self.local_config["freqai"]:
|
if "freqai" in self.local_config and "identifier" in self.local_config["freqai"]:
|
||||||
@@ -132,7 +127,13 @@ class LookaheadAnalysis(BaseAnalysis):
|
|||||||
varholder.data, varholder.timerange = backtesting.load_bt_data()
|
varholder.data, varholder.timerange = backtesting.load_bt_data()
|
||||||
varholder.timeframe = backtesting.timeframe
|
varholder.timeframe = backtesting.timeframe
|
||||||
|
|
||||||
varholder.indicators = backtesting.strategy.advise_all_indicators(varholder.data)
|
temp_indicators = backtesting.strategy.advise_all_indicators(varholder.data)
|
||||||
|
filled_indicators = dict()
|
||||||
|
for pair, dataframe in temp_indicators.items():
|
||||||
|
filled_indicators[pair] = backtesting.strategy.ft_advise_signals(
|
||||||
|
dataframe, {"pair": pair}
|
||||||
|
)
|
||||||
|
varholder.indicators = filled_indicators
|
||||||
varholder.result = self.get_result(backtesting, varholder.indicators)
|
varholder.result = self.get_result(backtesting, varholder.indicators)
|
||||||
|
|
||||||
def fill_entry_and_exit_varHolders(self, result_row):
|
def fill_entry_and_exit_varHolders(self, result_row):
|
||||||
|
|||||||
Reference in New Issue
Block a user