fix: Improve stacked imbalance detection in orderflow converter

This commit is contained in:
Joe Schr
2025-01-03 16:30:19 +01:00
parent ea96abecd0
commit 11976f11b0
2 changed files with 22 additions and 19 deletions

View File

@@ -265,20 +265,23 @@ def stacked_imbalance(
"""
imbalance = df[f"{label}_imbalance"]
int_series = pd.Series(np.where(imbalance, 1, 0))
stacked = int_series * (
int_series.groupby((int_series != int_series.shift()).cumsum()).cumcount() + 1
)
stacked_imbalance_idx = stacked.index[stacked >= stacked_imbalance_range]
stacked_imbalance_prices = []
# Group consecutive True values and get their counts
groups = (int_series != int_series.shift()).cumsum()
counts = int_series.groupby(groups).cumsum()
if not stacked_imbalance_idx.empty:
indices = (
stacked_imbalance_idx
# Find indices where count meets or exceeds the range requirement
valid_indices = counts[counts >= stacked_imbalance_range].index
stacked_imbalance_prices = []
if not valid_indices.empty:
# Get all prices from valid indices from beginning of the range
valid_prices = [imbalance.index.values[idx-(stacked_imbalance_range-1)] for idx in valid_indices]
# Sort prices according to direction
stacked_imbalance_prices = (
sorted(valid_prices)
if not should_reverse
else np.flipud(stacked_imbalance_idx)
else sorted(valid_prices, reverse=True)
)
stacked_imbalance_prices = [float(imbalance.index[idx]) for idx in indices]
return stacked_imbalance_prices if stacked_imbalance_prices else [np.nan]

View File

@@ -193,8 +193,8 @@ def test_public_trades_mock_populate_dataframe_with_trades__check_orderflow(
assert pytest.approx(results["delta"]) == -20.862
assert pytest.approx(results["min_delta"]) == -54.559999
assert 82.842 == results["max_delta"]
assert results["stacked_imbalances_bid"] == [234.99]
assert results["stacked_imbalances_ask"] == [234.96]
assert results["stacked_imbalances_bid"] == [234.97]
assert results["stacked_imbalances_ask"] == [234.94]
# Repeat assertions for the last row
results = df.iloc[-1]
@@ -586,22 +586,22 @@ def test_stacked_imbalances_multiple_prices():
# Create a sample DataFrame with known imbalances
df = pd.DataFrame(
{
'bid_imbalance': [True, True, True, False, False, True, True, False],
'ask_imbalance': [False, False, True, True, True, False, False, True]
'bid_imbalance': [True, True, True, False, False, True, True, False, True],
'ask_imbalance': [False, False, True, True, True, False, False, True, True]
},
index=[234.95, 234.96, 234.97, 234.98, 234.99, 235.00, 235.01, 235.02]
index=[234.95, 234.96, 234.97, 234.98, 234.99, 235.00, 235.01, 235.02, 235.03]
)
# Test bid imbalances (should return prices in ascending order)
bid_prices = stacked_imbalance(df, "bid", stacked_imbalance_range=2, should_reverse=False)
assert bid_prices == [234.95, 234.96, 234.97, 235.00, 235.01]
assert bid_prices == [234.95, 234.96, 235.00]
# Test ask imbalances (should return prices in descending order)
ask_prices = stacked_imbalance(df, "ask", stacked_imbalance_range=2, should_reverse=True)
assert ask_prices == [235.02, 234.99, 234.98, 234.97]
assert ask_prices == [235.02, 234.98, 234.97]
# Test with higher stacked_imbalance_range
bid_prices_higher = stacked_imbalance(df, "bid", stacked_imbalance_range=3, should_reverse=False)
assert bid_prices_higher == [234.95, 234.96, 234.97]
assert bid_prices_higher == [234.95]