mirror of
https://github.com/arc53/DocsGPT.git
synced 2026-05-13 15:45:26 +00:00
fix(events): skip replay budget INCR when no snapshot work possible
_allow_replay incremented the per-user counter on every /api/events GET, including no-op connects from a fresh client with no cursor against an empty backlog. React StrictMode dev double-mounts plus a few tabs trivially tripped the default 30-per-60s budget on idle reconnects. XLEN pre-check: when last_event_id is None and the user stream is empty, the connect can't do snapshot work — return True without INCR. Cursor-bearing connects still INCR unconditionally (probing the cursor's relationship to stream contents would require a redundant XRANGE).
This commit is contained in:
@@ -120,7 +120,9 @@ def _oldest_retained_id(redis_client, user_id: str) -> Optional[str]:
|
||||
return None
|
||||
|
||||
|
||||
def _allow_replay(redis_client, user_id: str) -> bool:
|
||||
def _allow_replay(
|
||||
redis_client, user_id: str, last_event_id: Optional[str]
|
||||
) -> bool:
|
||||
"""Per-user sliding-window snapshot-replay budget.
|
||||
|
||||
Increments a Redis counter under
|
||||
@@ -129,12 +131,39 @@ def _allow_replay(redis_client, user_id: str) -> bool:
|
||||
Redis is unavailable, fails open — the existing per-user
|
||||
concurrency cap still bounds parallel enumeration. Returns
|
||||
``True`` when the budget setting is 0 (disabled) or non-positive.
|
||||
|
||||
Budget is only consumed when the connect can plausibly do snapshot
|
||||
work. A fresh client (``last_event_id is None``) connecting to an
|
||||
empty backlog (``XLEN == 0``) returns ``True`` without INCR'ing —
|
||||
this catches the React-StrictMode dev-burst case where double
|
||||
mounts of an empty-backlog user would otherwise trip 429 in 5
|
||||
seconds and lock out further connects until the window rolls.
|
||||
"""
|
||||
budget = int(settings.EVENTS_REPLAY_BUDGET_REQUESTS_PER_WINDOW)
|
||||
if budget <= 0:
|
||||
return True
|
||||
if redis_client is None:
|
||||
return True
|
||||
|
||||
# Cheap pre-check: only INCR when we might actually replay. XLEN
|
||||
# is one Redis op; the alternative (INCR every connect) is two
|
||||
# ops AND wrongly counts no-op probes. The check is conservative:
|
||||
# if ``last_event_id`` is set we always INCR, even if the cursor
|
||||
# has already overtaken the latest entry — that case is rare and
|
||||
# short-lived, and probing further would mean a redundant XRANGE.
|
||||
if last_event_id is None:
|
||||
try:
|
||||
if int(redis_client.xlen(stream_key(user_id))) == 0:
|
||||
return True
|
||||
except Exception:
|
||||
# XLEN probe failed; fall through to the INCR path so a
|
||||
# transient Redis hiccup can't bypass the budget.
|
||||
logger.debug(
|
||||
"XLEN probe failed for replay budget check user=%s; "
|
||||
"proceeding to INCR",
|
||||
user_id,
|
||||
)
|
||||
|
||||
window = max(1, int(settings.EVENTS_REPLAY_BUDGET_WINDOW_SECONDS))
|
||||
key = replay_budget_key(user_id)
|
||||
try:
|
||||
@@ -326,7 +355,7 @@ def stream_events() -> Response:
|
||||
# cursor pinned and lets the frontend back off until the window
|
||||
# slides (eventStreamClient.ts treats 429 as escalated backoff).
|
||||
if push_enabled and redis_client is not None and not _allow_replay(
|
||||
redis_client, user_id
|
||||
redis_client, user_id, last_event_id
|
||||
):
|
||||
if counted:
|
||||
try:
|
||||
|
||||
@@ -440,7 +440,7 @@ class TestReplayRateLimit:
|
||||
with patch("application.api.events.routes.settings") as mock_settings:
|
||||
mock_settings.EVENTS_REPLAY_BUDGET_REQUESTS_PER_WINDOW = 0
|
||||
mock_settings.EVENTS_REPLAY_BUDGET_WINDOW_SECONDS = 60
|
||||
assert _allow_replay(MagicMock(), "alice") is True
|
||||
assert _allow_replay(MagicMock(), "alice", "1735682400000-0") is True
|
||||
|
||||
def test_allow_replay_returns_true_when_redis_unavailable(self):
|
||||
from application.api.events.routes import _allow_replay
|
||||
@@ -448,7 +448,45 @@ class TestReplayRateLimit:
|
||||
with patch("application.api.events.routes.settings") as mock_settings:
|
||||
mock_settings.EVENTS_REPLAY_BUDGET_REQUESTS_PER_WINDOW = 5
|
||||
mock_settings.EVENTS_REPLAY_BUDGET_WINDOW_SECONDS = 60
|
||||
assert _allow_replay(None, "alice") is True
|
||||
assert _allow_replay(None, "alice", "1735682400000-0") is True
|
||||
|
||||
def test_allow_replay_skips_incr_when_no_cursor_and_empty_backlog(self):
|
||||
"""Fresh client with no cursor and an empty user stream cannot
|
||||
do snapshot work — INCR'ing the counter would needlessly
|
||||
burn budget. Catches the React-StrictMode dev-burst case where
|
||||
double-mounted components would otherwise 429 in 5 connects.
|
||||
"""
|
||||
from application.api.events.routes import _allow_replay
|
||||
|
||||
with patch("application.api.events.routes.settings") as mock_settings:
|
||||
mock_settings.EVENTS_REPLAY_BUDGET_REQUESTS_PER_WINDOW = 3
|
||||
mock_settings.EVENTS_REPLAY_BUDGET_WINDOW_SECONDS = 60
|
||||
redis = MagicMock()
|
||||
redis.xlen.return_value = 0
|
||||
|
||||
# 5 connects in a row, all with no cursor — none consume
|
||||
# budget because the backlog is empty.
|
||||
for _ in range(5):
|
||||
assert _allow_replay(redis, "alice", None) is True
|
||||
|
||||
redis.xlen.assert_called()
|
||||
redis.incr.assert_not_called()
|
||||
|
||||
def test_allow_replay_incrs_when_no_cursor_but_backlog_present(self):
|
||||
"""A no-cursor connect against a non-empty backlog *will* do
|
||||
snapshot work, so it consumes budget normally.
|
||||
"""
|
||||
from application.api.events.routes import _allow_replay
|
||||
|
||||
with patch("application.api.events.routes.settings") as mock_settings:
|
||||
mock_settings.EVENTS_REPLAY_BUDGET_REQUESTS_PER_WINDOW = 5
|
||||
mock_settings.EVENTS_REPLAY_BUDGET_WINDOW_SECONDS = 60
|
||||
redis = MagicMock()
|
||||
redis.xlen.return_value = 42
|
||||
redis.incr.return_value = 1
|
||||
|
||||
assert _allow_replay(redis, "alice", None) is True
|
||||
redis.incr.assert_called_once()
|
||||
|
||||
def test_allow_replay_passes_until_budget_exhausted(self):
|
||||
from application.api.events.routes import _allow_replay
|
||||
@@ -465,12 +503,14 @@ class TestReplayRateLimit:
|
||||
|
||||
redis.incr.side_effect = _incr
|
||||
|
||||
# Cursor set → XLEN short-circuit doesn't fire, INCR always runs.
|
||||
cursor = "1735682400000-0"
|
||||
# First three pass.
|
||||
assert _allow_replay(redis, "alice") is True
|
||||
assert _allow_replay(redis, "alice") is True
|
||||
assert _allow_replay(redis, "alice") is True
|
||||
assert _allow_replay(redis, "alice", cursor) is True
|
||||
assert _allow_replay(redis, "alice", cursor) is True
|
||||
assert _allow_replay(redis, "alice", cursor) is True
|
||||
# Fourth refused.
|
||||
assert _allow_replay(redis, "alice") is False
|
||||
assert _allow_replay(redis, "alice", cursor) is False
|
||||
# TTL only seeded on the first INCR (when count == 1).
|
||||
redis.expire.assert_called_once()
|
||||
|
||||
@@ -482,7 +522,7 @@ class TestReplayRateLimit:
|
||||
mock_settings.EVENTS_REPLAY_BUDGET_WINDOW_SECONDS = 60
|
||||
redis = MagicMock()
|
||||
redis.incr.side_effect = Exception("redis down")
|
||||
assert _allow_replay(redis, "alice") is True
|
||||
assert _allow_replay(redis, "alice", "1735682400000-0") is True
|
||||
|
||||
def test_replay_backlog_passes_count_to_xrange(self):
|
||||
from application.api.events.routes import _replay_backlog
|
||||
|
||||
Reference in New Issue
Block a user