mirror of
https://github.com/arc53/DocsGPT.git
synced 2026-05-21 21:05:05 +00:00
fix: mini fixes
This commit is contained in:
@@ -52,8 +52,9 @@ def normalize_remote_data(source_type, remote_data):
|
||||
remote_data: The stored ``remote_data`` (dict, list, str, or None).
|
||||
|
||||
Returns:
|
||||
``source_data`` for the loader: a URL string for url/crawler/
|
||||
sitemap/github, a JSON string for reddit, a dict for s3.
|
||||
Loader input: a URL string or list for url/crawler/sitemap/github,
|
||||
a JSON string for reddit, a dict for s3; ``None`` when the row has
|
||||
nothing syncable.
|
||||
"""
|
||||
if remote_data is None:
|
||||
return None
|
||||
@@ -65,6 +66,8 @@ def normalize_remote_data(source_type, remote_data):
|
||||
try:
|
||||
remote_data = json.loads(stripped)
|
||||
except json.JSONDecodeError:
|
||||
# Not actually JSON — leave remote_data as the original
|
||||
# string; the per-loader branches below handle a string.
|
||||
pass
|
||||
|
||||
loader = (source_type or "").lower()
|
||||
|
||||
@@ -1446,6 +1446,11 @@ def sync_worker(self, frequency):
|
||||
continue
|
||||
|
||||
source_data = normalize_remote_data(source_type, doc.get("remote_data"))
|
||||
if not source_data:
|
||||
# No syncable URL/config — skip instead of dispatching a sync
|
||||
# that can only fail (and emit a spurious failed event).
|
||||
sync_counts["sync_skipped"] += 1
|
||||
continue
|
||||
|
||||
resp = sync(
|
||||
self, source_data, name, user, source_type, frequency, retriever, doc_id
|
||||
|
||||
@@ -240,6 +240,38 @@ class TestSyncWorker:
|
||||
"loader must receive the URL string, not the remote_data dict"
|
||||
)
|
||||
|
||||
def test_unsyncable_remote_data_is_skipped(
|
||||
self,
|
||||
pg_conn,
|
||||
patch_worker_db,
|
||||
task_self,
|
||||
monkeypatch,
|
||||
):
|
||||
"""A URL source whose remote_data dict has no URL key normalizes
|
||||
to None — sync_worker must skip it, not dispatch a doomed sync()."""
|
||||
from application import worker
|
||||
|
||||
SourcesRepository(pg_conn).create(
|
||||
"broken-feed",
|
||||
user_id="frank",
|
||||
type="url",
|
||||
retriever="classic",
|
||||
sync_frequency="monthly",
|
||||
remote_data={"provider": "url"},
|
||||
)
|
||||
|
||||
def _must_not_run(*args, **kwargs):
|
||||
raise AssertionError("sync() must not run for unsyncable sources")
|
||||
|
||||
monkeypatch.setattr(worker, "sync", _must_not_run)
|
||||
|
||||
result = worker.sync_worker(task_self, "monthly")
|
||||
|
||||
assert result["total_sync_count"] == 1
|
||||
assert result["sync_skipped"] == 1
|
||||
assert result["sync_failure"] == 0
|
||||
assert result["sync_success"] == 0
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestRemoteWorkerPathTraversal:
|
||||
|
||||
Reference in New Issue
Block a user