mirror of
https://github.com/docling-project/docling-serve.git
synced 2025-11-29 08:33:50 +00:00
feat: add chunking endpoints (#353)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
@@ -62,3 +62,60 @@ async def test_convert_url(async_client):
|
||||
time.sleep(2)
|
||||
|
||||
assert task["task_status"] == "success"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("include_converted_doc", [False, True])
|
||||
async def test_chunk_url(async_client, include_converted_doc: bool):
|
||||
"""Test chunk URL"""
|
||||
|
||||
example_docs = [
|
||||
"https://arxiv.org/pdf/2311.18481",
|
||||
]
|
||||
|
||||
base_url = "http://localhost:5001/v1"
|
||||
payload = {
|
||||
"sources": [{"kind": "http", "url": random.choice(example_docs)}],
|
||||
"include_converted_doc": include_converted_doc,
|
||||
}
|
||||
|
||||
response = await async_client.post(
|
||||
f"{base_url}/chunk/hybrid/source/async", json=payload
|
||||
)
|
||||
assert response.status_code == 200, "Response should be 200 OK"
|
||||
|
||||
task = response.json()
|
||||
|
||||
print(json.dumps(task, indent=2))
|
||||
|
||||
while task["task_status"] not in ("success", "failure"):
|
||||
response = await async_client.get(f"{base_url}/status/poll/{task['task_id']}")
|
||||
assert response.status_code == 200, "Response should be 200 OK"
|
||||
task = response.json()
|
||||
print(f"{task['task_status']=}")
|
||||
print(f"{task['task_position']=}")
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
assert task["task_status"] == "success"
|
||||
|
||||
result_resp = await async_client.get(f"{base_url}/result/{task['task_id']}")
|
||||
assert result_resp.status_code == 200, "Response should be 200 OK"
|
||||
result = result_resp.json()
|
||||
print("Got result.")
|
||||
|
||||
assert "chunks" in result
|
||||
assert len(result["chunks"]) > 0
|
||||
|
||||
assert "documents" in result
|
||||
assert len(result["documents"]) > 0
|
||||
assert result["documents"][0]["status"] == "success"
|
||||
|
||||
if include_converted_doc:
|
||||
assert result["documents"][0]["content"]["json_content"] is not None
|
||||
assert (
|
||||
result["documents"][0]["content"]["json_content"]["schema_name"]
|
||||
== "DoclingDocument"
|
||||
)
|
||||
else:
|
||||
assert result["documents"][0]["content"]["json_content"] is None
|
||||
|
||||
Reference in New Issue
Block a user