mirror of
https://github.com/docling-project/docling-serve.git
synced 2026-04-29 05:20:17 +00:00
feat: Async api (#60)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
@@ -89,7 +89,7 @@ async def test_convert_file(async_client):
|
||||
check.is_in(
|
||||
'{"schema_name": "DoclingDocument"',
|
||||
json.dumps(data["document"]["json_content"]),
|
||||
msg=f"JSON document should contain '{{\\n \"schema_name\": \"DoclingDocument'\". Received: {safe_slice(data['document']['json_content'])}",
|
||||
msg=f'JSON document should contain \'{{\\n "schema_name": "DoclingDocument\'". Received: {safe_slice(data["document"]["json_content"])}',
|
||||
)
|
||||
# HTML check
|
||||
check.is_in(
|
||||
|
||||
@@ -83,7 +83,7 @@ async def test_convert_url(async_client):
|
||||
check.is_in(
|
||||
'{"schema_name": "DoclingDocument"',
|
||||
json.dumps(data["document"]["json_content"]),
|
||||
msg=f"JSON document should contain '{{\\n \"schema_name\": \"DoclingDocument'\". Received: {safe_slice(data['document']['json_content'])}",
|
||||
msg=f'JSON document should contain \'{{\\n "schema_name": "DoclingDocument\'". Received: {safe_slice(data["document"]["json_content"])}',
|
||||
)
|
||||
# HTML check
|
||||
check.is_in(
|
||||
|
||||
48
tests/test_1-url-async-ws.py
Normal file
48
tests/test_1-url-async-ws.py
Normal file
@@ -0,0 +1,48 @@
|
||||
import base64
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from websockets.sync.client import connect
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def async_client():
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
yield client
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_convert_url(async_client: httpx.AsyncClient):
|
||||
"""Test convert URL to all outputs"""
|
||||
|
||||
doc_filename = Path("tests/2408.09869v5.pdf")
|
||||
encoded_doc = base64.b64encode(doc_filename.read_bytes()).decode()
|
||||
|
||||
base_url = "http://localhost:5001/v1alpha"
|
||||
payload = {
|
||||
"options": {
|
||||
"to_formats": ["md", "json"],
|
||||
"image_export_mode": "placeholder",
|
||||
"ocr": True,
|
||||
"abort_on_error": False,
|
||||
"return_as_file": False,
|
||||
},
|
||||
# "http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}],
|
||||
"file_sources": [{"base64_string": encoded_doc, "filename": doc_filename.name}],
|
||||
}
|
||||
# print(json.dumps(payload, indent=2))
|
||||
|
||||
for n in range(5):
|
||||
response = await async_client.post(
|
||||
f"{base_url}/convert/source/async", json=payload
|
||||
)
|
||||
assert response.status_code == 200, "Response should be 200 OK"
|
||||
|
||||
task = response.json()
|
||||
|
||||
uri = f"ws://localhost:5001/v1alpha/status/ws/{task['task_id']}"
|
||||
with connect(uri) as websocket:
|
||||
for message in websocket:
|
||||
print(message)
|
||||
60
tests/test_1-url-async.py
Normal file
60
tests/test_1-url-async.py
Normal file
@@ -0,0 +1,60 @@
|
||||
import json
|
||||
import random
|
||||
import time
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def async_client():
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
yield client
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_convert_url(async_client):
|
||||
"""Test convert URL to all outputs"""
|
||||
|
||||
example_docs = [
|
||||
"https://arxiv.org/pdf/2411.19710",
|
||||
"https://arxiv.org/pdf/2501.17887",
|
||||
"https://www.nature.com/articles/s41467-024-50779-y.pdf",
|
||||
"https://arxiv.org/pdf/2306.12802",
|
||||
"https://arxiv.org/pdf/2311.18481",
|
||||
]
|
||||
|
||||
base_url = "http://localhost:5001/v1alpha"
|
||||
payload = {
|
||||
"options": {
|
||||
"to_formats": ["md", "json"],
|
||||
"image_export_mode": "placeholder",
|
||||
"ocr": True,
|
||||
"abort_on_error": False,
|
||||
"return_as_file": False,
|
||||
},
|
||||
"http_sources": [{"url": random.choice(example_docs)}],
|
||||
}
|
||||
print(json.dumps(payload, indent=2))
|
||||
|
||||
for n in range(5):
|
||||
response = await async_client.post(
|
||||
f"{base_url}/convert/source/async", json=payload
|
||||
)
|
||||
assert response.status_code == 200, "Response should be 200 OK"
|
||||
|
||||
task = response.json()
|
||||
|
||||
print(json.dumps(task, indent=2))
|
||||
|
||||
while task["task_status"] not in ("success", "failure"):
|
||||
response = await async_client.get(f"{base_url}/status/poll/{task['task_id']}")
|
||||
assert response.status_code == 200, "Response should be 200 OK"
|
||||
task = response.json()
|
||||
print(f"{task['task_status']=}")
|
||||
print(f"{task['task_position']=}")
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
assert task["task_status"] == "success"
|
||||
@@ -57,18 +57,18 @@ async def test_convert_file(async_client):
|
||||
content_disposition = response.headers.get("content-disposition")
|
||||
|
||||
with check:
|
||||
assert (
|
||||
content_disposition is not None
|
||||
), "Content-Disposition header should be present"
|
||||
assert content_disposition is not None, (
|
||||
"Content-Disposition header should be present"
|
||||
)
|
||||
with check:
|
||||
assert "attachment" in content_disposition, "Response should be an attachment"
|
||||
with check:
|
||||
assert (
|
||||
'filename="converted_docs.zip"' in content_disposition
|
||||
), "Attachment filename should be 'converted_docs.zip'"
|
||||
assert 'filename="converted_docs.zip"' in content_disposition, (
|
||||
"Attachment filename should be 'converted_docs.zip'"
|
||||
)
|
||||
|
||||
content_type = response.headers.get("content-type")
|
||||
with check:
|
||||
assert (
|
||||
content_type == "application/zip"
|
||||
), "Content-Type should be 'application/zip'"
|
||||
assert content_type == "application/zip", (
|
||||
"Content-Type should be 'application/zip'"
|
||||
)
|
||||
|
||||
@@ -50,18 +50,18 @@ async def test_convert_url(async_client):
|
||||
content_disposition = response.headers.get("content-disposition")
|
||||
|
||||
with check:
|
||||
assert (
|
||||
content_disposition is not None
|
||||
), "Content-Disposition header should be present"
|
||||
assert content_disposition is not None, (
|
||||
"Content-Disposition header should be present"
|
||||
)
|
||||
with check:
|
||||
assert "attachment" in content_disposition, "Response should be an attachment"
|
||||
with check:
|
||||
assert (
|
||||
'filename="converted_docs.zip"' in content_disposition
|
||||
), "Attachment filename should be 'converted_docs.zip'"
|
||||
assert 'filename="converted_docs.zip"' in content_disposition, (
|
||||
"Attachment filename should be 'converted_docs.zip'"
|
||||
)
|
||||
|
||||
content_type = response.headers.get("content-type")
|
||||
with check:
|
||||
assert (
|
||||
content_type == "application/zip"
|
||||
), "Content-Type should be 'application/zip'"
|
||||
assert content_type == "application/zip", (
|
||||
"Content-Type should be 'application/zip'"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user