Files
docling-serve/tests/test_2-urls-async-all-outputs.py
2025-04-28 11:18:19 +02:00

89 lines
2.7 KiB
Python

import json
import time
import httpx
import pytest
import pytest_asyncio
from pytest_check import check
@pytest_asyncio.fixture
async def async_client():
async with httpx.AsyncClient(timeout=60.0) as client:
yield client
@pytest.mark.asyncio
async def test_convert_url(async_client):
"""Test convert URL to all outputs"""
base_url = "http://localhost:5001/v1alpha"
payload = {
"options": {
"from_formats": [
"docx",
"pptx",
"html",
"image",
"pdf",
"asciidoc",
"md",
"xlsx",
],
"to_formats": ["md", "json", "html", "text", "doctags"],
"image_export_mode": "placeholder",
"ocr": True,
"force_ocr": False,
"ocr_engine": "easyocr",
"ocr_lang": ["en"],
"pdf_backend": "dlparse_v2",
"table_mode": "fast",
"abort_on_error": False,
"return_as_file": False,
},
"http_sources": [
{"url": "https://arxiv.org/pdf/2206.01062"},
{"url": "https://arxiv.org/pdf/2408.09869"},
],
}
response = await async_client.post(f"{base_url}/convert/source/async", json=payload)
assert response.status_code == 200, "Response should be 200 OK"
task = response.json()
print(json.dumps(task, indent=2))
while task["task_status"] not in ("success", "failure"):
response = await async_client.get(f"{base_url}/status/poll/{task['task_id']}")
assert response.status_code == 200, "Response should be 200 OK"
task = response.json()
print(f"{task['task_status']=}")
print(f"{task['task_position']=}")
time.sleep(2)
assert task["task_status"] == "success"
result_resp = await async_client.get(f"{base_url}/result/{task['task_id']}")
assert result_resp.status_code == 200, "Response should be 200 OK"
# Check for zip file attachment
content_disposition = result_resp.headers.get("content-disposition")
with check:
assert content_disposition is not None, (
"Content-Disposition header should be present"
)
with check:
assert "attachment" in content_disposition, "Response should be an attachment"
with check:
assert 'filename="converted_docs.zip"' in content_disposition, (
"Attachment filename should be 'converted_docs.zip'"
)
content_type = result_resp.headers.get("content-type")
with check:
assert content_type == "application/zip", (
"Content-Type should be 'application/zip'"
)