feat: add chunking endpoints (#353)

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi
2025-09-09 08:38:54 +02:00
committed by GitHub
parent a4fed2d965
commit 9d6def0ec8
9 changed files with 910 additions and 310 deletions

View File

@@ -4,7 +4,8 @@ import logging
from fastapi import BackgroundTasks, Response
from docling_jobkit.datamodel.result import (
ConvertDocumentResult,
ChunkedDocumentResult,
DoclingTaskResult,
ExportResult,
RemoteTargetResult,
ZipArchiveResult,
@@ -14,6 +15,7 @@ from docling_jobkit.orchestrators.base_orchestrator import (
)
from docling_serve.datamodel.responses import (
ChunkDocumentResponse,
ConvertDocumentResponse,
PresignedUrlConvertDocumentResponse,
)
@@ -24,11 +26,16 @@ _log = logging.getLogger(__name__)
async def prepare_response(
task_id: str,
task_result: ConvertDocumentResult,
task_result: DoclingTaskResult,
orchestrator: BaseOrchestrator,
background_tasks: BackgroundTasks,
):
response: Response | ConvertDocumentResponse | PresignedUrlConvertDocumentResponse
response: (
Response
| ConvertDocumentResponse
| PresignedUrlConvertDocumentResponse
| ChunkDocumentResponse
)
if isinstance(task_result.result, ExportResult):
response = ConvertDocumentResponse(
document=task_result.result.content,
@@ -52,6 +59,12 @@ async def prepare_response(
num_succeeded=task_result.num_succeeded,
num_failed=task_result.num_failed,
)
elif isinstance(task_result.result, ChunkedDocumentResult):
response = ChunkDocumentResponse(
chunks=task_result.result.chunks,
documents=task_result.result.documents,
processing_time=task_result.processing_time,
)
else:
raise ValueError("Unknown result type")