mirror of
https://github.com/docling-project/docling-serve.git
synced 2025-11-29 08:33:50 +00:00
Compare commits
21 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
382d675631 | ||
|
|
c65f3c654c | ||
|
|
829effec1a | ||
|
|
494d66f992 | ||
|
|
14bafb2628 | ||
|
|
37e2e1ad09 | ||
|
|
71c5fae505 | ||
|
|
91956cbf4e | ||
|
|
4c9571a052 | ||
|
|
41624af09f | ||
|
|
26bef5bec0 | ||
|
|
40bb21d347 | ||
|
|
ee89ee4dae | ||
|
|
6b3d281f02 | ||
|
|
b598872e5c | ||
|
|
087417e5c2 | ||
|
|
57f9073bc0 | ||
|
|
525a43ff6f | ||
|
|
c1ce4719c9 | ||
|
|
5dfb75d3b9 | ||
|
|
420162e674 |
@@ -3,7 +3,7 @@ config:
|
||||
no-emphasis-as-header: false
|
||||
first-line-heading: false
|
||||
MD033:
|
||||
allowed_elements: ["details", "summary", "br", "a", "p", "img"]
|
||||
allowed_elements: ["details", "summary", "br", "a", "b", "p", "img"]
|
||||
MD024:
|
||||
siblings_only: true
|
||||
globs:
|
||||
|
||||
46
CHANGELOG.md
46
CHANGELOG.md
@@ -1,3 +1,49 @@
|
||||
## [v0.10.0](https://github.com/docling-project/docling-serve/releases/tag/v0.10.0) - 2025-04-28
|
||||
|
||||
### Feature
|
||||
|
||||
* Add support for file upload and return as file in async endpoints ([#152](https://github.com/docling-project/docling-serve/issues/152)) ([`c65f3c6`](https://github.com/docling-project/docling-serve/commit/c65f3c654c76c6b64b6aada1f0a153d74789d629))
|
||||
|
||||
### Documentation
|
||||
|
||||
* Fix new default pdf_backend ([#158](https://github.com/docling-project/docling-serve/issues/158)) ([`829effe`](https://github.com/docling-project/docling-serve/commit/829effec1a1b80320ccaf2c501be8015169b6fa3))
|
||||
* Fixing small typo in docs ([#155](https://github.com/docling-project/docling-serve/issues/155)) ([`14bafb2`](https://github.com/docling-project/docling-serve/commit/14bafb26286b94f80b56846c50d6e9a6d99a9763))
|
||||
|
||||
## [v0.9.0](https://github.com/docling-project/docling-serve/releases/tag/v0.9.0) - 2025-04-25
|
||||
|
||||
### Feature
|
||||
|
||||
* Expose picture description options ([#148](https://github.com/docling-project/docling-serve/issues/148)) ([`4c9571a`](https://github.com/docling-project/docling-serve/commit/4c9571a052d5ec0044e49225bc5615e13cdb0a56))
|
||||
* Add parameters for Kubeflow pipeline engine (WIP) ([#107](https://github.com/docling-project/docling-serve/issues/107)) ([`26bef5b`](https://github.com/docling-project/docling-serve/commit/26bef5bec060f0afd8d358816b68c3f2c0dd4bc2))
|
||||
|
||||
### Fix
|
||||
|
||||
* Produce image artifacts in referenced mode ([#151](https://github.com/docling-project/docling-serve/issues/151)) ([`71c5fae`](https://github.com/docling-project/docling-serve/commit/71c5fae505366459fd481d2ecdabc5ebed94d49c))
|
||||
|
||||
### Documentation
|
||||
|
||||
* Vlm and picture description options ([#149](https://github.com/docling-project/docling-serve/issues/149)) ([`91956cb`](https://github.com/docling-project/docling-serve/commit/91956cbf4e91cf82bb4d54ace397cdbbfaf594ba))
|
||||
|
||||
## [v0.8.0](https://github.com/docling-project/docling-serve/releases/tag/v0.8.0) - 2025-04-22
|
||||
|
||||
### Feature
|
||||
|
||||
* Add option for vlm pipeline ([#143](https://github.com/docling-project/docling-serve/issues/143)) ([`ee89ee4`](https://github.com/docling-project/docling-serve/commit/ee89ee4daee5e916bd6a3bdb452f78934cd03f60))
|
||||
* Expose more conversion options ([#142](https://github.com/docling-project/docling-serve/issues/142)) ([`6b3d281`](https://github.com/docling-project/docling-serve/commit/6b3d281f02905c195ab75f25bb39f5c4d4e7b680))
|
||||
* **UI:** Change UI to use async endpoints ([#131](https://github.com/docling-project/docling-serve/issues/131)) ([`b598872`](https://github.com/docling-project/docling-serve/commit/b598872e5c48928ac44417a11bb7acc0e5c3f0c6))
|
||||
|
||||
### Fix
|
||||
|
||||
* **UI:** Use https when calling the api ([#139](https://github.com/docling-project/docling-serve/issues/139)) ([`57f9073`](https://github.com/docling-project/docling-serve/commit/57f9073bc0daf72428b068ea28e2bec7cd76c37b))
|
||||
* Fix permissions in docker image ([#136](https://github.com/docling-project/docling-serve/issues/136)) ([`c1ce471`](https://github.com/docling-project/docling-serve/commit/c1ce4719c933179ba3c59d73d0584853bbd6fa6a))
|
||||
* Picture caption visuals ([#129](https://github.com/docling-project/docling-serve/issues/129)) ([`5dfb75d`](https://github.com/docling-project/docling-serve/commit/5dfb75d3b9a7022d1daad12edbb8ec7bbf9aa264))
|
||||
|
||||
### Documentation
|
||||
|
||||
* Fix required permissions for oauth2-proxy requests ([#141](https://github.com/docling-project/docling-serve/issues/141)) ([`087417e`](https://github.com/docling-project/docling-serve/commit/087417e5c2387d4ed95500222058f34d8a8702aa))
|
||||
* Update deployment examples ([#135](https://github.com/docling-project/docling-serve/issues/135)) ([`525a43f`](https://github.com/docling-project/docling-serve/commit/525a43ff6f04b7cc80f9dd6a0e653a8d8c4ab317))
|
||||
* Fix image tag ([#124](https://github.com/docling-project/docling-serve/issues/124)) ([`420162e`](https://github.com/docling-project/docling-serve/commit/420162e674cc38b4c3c13673ffbee4c20a1b15f1))
|
||||
|
||||
## [v0.7.0](https://github.com/docling-project/docling-serve/releases/tag/v0.7.0) - 2025-03-31
|
||||
|
||||
### Feature
|
||||
|
||||
@@ -17,6 +17,8 @@ RUN --mount=type=bind,source=os-packages.txt,target=/tmp/os-packages.txt \
|
||||
dnf -y clean all && \
|
||||
rm -rf /var/cache/dnf
|
||||
|
||||
RUN /usr/bin/fix-permissions /opt/app-root/src/.cache
|
||||
|
||||
ENV TESSDATA_PREFIX=/usr/share/tesseract/tessdata/
|
||||
|
||||
###################################################################################################
|
||||
@@ -44,7 +46,7 @@ RUN --mount=from=ghcr.io/astral-sh/uv:0.6.1,source=/uv,target=/bin/uv \
|
||||
--mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
|
||||
--mount=type=bind,source=uv.lock,target=uv.lock \
|
||||
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
|
||||
uv sync --frozen --no-install-project --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS}
|
||||
umask 002 && uv sync --frozen --no-install-project --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS}
|
||||
|
||||
ARG MODELS_LIST="layout tableformer picture_classifier easyocr"
|
||||
|
||||
@@ -52,15 +54,15 @@ RUN echo "Downloading models..." && \
|
||||
HF_HUB_DOWNLOAD_TIMEOUT="90" \
|
||||
HF_HUB_ETAG_TIMEOUT="90" \
|
||||
docling-tools models download -o "${DOCLING_SERVE_ARTIFACTS_PATH}" ${MODELS_LIST} && \
|
||||
chown -R 1001:0 /opt/app-root/src/.cache && \
|
||||
chmod -R g=u /opt/app-root/src/.cache
|
||||
chown -R 1001:0 ${DOCLING_SERVE_ARTIFACTS_PATH} && \
|
||||
chmod -R g=u ${DOCLING_SERVE_ARTIFACTS_PATH}
|
||||
|
||||
COPY --chown=1001:0 ./docling_serve ./docling_serve
|
||||
RUN --mount=from=ghcr.io/astral-sh/uv:0.6.1,source=/uv,target=/bin/uv \
|
||||
--mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
|
||||
--mount=type=bind,source=uv.lock,target=uv.lock \
|
||||
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
|
||||
uv sync --frozen --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS}
|
||||
umask 002 && uv sync --frozen --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS}
|
||||
|
||||
EXPOSE 5001
|
||||
|
||||
|
||||
13
Makefile
13
Makefile
@@ -17,6 +17,7 @@ else
|
||||
endif
|
||||
|
||||
TAG=$(shell git rev-parse HEAD)
|
||||
BRANCH_TAG=$(shell git rev-parse --abbrev-ref HEAD)
|
||||
|
||||
action-lint-file:
|
||||
$(CMD_PREFIX) touch .action-lint
|
||||
@@ -28,22 +29,22 @@ md-lint-file:
|
||||
docling-serve-image: Containerfile
|
||||
$(ECHO_PREFIX) printf " %-12s Containerfile\n" "[docling-serve]"
|
||||
$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-extra cu124 --no-extra cpu" -f Containerfile -t ghcr.io/docling-project/docling-serve:$(TAG) .
|
||||
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve:$(TAG) ghcr.io/docling-project/docling-serve:main
|
||||
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve:$(TAG) quay.io/docling-project/docling-serve:main
|
||||
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve:$(TAG) ghcr.io/docling-project/docling-serve:$(BRANCH_TAG)
|
||||
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve:$(TAG) quay.io/docling-project/docling-serve:$(BRANCH_TAG)
|
||||
|
||||
.PHONY: docling-serve-cpu-image
|
||||
docling-serve-cpu-image: Containerfile ## Build docling-serve "cpu only" container image
|
||||
$(ECHO_PREFIX) printf " %-12s Containerfile\n" "[docling-serve CPU]"
|
||||
$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-extra cu124" -f Containerfile -t ghcr.io/docling-project/docling-serve-cpu:$(TAG) .
|
||||
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cpu:$(TAG) ghcr.io/docling-project/docling-serve-cpu:main
|
||||
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cpu:$(TAG) quay.io/docling-project/docling-serve-cpu:main
|
||||
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cpu:$(TAG) ghcr.io/docling-project/docling-serve-cpu:$(BRANCH_TAG)
|
||||
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cpu:$(TAG) quay.io/docling-project/docling-serve-cpu:$(BRANCH_TAG)
|
||||
|
||||
.PHONY: docling-serve-cu124-image
|
||||
docling-serve-cu124-image: Containerfile ## Build docling-serve container image with GPU support
|
||||
$(ECHO_PREFIX) printf " %-12s Containerfile\n" "[docling-serve with Cuda 12.4]"
|
||||
$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-extra cpu" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu124:$(TAG) .
|
||||
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu124:$(TAG) ghcr.io/docling-project/docling-serve-cu124:main
|
||||
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu124:$(TAG) quay.io/docling-project/docling-serve-cu124:main
|
||||
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu124:$(TAG) ghcr.io/docling-project/docling-serve-cu124:$(BRANCH_TAG)
|
||||
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu124:$(TAG) quay.io/docling-project/docling-serve-cu124:$(BRANCH_TAG)
|
||||
|
||||
.PHONY: action-lint
|
||||
action-lint: .action-lint ## Lint GitHub Action workflows
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
import asyncio
|
||||
import importlib.metadata
|
||||
import logging
|
||||
import tempfile
|
||||
import shutil
|
||||
import time
|
||||
from contextlib import asynccontextmanager
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from typing import Annotated, Any, Optional, Union
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import (
|
||||
BackgroundTasks,
|
||||
@@ -28,9 +28,14 @@ from fastapi.staticfiles import StaticFiles
|
||||
|
||||
from docling.datamodel.base_models import DocumentStream
|
||||
|
||||
from docling_serve.datamodel.callback import (
|
||||
ProgressCallbackRequest,
|
||||
ProgressCallbackResponse,
|
||||
)
|
||||
from docling_serve.datamodel.convert import ConvertDocumentsOptions
|
||||
from docling_serve.datamodel.requests import (
|
||||
ConvertDocumentFileSourcesRequest,
|
||||
ConvertDocumentHttpSourcesRequest,
|
||||
ConvertDocumentsRequest,
|
||||
)
|
||||
from docling_serve.datamodel.responses import (
|
||||
@@ -40,19 +45,16 @@ from docling_serve.datamodel.responses import (
|
||||
TaskStatusResponse,
|
||||
WebsocketMessage,
|
||||
)
|
||||
from docling_serve.docling_conversion import (
|
||||
convert_documents,
|
||||
get_converter,
|
||||
get_pdf_pipeline_opts,
|
||||
)
|
||||
from docling_serve.engines import get_orchestrator
|
||||
from docling_serve.engines.async_local.orchestrator import (
|
||||
AsyncLocalOrchestrator,
|
||||
TaskNotFoundError,
|
||||
from docling_serve.datamodel.task import Task, TaskSource
|
||||
from docling_serve.engines.async_orchestrator import (
|
||||
BaseAsyncOrchestrator,
|
||||
ProgressInvalid,
|
||||
)
|
||||
from docling_serve.engines.async_orchestrator_factory import get_async_orchestrator
|
||||
from docling_serve.engines.base_orchestrator import TaskNotFoundError
|
||||
from docling_serve.helper_functions import FormDepends
|
||||
from docling_serve.response_preparation import process_results
|
||||
from docling_serve.settings import docling_serve_settings
|
||||
from docling_serve.storage import get_scratch
|
||||
|
||||
|
||||
# Set up custom logging as we'll be intermixes with FastAPI/Uvicorn's logging
|
||||
@@ -90,11 +92,11 @@ _log = logging.getLogger(__name__)
|
||||
# Context manager to initialize and clean up the lifespan of the FastAPI app
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
# Converter with default options
|
||||
pdf_format_option = get_pdf_pipeline_opts(ConvertDocumentsOptions())
|
||||
get_converter(pdf_format_option)
|
||||
orchestrator = get_async_orchestrator()
|
||||
scratch_dir = get_scratch()
|
||||
|
||||
orchestrator = get_orchestrator()
|
||||
# Warm up processing cache
|
||||
await orchestrator.warm_up_caches()
|
||||
|
||||
# Start the background queue processor
|
||||
queue_task = asyncio.create_task(orchestrator.process_queue())
|
||||
@@ -108,6 +110,10 @@ async def lifespan(app: FastAPI):
|
||||
except asyncio.CancelledError:
|
||||
_log.info("Queue processor cancelled.")
|
||||
|
||||
# Remove scratch directory in case it was a tempfile
|
||||
if docling_serve_settings.scratch_path is not None:
|
||||
shutil.rmtree(scratch_dir, ignore_errors=True)
|
||||
|
||||
|
||||
##################################
|
||||
# App creation and configuration #
|
||||
@@ -157,7 +163,8 @@ def create_app(): # noqa: C901
|
||||
|
||||
from docling_serve.gradio_ui import ui as gradio_ui
|
||||
|
||||
tmp_output_dir = Path(tempfile.mkdtemp())
|
||||
tmp_output_dir = get_scratch() / "gradio"
|
||||
tmp_output_dir.mkdir(exist_ok=True, parents=True)
|
||||
gradio_ui.gradio_output_dir = tmp_output_dir
|
||||
app = gr.mount_gradio_app(
|
||||
app,
|
||||
@@ -205,6 +212,56 @@ def create_app(): # noqa: C901
|
||||
redoc_js_url="/static/redoc.standalone.js",
|
||||
)
|
||||
|
||||
########################
|
||||
# Async / Sync helpers #
|
||||
########################
|
||||
|
||||
async def _enque_source(
|
||||
orchestrator: BaseAsyncOrchestrator, conversion_request: ConvertDocumentsRequest
|
||||
) -> Task:
|
||||
sources: list[TaskSource] = []
|
||||
if isinstance(conversion_request, ConvertDocumentFileSourcesRequest):
|
||||
sources.extend(conversion_request.file_sources)
|
||||
if isinstance(conversion_request, ConvertDocumentHttpSourcesRequest):
|
||||
sources.extend(conversion_request.http_sources)
|
||||
|
||||
task = await orchestrator.enqueue(
|
||||
sources=sources, options=conversion_request.options
|
||||
)
|
||||
return task
|
||||
|
||||
async def _enque_file(
|
||||
orchestrator: BaseAsyncOrchestrator,
|
||||
files: list[UploadFile],
|
||||
options: ConvertDocumentsOptions,
|
||||
) -> Task:
|
||||
_log.info(f"Received {len(files)} files for processing.")
|
||||
|
||||
# Load the uploaded files to Docling DocumentStream
|
||||
file_sources: list[TaskSource] = []
|
||||
for i, file in enumerate(files):
|
||||
buf = BytesIO(file.file.read())
|
||||
suffix = "" if len(file_sources) == 1 else f"_{i}"
|
||||
name = file.filename if file.filename else f"file{suffix}.pdf"
|
||||
file_sources.append(DocumentStream(name=name, stream=buf))
|
||||
|
||||
task = await orchestrator.enqueue(sources=file_sources, options=options)
|
||||
return task
|
||||
|
||||
async def _wait_task_complete(
|
||||
orchestrator: BaseAsyncOrchestrator, task_id: str
|
||||
) -> bool:
|
||||
MAX_WAIT = 120
|
||||
start_time = time.monotonic()
|
||||
while True:
|
||||
task = await orchestrator.task_status(task_id=task_id)
|
||||
if task.is_completed():
|
||||
return True
|
||||
await asyncio.sleep(5)
|
||||
elapsed_time = time.monotonic() - start_time
|
||||
if elapsed_time > MAX_WAIT:
|
||||
return False
|
||||
|
||||
#############################
|
||||
# API Endpoints definitions #
|
||||
#############################
|
||||
@@ -238,33 +295,33 @@ def create_app(): # noqa: C901
|
||||
}
|
||||
},
|
||||
)
|
||||
def process_url(
|
||||
background_tasks: BackgroundTasks, conversion_request: ConvertDocumentsRequest
|
||||
async def process_url(
|
||||
background_tasks: BackgroundTasks,
|
||||
orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
|
||||
conversion_request: ConvertDocumentsRequest,
|
||||
):
|
||||
sources: list[Union[str, DocumentStream]] = []
|
||||
headers: Optional[dict[str, Any]] = None
|
||||
if isinstance(conversion_request, ConvertDocumentFileSourcesRequest):
|
||||
for file_source in conversion_request.file_sources:
|
||||
sources.append(file_source.to_document_stream())
|
||||
else:
|
||||
for http_source in conversion_request.http_sources:
|
||||
sources.append(http_source.url)
|
||||
if headers is None and http_source.headers:
|
||||
headers = http_source.headers
|
||||
|
||||
# Note: results are only an iterator->lazy evaluation
|
||||
results = convert_documents(
|
||||
sources=sources, options=conversion_request.options, headers=headers
|
||||
task = await _enque_source(
|
||||
orchestrator=orchestrator, conversion_request=conversion_request
|
||||
)
|
||||
success = await _wait_task_complete(
|
||||
orchestrator=orchestrator, task_id=task.task_id
|
||||
)
|
||||
|
||||
# The real processing will happen here
|
||||
response = process_results(
|
||||
background_tasks=background_tasks,
|
||||
conversion_options=conversion_request.options,
|
||||
conv_results=results,
|
||||
)
|
||||
if not success:
|
||||
# TODO: abort task!
|
||||
return HTTPException(
|
||||
status_code=504, detail="Conversion is taking too long."
|
||||
)
|
||||
|
||||
return response
|
||||
result = await orchestrator.task_result(
|
||||
task_id=task.task_id, background_tasks=background_tasks
|
||||
)
|
||||
if result is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="Task result not found. Please wait for a completion status.",
|
||||
)
|
||||
return result
|
||||
|
||||
# Convert a document from file(s)
|
||||
@app.post(
|
||||
@@ -278,29 +335,34 @@ def create_app(): # noqa: C901
|
||||
)
|
||||
async def process_file(
|
||||
background_tasks: BackgroundTasks,
|
||||
orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
|
||||
files: list[UploadFile],
|
||||
options: Annotated[
|
||||
ConvertDocumentsOptions, FormDepends(ConvertDocumentsOptions)
|
||||
],
|
||||
):
|
||||
_log.info(f"Received {len(files)} files for processing.")
|
||||
|
||||
# Load the uploaded files to Docling DocumentStream
|
||||
file_sources = []
|
||||
for file in files:
|
||||
buf = BytesIO(file.file.read())
|
||||
name = file.filename if file.filename else "file.pdf"
|
||||
file_sources.append(DocumentStream(name=name, stream=buf))
|
||||
|
||||
results = convert_documents(sources=file_sources, options=options)
|
||||
|
||||
response = process_results(
|
||||
background_tasks=background_tasks,
|
||||
conversion_options=options,
|
||||
conv_results=results,
|
||||
task = await _enque_file(
|
||||
orchestrator=orchestrator, files=files, options=options
|
||||
)
|
||||
success = await _wait_task_complete(
|
||||
orchestrator=orchestrator, task_id=task.task_id
|
||||
)
|
||||
|
||||
return response
|
||||
if not success:
|
||||
# TODO: abort task!
|
||||
return HTTPException(
|
||||
status_code=504, detail="Conversion is taking too long."
|
||||
)
|
||||
|
||||
result = await orchestrator.task_result(
|
||||
task_id=task.task_id, background_tasks=background_tasks
|
||||
)
|
||||
if result is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="Task result not found. Please wait for a completion status.",
|
||||
)
|
||||
return result
|
||||
|
||||
# Convert a document from URL(s) using the async api
|
||||
@app.post(
|
||||
@@ -308,10 +370,12 @@ def create_app(): # noqa: C901
|
||||
response_model=TaskStatusResponse,
|
||||
)
|
||||
async def process_url_async(
|
||||
orchestrator: Annotated[AsyncLocalOrchestrator, Depends(get_orchestrator)],
|
||||
orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
|
||||
conversion_request: ConvertDocumentsRequest,
|
||||
):
|
||||
task = await orchestrator.enqueue(request=conversion_request)
|
||||
task = await _enque_source(
|
||||
orchestrator=orchestrator, conversion_request=conversion_request
|
||||
)
|
||||
task_queue_position = await orchestrator.get_queue_position(
|
||||
task_id=task.task_id
|
||||
)
|
||||
@@ -319,6 +383,33 @@ def create_app(): # noqa: C901
|
||||
task_id=task.task_id,
|
||||
task_status=task.task_status,
|
||||
task_position=task_queue_position,
|
||||
task_meta=task.processing_meta,
|
||||
)
|
||||
|
||||
# Convert a document from file(s) using the async api
|
||||
@app.post(
|
||||
"/v1alpha/convert/file/async",
|
||||
response_model=TaskStatusResponse,
|
||||
)
|
||||
async def process_file_async(
|
||||
orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
|
||||
background_tasks: BackgroundTasks,
|
||||
files: list[UploadFile],
|
||||
options: Annotated[
|
||||
ConvertDocumentsOptions, FormDepends(ConvertDocumentsOptions)
|
||||
],
|
||||
):
|
||||
task = await _enque_file(
|
||||
orchestrator=orchestrator, files=files, options=options
|
||||
)
|
||||
task_queue_position = await orchestrator.get_queue_position(
|
||||
task_id=task.task_id
|
||||
)
|
||||
return TaskStatusResponse(
|
||||
task_id=task.task_id,
|
||||
task_status=task.task_status,
|
||||
task_position=task_queue_position,
|
||||
task_meta=task.processing_meta,
|
||||
)
|
||||
|
||||
# Task status poll
|
||||
@@ -327,7 +418,7 @@ def create_app(): # noqa: C901
|
||||
response_model=TaskStatusResponse,
|
||||
)
|
||||
async def task_status_poll(
|
||||
orchestrator: Annotated[AsyncLocalOrchestrator, Depends(get_orchestrator)],
|
||||
orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
|
||||
task_id: str,
|
||||
wait: Annotated[
|
||||
float, Query(help="Number of seconds to wait for a completed status.")
|
||||
@@ -342,6 +433,7 @@ def create_app(): # noqa: C901
|
||||
task_id=task.task_id,
|
||||
task_status=task.task_status,
|
||||
task_position=task_queue_position,
|
||||
task_meta=task.processing_meta,
|
||||
)
|
||||
|
||||
# Task status websocket
|
||||
@@ -350,7 +442,7 @@ def create_app(): # noqa: C901
|
||||
)
|
||||
async def task_status_ws(
|
||||
websocket: WebSocket,
|
||||
orchestrator: Annotated[AsyncLocalOrchestrator, Depends(get_orchestrator)],
|
||||
orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
|
||||
task_id: str,
|
||||
):
|
||||
await websocket.accept()
|
||||
@@ -375,6 +467,7 @@ def create_app(): # noqa: C901
|
||||
task_id=task.task_id,
|
||||
task_status=task.task_status,
|
||||
task_position=task_queue_position,
|
||||
task_meta=task.processing_meta,
|
||||
)
|
||||
await websocket.send_text(
|
||||
WebsocketMessage(
|
||||
@@ -389,6 +482,7 @@ def create_app(): # noqa: C901
|
||||
task_id=task.task_id,
|
||||
task_status=task.task_status,
|
||||
task_position=task_queue_position,
|
||||
task_meta=task.processing_meta,
|
||||
)
|
||||
await websocket.send_text(
|
||||
WebsocketMessage(
|
||||
@@ -416,10 +510,13 @@ def create_app(): # noqa: C901
|
||||
},
|
||||
)
|
||||
async def task_result(
|
||||
orchestrator: Annotated[AsyncLocalOrchestrator, Depends(get_orchestrator)],
|
||||
orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
|
||||
background_tasks: BackgroundTasks,
|
||||
task_id: str,
|
||||
):
|
||||
result = await orchestrator.task_result(task_id=task_id)
|
||||
result = await orchestrator.task_result(
|
||||
task_id=task_id, background_tasks=background_tasks
|
||||
)
|
||||
if result is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
@@ -427,4 +524,23 @@ def create_app(): # noqa: C901
|
||||
)
|
||||
return result
|
||||
|
||||
# Update task progress
|
||||
@app.post(
|
||||
"/v1alpha/callback/task/progress",
|
||||
response_model=ProgressCallbackResponse,
|
||||
)
|
||||
async def callback_task_progress(
|
||||
orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
|
||||
request: ProgressCallbackRequest,
|
||||
):
|
||||
try:
|
||||
await orchestrator.receive_task_progress(request=request)
|
||||
return ProgressCallbackResponse(status="ack")
|
||||
except TaskNotFoundError:
|
||||
raise HTTPException(status_code=404, detail="Task not found.")
|
||||
except ProgressInvalid as err:
|
||||
raise HTTPException(
|
||||
status_code=400, detail=f"Invalid progress payload: {err}"
|
||||
)
|
||||
|
||||
return app
|
||||
|
||||
50
docling_serve/datamodel/callback.py
Normal file
50
docling_serve/datamodel/callback.py
Normal file
@@ -0,0 +1,50 @@
|
||||
import enum
|
||||
from typing import Annotated, Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ProgressKind(str, enum.Enum):
|
||||
SET_NUM_DOCS = "set_num_docs"
|
||||
UPDATE_PROCESSED = "update_processed"
|
||||
|
||||
|
||||
class BaseProgress(BaseModel):
|
||||
kind: ProgressKind
|
||||
|
||||
|
||||
class ProgressSetNumDocs(BaseProgress):
|
||||
kind: Literal[ProgressKind.SET_NUM_DOCS] = ProgressKind.SET_NUM_DOCS
|
||||
|
||||
num_docs: int
|
||||
|
||||
|
||||
class SucceededDocsItem(BaseModel):
|
||||
source: str
|
||||
|
||||
|
||||
class FailedDocsItem(BaseModel):
|
||||
source: str
|
||||
error: str
|
||||
|
||||
|
||||
class ProgressUpdateProcessed(BaseProgress):
|
||||
kind: Literal[ProgressKind.UPDATE_PROCESSED] = ProgressKind.UPDATE_PROCESSED
|
||||
|
||||
num_processed: int
|
||||
num_succeeded: int
|
||||
num_failed: int
|
||||
|
||||
docs_succeeded: list[SucceededDocsItem]
|
||||
docs_failed: list[FailedDocsItem]
|
||||
|
||||
|
||||
class ProgressCallbackRequest(BaseModel):
|
||||
task_id: str
|
||||
progress: Annotated[
|
||||
ProgressSetNumDocs | ProgressUpdateProcessed, Field(discriminator="kind")
|
||||
]
|
||||
|
||||
|
||||
class ProgressCallbackResponse(BaseModel):
|
||||
status: Literal["ack"] = "ack"
|
||||
@@ -1,13 +1,20 @@
|
||||
# Define the input options for the API
|
||||
from typing import Annotated, Optional
|
||||
from typing import Annotated, Any, Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import AnyUrl, BaseModel, Field, model_validator
|
||||
from typing_extensions import Self
|
||||
|
||||
from docling.datamodel.base_models import InputFormat, OutputFormat
|
||||
from docling.datamodel.pipeline_options import (
|
||||
EasyOcrOptions,
|
||||
PdfBackend,
|
||||
PdfPipeline,
|
||||
TableFormerMode,
|
||||
TableStructureOptions,
|
||||
)
|
||||
from docling.datamodel.settings import (
|
||||
DEFAULT_PAGE_RANGE,
|
||||
PageRange,
|
||||
)
|
||||
from docling.models.factories import get_ocr_factory
|
||||
from docling_core.types.doc import ImageRefMode
|
||||
@@ -20,6 +27,89 @@ ocr_factory = get_ocr_factory(
|
||||
ocr_engines_enum = ocr_factory.get_enum()
|
||||
|
||||
|
||||
class PictureDescriptionLocal(BaseModel):
|
||||
repo_id: Annotated[
|
||||
str,
|
||||
Field(
|
||||
description="Repository id from the Hugging Face Hub.",
|
||||
examples=[
|
||||
"HuggingFaceTB/SmolVLM-256M-Instruct",
|
||||
"ibm-granite/granite-vision-3.2-2b",
|
||||
],
|
||||
),
|
||||
]
|
||||
prompt: Annotated[
|
||||
str,
|
||||
Field(
|
||||
description="Prompt used when calling the vision-language model.",
|
||||
examples=[
|
||||
"Describe this image in a few sentences.",
|
||||
"This is a figure from a document. Provide a detailed description of it.",
|
||||
],
|
||||
),
|
||||
] = "Describe this image in a few sentences."
|
||||
generation_config: Annotated[
|
||||
dict[str, Any],
|
||||
Field(
|
||||
description="Config from https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.GenerationConfig",
|
||||
examples=[{"max_new_tokens": 200, "do_sample": False}],
|
||||
),
|
||||
] = {"max_new_tokens": 200, "do_sample": False}
|
||||
|
||||
|
||||
class PictureDescriptionApi(BaseModel):
|
||||
url: Annotated[
|
||||
AnyUrl,
|
||||
Field(
|
||||
description="Endpoint which accepts openai-api compatible requests.",
|
||||
examples=[
|
||||
AnyUrl(
|
||||
"http://localhost:8000/v1/chat/completions"
|
||||
), # example of a local vllm api
|
||||
AnyUrl(
|
||||
"http://localhost:11434/v1/chat/completions"
|
||||
), # example of ollama
|
||||
],
|
||||
),
|
||||
]
|
||||
headers: Annotated[
|
||||
dict[str, str],
|
||||
Field(
|
||||
description="Headers used for calling the API endpoint. For example, it could include authentication headers."
|
||||
),
|
||||
] = {}
|
||||
params: Annotated[
|
||||
dict[str, Any],
|
||||
Field(
|
||||
description="Model parameters.",
|
||||
examples=[
|
||||
{ # on vllm
|
||||
"model": "HuggingFaceTB/SmolVLM-256M-Instruct",
|
||||
"max_completion_tokens": 200,
|
||||
},
|
||||
{ # on vllm
|
||||
"model": "ibm-granite/granite-vision-3.2-2b",
|
||||
"max_completion_tokens": 200,
|
||||
},
|
||||
{ # on ollama
|
||||
"model": "granite3.2-vision:2b"
|
||||
},
|
||||
],
|
||||
),
|
||||
] = {}
|
||||
timeout: Annotated[float, Field(description="Timeout for the API request.")] = 20
|
||||
prompt: Annotated[
|
||||
str,
|
||||
Field(
|
||||
description="Prompt used when calling the vision-language model.",
|
||||
examples=[
|
||||
"Describe this image in a few sentences.",
|
||||
"This is a figures from a document. Provide a detailed description of it.",
|
||||
],
|
||||
),
|
||||
] = "Describe this image in a few sentences."
|
||||
|
||||
|
||||
class ConvertDocumentsOptions(BaseModel):
|
||||
from_formats: Annotated[
|
||||
list[InputFormat],
|
||||
@@ -121,16 +211,37 @@ class ConvertDocumentsOptions(BaseModel):
|
||||
table_mode: Annotated[
|
||||
TableFormerMode,
|
||||
Field(
|
||||
TableFormerMode.FAST,
|
||||
description=(
|
||||
"Mode to use for table structure, String. "
|
||||
f"Allowed values: {', '.join([v.value for v in TableFormerMode])}. "
|
||||
"Optional, defaults to fast."
|
||||
),
|
||||
examples=[TableFormerMode.FAST],
|
||||
examples=[TableStructureOptions().mode],
|
||||
# pattern="fast|accurate",
|
||||
),
|
||||
] = TableFormerMode.FAST
|
||||
] = TableStructureOptions().mode
|
||||
|
||||
pipeline: Annotated[
|
||||
PdfPipeline,
|
||||
Field(description="Choose the pipeline to process PDF or image files."),
|
||||
] = PdfPipeline.STANDARD
|
||||
|
||||
page_range: Annotated[
|
||||
PageRange,
|
||||
Field(
|
||||
description="Only convert a range of pages. The page number starts at 1.",
|
||||
examples=[(1, 4)],
|
||||
),
|
||||
] = DEFAULT_PAGE_RANGE
|
||||
|
||||
document_timeout: Annotated[
|
||||
float,
|
||||
Field(
|
||||
description="The timeout for processing each document, in seconds.",
|
||||
gt=0,
|
||||
le=docling_serve_settings.max_document_timeout,
|
||||
),
|
||||
] = docling_serve_settings.max_document_timeout
|
||||
|
||||
abort_on_error: Annotated[
|
||||
bool,
|
||||
@@ -199,7 +310,7 @@ class ConvertDocumentsOptions(BaseModel):
|
||||
bool,
|
||||
Field(
|
||||
description=(
|
||||
"If enabled, perform formula OCR, return Latex code. "
|
||||
"If enabled, perform formula OCR, return LaTeX code. "
|
||||
"Boolean. Optional, defaults to false."
|
||||
),
|
||||
examples=[False],
|
||||
@@ -227,3 +338,30 @@ class ConvertDocumentsOptions(BaseModel):
|
||||
examples=[False],
|
||||
),
|
||||
] = False
|
||||
|
||||
picture_description_local: Annotated[
|
||||
Optional[PictureDescriptionLocal],
|
||||
Field(
|
||||
description="Options for running a local vision-language model in the picture description. The parameters refer to a model hosted on Hugging Face. This parameter is mutually exclusive with picture_description_api."
|
||||
),
|
||||
] = None
|
||||
|
||||
picture_description_api: Annotated[
|
||||
Optional[PictureDescriptionApi],
|
||||
Field(
|
||||
description="API details for using a vision-language model in the picture description. This parameter is mutually exclusive with picture_description_local."
|
||||
),
|
||||
] = None
|
||||
|
||||
@model_validator(mode="after")
|
||||
def picture_description_exclusivity(self) -> Self:
|
||||
# Validate picture description options
|
||||
if (
|
||||
self.picture_description_local is not None
|
||||
and self.picture_description_api is not None
|
||||
):
|
||||
raise ValueError(
|
||||
"The parameters picture_description_local and picture_description_api are mutually exclusive, only one of them can be set."
|
||||
)
|
||||
|
||||
return self
|
||||
|
||||
@@ -10,3 +10,4 @@ class TaskStatus(str, enum.Enum):
|
||||
|
||||
class AsyncEngine(str, enum.Enum):
|
||||
LOCAL = "local"
|
||||
KFP = "kfp"
|
||||
|
||||
7
docling_serve/datamodel/kfp.py
Normal file
7
docling_serve/datamodel/kfp.py
Normal file
@@ -0,0 +1,7 @@
|
||||
from pydantic import AnyUrl, BaseModel
|
||||
|
||||
|
||||
class CallbackSpec(BaseModel):
|
||||
url: AnyUrl
|
||||
headers: dict[str, str] = {}
|
||||
ca_cert: str = ""
|
||||
@@ -2,7 +2,7 @@ import base64
|
||||
from io import BytesIO
|
||||
from typing import Annotated, Any, Union
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import AnyHttpUrl, BaseModel, Field
|
||||
|
||||
from docling.datamodel.base_models import DocumentStream
|
||||
|
||||
@@ -15,7 +15,7 @@ class DocumentsConvertBase(BaseModel):
|
||||
|
||||
class HttpSource(BaseModel):
|
||||
url: Annotated[
|
||||
str,
|
||||
AnyHttpUrl,
|
||||
Field(
|
||||
description="HTTP url to process",
|
||||
examples=["https://arxiv.org/pdf/2206.01062"],
|
||||
|
||||
@@ -7,6 +7,8 @@ from docling.datamodel.document import ConversionStatus, ErrorItem
|
||||
from docling.utils.profiling import ProfilingItem
|
||||
from docling_core.types.doc import DoclingDocument
|
||||
|
||||
from docling_serve.datamodel.task_meta import TaskProcessingMeta
|
||||
|
||||
|
||||
# Status
|
||||
class HealthCheckResponse(BaseModel):
|
||||
@@ -38,6 +40,7 @@ class TaskStatusResponse(BaseModel):
|
||||
task_id: str
|
||||
task_status: str
|
||||
task_position: Optional[int] = None
|
||||
task_meta: Optional[TaskProcessingMeta] = None
|
||||
|
||||
|
||||
class MessageKind(str, enum.Enum):
|
||||
|
||||
@@ -1,17 +1,30 @@
|
||||
from typing import Optional
|
||||
from pathlib import Path
|
||||
from typing import Optional, Union
|
||||
|
||||
from pydantic import BaseModel
|
||||
from fastapi.responses import FileResponse
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
from docling.datamodel.base_models import DocumentStream
|
||||
|
||||
from docling_serve.datamodel.convert import ConvertDocumentsOptions
|
||||
from docling_serve.datamodel.engines import TaskStatus
|
||||
from docling_serve.datamodel.requests import ConvertDocumentsRequest
|
||||
from docling_serve.datamodel.requests import FileSource, HttpSource
|
||||
from docling_serve.datamodel.responses import ConvertDocumentResponse
|
||||
from docling_serve.datamodel.task_meta import TaskProcessingMeta
|
||||
|
||||
TaskSource = Union[HttpSource, FileSource, DocumentStream]
|
||||
|
||||
|
||||
class Task(BaseModel):
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
|
||||
task_id: str
|
||||
task_status: TaskStatus = TaskStatus.PENDING
|
||||
request: Optional[ConvertDocumentsRequest]
|
||||
result: Optional[ConvertDocumentResponse] = None
|
||||
sources: list[TaskSource] = []
|
||||
options: Optional[ConvertDocumentsOptions]
|
||||
result: Optional[Union[ConvertDocumentResponse, FileResponse]] = None
|
||||
scratch_dir: Optional[Path] = None
|
||||
processing_meta: Optional[TaskProcessingMeta] = None
|
||||
|
||||
def is_completed(self) -> bool:
|
||||
if self.task_status in [TaskStatus.SUCCESS, TaskStatus.FAILURE]:
|
||||
|
||||
8
docling_serve/datamodel/task_meta.py
Normal file
8
docling_serve/datamodel/task_meta.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class TaskProcessingMeta(BaseModel):
|
||||
num_docs: int
|
||||
num_processed: int = 0
|
||||
num_succeeded: int = 0
|
||||
num_failed: int = 0
|
||||
@@ -1,6 +1,7 @@
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
from collections.abc import Iterable, Iterator
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
@@ -18,10 +19,17 @@ from docling.datamodel.document import ConversionResult
|
||||
from docling.datamodel.pipeline_options import (
|
||||
OcrOptions,
|
||||
PdfBackend,
|
||||
PdfPipeline,
|
||||
PdfPipelineOptions,
|
||||
PictureDescriptionApiOptions,
|
||||
PictureDescriptionVlmOptions,
|
||||
TableFormerMode,
|
||||
VlmPipelineOptions,
|
||||
smoldocling_vlm_conversion_options,
|
||||
smoldocling_vlm_mlx_conversion_options,
|
||||
)
|
||||
from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
|
||||
from docling.pipeline.vlm_pipeline import VlmPipeline
|
||||
from docling_core.types.doc import ImageRefMode
|
||||
|
||||
from docling_serve.datamodel.convert import ConvertDocumentsOptions, ocr_factory
|
||||
@@ -84,10 +92,9 @@ def get_converter(pdf_format_option: PdfFormatOption) -> DocumentConverter:
|
||||
return _get_converter_from_hash(options_hash)
|
||||
|
||||
|
||||
# Computes the PDF pipeline options and returns the PdfFormatOption and its hash
|
||||
def get_pdf_pipeline_opts(
|
||||
request: ConvertDocumentsOptions,
|
||||
) -> PdfFormatOption:
|
||||
def _parse_standard_pdf_opts(
|
||||
request: ConvertDocumentsOptions, artifacts_path: Optional[Path]
|
||||
) -> PdfPipelineOptions:
|
||||
try:
|
||||
ocr_options: OcrOptions = ocr_factory.create_options(
|
||||
kind=request.ocr_engine.value, # type: ignore
|
||||
@@ -110,6 +117,9 @@ def get_pdf_pipeline_opts(
|
||||
ocr_options.lang = request.ocr_lang
|
||||
|
||||
pipeline_options = PdfPipelineOptions(
|
||||
artifacts_path=artifacts_path,
|
||||
enable_remote_services=docling_serve_settings.enable_remote_services,
|
||||
document_timeout=request.document_timeout,
|
||||
do_ocr=request.do_ocr,
|
||||
ocr_options=ocr_options,
|
||||
do_table_structure=request.do_table_structure,
|
||||
@@ -118,14 +128,33 @@ def get_pdf_pipeline_opts(
|
||||
do_picture_classification=request.do_picture_classification,
|
||||
do_picture_description=request.do_picture_description,
|
||||
)
|
||||
pipeline_options.table_structure_options.do_cell_matching = True # do_cell_matching
|
||||
pipeline_options.table_structure_options.mode = TableFormerMode(request.table_mode)
|
||||
|
||||
if request.image_export_mode != ImageRefMode.PLACEHOLDER:
|
||||
pipeline_options.generate_page_images = True
|
||||
if request.image_export_mode == ImageRefMode.REFERENCED:
|
||||
pipeline_options.generate_picture_images = True
|
||||
if request.images_scale:
|
||||
pipeline_options.images_scale = request.images_scale
|
||||
|
||||
if request.picture_description_local is not None:
|
||||
pipeline_options.picture_description_options = (
|
||||
PictureDescriptionVlmOptions.model_validate(
|
||||
request.picture_description_local.model_dump()
|
||||
)
|
||||
)
|
||||
|
||||
if request.picture_description_api is not None:
|
||||
pipeline_options.picture_description_options = (
|
||||
PictureDescriptionApiOptions.model_validate(
|
||||
request.picture_description_api.model_dump()
|
||||
)
|
||||
)
|
||||
|
||||
return pipeline_options
|
||||
|
||||
|
||||
def _parse_backend(request: ConvertDocumentsOptions) -> type[PdfDocumentBackend]:
|
||||
if request.pdf_backend == PdfBackend.DLPARSE_V1:
|
||||
backend: type[PdfDocumentBackend] = DoclingParseDocumentBackend
|
||||
elif request.pdf_backend == PdfBackend.DLPARSE_V2:
|
||||
@@ -137,35 +166,78 @@ def get_pdf_pipeline_opts(
|
||||
else:
|
||||
raise RuntimeError(f"Unexpected PDF backend type {request.pdf_backend}")
|
||||
|
||||
return backend
|
||||
|
||||
|
||||
def _parse_vlm_pdf_opts(
|
||||
request: ConvertDocumentsOptions, artifacts_path: Optional[Path]
|
||||
) -> VlmPipelineOptions:
|
||||
pipeline_options = VlmPipelineOptions(
|
||||
artifacts_path=artifacts_path,
|
||||
document_timeout=request.document_timeout,
|
||||
)
|
||||
pipeline_options.vlm_options = smoldocling_vlm_conversion_options
|
||||
if sys.platform == "darwin":
|
||||
try:
|
||||
import mlx_vlm # noqa: F401
|
||||
|
||||
pipeline_options.vlm_options = smoldocling_vlm_mlx_conversion_options
|
||||
except ImportError:
|
||||
_log.warning(
|
||||
"To run SmolDocling faster, please install mlx-vlm:\n"
|
||||
"pip install mlx-vlm"
|
||||
)
|
||||
return pipeline_options
|
||||
|
||||
|
||||
# Computes the PDF pipeline options and returns the PdfFormatOption and its hash
|
||||
def get_pdf_pipeline_opts(
|
||||
request: ConvertDocumentsOptions,
|
||||
) -> PdfFormatOption:
|
||||
artifacts_path: Optional[Path] = None
|
||||
if docling_serve_settings.artifacts_path is not None:
|
||||
if str(docling_serve_settings.artifacts_path.absolute()) == "":
|
||||
_log.info(
|
||||
"artifacts_path is an empty path, model weights will be dowloaded "
|
||||
"artifacts_path is an empty path, model weights will be downloaded "
|
||||
"at runtime."
|
||||
)
|
||||
pipeline_options.artifacts_path = None
|
||||
artifacts_path = None
|
||||
elif docling_serve_settings.artifacts_path.is_dir():
|
||||
_log.info(
|
||||
"artifacts_path is set to a valid directory. "
|
||||
"No model weights will be downloaded at runtime."
|
||||
)
|
||||
pipeline_options.artifacts_path = docling_serve_settings.artifacts_path
|
||||
artifacts_path = docling_serve_settings.artifacts_path
|
||||
else:
|
||||
_log.warning(
|
||||
"artifacts_path is set to an invalid directory. "
|
||||
"The system will download the model weights at runtime."
|
||||
)
|
||||
pipeline_options.artifacts_path = None
|
||||
artifacts_path = None
|
||||
else:
|
||||
_log.info(
|
||||
"artifacts_path is unset. "
|
||||
"The system will download the model weights at runtime."
|
||||
)
|
||||
|
||||
pdf_format_option = PdfFormatOption(
|
||||
pipeline_options=pipeline_options,
|
||||
backend=backend,
|
||||
)
|
||||
pipeline_options: Union[PdfPipelineOptions, VlmPipelineOptions]
|
||||
if request.pipeline == PdfPipeline.STANDARD:
|
||||
pipeline_options = _parse_standard_pdf_opts(request, artifacts_path)
|
||||
backend = _parse_backend(request)
|
||||
pdf_format_option = PdfFormatOption(
|
||||
pipeline_options=pipeline_options,
|
||||
backend=backend,
|
||||
)
|
||||
|
||||
elif request.pipeline == PdfPipeline.VLM:
|
||||
pipeline_options = _parse_vlm_pdf_opts(request, artifacts_path)
|
||||
pdf_format_option = PdfFormatOption(
|
||||
pipeline_cls=VlmPipeline, pipeline_options=pipeline_options
|
||||
)
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f"The pipeline {request.pipeline} is not implemented."
|
||||
)
|
||||
|
||||
return pdf_format_option
|
||||
|
||||
@@ -180,6 +252,9 @@ def convert_documents(
|
||||
results: Iterator[ConversionResult] = converter.convert_all(
|
||||
sources,
|
||||
headers=headers,
|
||||
page_range=options.page_range,
|
||||
max_file_size=docling_serve_settings.max_file_size,
|
||||
max_num_pages=docling_serve_settings.max_num_pages,
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
from functools import lru_cache
|
||||
|
||||
from docling_serve.engines.async_local.orchestrator import AsyncLocalOrchestrator
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_orchestrator() -> AsyncLocalOrchestrator:
|
||||
return AsyncLocalOrchestrator()
|
||||
|
||||
0
docling_serve/engines/async_kfp/__init__.py
Normal file
0
docling_serve/engines/async_kfp/__init__.py
Normal file
137
docling_serve/engines/async_kfp/kfp_pipeline.py
Normal file
137
docling_serve/engines/async_kfp/kfp_pipeline.py
Normal file
@@ -0,0 +1,137 @@
|
||||
# ruff: noqa: E402, UP006, UP035
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from kfp import dsl
|
||||
|
||||
PYTHON_BASE_IMAGE = "python:3.12"
|
||||
|
||||
|
||||
@dsl.component(
|
||||
base_image=PYTHON_BASE_IMAGE,
|
||||
packages_to_install=[
|
||||
"pydantic",
|
||||
"docling-serve @ git+https://github.com/docling-project/docling-serve@feat-kfp-engine",
|
||||
],
|
||||
pip_index_urls=["https://download.pytorch.org/whl/cpu", "https://pypi.org/simple"],
|
||||
)
|
||||
def generate_chunks(
|
||||
run_name: str,
|
||||
request: Dict[str, Any],
|
||||
batch_size: int,
|
||||
callbacks: List[Dict[str, Any]],
|
||||
) -> List[List[Dict[str, Any]]]:
|
||||
from pydantic import TypeAdapter
|
||||
|
||||
from docling_serve.datamodel.callback import (
|
||||
ProgressCallbackRequest,
|
||||
ProgressSetNumDocs,
|
||||
)
|
||||
from docling_serve.datamodel.kfp import CallbackSpec
|
||||
from docling_serve.engines.async_kfp.notify import notify_callbacks
|
||||
|
||||
CallbacksListType = TypeAdapter(list[CallbackSpec])
|
||||
|
||||
sources = request["http_sources"]
|
||||
splits = [sources[i : i + batch_size] for i in range(0, len(sources), batch_size)]
|
||||
|
||||
total = sum(len(chunk) for chunk in splits)
|
||||
payload = ProgressCallbackRequest(
|
||||
task_id=run_name, progress=ProgressSetNumDocs(num_docs=total)
|
||||
)
|
||||
notify_callbacks(
|
||||
payload=payload,
|
||||
callbacks=CallbacksListType.validate_python(callbacks),
|
||||
)
|
||||
|
||||
return splits
|
||||
|
||||
|
||||
@dsl.component(
|
||||
base_image=PYTHON_BASE_IMAGE,
|
||||
packages_to_install=[
|
||||
"pydantic",
|
||||
"docling-serve @ git+https://github.com/docling-project/docling-serve@feat-kfp-engine",
|
||||
],
|
||||
pip_index_urls=["https://download.pytorch.org/whl/cpu", "https://pypi.org/simple"],
|
||||
)
|
||||
def convert_batch(
|
||||
run_name: str,
|
||||
data_splits: List[Dict[str, Any]],
|
||||
request: Dict[str, Any],
|
||||
callbacks: List[Dict[str, Any]],
|
||||
output_path: dsl.OutputPath("Directory"), # type: ignore
|
||||
):
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import AnyUrl, TypeAdapter
|
||||
|
||||
from docling_serve.datamodel.callback import (
|
||||
FailedDocsItem,
|
||||
ProgressCallbackRequest,
|
||||
ProgressUpdateProcessed,
|
||||
SucceededDocsItem,
|
||||
)
|
||||
from docling_serve.datamodel.convert import ConvertDocumentsOptions
|
||||
from docling_serve.datamodel.kfp import CallbackSpec
|
||||
from docling_serve.datamodel.requests import HttpSource
|
||||
from docling_serve.engines.async_kfp.notify import notify_callbacks
|
||||
|
||||
CallbacksListType = TypeAdapter(list[CallbackSpec])
|
||||
|
||||
convert_options = ConvertDocumentsOptions.model_validate(request["options"])
|
||||
print(convert_options)
|
||||
|
||||
output_dir = Path(output_path)
|
||||
output_dir.mkdir(exist_ok=True, parents=True)
|
||||
docs_succeeded: list[SucceededDocsItem] = []
|
||||
docs_failed: list[FailedDocsItem] = []
|
||||
for source_dict in data_splits:
|
||||
source = HttpSource.model_validate(source_dict)
|
||||
filename = Path(str(AnyUrl(source.url).path)).name
|
||||
output_filename = output_dir / filename
|
||||
print(f"Writing {output_filename}")
|
||||
with output_filename.open("w") as f:
|
||||
f.write(source.model_dump_json())
|
||||
docs_succeeded.append(SucceededDocsItem(source=source.url))
|
||||
|
||||
payload = ProgressCallbackRequest(
|
||||
task_id=run_name,
|
||||
progress=ProgressUpdateProcessed(
|
||||
num_failed=len(docs_failed),
|
||||
num_processed=len(docs_succeeded) + len(docs_failed),
|
||||
num_succeeded=len(docs_succeeded),
|
||||
docs_succeeded=docs_succeeded,
|
||||
docs_failed=docs_failed,
|
||||
),
|
||||
)
|
||||
|
||||
print(payload)
|
||||
notify_callbacks(
|
||||
payload=payload,
|
||||
callbacks=CallbacksListType.validate_python(callbacks),
|
||||
)
|
||||
|
||||
|
||||
@dsl.pipeline()
|
||||
def process(
|
||||
batch_size: int,
|
||||
request: Dict[str, Any],
|
||||
callbacks: List[Dict[str, Any]] = [],
|
||||
run_name: str = "",
|
||||
):
|
||||
chunks_task = generate_chunks(
|
||||
run_name=run_name,
|
||||
request=request,
|
||||
batch_size=batch_size,
|
||||
callbacks=callbacks,
|
||||
)
|
||||
chunks_task.set_caching_options(False)
|
||||
|
||||
with dsl.ParallelFor(chunks_task.output, parallelism=4) as data_splits:
|
||||
convert_batch(
|
||||
run_name=run_name,
|
||||
data_splits=data_splits,
|
||||
request=request,
|
||||
callbacks=callbacks,
|
||||
)
|
||||
32
docling_serve/engines/async_kfp/notify.py
Normal file
32
docling_serve/engines/async_kfp/notify.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import ssl
|
||||
|
||||
import certifi
|
||||
import httpx
|
||||
|
||||
from docling_serve.datamodel.callback import ProgressCallbackRequest
|
||||
from docling_serve.datamodel.kfp import CallbackSpec
|
||||
|
||||
|
||||
def notify_callbacks(
|
||||
payload: ProgressCallbackRequest,
|
||||
callbacks: list[CallbackSpec],
|
||||
):
|
||||
if len(callbacks) == 0:
|
||||
return
|
||||
|
||||
for callback in callbacks:
|
||||
# https://www.python-httpx.org/advanced/ssl/#configuring-client-instances
|
||||
if callback.ca_cert:
|
||||
ctx = ssl.create_default_context(cadata=callback.ca_cert)
|
||||
else:
|
||||
ctx = ssl.create_default_context(cafile=certifi.where())
|
||||
|
||||
try:
|
||||
httpx.post(
|
||||
str(callback.url),
|
||||
headers=callback.headers,
|
||||
json=payload.model_dump(mode="json"),
|
||||
verify=ctx,
|
||||
)
|
||||
except httpx.HTTPError as err:
|
||||
print(f"Error notifying callback {callback.url}: {err}")
|
||||
235
docling_serve/engines/async_kfp/orchestrator.py
Normal file
235
docling_serve/engines/async_kfp/orchestrator.py
Normal file
@@ -0,0 +1,235 @@
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from kfp_server_api.models import V2beta1RuntimeState
|
||||
from pydantic import BaseModel, TypeAdapter
|
||||
from pydantic_settings import SettingsConfigDict
|
||||
|
||||
from docling_serve.datamodel.callback import (
|
||||
ProgressCallbackRequest,
|
||||
ProgressSetNumDocs,
|
||||
ProgressUpdateProcessed,
|
||||
)
|
||||
from docling_serve.datamodel.convert import ConvertDocumentsOptions
|
||||
from docling_serve.datamodel.engines import TaskStatus
|
||||
from docling_serve.datamodel.kfp import CallbackSpec
|
||||
from docling_serve.datamodel.requests import HttpSource
|
||||
from docling_serve.datamodel.task import Task, TaskSource
|
||||
from docling_serve.datamodel.task_meta import TaskProcessingMeta
|
||||
from docling_serve.engines.async_kfp.kfp_pipeline import process
|
||||
from docling_serve.engines.async_orchestrator import (
|
||||
BaseAsyncOrchestrator,
|
||||
ProgressInvalid,
|
||||
)
|
||||
from docling_serve.settings import docling_serve_settings
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class _RunItem(BaseModel):
|
||||
model_config = SettingsConfigDict(arbitrary_types_allowed=True)
|
||||
|
||||
run_id: str
|
||||
state: str
|
||||
created_at: datetime.datetime
|
||||
scheduled_at: datetime.datetime
|
||||
finished_at: datetime.datetime
|
||||
|
||||
|
||||
class AsyncKfpOrchestrator(BaseAsyncOrchestrator):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
import kfp
|
||||
|
||||
kfp_endpoint = docling_serve_settings.eng_kfp_endpoint
|
||||
if kfp_endpoint is None:
|
||||
raise ValueError("KFP endpoint is required when using the KFP engine.")
|
||||
|
||||
kube_sa_token_path = Path("/run/secrets/kubernetes.io/serviceaccount/token")
|
||||
kube_sa_ca_cert_path = Path(
|
||||
"/run/secrets/kubernetes.io/serviceaccount/service-ca.crt"
|
||||
)
|
||||
|
||||
ssl_ca_cert = docling_serve_settings.eng_kfp_ca_cert_path
|
||||
token = docling_serve_settings.eng_kfp_token
|
||||
if (
|
||||
ssl_ca_cert is None
|
||||
and ".svc" in kfp_endpoint.host
|
||||
and kube_sa_ca_cert_path.exists()
|
||||
):
|
||||
ssl_ca_cert = str(kube_sa_ca_cert_path)
|
||||
if token is None and kube_sa_token_path.exists():
|
||||
token = kube_sa_token_path.read_text()
|
||||
|
||||
self._client = kfp.Client(
|
||||
host=str(kfp_endpoint),
|
||||
existing_token=token,
|
||||
ssl_ca_cert=ssl_ca_cert,
|
||||
# verify_ssl=False,
|
||||
)
|
||||
|
||||
async def enqueue(
|
||||
self, sources: list[TaskSource], options: ConvertDocumentsOptions
|
||||
) -> Task:
|
||||
callbacks = []
|
||||
if docling_serve_settings.eng_kfp_self_callback_endpoint is not None:
|
||||
headers = {}
|
||||
if docling_serve_settings.eng_kfp_self_callback_token_path is not None:
|
||||
token = (
|
||||
docling_serve_settings.eng_kfp_self_callback_token_path.read_text()
|
||||
)
|
||||
headers["Authorization"] = f"Bearer {token}"
|
||||
ca_cert = ""
|
||||
if docling_serve_settings.eng_kfp_self_callback_ca_cert_path is not None:
|
||||
ca_cert = docling_serve_settings.eng_kfp_self_callback_ca_cert_path.read_text()
|
||||
callbacks.append(
|
||||
CallbackSpec(
|
||||
url=docling_serve_settings.eng_kfp_self_callback_endpoint,
|
||||
headers=headers,
|
||||
ca_cert=ca_cert,
|
||||
)
|
||||
)
|
||||
|
||||
CallbacksType = TypeAdapter(list[CallbackSpec])
|
||||
SourcesListType = TypeAdapter(list[HttpSource])
|
||||
http_sources = [s for s in sources if isinstance(s, HttpSource)]
|
||||
# hack: since the current kfp backend is not resolving the job_id placeholder,
|
||||
# we set the run_name and pass it as argument to the job itself.
|
||||
run_name = f"docling-job-{uuid.uuid4()}"
|
||||
kfp_run = self._client.create_run_from_pipeline_func(
|
||||
process,
|
||||
arguments={
|
||||
"batch_size": 10,
|
||||
"sources": SourcesListType.dump_python(http_sources, mode="json"),
|
||||
"options": options.model_dump(mode="json"),
|
||||
"callbacks": CallbacksType.dump_python(callbacks, mode="json"),
|
||||
"run_name": run_name,
|
||||
},
|
||||
run_name=run_name,
|
||||
)
|
||||
task_id = kfp_run.run_id
|
||||
|
||||
task = Task(task_id=task_id, sources=sources, options=options)
|
||||
await self.init_task_tracking(task)
|
||||
return task
|
||||
|
||||
async def _update_task_from_run(self, task_id: str, wait: float = 0.0):
|
||||
run_info = self._client.get_run(run_id=task_id)
|
||||
task = await self.get_raw_task(task_id=task_id)
|
||||
# RUNTIME_STATE_UNSPECIFIED = "RUNTIME_STATE_UNSPECIFIED"
|
||||
# PENDING = "PENDING"
|
||||
# RUNNING = "RUNNING"
|
||||
# SUCCEEDED = "SUCCEEDED"
|
||||
# SKIPPED = "SKIPPED"
|
||||
# FAILED = "FAILED"
|
||||
# CANCELING = "CANCELING"
|
||||
# CANCELED = "CANCELED"
|
||||
# PAUSED = "PAUSED"
|
||||
if run_info.state == V2beta1RuntimeState.SUCCEEDED:
|
||||
task.task_status = TaskStatus.SUCCESS
|
||||
elif run_info.state == V2beta1RuntimeState.PENDING:
|
||||
task.task_status = TaskStatus.PENDING
|
||||
elif run_info.state == V2beta1RuntimeState.RUNNING:
|
||||
task.task_status = TaskStatus.STARTED
|
||||
else:
|
||||
task.task_status = TaskStatus.FAILURE
|
||||
|
||||
async def task_status(self, task_id: str, wait: float = 0.0) -> Task:
|
||||
await self._update_task_from_run(task_id=task_id, wait=wait)
|
||||
return await self.get_raw_task(task_id=task_id)
|
||||
|
||||
async def _get_pending(self) -> list[_RunItem]:
|
||||
runs: list[_RunItem] = []
|
||||
next_page: Optional[str] = None
|
||||
while True:
|
||||
res = self._client.list_runs(
|
||||
page_token=next_page,
|
||||
page_size=20,
|
||||
filter=json.dumps(
|
||||
{
|
||||
"predicates": [
|
||||
{
|
||||
"operation": "EQUALS",
|
||||
"key": "state",
|
||||
"stringValue": "PENDING",
|
||||
}
|
||||
]
|
||||
}
|
||||
),
|
||||
)
|
||||
if res.runs is not None:
|
||||
for run in res.runs:
|
||||
runs.append(
|
||||
_RunItem(
|
||||
run_id=run.run_id,
|
||||
state=run.state,
|
||||
created_at=run.created_at,
|
||||
scheduled_at=run.scheduled_at,
|
||||
finished_at=run.finished_at,
|
||||
)
|
||||
)
|
||||
if res.next_page_token is None:
|
||||
break
|
||||
next_page = res.next_page_token
|
||||
return runs
|
||||
|
||||
async def queue_size(self) -> int:
|
||||
runs = await self._get_pending()
|
||||
return len(runs)
|
||||
|
||||
async def get_queue_position(self, task_id: str) -> Optional[int]:
|
||||
runs = await self._get_pending()
|
||||
for pos, run in enumerate(runs, start=1):
|
||||
if run.run_id == task_id:
|
||||
return pos
|
||||
return None
|
||||
|
||||
async def process_queue(self):
|
||||
return
|
||||
|
||||
async def warm_up_caches(self):
|
||||
return
|
||||
|
||||
async def _get_run_id(self, run_name: str) -> str:
|
||||
res = self._client.list_runs(
|
||||
filter=json.dumps(
|
||||
{
|
||||
"predicates": [
|
||||
{
|
||||
"operation": "EQUALS",
|
||||
"key": "name",
|
||||
"stringValue": run_name,
|
||||
}
|
||||
]
|
||||
}
|
||||
),
|
||||
)
|
||||
if res.runs is not None and len(res.runs) > 0:
|
||||
return res.runs[0].run_id
|
||||
raise RuntimeError(f"Run with {run_name=} not found.")
|
||||
|
||||
async def receive_task_progress(self, request: ProgressCallbackRequest):
|
||||
task_id = await self._get_run_id(run_name=request.task_id)
|
||||
progress = request.progress
|
||||
task = await self.get_raw_task(task_id=task_id)
|
||||
|
||||
if isinstance(progress, ProgressSetNumDocs):
|
||||
task.processing_meta = TaskProcessingMeta(num_docs=progress.num_docs)
|
||||
task.task_status = TaskStatus.STARTED
|
||||
|
||||
elif isinstance(progress, ProgressUpdateProcessed):
|
||||
if task.processing_meta is None:
|
||||
raise ProgressInvalid(
|
||||
"UpdateProcessed was called before setting the expected number of documents."
|
||||
)
|
||||
task.processing_meta.num_processed += progress.num_processed
|
||||
task.processing_meta.num_succeeded += progress.num_succeeded
|
||||
task.processing_meta.num_failed += progress.num_failed
|
||||
task.task_status = TaskStatus.STARTED
|
||||
|
||||
# TODO: could be moved to BackgroundTask
|
||||
await self.notify_task_subscribers(task_id=task_id)
|
||||
@@ -3,44 +3,30 @@ import logging
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import WebSocket
|
||||
|
||||
from docling_serve.datamodel.engines import TaskStatus
|
||||
from docling_serve.datamodel.requests import ConvertDocumentsRequest
|
||||
from docling_serve.datamodel.responses import (
|
||||
MessageKind,
|
||||
TaskStatusResponse,
|
||||
WebsocketMessage,
|
||||
)
|
||||
from docling_serve.datamodel.task import Task
|
||||
from docling_serve.datamodel.convert import ConvertDocumentsOptions
|
||||
from docling_serve.datamodel.task import Task, TaskSource
|
||||
from docling_serve.docling_conversion import get_converter, get_pdf_pipeline_opts
|
||||
from docling_serve.engines.async_local.worker import AsyncLocalWorker
|
||||
from docling_serve.engines.base_orchestrator import BaseOrchestrator
|
||||
from docling_serve.engines.async_orchestrator import BaseAsyncOrchestrator
|
||||
from docling_serve.settings import docling_serve_settings
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class OrchestratorError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class TaskNotFoundError(OrchestratorError):
|
||||
pass
|
||||
|
||||
|
||||
class AsyncLocalOrchestrator(BaseOrchestrator):
|
||||
class AsyncLocalOrchestrator(BaseAsyncOrchestrator):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.task_queue = asyncio.Queue()
|
||||
self.tasks: dict[str, Task] = {}
|
||||
self.queue_list: list[str] = []
|
||||
self.task_subscribers: dict[str, set[WebSocket]] = {}
|
||||
|
||||
async def enqueue(self, request: ConvertDocumentsRequest) -> Task:
|
||||
async def enqueue(
|
||||
self, sources: list[TaskSource], options: ConvertDocumentsOptions
|
||||
) -> Task:
|
||||
task_id = str(uuid.uuid4())
|
||||
task = Task(task_id=task_id, request=request)
|
||||
self.tasks[task_id] = task
|
||||
task = Task(task_id=task_id, sources=sources, options=options)
|
||||
await self.init_task_tracking(task)
|
||||
|
||||
self.queue_list.append(task_id)
|
||||
self.task_subscribers[task_id] = set()
|
||||
await self.task_queue.put(task_id)
|
||||
return task
|
||||
|
||||
@@ -52,16 +38,6 @@ class AsyncLocalOrchestrator(BaseOrchestrator):
|
||||
self.queue_list.index(task_id) + 1 if task_id in self.queue_list else None
|
||||
)
|
||||
|
||||
async def task_status(self, task_id: str, wait: float = 0.0) -> Task:
|
||||
if task_id not in self.tasks:
|
||||
raise TaskNotFoundError()
|
||||
return self.tasks[task_id]
|
||||
|
||||
async def task_result(self, task_id: str):
|
||||
if task_id not in self.tasks:
|
||||
raise TaskNotFoundError()
|
||||
return self.tasks[task_id].result
|
||||
|
||||
async def process_queue(self):
|
||||
# Create a pool of workers
|
||||
workers = []
|
||||
@@ -75,28 +51,7 @@ class AsyncLocalOrchestrator(BaseOrchestrator):
|
||||
await asyncio.gather(*workers)
|
||||
_log.debug("All workers completed.")
|
||||
|
||||
async def notify_task_subscribers(self, task_id: str):
|
||||
if task_id not in self.task_subscribers:
|
||||
raise RuntimeError(f"Task {task_id} does not have a subscribers list.")
|
||||
|
||||
task = self.tasks[task_id]
|
||||
task_queue_position = await self.get_queue_position(task_id)
|
||||
msg = TaskStatusResponse(
|
||||
task_id=task.task_id,
|
||||
task_status=task.task_status,
|
||||
task_position=task_queue_position,
|
||||
)
|
||||
for websocket in self.task_subscribers[task_id]:
|
||||
await websocket.send_text(
|
||||
WebsocketMessage(message=MessageKind.UPDATE, task=msg).model_dump_json()
|
||||
)
|
||||
if task.is_completed():
|
||||
await websocket.close()
|
||||
|
||||
async def notify_queue_positions(self):
|
||||
for task_id in self.task_subscribers.keys():
|
||||
# notify only pending tasks
|
||||
if self.tasks[task_id].task_status != TaskStatus.PENDING:
|
||||
continue
|
||||
|
||||
await self.notify_task_subscribers(task_id)
|
||||
async def warm_up_caches(self):
|
||||
# Converter with default options
|
||||
pdf_format_option = get_pdf_pipeline_opts(ConvertDocumentsOptions())
|
||||
get_converter(pdf_format_option)
|
||||
|
||||
@@ -1,17 +1,18 @@
|
||||
import asyncio
|
||||
import logging
|
||||
import shutil
|
||||
import time
|
||||
from typing import TYPE_CHECKING, Any, Optional, Union
|
||||
|
||||
from fastapi import BackgroundTasks
|
||||
from fastapi.responses import FileResponse
|
||||
|
||||
from docling.datamodel.base_models import DocumentStream
|
||||
|
||||
from docling_serve.datamodel.engines import TaskStatus
|
||||
from docling_serve.datamodel.requests import ConvertDocumentFileSourcesRequest
|
||||
from docling_serve.datamodel.responses import ConvertDocumentResponse
|
||||
from docling_serve.datamodel.requests import FileSource, HttpSource
|
||||
from docling_serve.docling_conversion import convert_documents
|
||||
from docling_serve.response_preparation import process_results
|
||||
from docling_serve.storage import get_scratch
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docling_serve.engines.async_local.orchestrator import AsyncLocalOrchestrator
|
||||
@@ -44,59 +45,66 @@ class AsyncLocalWorker:
|
||||
# Notify clients about queue updates
|
||||
await self.orchestrator.notify_queue_positions()
|
||||
|
||||
# Get the current event loop
|
||||
asyncio.get_event_loop()
|
||||
|
||||
# Define a callback function to send progress updates to the client.
|
||||
# TODO: send partial updates, e.g. when a document in the batch is done
|
||||
def run_conversion():
|
||||
sources: list[Union[str, DocumentStream]] = []
|
||||
convert_sources: list[Union[str, DocumentStream]] = []
|
||||
headers: Optional[dict[str, Any]] = None
|
||||
if isinstance(task.request, ConvertDocumentFileSourcesRequest):
|
||||
for file_source in task.request.file_sources:
|
||||
sources.append(file_source.to_document_stream())
|
||||
else:
|
||||
for http_source in task.request.http_sources:
|
||||
sources.append(http_source.url)
|
||||
if headers is None and http_source.headers:
|
||||
headers = http_source.headers
|
||||
for source in task.sources:
|
||||
if isinstance(source, DocumentStream):
|
||||
convert_sources.append(source)
|
||||
elif isinstance(source, FileSource):
|
||||
convert_sources.append(source.to_document_stream())
|
||||
elif isinstance(source, HttpSource):
|
||||
convert_sources.append(str(source.url))
|
||||
if headers is None and source.headers:
|
||||
headers = source.headers
|
||||
|
||||
# Note: results are only an iterator->lazy evaluation
|
||||
results = convert_documents(
|
||||
sources=sources,
|
||||
options=task.request.options,
|
||||
sources=convert_sources,
|
||||
options=task.options,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
# The real processing will happen here
|
||||
work_dir = get_scratch() / task_id
|
||||
response = process_results(
|
||||
background_tasks=BackgroundTasks(),
|
||||
conversion_options=task.request.options,
|
||||
conversion_options=task.options,
|
||||
conv_results=results,
|
||||
work_dir=work_dir,
|
||||
)
|
||||
|
||||
if work_dir.exists():
|
||||
task.scratch_dir = work_dir
|
||||
if not isinstance(response, FileResponse):
|
||||
_log.warning(
|
||||
f"Task {task_id=} produced content in {work_dir=} but the response is not a file."
|
||||
)
|
||||
shutil.rmtree(work_dir, ignore_errors=True)
|
||||
|
||||
return response
|
||||
|
||||
# Run the prediction in a thread to avoid blocking the event loop.
|
||||
start_time = time.monotonic()
|
||||
|
||||
# Run the prediction in a thread to avoid blocking the event loop.
|
||||
# Get the current event loop
|
||||
# loop = asyncio.get_event_loop()
|
||||
# future = asyncio.run_coroutine_threadsafe(
|
||||
# run_conversion(),
|
||||
# loop=loop
|
||||
# )
|
||||
# response = future.result()
|
||||
|
||||
# Run in a thread
|
||||
response = await asyncio.to_thread(
|
||||
run_conversion,
|
||||
)
|
||||
processing_time = time.monotonic() - start_time
|
||||
|
||||
if not isinstance(response, ConvertDocumentResponse):
|
||||
_log.error(
|
||||
f"Worker {self.worker_id} got un-processable "
|
||||
"result for {task_id}: {type(response)}"
|
||||
)
|
||||
task.result = response
|
||||
task.request = None
|
||||
task.sources = []
|
||||
task.options = None
|
||||
|
||||
task.task_status = TaskStatus.SUCCESS
|
||||
_log.info(
|
||||
|
||||
85
docling_serve/engines/async_orchestrator.py
Normal file
85
docling_serve/engines/async_orchestrator.py
Normal file
@@ -0,0 +1,85 @@
|
||||
import shutil
|
||||
from typing import Union
|
||||
|
||||
from fastapi import BackgroundTasks, WebSocket
|
||||
from fastapi.responses import FileResponse
|
||||
|
||||
from docling_serve.datamodel.callback import ProgressCallbackRequest
|
||||
from docling_serve.datamodel.engines import TaskStatus
|
||||
from docling_serve.datamodel.responses import (
|
||||
ConvertDocumentResponse,
|
||||
MessageKind,
|
||||
TaskStatusResponse,
|
||||
WebsocketMessage,
|
||||
)
|
||||
from docling_serve.datamodel.task import Task
|
||||
from docling_serve.engines.base_orchestrator import (
|
||||
BaseOrchestrator,
|
||||
OrchestratorError,
|
||||
TaskNotFoundError,
|
||||
)
|
||||
from docling_serve.settings import docling_serve_settings
|
||||
|
||||
|
||||
class ProgressInvalid(OrchestratorError):
|
||||
pass
|
||||
|
||||
|
||||
class BaseAsyncOrchestrator(BaseOrchestrator):
|
||||
def __init__(self):
|
||||
self.tasks: dict[str, Task] = {}
|
||||
self.task_subscribers: dict[str, set[WebSocket]] = {}
|
||||
|
||||
async def init_task_tracking(self, task: Task):
|
||||
task_id = task.task_id
|
||||
self.tasks[task.task_id] = task
|
||||
self.task_subscribers[task_id] = set()
|
||||
|
||||
async def get_raw_task(self, task_id: str) -> Task:
|
||||
if task_id not in self.tasks:
|
||||
raise TaskNotFoundError()
|
||||
return self.tasks[task_id]
|
||||
|
||||
async def task_status(self, task_id: str, wait: float = 0.0) -> Task:
|
||||
return await self.get_raw_task(task_id=task_id)
|
||||
|
||||
async def task_result(
|
||||
self, task_id: str, background_tasks: BackgroundTasks
|
||||
) -> Union[ConvertDocumentResponse, FileResponse, None]:
|
||||
task = await self.get_raw_task(task_id=task_id)
|
||||
if task.is_completed() and task.scratch_dir is not None:
|
||||
if docling_serve_settings.single_use_results:
|
||||
background_tasks.add_task(
|
||||
shutil.rmtree, task.scratch_dir, ignore_errors=True
|
||||
)
|
||||
return task.result
|
||||
|
||||
async def notify_task_subscribers(self, task_id: str):
|
||||
if task_id not in self.task_subscribers:
|
||||
raise RuntimeError(f"Task {task_id} does not have a subscribers list.")
|
||||
|
||||
task = await self.get_raw_task(task_id=task_id)
|
||||
task_queue_position = await self.get_queue_position(task_id)
|
||||
msg = TaskStatusResponse(
|
||||
task_id=task.task_id,
|
||||
task_status=task.task_status,
|
||||
task_position=task_queue_position,
|
||||
task_meta=task.processing_meta,
|
||||
)
|
||||
for websocket in self.task_subscribers[task_id]:
|
||||
await websocket.send_text(
|
||||
WebsocketMessage(message=MessageKind.UPDATE, task=msg).model_dump_json()
|
||||
)
|
||||
if task.is_completed():
|
||||
await websocket.close()
|
||||
|
||||
async def notify_queue_positions(self):
|
||||
for task_id in self.task_subscribers.keys():
|
||||
# notify only pending tasks
|
||||
if self.tasks[task_id].task_status != TaskStatus.PENDING:
|
||||
continue
|
||||
|
||||
await self.notify_task_subscribers(task_id)
|
||||
|
||||
async def receive_task_progress(self, request: ProgressCallbackRequest):
|
||||
raise NotImplementedError()
|
||||
21
docling_serve/engines/async_orchestrator_factory.py
Normal file
21
docling_serve/engines/async_orchestrator_factory.py
Normal file
@@ -0,0 +1,21 @@
|
||||
from functools import lru_cache
|
||||
|
||||
from docling_serve.datamodel.engines import AsyncEngine
|
||||
from docling_serve.engines.async_orchestrator import BaseAsyncOrchestrator
|
||||
from docling_serve.settings import docling_serve_settings
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_async_orchestrator() -> BaseAsyncOrchestrator:
|
||||
if docling_serve_settings.eng_kind == AsyncEngine.LOCAL:
|
||||
from docling_serve.engines.async_local.orchestrator import (
|
||||
AsyncLocalOrchestrator,
|
||||
)
|
||||
|
||||
return AsyncLocalOrchestrator()
|
||||
elif docling_serve_settings.eng_kind == AsyncEngine.KFP:
|
||||
from docling_serve.engines.async_kfp.orchestrator import AsyncKfpOrchestrator
|
||||
|
||||
return AsyncKfpOrchestrator()
|
||||
|
||||
raise RuntimeError(f"Engine {docling_serve_settings.eng_kind} not recognized.")
|
||||
@@ -1,11 +1,27 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Optional, Union
|
||||
|
||||
from docling_serve.datamodel.task import Task
|
||||
from fastapi import BackgroundTasks
|
||||
from fastapi.responses import FileResponse
|
||||
|
||||
from docling_serve.datamodel.convert import ConvertDocumentsOptions
|
||||
from docling_serve.datamodel.responses import ConvertDocumentResponse
|
||||
from docling_serve.datamodel.task import Task, TaskSource
|
||||
|
||||
|
||||
class OrchestratorError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class TaskNotFoundError(OrchestratorError):
|
||||
pass
|
||||
|
||||
|
||||
class BaseOrchestrator(ABC):
|
||||
@abstractmethod
|
||||
async def enqueue(self, task) -> Task:
|
||||
async def enqueue(
|
||||
self, sources: list[TaskSource], options: ConvertDocumentsOptions
|
||||
) -> Task:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
@@ -13,9 +29,23 @@ class BaseOrchestrator(ABC):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def task_status(self, task_id: str) -> Task:
|
||||
async def get_queue_position(self, task_id: str) -> Optional[int]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def task_result(self, task_id: str):
|
||||
async def task_status(self, task_id: str, wait: float = 0.0) -> Task:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def task_result(
|
||||
self, task_id: str, background_tasks: BackgroundTasks
|
||||
) -> Union[ConvertDocumentResponse, FileResponse, None]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def process_queue(self):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def warm_up_caches(self):
|
||||
pass
|
||||
|
||||
@@ -1,11 +1,23 @@
|
||||
import base64
|
||||
import importlib
|
||||
import json
|
||||
import logging
|
||||
import ssl
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import certifi
|
||||
import gradio as gr
|
||||
import requests
|
||||
import httpx
|
||||
|
||||
from docling.datamodel.pipeline_options import (
|
||||
PdfBackend,
|
||||
PdfPipeline,
|
||||
TableFormerMode,
|
||||
TableStructureOptions,
|
||||
)
|
||||
|
||||
from docling_serve.helper_functions import _to_list_of_strings
|
||||
from docling_serve.settings import docling_serve_settings, uvicorn_settings
|
||||
@@ -17,7 +29,7 @@ logger = logging.getLogger(__name__)
|
||||
############################
|
||||
|
||||
logo_path = "https://raw.githubusercontent.com/docling-project/docling/refs/heads/main/docs/assets/logo.svg"
|
||||
js_components_url = "https://unpkg.com/@docling/docling-components@0.0.3"
|
||||
js_components_url = "https://unpkg.com/@docling/docling-components@0.0.6"
|
||||
if (
|
||||
docling_serve_settings.static_path is not None
|
||||
and docling_serve_settings.static_path.is_dir()
|
||||
@@ -109,8 +121,29 @@ file_output_path = None # Will be set when a new file is generated
|
||||
#############
|
||||
|
||||
|
||||
def get_api_endpoint() -> str:
|
||||
protocol = "http"
|
||||
if uvicorn_settings.ssl_keyfile is not None:
|
||||
protocol = "https"
|
||||
return f"{protocol}://{docling_serve_settings.api_host}:{uvicorn_settings.port}"
|
||||
|
||||
|
||||
def get_ssl_context() -> ssl.SSLContext:
|
||||
ctx = ssl.create_default_context(cafile=certifi.where())
|
||||
kube_sa_ca_cert_path = Path(
|
||||
"/run/secrets/kubernetes.io/serviceaccount/service-ca.crt"
|
||||
)
|
||||
if (
|
||||
uvicorn_settings.ssl_keyfile is not None
|
||||
and ".svc." in docling_serve_settings.api_host
|
||||
and kube_sa_ca_cert_path.exists()
|
||||
):
|
||||
ctx.load_verify_locations(cafile=kube_sa_ca_cert_path)
|
||||
return ctx
|
||||
|
||||
|
||||
def health_check():
|
||||
response = requests.get(f"http://localhost:{uvicorn_settings.port}/health")
|
||||
response = httpx.get(f"{get_api_endpoint()}/health")
|
||||
if response.status_code == 200:
|
||||
return "Healthy"
|
||||
return "Unhealthy"
|
||||
@@ -126,6 +159,11 @@ def set_outputs_visibility_direct(x, y):
|
||||
return content, file
|
||||
|
||||
|
||||
def set_task_id_visibility(x):
|
||||
task_id_row = gr.Row(visible=x)
|
||||
return task_id_row
|
||||
|
||||
|
||||
def set_outputs_visibility_process(x):
|
||||
content = gr.Row(visible=not x)
|
||||
file = gr.Row(visible=x)
|
||||
@@ -137,6 +175,7 @@ def set_download_button_label(label_text: gr.State):
|
||||
|
||||
|
||||
def clear_outputs():
|
||||
task_id_rendered = ""
|
||||
markdown_content = ""
|
||||
json_content = ""
|
||||
json_rendered_content = ""
|
||||
@@ -145,6 +184,7 @@ def clear_outputs():
|
||||
doctags_content = ""
|
||||
|
||||
return (
|
||||
task_id_rendered,
|
||||
markdown_content,
|
||||
markdown_content,
|
||||
json_content,
|
||||
@@ -164,12 +204,16 @@ def clear_file_input():
|
||||
return None
|
||||
|
||||
|
||||
def auto_set_return_as_file(url_input, file_input, image_export_mode):
|
||||
def auto_set_return_as_file(
|
||||
url_input_value: str,
|
||||
file_input_value: Optional[list[str]],
|
||||
image_export_mode_value: str,
|
||||
):
|
||||
# If more than one input source is provided, return as file
|
||||
if (
|
||||
(len(url_input.split(",")) > 1)
|
||||
or (file_input and len(file_input) > 1)
|
||||
or (image_export_mode == "referenced")
|
||||
(len(url_input_value.split(",")) > 1)
|
||||
or (file_input_value and len(file_input_value) > 1)
|
||||
or (image_export_mode_value == "referenced")
|
||||
):
|
||||
return True
|
||||
else:
|
||||
@@ -187,10 +231,56 @@ def change_ocr_lang(ocr_engine):
|
||||
return "english,chinese"
|
||||
|
||||
|
||||
def wait_task_finish(task_id: str, return_as_file: bool):
|
||||
conversion_sucess = False
|
||||
task_finished = False
|
||||
task_status = ""
|
||||
ssl_ctx = get_ssl_context()
|
||||
while not task_finished:
|
||||
try:
|
||||
response = httpx.get(
|
||||
f"{get_api_endpoint()}/v1alpha/status/poll/{task_id}?wait=5",
|
||||
verify=ssl_ctx,
|
||||
timeout=15,
|
||||
)
|
||||
task_status = response.json()["task_status"]
|
||||
if task_status == "success":
|
||||
conversion_sucess = True
|
||||
task_finished = True
|
||||
|
||||
if task_status in ("failure", "revoked"):
|
||||
conversion_sucess = False
|
||||
task_finished = True
|
||||
raise RuntimeError(f"Task failed with status {task_status!r}")
|
||||
time.sleep(5)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing file(s): {e}")
|
||||
conversion_sucess = False
|
||||
task_finished = True
|
||||
raise gr.Error(f"Error processing file(s): {e}", print_exception=False)
|
||||
|
||||
if conversion_sucess:
|
||||
try:
|
||||
response = httpx.get(
|
||||
f"{get_api_endpoint()}/v1alpha/result/{task_id}",
|
||||
timeout=15,
|
||||
verify=ssl_ctx,
|
||||
)
|
||||
output = response_to_output(response, return_as_file)
|
||||
return output
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting task result: {e}")
|
||||
|
||||
raise gr.Error(
|
||||
f"Error getting task result, conversion finished with status: {task_status}"
|
||||
)
|
||||
|
||||
|
||||
def process_url(
|
||||
input_sources,
|
||||
to_formats,
|
||||
image_export_mode,
|
||||
pipeline,
|
||||
ocr,
|
||||
force_ocr,
|
||||
ocr_engine,
|
||||
@@ -209,6 +299,7 @@ def process_url(
|
||||
"options": {
|
||||
"to_formats": to_formats,
|
||||
"image_export_mode": image_export_mode,
|
||||
"pipeline": pipeline,
|
||||
"ocr": ocr,
|
||||
"force_ocr": force_ocr,
|
||||
"ocr_engine": ocr_engine,
|
||||
@@ -231,9 +322,12 @@ def process_url(
|
||||
logger.error("No input sources provided.")
|
||||
raise gr.Error("No input sources provided.", print_exception=False)
|
||||
try:
|
||||
response = requests.post(
|
||||
f"http://localhost:{uvicorn_settings.port}/v1alpha/convert/source",
|
||||
ssl_ctx = get_ssl_context()
|
||||
response = httpx.post(
|
||||
f"{get_api_endpoint()}/v1alpha/convert/source/async",
|
||||
json=parameters,
|
||||
verify=ssl_ctx,
|
||||
timeout=60,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing URL: {e}")
|
||||
@@ -243,14 +337,22 @@ def process_url(
|
||||
error_message = data.get("detail", "An unknown error occurred.")
|
||||
logger.error(f"Error processing file: {error_message}")
|
||||
raise gr.Error(f"Error processing file: {error_message}", print_exception=False)
|
||||
output = response_to_output(response, return_as_file)
|
||||
return output
|
||||
|
||||
task_id_rendered = response.json()["task_id"]
|
||||
return task_id_rendered
|
||||
|
||||
|
||||
def file_to_base64(file):
|
||||
with open(file.name, "rb") as f:
|
||||
encoded_string = base64.b64encode(f.read()).decode("utf-8")
|
||||
return encoded_string
|
||||
|
||||
|
||||
def process_file(
|
||||
files,
|
||||
to_formats,
|
||||
image_export_mode,
|
||||
pipeline,
|
||||
ocr,
|
||||
force_ocr,
|
||||
ocr_engine,
|
||||
@@ -264,33 +366,41 @@ def process_file(
|
||||
do_picture_classification,
|
||||
do_picture_description,
|
||||
):
|
||||
if not files or len(files) == 0 or files[0] == "":
|
||||
if not files or len(files) == 0:
|
||||
logger.error("No files provided.")
|
||||
raise gr.Error("No files provided.", print_exception=False)
|
||||
files_data = [("files", (file.name, open(file.name, "rb"))) for file in files]
|
||||
files_data = [
|
||||
{"base64_string": file_to_base64(file), "filename": file.name} for file in files
|
||||
]
|
||||
|
||||
parameters = {
|
||||
"to_formats": to_formats,
|
||||
"image_export_mode": image_export_mode,
|
||||
"ocr": str(ocr).lower(),
|
||||
"force_ocr": str(force_ocr).lower(),
|
||||
"ocr_engine": ocr_engine,
|
||||
"ocr_lang": _to_list_of_strings(ocr_lang),
|
||||
"pdf_backend": pdf_backend,
|
||||
"table_mode": table_mode,
|
||||
"abort_on_error": str(abort_on_error).lower(),
|
||||
"return_as_file": str(return_as_file).lower(),
|
||||
"do_code_enrichment": str(do_code_enrichment).lower(),
|
||||
"do_formula_enrichment": str(do_formula_enrichment).lower(),
|
||||
"do_picture_classification": str(do_picture_classification).lower(),
|
||||
"do_picture_description": str(do_picture_description).lower(),
|
||||
"file_sources": files_data,
|
||||
"options": {
|
||||
"to_formats": to_formats,
|
||||
"image_export_mode": image_export_mode,
|
||||
"pipeline": pipeline,
|
||||
"ocr": ocr,
|
||||
"force_ocr": force_ocr,
|
||||
"ocr_engine": ocr_engine,
|
||||
"ocr_lang": _to_list_of_strings(ocr_lang),
|
||||
"pdf_backend": pdf_backend,
|
||||
"table_mode": table_mode,
|
||||
"abort_on_error": abort_on_error,
|
||||
"return_as_file": return_as_file,
|
||||
"do_code_enrichment": do_code_enrichment,
|
||||
"do_formula_enrichment": do_formula_enrichment,
|
||||
"do_picture_classification": do_picture_classification,
|
||||
"do_picture_description": do_picture_description,
|
||||
},
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
f"http://localhost:{uvicorn_settings.port}/v1alpha/convert/file",
|
||||
files=files_data,
|
||||
data=parameters,
|
||||
ssl_ctx = get_ssl_context()
|
||||
response = httpx.post(
|
||||
f"{get_api_endpoint()}/v1alpha/convert/source/async",
|
||||
json=parameters,
|
||||
verify=ssl_ctx,
|
||||
timeout=60,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing file(s): {e}")
|
||||
@@ -300,8 +410,9 @@ def process_file(
|
||||
error_message = data.get("detail", "An unknown error occurred.")
|
||||
logger.error(f"Error processing file: {error_message}")
|
||||
raise gr.Error(f"Error processing file: {error_message}", print_exception=False)
|
||||
output = response_to_output(response, return_as_file)
|
||||
return output
|
||||
|
||||
task_id_rendered = response.json()["task_id"]
|
||||
return task_id_rendered
|
||||
|
||||
|
||||
def response_to_output(response, return_as_file):
|
||||
@@ -415,30 +526,31 @@ with gr.Blocks(
|
||||
)
|
||||
|
||||
# URL Processing Tab
|
||||
with gr.Tab("Convert URL(s)"):
|
||||
with gr.Tab("Convert URL"):
|
||||
with gr.Row():
|
||||
with gr.Column(scale=4):
|
||||
url_input = gr.Textbox(
|
||||
label="Input Sources (comma-separated URLs)",
|
||||
placeholder="https://arxiv.org/pdf/2206.01062",
|
||||
label="URL Input Source",
|
||||
placeholder="https://arxiv.org/pdf/2501.17887",
|
||||
)
|
||||
with gr.Column(scale=1):
|
||||
url_process_btn = gr.Button("Process URL(s)", scale=1)
|
||||
url_process_btn = gr.Button("Process URL", scale=1)
|
||||
url_reset_btn = gr.Button("Reset", scale=1)
|
||||
|
||||
# File Processing Tab
|
||||
with gr.Tab("Convert File(s)"):
|
||||
with gr.Tab("Convert File"):
|
||||
with gr.Row():
|
||||
with gr.Column(scale=4):
|
||||
file_input = gr.File(
|
||||
elem_id="file_input_zone",
|
||||
label="Upload Files",
|
||||
label="Upload File",
|
||||
file_types=[
|
||||
".pdf",
|
||||
".docx",
|
||||
".pptx",
|
||||
".html",
|
||||
".xlsx",
|
||||
".json",
|
||||
".asciidoc",
|
||||
".txt",
|
||||
".md",
|
||||
@@ -451,7 +563,7 @@ with gr.Blocks(
|
||||
scale=4,
|
||||
)
|
||||
with gr.Column(scale=1):
|
||||
file_process_btn = gr.Button("Process File(s)", scale=1)
|
||||
file_process_btn = gr.Button("Process File", scale=1)
|
||||
file_reset_btn = gr.Button("Reset", scale=1)
|
||||
|
||||
# Options
|
||||
@@ -460,14 +572,14 @@ with gr.Blocks(
|
||||
with gr.Column(scale=1):
|
||||
to_formats = gr.CheckboxGroup(
|
||||
[
|
||||
("Markdown", "md"),
|
||||
("Docling (JSON)", "json"),
|
||||
("Markdown", "md"),
|
||||
("HTML", "html"),
|
||||
("Plain Text", "text"),
|
||||
("Doc Tags", "doctags"),
|
||||
],
|
||||
label="To Formats",
|
||||
value=["md"],
|
||||
value=["json", "md"],
|
||||
)
|
||||
with gr.Column(scale=1):
|
||||
image_export_mode = gr.Radio(
|
||||
@@ -479,6 +591,13 @@ with gr.Blocks(
|
||||
label="Image Export Mode",
|
||||
value="embedded",
|
||||
)
|
||||
with gr.Row():
|
||||
with gr.Column(scale=1, min_width=200):
|
||||
pipeline = gr.Radio(
|
||||
[(v.value.capitalize(), v.value) for v in PdfPipeline],
|
||||
label="Pipeline type",
|
||||
value=PdfPipeline.STANDARD.value,
|
||||
)
|
||||
with gr.Row():
|
||||
with gr.Column(scale=1, min_width=200):
|
||||
ocr = gr.Checkbox(label="Enable OCR", value=True)
|
||||
@@ -499,15 +618,17 @@ with gr.Blocks(
|
||||
)
|
||||
ocr_engine.change(change_ocr_lang, inputs=[ocr_engine], outputs=[ocr_lang])
|
||||
with gr.Row():
|
||||
with gr.Column(scale=2):
|
||||
with gr.Column(scale=4):
|
||||
pdf_backend = gr.Radio(
|
||||
["pypdfium2", "dlparse_v1", "dlparse_v2"],
|
||||
[v.value for v in PdfBackend],
|
||||
label="PDF Backend",
|
||||
value="dlparse_v2",
|
||||
value=PdfBackend.DLPARSE_V4.value,
|
||||
)
|
||||
with gr.Column(scale=2):
|
||||
table_mode = gr.Radio(
|
||||
["fast", "accurate"], label="Table Mode", value="fast"
|
||||
[(v.value.capitalize(), v.value) for v in TableFormerMode],
|
||||
label="Table Mode",
|
||||
value=TableStructureOptions().mode.value,
|
||||
)
|
||||
with gr.Column(scale=1):
|
||||
abort_on_error = gr.Checkbox(label="Abort on Error", value=False)
|
||||
@@ -528,18 +649,22 @@ with gr.Blocks(
|
||||
label="Enable picture description", value=False
|
||||
)
|
||||
|
||||
# Task id output
|
||||
with gr.Row(visible=False) as task_id_output:
|
||||
task_id_rendered = gr.Textbox(label="Task id", interactive=False)
|
||||
|
||||
# Document output
|
||||
with gr.Row(visible=False) as content_output:
|
||||
with gr.Tab("Docling (JSON)"):
|
||||
output_json = gr.Code(language="json", wrap_lines=True, show_label=False)
|
||||
with gr.Tab("Docling-Rendered"):
|
||||
output_json_rendered = gr.HTML(label="Response")
|
||||
with gr.Tab("Markdown"):
|
||||
output_markdown = gr.Code(
|
||||
language="markdown", wrap_lines=True, show_label=False
|
||||
)
|
||||
with gr.Tab("Markdown-Rendered"):
|
||||
output_markdown_rendered = gr.Markdown(label="Response")
|
||||
with gr.Tab("Docling (JSON)"):
|
||||
output_json = gr.Code(language="json", wrap_lines=True, show_label=False)
|
||||
with gr.Tab("Docling-Rendered"):
|
||||
output_json_rendered = gr.HTML()
|
||||
with gr.Tab("HTML"):
|
||||
output_html = gr.Code(language="html", wrap_lines=True, show_label=False)
|
||||
with gr.Tab("HTML-Rendered"):
|
||||
@@ -579,14 +704,11 @@ with gr.Blocks(
|
||||
set_options_visibility, inputs=[false_bool], outputs=[options]
|
||||
).then(
|
||||
set_download_button_label, inputs=[processing_text], outputs=[download_file_btn]
|
||||
).then(
|
||||
set_outputs_visibility_process,
|
||||
inputs=[return_as_file],
|
||||
outputs=[content_output, file_output],
|
||||
).then(
|
||||
clear_outputs,
|
||||
inputs=None,
|
||||
outputs=[
|
||||
task_id_rendered,
|
||||
output_markdown,
|
||||
output_markdown_rendered,
|
||||
output_json,
|
||||
@@ -596,12 +718,17 @@ with gr.Blocks(
|
||||
output_text,
|
||||
output_doctags,
|
||||
],
|
||||
).then(
|
||||
set_task_id_visibility,
|
||||
inputs=[true_bool],
|
||||
outputs=[task_id_output],
|
||||
).then(
|
||||
process_url,
|
||||
inputs=[
|
||||
url_input,
|
||||
to_formats,
|
||||
image_export_mode,
|
||||
pipeline,
|
||||
ocr,
|
||||
force_ocr,
|
||||
ocr_engine,
|
||||
@@ -615,6 +742,16 @@ with gr.Blocks(
|
||||
do_picture_classification,
|
||||
do_picture_description,
|
||||
],
|
||||
outputs=[
|
||||
task_id_rendered,
|
||||
],
|
||||
).then(
|
||||
set_outputs_visibility_process,
|
||||
inputs=[return_as_file],
|
||||
outputs=[content_output, file_output],
|
||||
).then(
|
||||
wait_task_finish,
|
||||
inputs=[task_id_rendered, return_as_file],
|
||||
outputs=[
|
||||
output_markdown,
|
||||
output_markdown_rendered,
|
||||
@@ -645,21 +782,20 @@ with gr.Blocks(
|
||||
set_outputs_visibility_direct,
|
||||
inputs=[false_bool, false_bool],
|
||||
outputs=[content_output, file_output],
|
||||
).then(clear_url_input, inputs=None, outputs=[url_input])
|
||||
).then(set_task_id_visibility, inputs=[false_bool], outputs=[task_id_output]).then(
|
||||
clear_url_input, inputs=None, outputs=[url_input]
|
||||
)
|
||||
|
||||
# File processing
|
||||
file_process_btn.click(
|
||||
set_options_visibility, inputs=[false_bool], outputs=[options]
|
||||
).then(
|
||||
set_download_button_label, inputs=[processing_text], outputs=[download_file_btn]
|
||||
).then(
|
||||
set_outputs_visibility_process,
|
||||
inputs=[return_as_file],
|
||||
outputs=[content_output, file_output],
|
||||
).then(
|
||||
clear_outputs,
|
||||
inputs=None,
|
||||
outputs=[
|
||||
task_id_rendered,
|
||||
output_markdown,
|
||||
output_markdown_rendered,
|
||||
output_json,
|
||||
@@ -669,12 +805,17 @@ with gr.Blocks(
|
||||
output_text,
|
||||
output_doctags,
|
||||
],
|
||||
).then(
|
||||
set_task_id_visibility,
|
||||
inputs=[true_bool],
|
||||
outputs=[task_id_output],
|
||||
).then(
|
||||
process_file,
|
||||
inputs=[
|
||||
file_input,
|
||||
to_formats,
|
||||
image_export_mode,
|
||||
pipeline,
|
||||
ocr,
|
||||
force_ocr,
|
||||
ocr_engine,
|
||||
@@ -688,6 +829,16 @@ with gr.Blocks(
|
||||
do_picture_classification,
|
||||
do_picture_description,
|
||||
],
|
||||
outputs=[
|
||||
task_id_rendered,
|
||||
],
|
||||
).then(
|
||||
set_outputs_visibility_process,
|
||||
inputs=[return_as_file],
|
||||
outputs=[content_output, file_output],
|
||||
).then(
|
||||
wait_task_finish,
|
||||
inputs=[task_id_rendered, return_as_file],
|
||||
outputs=[
|
||||
output_markdown,
|
||||
output_markdown_rendered,
|
||||
@@ -718,4 +869,6 @@ with gr.Blocks(
|
||||
set_outputs_visibility_direct,
|
||||
inputs=[false_bool, false_bool],
|
||||
outputs=[content_output, file_output],
|
||||
).then(clear_file_input, inputs=None, outputs=[file_input])
|
||||
).then(set_task_id_visibility, inputs=[false_bool], outputs=[task_id_output]).then(
|
||||
clear_file_input, inputs=None, outputs=[file_input]
|
||||
)
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import time
|
||||
from collections.abc import Iterable
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
|
||||
from fastapi import BackgroundTasks, HTTPException
|
||||
from fastapi import HTTPException
|
||||
from fastapi.responses import FileResponse
|
||||
|
||||
from docling.datamodel.base_models import OutputFormat
|
||||
@@ -46,7 +45,7 @@ def _export_document_as_content(
|
||||
if export_md:
|
||||
document.md_content = new_doc.export_to_markdown(image_mode=image_mode)
|
||||
if export_doctags:
|
||||
document.doctags_content = new_doc.export_to_document_tokens()
|
||||
document.doctags_content = new_doc.export_to_doctags()
|
||||
elif conv_res.status == ConversionStatus.SKIPPED:
|
||||
raise HTTPException(status_code=400, detail=conv_res.errors)
|
||||
else:
|
||||
@@ -124,9 +123,9 @@ def _export_documents_as_files(
|
||||
|
||||
|
||||
def process_results(
|
||||
background_tasks: BackgroundTasks,
|
||||
conversion_options: ConvertDocumentsOptions,
|
||||
conv_results: Iterable[ConversionResult],
|
||||
work_dir: Path,
|
||||
) -> Union[ConvertDocumentResponse, FileResponse]:
|
||||
# Let's start by processing the documents
|
||||
try:
|
||||
@@ -183,7 +182,6 @@ def process_results(
|
||||
# Multiple documents were processed, or we are forced returning as a file
|
||||
else:
|
||||
# Temporary directory to store the outputs
|
||||
work_dir = Path(tempfile.mkdtemp(prefix="docling_"))
|
||||
output_dir = work_dir / "output"
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
@@ -203,7 +201,6 @@ def process_results(
|
||||
)
|
||||
|
||||
files = os.listdir(output_dir)
|
||||
|
||||
if len(files) == 0:
|
||||
raise HTTPException(status_code=500, detail="No documents were exported.")
|
||||
|
||||
@@ -216,7 +213,7 @@ def process_results(
|
||||
|
||||
# Other cleanups after the response is sent
|
||||
# Output directory
|
||||
background_tasks.add_task(shutil.rmtree, work_dir, ignore_errors=True)
|
||||
# background_tasks.add_task(shutil.rmtree, work_dir, ignore_errors=True)
|
||||
|
||||
response = FileResponse(
|
||||
file_path, filename=file_path.name, media_type="application/zip"
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Optional, Union
|
||||
|
||||
from pydantic import AnyUrl, model_validator
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
from typing_extensions import Self
|
||||
|
||||
from docling_serve.datamodel.engines import AsyncEngine
|
||||
|
||||
@@ -32,17 +35,50 @@ class DoclingServeSettings(BaseSettings):
|
||||
)
|
||||
|
||||
enable_ui: bool = False
|
||||
api_host: str = "localhost"
|
||||
artifacts_path: Optional[Path] = None
|
||||
static_path: Optional[Path] = None
|
||||
scratch_path: Optional[Path] = None
|
||||
single_use_results: bool = True
|
||||
options_cache_size: int = 2
|
||||
enable_remote_services: bool = False
|
||||
allow_external_plugins: bool = False
|
||||
|
||||
max_document_timeout: float = 3_600 * 24 * 7 # 7 days
|
||||
max_num_pages: int = sys.maxsize
|
||||
max_file_size: int = sys.maxsize
|
||||
|
||||
cors_origins: list[str] = ["*"]
|
||||
cors_methods: list[str] = ["*"]
|
||||
cors_headers: list[str] = ["*"]
|
||||
|
||||
eng_kind: AsyncEngine = AsyncEngine.LOCAL
|
||||
# Local engine
|
||||
eng_loc_num_workers: int = 2
|
||||
# KFP engine
|
||||
eng_kfp_endpoint: Optional[AnyUrl] = None
|
||||
eng_kfp_token: Optional[str] = None
|
||||
eng_kfp_ca_cert_path: Optional[str] = None
|
||||
eng_kfp_self_callback_endpoint: Optional[str] = None
|
||||
eng_kfp_self_callback_token_path: Optional[Path] = None
|
||||
eng_kfp_self_callback_ca_cert_path: Optional[Path] = None
|
||||
|
||||
eng_kfp_experimental: bool = False
|
||||
|
||||
@model_validator(mode="after")
|
||||
def engine_settings(self) -> Self:
|
||||
# Validate KFP engine settings
|
||||
if self.eng_kind == AsyncEngine.KFP:
|
||||
if self.eng_kfp_endpoint is None:
|
||||
raise ValueError("KFP endpoint is required when using the KFP engine.")
|
||||
|
||||
if self.eng_kind == AsyncEngine.KFP:
|
||||
if not self.eng_kfp_experimental:
|
||||
raise ValueError(
|
||||
"KFP is not yet working. To enable the development version, you must set DOCLING_SERVE_ENG_KFP_EXPERIMENTAL=true."
|
||||
)
|
||||
|
||||
return self
|
||||
|
||||
|
||||
uvicorn_settings = UvicornSettings()
|
||||
|
||||
16
docling_serve/storage.py
Normal file
16
docling_serve/storage.py
Normal file
@@ -0,0 +1,16 @@
|
||||
import tempfile
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
|
||||
from docling_serve.settings import docling_serve_settings
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_scratch() -> Path:
|
||||
scratch_dir = (
|
||||
docling_serve_settings.scratch_path
|
||||
if docling_serve_settings.scratch_path is not None
|
||||
else Path(tempfile.mkdtemp(prefix="docling_"))
|
||||
)
|
||||
scratch_dir.mkdir(exist_ok=True, parents=True)
|
||||
return scratch_dir
|
||||
@@ -37,8 +37,42 @@ THe following table describes the options to configure the Docling Serve app.
|
||||
| -----------|-----|---------|-------------|
|
||||
| `--artifacts-path` | `DOCLING_SERVE_ARTIFACTS_PATH` | unset | If set to a valid directory, the model weights will be loaded from this path |
|
||||
| | `DOCLING_SERVE_STATIC_PATH` | unset | If set to a valid directory, the static assets for the docs and ui will be loaded from this path |
|
||||
| | `DOCLING_SERVE_SCRATCH_PATH` | | If set, this directory will be used as scratch workspace, e.g. storing the results before they get requested. If unset, a temporary created is created for this purpose. |
|
||||
| `--enable-ui` | `DOCLING_SERVE_ENABLE_UI` | `false` | Enable the demonstrator UI. |
|
||||
| | `DOCLING_SERVE_ENABLE_REMOTE_SERVICES` | `false` | Allow pipeline components making remote connections. For example, this is needed when using a vision-language model via APIs. |
|
||||
| | `DOCLING_SERVE_ALLOW_EXTERNAL_PLUGINS` | `false` | Allow the selection of third-party plugins. |
|
||||
| | `DOCLING_SERVE_SINGLE_USE_RESULTS` | `true` | If true, results can be accessed only once. If false, the results accumulate in the scratch directory. |
|
||||
| | `DOCLING_SERVE_MAX_DOCUMENT_TIMEOUT` | `604800` (7 days) | The maximum time for processing a document. |
|
||||
| | `DOCLING_SERVE_MAX_NUM_PAGES` | | The maximum number of pages for a document to be processed. |
|
||||
| | `DOCLING_SERVE_MAX_FILE_SIZE` | | The maximum file size for a document to be processed. |
|
||||
| | `DOCLING_SERVE_OPTIONS_CACHE_SIZE` | `2` | How many DocumentConveter objects (including their loaded models) to keep in the cache. |
|
||||
| | `DOCLING_SERVE_CORS_ORIGINS` | `["*"]` | A list of origins that should be permitted to make cross-origin requests. |
|
||||
| | `DOCLING_SERVE_CORS_METHODS` | `["*"]` | A list of HTTP methods that should be allowed for cross-origin requests. |
|
||||
| | `DOCLING_SERVE_CORS_HEADERS` | `["*"]` | A list of HTTP request headers that should be supported for cross-origin requests. |
|
||||
| | `DOCLING_SERVE_ENG_KIND` | `local` | The compute engine to use for the async tasks. Possible values are `local` and `kfp`. See below for more configurations of the engines. |
|
||||
|
||||
### Compute engine
|
||||
|
||||
Docling Serve can be deployed with several possible of compute engine.
|
||||
The selected compute engine will be running all the async jobs.
|
||||
|
||||
#### Local engine
|
||||
|
||||
The following table describes the options to configure the Docling Serve KFP engine.
|
||||
|
||||
| ENV | Default | Description |
|
||||
|-----|---------|-------------|
|
||||
| `DOCLING_SERVE_ENG_LOC_NUM_WORKERS` | 2 | Number of workers/threads processing the incoming tasks. |
|
||||
|
||||
#### KFP engine
|
||||
|
||||
The following table describes the options to configure the Docling Serve KFP engine.
|
||||
|
||||
| ENV | Default | Description |
|
||||
|-----|---------|-------------|
|
||||
| `DOCLING_SERVE_ENG_KFP_ENDPOINT` | | Must be set to the Kubeflow Pipeline endpoint. When using the in-cluster deployment, make sure to use the cluster endpoint, e.g. `https://NAME.NAMESPACE.svc.cluster.local:8888` |
|
||||
| `DOCLING_SERVE_ENG_KFP_TOKEN` | | The authentication token for KFP. For in-cluster deployment, the app will load automatically the token of the ServiceAccount. |
|
||||
| `DOCLING_SERVE_ENG_KFP_CA_CERT_PATH` | | Path to the CA certificates for the KFP endpoint. For in-cluster deployment, the app will load automatically the internal CA. |
|
||||
| `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_ENDPOINT` | | If set, it enables internal callbacks providing status update of the KFP job. Usually something like `https://NAME.NAMESPACE.svc.cluster.local:5001/v1alpha/callback/task/progress`. |
|
||||
| `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_TOKEN_PATH` | | The token used for authenticating the progress callback. For cluster-internal workloads, use `/run/secrets/kubernetes.io/serviceaccount/token`. |
|
||||
| `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_CA_CERT_PATH` | | The CA certificate for the progress callback. For cluster-inetrnal workloads, use `/var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt`. |
|
||||
|
||||
15
docs/deploy-examples/compose-gpu.yaml
Normal file
15
docs/deploy-examples/compose-gpu.yaml
Normal file
@@ -0,0 +1,15 @@
|
||||
services:
|
||||
docling:
|
||||
image: ghcr.io/docling-project/docling-serve-cu124
|
||||
container_name: docling-serve
|
||||
ports:
|
||||
- 5001:5001
|
||||
environment:
|
||||
- DOCLING_SERVE_ENABLE_UI=true
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: all # nvidia-smi
|
||||
capabilities: [gpu]
|
||||
@@ -9,41 +9,18 @@ metadata:
|
||||
annotations:
|
||||
serviceaccounts.openshift.io/oauth-redirectreference.primary: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"docling-serve"}}'
|
||||
---
|
||||
kind: Role
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: docling-serve-oauth
|
||||
labels:
|
||||
app: docling-serve
|
||||
component: docling-serve-api
|
||||
rules:
|
||||
- verbs:
|
||||
- create
|
||||
apiGroups:
|
||||
- authorization.k8s.io
|
||||
resources:
|
||||
- subjectaccessreviews
|
||||
- verbs:
|
||||
- create
|
||||
apiGroups:
|
||||
- authentication.k8s.io
|
||||
resources:
|
||||
- tokenreviews
|
||||
---
|
||||
kind: RoleBinding
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: docling-serve-oauth
|
||||
labels:
|
||||
app: docling-serve
|
||||
component: docling-serve-api
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: docling-serve
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: docling-serve-oauth
|
||||
kind: ClusterRole
|
||||
name: system:auth-delegator
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: docling-serve
|
||||
namespace: docling
|
||||
---
|
||||
apiVersion: route.openshift.io/v1
|
||||
kind: Route
|
||||
@@ -107,10 +84,10 @@ spec:
|
||||
- name: api
|
||||
resources:
|
||||
limits:
|
||||
cpu: 500m
|
||||
cpu: 2000m
|
||||
memory: 2Gi
|
||||
requests:
|
||||
cpu: 250m
|
||||
cpu: 800m
|
||||
memory: 1Gi
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
@@ -128,13 +105,19 @@ spec:
|
||||
port: http
|
||||
scheme: HTTPS
|
||||
initialDelaySeconds: 3
|
||||
timeoutSeconds: 2
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 4
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
failureThreshold: 5
|
||||
env:
|
||||
- name: NAMESPACE
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.namespace
|
||||
- name: DOCLING_SERVE_ENABLE_UI
|
||||
value: 'true'
|
||||
- name: DOCLING_SERVE_API_HOST
|
||||
value: 'docling-serve.$(NAMESPACE).svc.cluster.local'
|
||||
- name: UVICORN_SSL_CERTFILE
|
||||
value: '/etc/tls/private/tls.crt'
|
||||
- name: UVICORN_SSL_KEYFILE
|
||||
@@ -147,7 +130,7 @@ spec:
|
||||
- name: proxy-tls
|
||||
mountPath: /etc/tls/private
|
||||
imagePullPolicy: Always
|
||||
image: 'ghcr.io/docling-project/docling-serve:dev-ssl'
|
||||
image: 'ghcr.io/docling-project/docling-serve-cpu:fix-ui-with-https'
|
||||
- name: oauth-proxy
|
||||
resources:
|
||||
limits:
|
||||
|
||||
58
docs/deploy-examples/docling-serve-simple.yaml
Normal file
58
docs/deploy-examples/docling-serve-simple.yaml
Normal file
@@ -0,0 +1,58 @@
|
||||
# This example deployment configures Docling Serve with a Service and cuda image
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docling-serve
|
||||
labels:
|
||||
app: docling-serve
|
||||
component: docling-serve-api
|
||||
spec:
|
||||
ports:
|
||||
- name: http
|
||||
port: 5001
|
||||
targetPort: http
|
||||
selector:
|
||||
app: docling-serve
|
||||
component: docling-serve-api
|
||||
---
|
||||
kind: Deployment
|
||||
apiVersion: apps/v1
|
||||
metadata:
|
||||
name: docling-serve
|
||||
labels:
|
||||
app: docling-serve
|
||||
component: docling-serve-api
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: docling-serve
|
||||
component: docling-serve-api
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: docling-serve
|
||||
component: docling-serve-api
|
||||
spec:
|
||||
restartPolicy: Always
|
||||
containers:
|
||||
- name: api
|
||||
resources:
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 2Gi
|
||||
nvidia.com/gpu: 1 # Limit to one GPU
|
||||
requests:
|
||||
cpu: 250m
|
||||
memory: 1Gi
|
||||
nvidia.com/gpu: 1 # Limit to one GPU
|
||||
env:
|
||||
- name: DOCLING_SERVE_ENABLE_UI
|
||||
value: 'true'
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 5001
|
||||
protocol: TCP
|
||||
imagePullPolicy: Always
|
||||
image: 'ghcr.io/docling-project/docling-serve-cu124'
|
||||
@@ -1,7 +1,161 @@
|
||||
# Deployment
|
||||
# Deployment Examples
|
||||
|
||||
This document provides deployment examples for running the application in different environments.
|
||||
|
||||
Choose the deployment option that best fits your setup.
|
||||
|
||||
- **[Local GPU](#local-gpu)**: For deploying the application locally on a machine with a NVIDIA GPU (using Docker Compose).
|
||||
- **[OpenShift](#openshift)**: For deploying the application on an OpenShift cluster, designed for cloud-native environments.
|
||||
|
||||
---
|
||||
|
||||
## Local GPU
|
||||
|
||||
### Docker compose
|
||||
|
||||
Manifest example: [compose-gpu.yaml](./deploy-examples/compose-gpu.yaml)
|
||||
|
||||
This deployment has the following features:
|
||||
|
||||
- NVIDIA cuda enabled
|
||||
|
||||
Install the app with:
|
||||
|
||||
```sh
|
||||
docker compose -f docs/deploy-examples/compose-gpu.yaml up -d
|
||||
```
|
||||
|
||||
For using the API:
|
||||
|
||||
```sh
|
||||
# Make a test query
|
||||
curl -X 'POST' \
|
||||
"localhost:5001/v1alpha/convert/source/async" \
|
||||
-H "accept: application/json" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
|
||||
}'
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary><b>Requirements</b></summary>
|
||||
|
||||
- debian/ubuntu/rhel/fedora/opensuse
|
||||
- docker
|
||||
- nvidia drivers >=550.54.14
|
||||
- nvidia-container-toolkit
|
||||
|
||||
Docs:
|
||||
|
||||
- [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/supported-platforms.html)
|
||||
- [CUDA Toolkit Release Notes](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#id6)
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>Steps</b></summary>
|
||||
|
||||
1. Check driver version and which GPU you want to use (0/1/2/3.. and update [compose-gpu.yaml](./deploy-examples/compose-gpu.yaml) file or use `count: all`)
|
||||
|
||||
```sh
|
||||
nvidia-smi
|
||||
```
|
||||
|
||||
2. Check if the NVIDIA Container Toolkit is installed/updated
|
||||
|
||||
```sh
|
||||
# debian
|
||||
dpkg -l | grep nvidia-container-toolkit
|
||||
```
|
||||
|
||||
```sh
|
||||
# rhel
|
||||
rpm -q nvidia-container-toolkit
|
||||
```
|
||||
|
||||
NVIDIA Container Toolkit install steps can be found here:
|
||||
|
||||
<https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html>
|
||||
|
||||
3. Check which runtime is being used by Docker
|
||||
|
||||
```sh
|
||||
# docker
|
||||
docker info | grep -i runtime
|
||||
```
|
||||
|
||||
4. If the default Docker runtime changes back from 'nvidia' to 'default' after restarting the Docker service (optional):
|
||||
|
||||
Backup the daemon.json file:
|
||||
|
||||
```sh
|
||||
sudo cp /etc/docker/daemon.json /etc/docker/daemon.json.bak
|
||||
```
|
||||
|
||||
Update the daemon.json file:
|
||||
|
||||
```sh
|
||||
echo '{
|
||||
"runtimes": {
|
||||
"nvidia": {
|
||||
"path": "nvidia-container-runtime"
|
||||
}
|
||||
},
|
||||
"default-runtime": "nvidia"
|
||||
}' | sudo tee /etc/docker/daemon.json > /dev/null
|
||||
```
|
||||
|
||||
Restart the Docker service:
|
||||
|
||||
```sh
|
||||
sudo systemctl restart docker
|
||||
```
|
||||
|
||||
Confirm 'nvidia' is the default runtime used by Docker by repeating step 3.
|
||||
|
||||
5. Run the container:
|
||||
|
||||
```sh
|
||||
docker compose -f docs/deploy-examples/compose-gpu.yaml up -d
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
## OpenShift
|
||||
|
||||
### Simple deployment
|
||||
|
||||
Manifest example: [docling-serve-simple.yaml](./deploy-examples/docling-serve-simple.yaml)
|
||||
|
||||
This deployment example has the following features:
|
||||
|
||||
- Deployment configuration
|
||||
- Service configuration
|
||||
- NVIDIA cuda enabled
|
||||
|
||||
Install the app with:
|
||||
|
||||
```sh
|
||||
oc apply -f docs/deploy-examples/docling-serve-simple.yaml
|
||||
```
|
||||
|
||||
For using the API:
|
||||
|
||||
```sh
|
||||
# Port-forward the service
|
||||
oc port-forward svc/docling-serve 5001:5001
|
||||
|
||||
# Make a test query
|
||||
curl -X 'POST' \
|
||||
"localhost:5001/v1alpha/convert/source/async" \
|
||||
-H "accept: application/json" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
|
||||
}'
|
||||
```
|
||||
|
||||
### Secure deployment with `oauth-proxy`
|
||||
|
||||
Manifest example: [docling-serve-oauth.yaml](./deploy-examples/docling-serve-oauth.yaml)
|
||||
@@ -15,7 +169,7 @@ This deployment has the following features:
|
||||
Install the app with:
|
||||
|
||||
```sh
|
||||
kubectl apply -f docs/deploy-examples/docling-serve-oauth.yaml
|
||||
oc apply -f docs/deploy-examples/docling-serve-oauth.yaml
|
||||
```
|
||||
|
||||
For using the API:
|
||||
|
||||
@@ -6,19 +6,26 @@ The API provides two endpoints: one for urls, one for files. This is necessary t
|
||||
|
||||
On top of the source of file (see below), both endpoints support the same parameters, which are almost the same as the Docling CLI.
|
||||
|
||||
- `from_format` (List[str]): Input format(s) to convert from. Allowed values: `docx`, `pptx`, `html`, `image`, `pdf`, `asciidoc`, `md`. Defaults to all formats.
|
||||
- `from_formats` (List[str]): Input format(s) to convert from. Allowed values: `docx`, `pptx`, `html`, `image`, `pdf`, `asciidoc`, `md`. Defaults to all formats.
|
||||
- `to_formats` (List[str]): Output format(s) to convert to. Allowed values: `md`, `json`, `html`, `text`, `doctags`. Defaults to `md`.
|
||||
- `pipeline` (str). The choice of which pipeline to use. Allowed values are `standard` and `vlm`. Defaults to `standard`.
|
||||
- `do_ocr` (bool): If enabled, the bitmap content will be processed using OCR. Defaults to `True`.
|
||||
- `image_export_mode`: Image export mode for the document (only in case of JSON, Markdown or HTML). Allowed values: embedded, placeholder, referenced. Optional, defaults to `embedded`.
|
||||
- `force_ocr` (bool): If enabled, replace any existing text with OCR-generated text over the full content. Defaults to `False`.
|
||||
- `ocr_engine` (str): OCR engine to use. Allowed values: `easyocr`, `tesseract_cli`, `tesseract`, `rapidocr`, `ocrmac`. Defaults to `easyocr`.
|
||||
- `ocr_lang` (List[str]): List of languages used by the OCR engine. Note that each OCR engine has different values for the language names. Defaults to empty.
|
||||
- `pdf_backend` (str): PDF backend to use. Allowed values: `pypdfium2`, `dlparse_v1`, `dlparse_v2`. Defaults to `dlparse_v2`.
|
||||
- `pdf_backend` (str): PDF backend to use. Allowed values: `pypdfium2`, `dlparse_v1`, `dlparse_v2`, `dlparse_v4`. Defaults to `dlparse_v4`.
|
||||
- `table_mode` (str): Table mode to use. Allowed values: `fast`, `accurate`. Defaults to `fast`.
|
||||
- `abort_on_error` (bool): If enabled, abort on error. Defaults to false.
|
||||
- `return_as_file` (boo): If enabled, return the output as a file. Defaults to false.
|
||||
- `do_table_structure` (bool): If enabled, the table structure will be extracted. Defaults to true.
|
||||
- `include_images` (bool): If enabled, images will be extracted from the document. Defaults to true.
|
||||
- `do_code_enrichment` (bool): If enabled, perform OCR code enrichment. Defaults to false.
|
||||
- `do_formula_enrichment` (bool): If enabled, perform formula OCR, return LaTeX code. Defaults to false.
|
||||
- `do_picture_classification` (bool): If enabled, classify pictures in documents. Defaults to false.
|
||||
- `do_picture_description` (bool): If enabled, describe pictures in documents. Defaults to false.
|
||||
- `picture_description_local` (dict): Options for running a local vision-language model in the picture description. The parameters refer to a model hosted on Hugging Face. This parameter is mutually exclusive with picture_description_api.
|
||||
- `picture_description_api` (dict): API details for using a vision-language model in the picture description. This parameter is mutually exclusive with picture_description_local.
|
||||
- `include_images` (bool): If enabled, images will be extracted from the document. Defaults to false.
|
||||
- `images_scale` (float): Scale factor for images. Defaults to 2.0.
|
||||
|
||||
## Convert endpoints
|
||||
@@ -244,6 +251,70 @@ data = response.json()
|
||||
|
||||
</details>
|
||||
|
||||
### Picture description options
|
||||
|
||||
When the picture description enrichment is activated, users may specify which model and which execution mode to use for this task. There are two choices for the execution mode: _local_ will run the vision-language model directly, _api_ will invoke an external API endpoint.
|
||||
|
||||
The local option is specified with:
|
||||
|
||||
```jsonc
|
||||
{
|
||||
"picture_description_local": {
|
||||
"repo_id": "", // Repository id from the Hugging Face Hub.
|
||||
"generation_config": {"max_new_tokens": 200, "do_sample": false}, // HF generation config.
|
||||
"prompt": "Describe this image in a few sentences. ", // Prompt used when calling the vision-language model.
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The possible values for `generation_config` are documented in the [Hugging Face text generation docs](https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.GenerationConfig).
|
||||
|
||||
The api option is specified with:
|
||||
|
||||
```jsonc
|
||||
{
|
||||
"picture_description_api": {
|
||||
"url": "", // Endpoint which accepts openai-api compatible requests.
|
||||
"headers": {}, // Headers used for calling the API endpoint. For example, it could include authentication headers.
|
||||
"params": {}, // Model parameters.
|
||||
"timeout": 20, // Timeout for the API request.
|
||||
"prompt": "Describe this image in a few sentences. ", // Prompt used when calling the vision-language model.
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Example URLs are:
|
||||
|
||||
- `http://localhost:8000/v1/chat/completions` for the local vllm api, with example `params`:
|
||||
- the `HuggingFaceTB/SmolVLM-256M-Instruct` model
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "HuggingFaceTB/SmolVLM-256M-Instruct",
|
||||
"max_completion_tokens": 200,
|
||||
}
|
||||
```
|
||||
|
||||
- the `ibm-granite/granite-vision-3.2-2b` model
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "ibm-granite/granite-vision-3.2-2b",
|
||||
"max_completion_tokens": 200,
|
||||
}
|
||||
```
|
||||
|
||||
- `http://localhost:11434/v1/chat/completions` for the local ollama api, with example `params`:
|
||||
- the `granite3.2-vision:2b` model
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "granite3.2-vision:2b"
|
||||
}
|
||||
```
|
||||
|
||||
Note that when using `picture_description_api`, the server must be launched with `DOCLING_SERVE_ENABLE_REMOTE_SERVICES=true`.
|
||||
|
||||
## Response format
|
||||
|
||||
The response can be a JSON Document or a File.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "docling-serve"
|
||||
version = "0.7.0" # DO NOT EDIT, updated automatically
|
||||
version = "0.10.0" # DO NOT EDIT, updated automatically
|
||||
description = "Running Docling as a service"
|
||||
license = {text = "MIT"}
|
||||
authors = [
|
||||
@@ -30,9 +30,11 @@ classifiers = [
|
||||
]
|
||||
requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"docling~=2.28",
|
||||
"docling[vlm]~=2.28",
|
||||
"mlx-vlm~=0.1.12; sys_platform == 'darwin' and platform_machine == 'arm64'",
|
||||
"fastapi[standard]~=0.115",
|
||||
"httpx~=0.28",
|
||||
"kfp[kubernetes]>=2.10.0",
|
||||
"pydantic~=2.10",
|
||||
"pydantic-settings~=2.4",
|
||||
"python-multipart>=0.0.14,<0.1.0",
|
||||
@@ -43,7 +45,8 @@ dependencies = [
|
||||
|
||||
[project.optional-dependencies]
|
||||
ui = [
|
||||
"gradio~=5.9"
|
||||
"gradio~=5.9",
|
||||
"pydantic<2.11.0", # fix compatibility between gradio and new pydantic 2.11
|
||||
]
|
||||
tesserocr = [
|
||||
"tesserocr~=2.7"
|
||||
@@ -63,6 +66,7 @@ cu124 = [
|
||||
|
||||
[dependency-groups]
|
||||
dev = [
|
||||
"asgi-lifespan~=2.0",
|
||||
"mypy~=1.11",
|
||||
"pre-commit-uv~=4.1",
|
||||
"pytest~=8.3",
|
||||
@@ -80,6 +84,10 @@ conflicts = [
|
||||
{ extra = "cu124" },
|
||||
],
|
||||
]
|
||||
environments = ["sys_platform != 'darwin' or platform_machine != 'x86_64'"]
|
||||
override-dependencies = [
|
||||
"urllib3~=2.0"
|
||||
]
|
||||
|
||||
[tool.uv.sources]
|
||||
torch = [
|
||||
@@ -195,6 +203,9 @@ module = [
|
||||
"tesserocr.*",
|
||||
"rapidocr_onnxruntime.*",
|
||||
"requests.*",
|
||||
"kfp.*",
|
||||
"kfp_server_api.*",
|
||||
"mlx_vlm.*",
|
||||
]
|
||||
ignore_missing_imports = true
|
||||
|
||||
|
||||
@@ -47,9 +47,7 @@ async def test_convert_file(async_client):
|
||||
"files": ("2206.01062v1.pdf", open(file_path, "rb"), "application/pdf"),
|
||||
}
|
||||
|
||||
response = await async_client.post(
|
||||
url, files=files, data={"options": json.dumps(options)}
|
||||
)
|
||||
response = await async_client.post(url, files=files, data=options)
|
||||
assert response.status_code == 200, "Response should be 200 OK"
|
||||
|
||||
data = response.json()
|
||||
@@ -92,16 +90,11 @@ async def test_convert_file(async_client):
|
||||
msg=f'JSON document should contain \'{{\\n "schema_name": "DoclingDocument\'". Received: {safe_slice(data["document"]["json_content"])}',
|
||||
)
|
||||
# HTML check
|
||||
check.is_in(
|
||||
"html_content",
|
||||
data.get("document", {}),
|
||||
msg=f"Response should contain 'html_content' key. Received keys: {list(data.get('document', {}).keys())}",
|
||||
)
|
||||
if data.get("document", {}).get("html_content") is not None:
|
||||
check.is_in(
|
||||
'<!DOCTYPE html>\n<html lang="en">\n<head>',
|
||||
"<!DOCTYPE html>\n<html>\n<head>",
|
||||
data["document"]["html_content"],
|
||||
msg=f"HTML document should contain '<!DOCTYPE html>\\n<html lang=\"en'>. Received: {safe_slice(data['document']['html_content'])}",
|
||||
msg=f"HTML document should contain '<!DOCTYPE html>\\n<html>'. Received: {safe_slice(data['document']['html_content'])}",
|
||||
)
|
||||
# Text check
|
||||
check.is_in(
|
||||
@@ -123,7 +116,7 @@ async def test_convert_file(async_client):
|
||||
)
|
||||
if data.get("document", {}).get("doctags_content") is not None:
|
||||
check.is_in(
|
||||
"<document>\n<section_header_level_1><location>",
|
||||
"<doctag><page_header><loc",
|
||||
data["document"]["doctags_content"],
|
||||
msg=f"DocTags document should contain '<document>\\n<section_header_level_1><location>'. Received: {safe_slice(data['document']['doctags_content'])}",
|
||||
msg=f"DocTags document should contain '<doctag><page_header><loc'. Received: {safe_slice(data['document']['doctags_content'])}",
|
||||
)
|
||||
|
||||
69
tests/test_1-file-async.py
Normal file
69
tests/test_1-file-async.py
Normal file
@@ -0,0 +1,69 @@
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def async_client():
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
yield client
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_convert_url(async_client):
|
||||
"""Test convert URL to all outputs"""
|
||||
|
||||
base_url = "http://localhost:5001/v1alpha"
|
||||
payload = {
|
||||
"to_formats": ["md", "json", "html"],
|
||||
"image_export_mode": "placeholder",
|
||||
"ocr": False,
|
||||
"abort_on_error": False,
|
||||
"return_as_file": False,
|
||||
}
|
||||
|
||||
file_path = Path(__file__).parent / "2206.01062v1.pdf"
|
||||
files = {
|
||||
"files": (file_path.name, file_path.open("rb"), "application/pdf"),
|
||||
}
|
||||
|
||||
for n in range(1):
|
||||
response = await async_client.post(
|
||||
f"{base_url}/convert/file/async", files=files, data=payload
|
||||
)
|
||||
assert response.status_code == 200, "Response should be 200 OK"
|
||||
|
||||
task = response.json()
|
||||
|
||||
print(json.dumps(task, indent=2))
|
||||
|
||||
while task["task_status"] not in ("success", "failure"):
|
||||
response = await async_client.get(f"{base_url}/status/poll/{task['task_id']}")
|
||||
assert response.status_code == 200, "Response should be 200 OK"
|
||||
task = response.json()
|
||||
print(f"{task['task_status']=}")
|
||||
print(f"{task['task_position']=}")
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
assert task["task_status"] == "success"
|
||||
|
||||
result_resp = await async_client.get(f"{base_url}/result/{task['task_id']}")
|
||||
assert result_resp.status_code == 200, "Response should be 200 OK"
|
||||
result = result_resp.json()
|
||||
|
||||
assert "md_content" in result["document"]
|
||||
assert result["document"]["md_content"] is not None
|
||||
assert len(result["document"]["md_content"]) > 10
|
||||
|
||||
assert "html_content" in result["document"]
|
||||
assert result["document"]["html_content"] is not None
|
||||
assert len(result["document"]["html_content"]) > 10
|
||||
|
||||
assert "json_content" in result["document"]
|
||||
assert result["document"]["json_content"] is not None
|
||||
assert result["document"]["json_content"]["schema_name"] == "DoclingDocument"
|
||||
@@ -93,9 +93,9 @@ async def test_convert_url(async_client):
|
||||
)
|
||||
if data.get("document", {}).get("html_content") is not None:
|
||||
check.is_in(
|
||||
'<!DOCTYPE html>\n<html lang="en">\n<head>',
|
||||
"<!DOCTYPE html>\n<html>\n<head>",
|
||||
data["document"]["html_content"],
|
||||
msg=f"HTML document should contain '<!DOCTYPE html>\\n<html lang=\"en'>. Received: {safe_slice(data['document']['html_content'])}",
|
||||
msg=f"HTML document should contain '<!DOCTYPE html>\\n<html>'. Received: {safe_slice(data['document']['html_content'])}",
|
||||
)
|
||||
# Text check
|
||||
check.is_in(
|
||||
@@ -117,7 +117,7 @@ async def test_convert_url(async_client):
|
||||
)
|
||||
if data.get("document", {}).get("doctags_content") is not None:
|
||||
check.is_in(
|
||||
"<document>\n<section_header_level_1><location>",
|
||||
"<doctag><page_header><loc",
|
||||
data["document"]["doctags_content"],
|
||||
msg=f"DocTags document should contain '<document>\\n<section_header_level_1><location>'. Received: {safe_slice(data['document']['doctags_content'])}",
|
||||
msg=f"DocTags document should contain '<doctag><page_header><loc'. Received: {safe_slice(data['document']['doctags_content'])}",
|
||||
)
|
||||
|
||||
@@ -28,6 +28,16 @@ async def test_convert_url(async_client: httpx.AsyncClient):
|
||||
"ocr": True,
|
||||
"abort_on_error": False,
|
||||
"return_as_file": False,
|
||||
# "do_picture_description": True,
|
||||
# "picture_description_api": {
|
||||
# "url": "http://localhost:11434/v1/chat/completions",
|
||||
# "params": {
|
||||
# "model": "granite3.2-vision:2b",
|
||||
# }
|
||||
# },
|
||||
# "picture_description_local": {
|
||||
# "repo_id": "HuggingFaceTB/SmolVLM-256M-Instruct",
|
||||
# },
|
||||
},
|
||||
# "http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}],
|
||||
"file_sources": [{"base64_string": encoded_doc, "filename": doc_filename.name}],
|
||||
|
||||
@@ -38,7 +38,7 @@ async def test_convert_url(async_client):
|
||||
}
|
||||
print(json.dumps(payload, indent=2))
|
||||
|
||||
for n in range(5):
|
||||
for n in range(3):
|
||||
response = await async_client.post(
|
||||
f"{base_url}/convert/source/async", json=payload
|
||||
)
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
import httpx
|
||||
@@ -48,9 +47,7 @@ async def test_convert_file(async_client):
|
||||
("files", ("2408.09869v5.pdf", open(file_path, "rb"), "application/pdf")),
|
||||
]
|
||||
|
||||
response = await async_client.post(
|
||||
url, files=files, data={"options": json.dumps(options)}
|
||||
)
|
||||
response = await async_client.post(url, files=files, data=options)
|
||||
assert response.status_code == 200, "Response should be 200 OK"
|
||||
|
||||
# Check for zip file attachment
|
||||
|
||||
88
tests/test_2-urls-async-all-outputs.py
Normal file
88
tests/test_2-urls-async-all-outputs.py
Normal file
@@ -0,0 +1,88 @@
|
||||
import json
|
||||
import time
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from pytest_check import check
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def async_client():
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
yield client
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_convert_url(async_client):
|
||||
"""Test convert URL to all outputs"""
|
||||
base_url = "http://localhost:5001/v1alpha"
|
||||
payload = {
|
||||
"options": {
|
||||
"from_formats": [
|
||||
"docx",
|
||||
"pptx",
|
||||
"html",
|
||||
"image",
|
||||
"pdf",
|
||||
"asciidoc",
|
||||
"md",
|
||||
"xlsx",
|
||||
],
|
||||
"to_formats": ["md", "json", "html", "text", "doctags"],
|
||||
"image_export_mode": "placeholder",
|
||||
"ocr": True,
|
||||
"force_ocr": False,
|
||||
"ocr_engine": "easyocr",
|
||||
"ocr_lang": ["en"],
|
||||
"pdf_backend": "dlparse_v2",
|
||||
"table_mode": "fast",
|
||||
"abort_on_error": False,
|
||||
"return_as_file": False,
|
||||
},
|
||||
"http_sources": [
|
||||
{"url": "https://arxiv.org/pdf/2206.01062"},
|
||||
{"url": "https://arxiv.org/pdf/2408.09869"},
|
||||
],
|
||||
}
|
||||
|
||||
response = await async_client.post(f"{base_url}/convert/source/async", json=payload)
|
||||
assert response.status_code == 200, "Response should be 200 OK"
|
||||
|
||||
task = response.json()
|
||||
|
||||
print(json.dumps(task, indent=2))
|
||||
|
||||
while task["task_status"] not in ("success", "failure"):
|
||||
response = await async_client.get(f"{base_url}/status/poll/{task['task_id']}")
|
||||
assert response.status_code == 200, "Response should be 200 OK"
|
||||
task = response.json()
|
||||
print(f"{task['task_status']=}")
|
||||
print(f"{task['task_position']=}")
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
assert task["task_status"] == "success"
|
||||
|
||||
result_resp = await async_client.get(f"{base_url}/result/{task['task_id']}")
|
||||
assert result_resp.status_code == 200, "Response should be 200 OK"
|
||||
|
||||
# Check for zip file attachment
|
||||
content_disposition = result_resp.headers.get("content-disposition")
|
||||
|
||||
with check:
|
||||
assert content_disposition is not None, (
|
||||
"Content-Disposition header should be present"
|
||||
)
|
||||
with check:
|
||||
assert "attachment" in content_disposition, "Response should be an attachment"
|
||||
with check:
|
||||
assert 'filename="converted_docs.zip"' in content_disposition, (
|
||||
"Attachment filename should be 'converted_docs.zip'"
|
||||
)
|
||||
|
||||
content_type = result_resp.headers.get("content-type")
|
||||
with check:
|
||||
assert content_type == "application/zip", (
|
||||
"Content-Type should be 'application/zip'"
|
||||
)
|
||||
156
tests/test_fastapi_endpoints.py
Normal file
156
tests/test_fastapi_endpoints.py
Normal file
@@ -0,0 +1,156 @@
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from asgi_lifespan import LifespanManager
|
||||
from httpx import ASGITransport, AsyncClient
|
||||
from pytest_check import check
|
||||
|
||||
from docling_serve.app import create_app
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def event_loop():
|
||||
return asyncio.get_event_loop()
|
||||
|
||||
|
||||
@pytest_asyncio.fixture(scope="session")
|
||||
async def app():
|
||||
app = create_app()
|
||||
|
||||
async with LifespanManager(app) as manager:
|
||||
print("Launching lifespan of app.")
|
||||
yield manager.app
|
||||
|
||||
|
||||
@pytest_asyncio.fixture(scope="session")
|
||||
async def client(app):
|
||||
async with AsyncClient(
|
||||
transport=ASGITransport(app=app), base_url="http://app.io"
|
||||
) as client:
|
||||
print("Client is ready")
|
||||
yield client
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_health(client: AsyncClient):
|
||||
response = await client.get("/health")
|
||||
assert response.status_code == 200
|
||||
assert response.json() == {"status": "ok"}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_convert_file(client: AsyncClient):
|
||||
"""Test convert single file to all outputs"""
|
||||
|
||||
endpoint = "/v1alpha/convert/file"
|
||||
options = {
|
||||
"from_formats": [
|
||||
"docx",
|
||||
"pptx",
|
||||
"html",
|
||||
"image",
|
||||
"pdf",
|
||||
"asciidoc",
|
||||
"md",
|
||||
"xlsx",
|
||||
],
|
||||
"to_formats": ["md", "json", "html", "text", "doctags"],
|
||||
"image_export_mode": "placeholder",
|
||||
"ocr": True,
|
||||
"force_ocr": False,
|
||||
"ocr_engine": "easyocr",
|
||||
"ocr_lang": ["en"],
|
||||
"pdf_backend": "dlparse_v2",
|
||||
"table_mode": "fast",
|
||||
"abort_on_error": False,
|
||||
"return_as_file": False,
|
||||
}
|
||||
|
||||
current_dir = os.path.dirname(__file__)
|
||||
file_path = os.path.join(current_dir, "2206.01062v1.pdf")
|
||||
|
||||
files = {
|
||||
"files": ("2206.01062v1.pdf", open(file_path, "rb"), "application/pdf"),
|
||||
}
|
||||
|
||||
response = await client.post(endpoint, files=files, data=options)
|
||||
assert response.status_code == 200, "Response should be 200 OK"
|
||||
|
||||
data = response.json()
|
||||
|
||||
# Response content checks
|
||||
# Helper function to safely slice strings
|
||||
def safe_slice(value, length=100):
|
||||
if isinstance(value, str):
|
||||
return value[:length]
|
||||
return str(value) # Convert non-string values to string for debug purposes
|
||||
|
||||
# Document check
|
||||
check.is_in(
|
||||
"document",
|
||||
data,
|
||||
msg=f"Response should contain 'document' key. Received keys: {list(data.keys())}",
|
||||
)
|
||||
# MD check
|
||||
check.is_in(
|
||||
"md_content",
|
||||
data.get("document", {}),
|
||||
msg=f"Response should contain 'md_content' key. Received keys: {list(data.get('document', {}).keys())}",
|
||||
)
|
||||
if data.get("document", {}).get("md_content") is not None:
|
||||
check.is_in(
|
||||
"## DocLayNet: ",
|
||||
data["document"]["md_content"],
|
||||
msg=f"Markdown document should contain 'DocLayNet: '. Received: {safe_slice(data['document']['md_content'])}",
|
||||
)
|
||||
# JSON check
|
||||
check.is_in(
|
||||
"json_content",
|
||||
data.get("document", {}),
|
||||
msg=f"Response should contain 'json_content' key. Received keys: {list(data.get('document', {}).keys())}",
|
||||
)
|
||||
if data.get("document", {}).get("json_content") is not None:
|
||||
check.is_in(
|
||||
'{"schema_name": "DoclingDocument"',
|
||||
json.dumps(data["document"]["json_content"]),
|
||||
msg=f'JSON document should contain \'{{\\n "schema_name": "DoclingDocument\'". Received: {safe_slice(data["document"]["json_content"])}',
|
||||
)
|
||||
# HTML check
|
||||
check.is_in(
|
||||
"html_content",
|
||||
data.get("document", {}),
|
||||
msg=f"Response should contain 'html_content' key. Received keys: {list(data.get('document', {}).keys())}",
|
||||
)
|
||||
if data.get("document", {}).get("html_content") is not None:
|
||||
check.is_in(
|
||||
"<!DOCTYPE html>\n<html>\n<head>",
|
||||
data["document"]["html_content"],
|
||||
msg=f"HTML document should contain '<!DOCTYPE html>\n<html>\n<head>'. Received: {safe_slice(data['document']['html_content'])}",
|
||||
)
|
||||
# Text check
|
||||
check.is_in(
|
||||
"text_content",
|
||||
data.get("document", {}),
|
||||
msg=f"Response should contain 'text_content' key. Received keys: {list(data.get('document', {}).keys())}",
|
||||
)
|
||||
if data.get("document", {}).get("text_content") is not None:
|
||||
check.is_in(
|
||||
"DocLayNet: A Large Human-Annotated Dataset",
|
||||
data["document"]["text_content"],
|
||||
msg=f"Text document should contain 'DocLayNet: A Large Human-Annotated Dataset'. Received: {safe_slice(data['document']['text_content'])}",
|
||||
)
|
||||
# DocTags check
|
||||
check.is_in(
|
||||
"doctags_content",
|
||||
data.get("document", {}),
|
||||
msg=f"Response should contain 'doctags_content' key. Received keys: {list(data.get('document', {}).keys())}",
|
||||
)
|
||||
if data.get("document", {}).get("doctags_content") is not None:
|
||||
check.is_in(
|
||||
"<doctag><page_header>",
|
||||
data["document"]["doctags_content"],
|
||||
msg=f"DocTags document should contain '<doctag><page_header>'. Received: {safe_slice(data['document']['doctags_content'])}",
|
||||
)
|
||||
Reference in New Issue
Block a user