mirror of
https://github.com/docling-project/docling-serve.git
synced 2025-11-29 08:33:50 +00:00
Compare commits
15 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b6eece7ef0 | ||
|
|
f5af71e8f6 | ||
|
|
d95ea94087 | ||
|
|
5344505718 | ||
|
|
5edc624fbf | ||
|
|
45f0f3c8f9 | ||
|
|
0595d31d5b | ||
|
|
f6b5f0e063 | ||
|
|
8b22a39141 | ||
|
|
d4eac053f9 | ||
|
|
fa1c5f04f3 | ||
|
|
ba61af2359 | ||
|
|
6b6dd8a0d0 | ||
|
|
513ae0c119 | ||
|
|
bde040661f |
4
.github/workflows/job-image.yml
vendored
4
.github/workflows/job-image.yml
vendored
@@ -108,6 +108,7 @@ jobs:
|
||||
cache-to: type=gha,mode=max
|
||||
file: Containerfile
|
||||
build-args: ${{ inputs.build_args }}
|
||||
pull: true
|
||||
##
|
||||
## This stage runs after the build, so it leverages all build cache
|
||||
##
|
||||
@@ -226,7 +227,8 @@ jobs:
|
||||
cache-to: type=gha,mode=max
|
||||
file: Containerfile
|
||||
build-args: ${{ inputs.build_args }}
|
||||
pull: true
|
||||
|
||||
- name: Remove Local Docker Images
|
||||
- name: Remove local Docker images
|
||||
run: |
|
||||
docker image prune -af
|
||||
|
||||
@@ -34,6 +34,6 @@ repos:
|
||||
files: \.md$
|
||||
- repo: https://github.com/astral-sh/uv-pre-commit
|
||||
# uv version, https://github.com/astral-sh/uv-pre-commit/releases
|
||||
rev: 0.8.3
|
||||
rev: 0.8.19
|
||||
hooks:
|
||||
- id: uv-lock
|
||||
|
||||
62
CHANGELOG.md
62
CHANGELOG.md
@@ -1,3 +1,65 @@
|
||||
## [v1.7.0](https://github.com/docling-project/docling-serve/releases/tag/v1.7.0) - 2025-10-17
|
||||
|
||||
### Feature
|
||||
|
||||
* **UI:** Add auto and orcmac options in demo UI ([#408](https://github.com/docling-project/docling-serve/issues/408)) ([`f5af71e`](https://github.com/docling-project/docling-serve/commit/f5af71e8f6de00d7dd702471a3eea2e94d882410))
|
||||
* Docling with auto-ocr ([#403](https://github.com/docling-project/docling-serve/issues/403)) ([`d95ea94`](https://github.com/docling-project/docling-serve/commit/d95ea940870af0d8df689061baa50f6026efce28))
|
||||
|
||||
### Fix
|
||||
|
||||
* Run docling ui behind a reverse proxy using a context path ([#396](https://github.com/docling-project/docling-serve/issues/396)) ([`5344505`](https://github.com/docling-project/docling-serve/commit/53445057184aa731ee7456b33b70bc0ecf82f2a6))
|
||||
|
||||
### Docling libraries included in this release:
|
||||
- docling 2.57.0
|
||||
- docling-core 2.48.4
|
||||
- docling-ibm-models 3.9.1
|
||||
- docling-jobkit 1.6.0
|
||||
- docling-mcp 1.3.2
|
||||
- docling-parse 4.5.0
|
||||
- docling-serve 1.7.0
|
||||
|
||||
## [v1.6.0](https://github.com/docling-project/docling-serve/releases/tag/v1.6.0) - 2025-10-03
|
||||
|
||||
### Feature
|
||||
|
||||
* Pin new version of jobkit with granite-docling and connectors ([#391](https://github.com/docling-project/docling-serve/issues/391)) ([`0595d31`](https://github.com/docling-project/docling-serve/commit/0595d31d5b357553426215ca6771796a47e41324))
|
||||
|
||||
### Fix
|
||||
|
||||
* Update locked dependencies ([#392](https://github.com/docling-project/docling-serve/issues/392)) ([`45f0f3c`](https://github.com/docling-project/docling-serve/commit/45f0f3c8f95d418ac30e3744d27d02a63f9e4490))
|
||||
* **UI:** Allow both lowercase and uppercase extensions ([#386](https://github.com/docling-project/docling-serve/issues/386)) ([`8b22a39`](https://github.com/docling-project/docling-serve/commit/8b22a391418d22c1a4d706f880341f28702057b5))
|
||||
* Correctly raise HTTPException for Gateway Timeout ([#382](https://github.com/docling-project/docling-serve/issues/382)) ([`d4eac05`](https://github.com/docling-project/docling-serve/commit/d4eac053f9ce0a60f9070127335bdd56e193d7fa))
|
||||
* Pinning of higher version of dependencies to fix potential security issues ([#363](https://github.com/docling-project/docling-serve/issues/363)) ([`ba61af2`](https://github.com/docling-project/docling-serve/commit/ba61af23591eff200481aa2e532cf7d0701f0ea4))
|
||||
|
||||
### Documentation
|
||||
|
||||
* Fix docs for websocket breaking condition ([#390](https://github.com/docling-project/docling-serve/issues/390)) ([`f6b5f0e`](https://github.com/docling-project/docling-serve/commit/f6b5f0e06354d2db7d03d274b114499e3407dccf))
|
||||
|
||||
### Docling libraries included in this release:
|
||||
- docling 2.55.1
|
||||
- docling-core 2.48.4
|
||||
- docling-ibm-models 3.9.1
|
||||
- docling-jobkit 1.6.0
|
||||
- docling-mcp 1.3.2
|
||||
- docling-parse 4.5.0
|
||||
- docling-serve 1.6.0
|
||||
|
||||
## [v1.5.1](https://github.com/docling-project/docling-serve/releases/tag/v1.5.1) - 2025-09-17
|
||||
|
||||
### Fix
|
||||
|
||||
* Remove old dependencies, fixes in docling-parse and more minor dependencies upgrade ([#362](https://github.com/docling-project/docling-serve/issues/362)) ([`513ae0c`](https://github.com/docling-project/docling-serve/commit/513ae0c119b66d3b17cf9a5d371a0f7971f43be7))
|
||||
* Updates rapidocr deps ([#361](https://github.com/docling-project/docling-serve/issues/361)) ([`bde0406`](https://github.com/docling-project/docling-serve/commit/bde040661fb65c67699326cd6281c0e6232e26f2))
|
||||
|
||||
### Docling libraries included in this release:
|
||||
- docling 2.52.0
|
||||
- docling-core 2.48.1
|
||||
- docling-ibm-models 3.9.1
|
||||
- docling-jobkit 1.5.0
|
||||
- docling-mcp 1.2.0
|
||||
- docling-parse 4.5.0
|
||||
- docling-serve 1.5.1
|
||||
|
||||
## [v1.5.0](https://github.com/docling-project/docling-serve/releases/tag/v1.5.0) - 2025-09-09
|
||||
|
||||
### Feature
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
ARG BASE_IMAGE=quay.io/sclorg/python-312-c9s:c9s
|
||||
|
||||
ARG UV_VERSION=0.8.3
|
||||
ARG UV_IMAGE=ghcr.io/astral-sh/uv:0.8.19
|
||||
|
||||
ARG UV_SYNC_EXTRA_ARGS=""
|
||||
|
||||
@@ -25,7 +25,7 @@ RUN /usr/bin/fix-permissions /opt/app-root/src/.cache
|
||||
|
||||
ENV TESSDATA_PREFIX=/usr/share/tesseract/tessdata/
|
||||
|
||||
FROM ghcr.io/astral-sh/uv:${UV_VERSION} AS uv_stage
|
||||
FROM ${UV_IMAGE} AS uv_stage
|
||||
|
||||
###################################################################################################
|
||||
# Docling layer #
|
||||
@@ -58,7 +58,7 @@ RUN --mount=from=uv_stage,source=/uv,target=/bin/uv \
|
||||
uv sync ${UV_SYNC_ARGS} ${UV_SYNC_EXTRA_ARGS} --no-extra flash-attn && \
|
||||
FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE uv sync ${UV_SYNC_ARGS} ${UV_SYNC_EXTRA_ARGS} --no-build-isolation-package=flash-attn
|
||||
|
||||
ARG MODELS_LIST="layout tableformer picture_classifier easyocr"
|
||||
ARG MODELS_LIST="layout tableformer picture_classifier rapidocr easyocr"
|
||||
|
||||
RUN echo "Downloading models..." && \
|
||||
HF_HUB_DOWNLOAD_TIMEOUT="90" \
|
||||
|
||||
@@ -46,7 +46,6 @@ from docling_jobkit.datamodel.s3_coords import S3Coordinates
|
||||
from docling_jobkit.datamodel.task import Task, TaskSource, TaskType
|
||||
from docling_jobkit.datamodel.task_targets import (
|
||||
InBodyTarget,
|
||||
TaskTarget,
|
||||
ZipTarget,
|
||||
)
|
||||
from docling_jobkit.orchestrators.base_orchestrator import (
|
||||
@@ -64,6 +63,7 @@ from docling_serve.datamodel.requests import (
|
||||
HttpSourceRequest,
|
||||
S3SourceRequest,
|
||||
TargetName,
|
||||
TargetRequest,
|
||||
make_request_model,
|
||||
)
|
||||
from docling_serve.datamodel.responses import (
|
||||
@@ -194,16 +194,25 @@ def create_app(): # noqa: C901
|
||||
import gradio as gr
|
||||
|
||||
from docling_serve.gradio_ui import ui as gradio_ui
|
||||
from docling_serve.settings import uvicorn_settings
|
||||
|
||||
tmp_output_dir = get_scratch() / "gradio"
|
||||
tmp_output_dir.mkdir(exist_ok=True, parents=True)
|
||||
gradio_ui.gradio_output_dir = tmp_output_dir
|
||||
|
||||
# Build the root_path for Gradio, accounting for UVICORN_ROOT_PATH
|
||||
gradio_root_path = (
|
||||
f"{uvicorn_settings.root_path}/ui"
|
||||
if uvicorn_settings.root_path
|
||||
else "/ui"
|
||||
)
|
||||
|
||||
app = gr.mount_gradio_app(
|
||||
app,
|
||||
gradio_ui,
|
||||
path="/ui",
|
||||
allowed_paths=["./logo.png", tmp_output_dir],
|
||||
root_path="/ui",
|
||||
root_path=gradio_root_path,
|
||||
)
|
||||
except ImportError:
|
||||
_log.warning(
|
||||
@@ -304,7 +313,7 @@ def create_app(): # noqa: C901
|
||||
convert_options: ConvertDocumentsRequestOptions,
|
||||
chunking_options: BaseChunkerOptions | None,
|
||||
chunking_export_options: ChunkingExportOptions | None,
|
||||
target: TaskTarget,
|
||||
target: TargetRequest,
|
||||
) -> Task:
|
||||
_log.info(f"Received {len(files)} files for processing.")
|
||||
|
||||
@@ -455,7 +464,7 @@ def create_app(): # noqa: C901
|
||||
|
||||
if not completed:
|
||||
# TODO: abort task!
|
||||
return HTTPException(
|
||||
raise HTTPException(
|
||||
status_code=504,
|
||||
detail=f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={docling_serve_settings.max_sync_wait}.",
|
||||
)
|
||||
@@ -511,7 +520,7 @@ def create_app(): # noqa: C901
|
||||
|
||||
if not completed:
|
||||
# TODO: abort task!
|
||||
return HTTPException(
|
||||
raise HTTPException(
|
||||
status_code=504,
|
||||
detail=f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={docling_serve_settings.max_sync_wait}.",
|
||||
)
|
||||
@@ -711,7 +720,7 @@ def create_app(): # noqa: C901
|
||||
|
||||
if not completed:
|
||||
# TODO: abort task!
|
||||
return HTTPException(
|
||||
raise HTTPException(
|
||||
status_code=504,
|
||||
detail=f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={docling_serve_settings.max_sync_wait}.",
|
||||
)
|
||||
@@ -793,7 +802,7 @@ def create_app(): # noqa: C901
|
||||
|
||||
if not completed:
|
||||
# TODO: abort task!
|
||||
return HTTPException(
|
||||
raise HTTPException(
|
||||
status_code=504,
|
||||
detail=f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={docling_serve_settings.max_sync_wait}.",
|
||||
)
|
||||
|
||||
@@ -13,8 +13,8 @@ from docling_jobkit.datamodel.http_inputs import FileSource, HttpSource
|
||||
from docling_jobkit.datamodel.s3_coords import S3Coordinates
|
||||
from docling_jobkit.datamodel.task_targets import (
|
||||
InBodyTarget,
|
||||
PutTarget,
|
||||
S3Target,
|
||||
TaskTarget,
|
||||
ZipTarget,
|
||||
)
|
||||
|
||||
@@ -47,12 +47,17 @@ SourceRequestItem = Annotated[
|
||||
FileSourceRequest | HttpSourceRequest | S3SourceRequest, Field(discriminator="kind")
|
||||
]
|
||||
|
||||
TargetRequest = Annotated[
|
||||
InBodyTarget | ZipTarget | S3Target | PutTarget,
|
||||
Field(discriminator="kind"),
|
||||
]
|
||||
|
||||
|
||||
## Complete Source request
|
||||
class ConvertDocumentsRequest(BaseModel):
|
||||
options: ConvertDocumentsRequestOptions = ConvertDocumentsRequestOptions()
|
||||
sources: list[SourceRequestItem]
|
||||
target: TaskTarget = InBodyTarget()
|
||||
target: TargetRequest = InBodyTarget()
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_s3_source_and_target(self) -> Self:
|
||||
@@ -94,7 +99,7 @@ class BaseChunkDocumentsRequest(BaseModel):
|
||||
),
|
||||
] = False
|
||||
target: Annotated[
|
||||
TaskTarget, Field(description="Specification for the type of output target.")
|
||||
TargetRequest, Field(description="Specification for the type of output target.")
|
||||
] = InBodyTarget()
|
||||
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ import itertools
|
||||
import json
|
||||
import logging
|
||||
import ssl
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
@@ -224,13 +225,17 @@ def auto_set_return_as_file(
|
||||
|
||||
def change_ocr_lang(ocr_engine):
|
||||
if ocr_engine == "easyocr":
|
||||
return "en,fr,de,es"
|
||||
return gr.update(visible=True, value="en,fr,de,es")
|
||||
elif ocr_engine == "tesseract_cli":
|
||||
return "eng,fra,deu,spa"
|
||||
return gr.update(visible=True, value="eng,fra,deu,spa")
|
||||
elif ocr_engine == "tesseract":
|
||||
return "eng,fra,deu,spa"
|
||||
return gr.update(visible=True, value="eng,fra,deu,spa")
|
||||
elif ocr_engine == "rapidocr":
|
||||
return "english,chinese"
|
||||
return gr.update(visible=True, value="english,chinese")
|
||||
elif ocr_engine == "ocrmac":
|
||||
return gr.update(visible=True, value="fr-FR,de-DE,es-ES,en-US")
|
||||
|
||||
return gr.update(visible=False, value="")
|
||||
|
||||
|
||||
def wait_task_finish(auth: str, task_id: str, return_as_file: bool):
|
||||
@@ -570,14 +575,17 @@ with gr.Blocks(
|
||||
with gr.Tab("Convert File"):
|
||||
with gr.Row():
|
||||
with gr.Column(scale=4):
|
||||
raw_exts = itertools.chain.from_iterable(FormatToExtensions.values())
|
||||
file_input = gr.File(
|
||||
elem_id="file_input_zone",
|
||||
label="Upload File",
|
||||
file_types=[
|
||||
f".{v}"
|
||||
for v in itertools.chain.from_iterable(
|
||||
FormatToExtensions.values()
|
||||
)
|
||||
f".{v.lower()}"
|
||||
for v in raw_exts # lowercase
|
||||
]
|
||||
+ [
|
||||
f".{v.upper()}"
|
||||
for v in raw_exts # uppercase
|
||||
],
|
||||
file_count="multiple",
|
||||
scale=4,
|
||||
@@ -633,18 +641,25 @@ with gr.Blocks(
|
||||
ocr = gr.Checkbox(label="Enable OCR", value=True)
|
||||
force_ocr = gr.Checkbox(label="Force OCR", value=False)
|
||||
with gr.Column(scale=1):
|
||||
engines_list = [
|
||||
("Auto", "auto"),
|
||||
("EasyOCR", "easyocr"),
|
||||
("Tesseract", "tesseract"),
|
||||
("RapidOCR", "rapidocr"),
|
||||
]
|
||||
if sys.platform == "darwin":
|
||||
engines_list.append(("OCRMac", "ocrmac"))
|
||||
|
||||
ocr_engine = gr.Radio(
|
||||
[
|
||||
("EasyOCR", "easyocr"),
|
||||
("Tesseract", "tesseract"),
|
||||
("RapidOCR", "rapidocr"),
|
||||
],
|
||||
engines_list,
|
||||
label="OCR Engine",
|
||||
value="easyocr",
|
||||
value="auto",
|
||||
)
|
||||
with gr.Column(scale=1, min_width=200):
|
||||
ocr_lang = gr.Textbox(
|
||||
label="OCR Language (beware of the format)", value="en,fr,de,es"
|
||||
label="OCR Language (beware of the format)",
|
||||
value="en,fr,de,es",
|
||||
visible=False,
|
||||
)
|
||||
ocr_engine.change(change_ocr_lang, inputs=[ocr_engine], outputs=[ocr_lang])
|
||||
with gr.Row():
|
||||
|
||||
@@ -433,7 +433,7 @@ with connect(uri) as websocket:
|
||||
payload = json.loads(message)
|
||||
if payload["message"] == "error":
|
||||
break
|
||||
if payload["message"] == "error" and payload["task"]["task_status"] in ("success", "failure"):
|
||||
if payload["message"] == "update" and payload["task"]["task_status"] in ("success", "failure"):
|
||||
break
|
||||
except:
|
||||
break
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "docling-serve"
|
||||
version = "1.5.0" # DO NOT EDIT, updated automatically
|
||||
version = "1.7.0" # DO NOT EDIT, updated automatically
|
||||
description = "Running Docling as a service"
|
||||
license = {text = "MIT"}
|
||||
authors = [
|
||||
@@ -35,7 +35,7 @@ requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"docling~=2.38",
|
||||
"docling-core>=2.45.0",
|
||||
"docling-jobkit[kfp,rq,vlm]>=1.5.0,<2.0.0",
|
||||
"docling-jobkit[kfp,rq,vlm]>=1.6.0,<2.0.0",
|
||||
"fastapi[standard]~=0.115",
|
||||
"httpx~=0.28",
|
||||
"pydantic~=2.10",
|
||||
@@ -50,15 +50,17 @@ dependencies = [
|
||||
|
||||
[project.optional-dependencies]
|
||||
ui = [
|
||||
"gradio~=5.9",
|
||||
"pydantic<2.11.0", # fix compatibility between gradio and new pydantic 2.11
|
||||
"gradio>=5.23.2,<6.0.0",
|
||||
]
|
||||
tesserocr = [
|
||||
"tesserocr~=2.7"
|
||||
]
|
||||
easyocr = [
|
||||
"easyocr>=1.7",
|
||||
]
|
||||
rapidocr = [
|
||||
"rapidocr-onnxruntime~=1.4; python_version<'3.13'",
|
||||
"onnxruntime~=1.7",
|
||||
"rapidocr (>=3.3,<4.0.0) ; python_version < '3.14'",
|
||||
"onnxruntime (>=1.7.0,<2.0.0)",
|
||||
]
|
||||
flash-attn = [
|
||||
"flash-attn~=2.8.2; sys_platform == 'linux' and platform_machine == 'x86_64'"
|
||||
@@ -87,10 +89,10 @@ cpu = [
|
||||
"torchvision>=0.22.1",
|
||||
]
|
||||
|
||||
cu124 = [
|
||||
"torch>=2.6.0",
|
||||
"torchvision>=0.21.0",
|
||||
]
|
||||
# cu124 = [
|
||||
# "torch>=2.6.0",
|
||||
# "torchvision>=0.21.0",
|
||||
# ]
|
||||
|
||||
cu126 = [
|
||||
"torch>=2.7.1",
|
||||
@@ -115,7 +117,7 @@ conflicts = [
|
||||
[
|
||||
{ group = "pypi" },
|
||||
{ group = "cpu" },
|
||||
{ group = "cu124" },
|
||||
# { group = "cu124" },
|
||||
{ group = "cu126" },
|
||||
{ group = "cu128" },
|
||||
{ group = "rocm" },
|
||||
@@ -123,14 +125,15 @@ conflicts = [
|
||||
]
|
||||
environments = ["sys_platform != 'darwin' or platform_machine != 'x86_64'"]
|
||||
override-dependencies = [
|
||||
"urllib3~=2.0"
|
||||
"urllib3~=2.0",
|
||||
"xgrammar>=0.1.24"
|
||||
]
|
||||
|
||||
[tool.uv.sources]
|
||||
torch = [
|
||||
{ index = "pytorch-pypi", group = "pypi" },
|
||||
{ index = "pytorch-cpu", group = "cpu" },
|
||||
{ index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
|
||||
# { index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
|
||||
{ index = "pytorch-cu126", group = "cu126", marker = "sys_platform == 'linux'" },
|
||||
{ index = "pytorch-cu128", group = "cu128", marker = "sys_platform == 'linux'" },
|
||||
{ index = "pytorch-rocm", group = "rocm", marker = "sys_platform == 'linux'" },
|
||||
@@ -139,7 +142,7 @@ torch = [
|
||||
torchvision = [
|
||||
{ index = "pytorch-pypi", group = "pypi" },
|
||||
{ index = "pytorch-cpu", group = "cpu" },
|
||||
{ index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
|
||||
# { index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
|
||||
{ index = "pytorch-cu126", group = "cu126", marker = "sys_platform == 'linux'" },
|
||||
{ index = "pytorch-cu128", group = "cu128", marker = "sys_platform == 'linux'" },
|
||||
{ index = "pytorch-rocm", group = "rocm", marker = "sys_platform == 'linux'" },
|
||||
@@ -162,10 +165,10 @@ name = "pytorch-cpu"
|
||||
url = "https://download.pytorch.org/whl/cpu"
|
||||
explicit = true
|
||||
|
||||
[[tool.uv.index]]
|
||||
name = "pytorch-cu124"
|
||||
url = "https://download.pytorch.org/whl/cu124"
|
||||
explicit = true
|
||||
# [[tool.uv.index]]
|
||||
# name = "pytorch-cu124"
|
||||
# url = "https://download.pytorch.org/whl/cu124"
|
||||
# explicit = true
|
||||
|
||||
[[tool.uv.index]]
|
||||
name = "pytorch-cu126"
|
||||
|
||||
Reference in New Issue
Block a user