5 Commits

Author SHA1 Message Date
github-actions[bot]
a2dcb0a20f chore: bump version to 0.10.1 [skip ci] 2025-04-30 16:04:30 +00:00
Michele Dolfi
36787bc061 fix: avoid missing specialized keys in the options hash (#166)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-04-30 13:14:34 +02:00
Michele Dolfi
509f4889f8 fix: allow users to set the area threshold for picture descriptions (#165)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com>
Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
2025-04-30 12:37:24 +02:00
Michele Dolfi
919cf5c041 fix: expose max wait time in sync endpoints (#164)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-04-30 12:30:11 +02:00
Michele Dolfi
35c2630c61 fix: add flash-attn for cuda images (#161)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-04-29 16:58:33 +02:00
16 changed files with 3059 additions and 2952 deletions

View File

@@ -19,7 +19,7 @@ jobs:
platforms: linux/amd64, linux/arm64
- name: docling-project/docling-serve-cpu
build_args: |
UV_SYNC_EXTRA_ARGS=--no-extra cu124
UV_SYNC_EXTRA_ARGS=--no-extra cu124 --no-extra flash-attn
platforms: linux/amd64, linux/arm64
- name: docling-project/docling-serve-cu124
build_args: |

View File

@@ -23,7 +23,7 @@ jobs:
platforms: linux/amd64, linux/arm64
- name: docling-project/docling-serve-cpu
build_args: |
UV_SYNC_EXTRA_ARGS=--no-extra cu124
UV_SYNC_EXTRA_ARGS=--no-extra cu124 --no-extra flash-attn
platforms: linux/amd64, linux/arm64
- name: docling-project/docling-serve-cu124
build_args: |

View File

@@ -17,7 +17,7 @@ jobs:
python-version: ${{ matrix.python-version }}
enable-cache: true
- name: Install dependencies
run: uv sync --all-extras --no-extra cu124
run: uv sync --all-extras --no-extra cu124 --no-extra flash-attn
- name: Build package
run: uv build
- name: Check content of wheel

View File

@@ -25,7 +25,7 @@ jobs:
key: pre-commit|${{ env.PY }}|${{ hashFiles('.pre-commit-config.yaml') }}
- name: Install dependencies
run: uv sync --frozen --all-extras --no-extra cu124
run: uv sync --frozen --all-extras --no-extra cu124 --no-extra flash-attn
- name: Run styling check
run: pre-commit run --all-files

View File

@@ -1,3 +1,12 @@
## [v0.10.1](https://github.com/docling-project/docling-serve/releases/tag/v0.10.1) - 2025-04-30
### Fix
* Avoid missing specialized keys in the options hash ([#166](https://github.com/docling-project/docling-serve/issues/166)) ([`36787bc`](https://github.com/docling-project/docling-serve/commit/36787bc0616356a6199da618d8646de51636b34e))
* Allow users to set the area threshold for picture descriptions ([#165](https://github.com/docling-project/docling-serve/issues/165)) ([`509f488`](https://github.com/docling-project/docling-serve/commit/509f4889f8ed4c0f0ce25bec4126ef1f1199797c))
* Expose max wait time in sync endpoints ([#164](https://github.com/docling-project/docling-serve/issues/164)) ([`919cf5c`](https://github.com/docling-project/docling-serve/commit/919cf5c0414f2f11eb8012f451fed7a8f582b7ad))
* Add flash-attn for cuda images ([#161](https://github.com/docling-project/docling-serve/issues/161)) ([`35c2630`](https://github.com/docling-project/docling-serve/commit/35c2630c613cf229393fc67b6938152b063ff498))
## [v0.10.0](https://github.com/docling-project/docling-serve/releases/tag/v0.10.0) - 2025-04-28
### Feature

View File

@@ -46,7 +46,10 @@ RUN --mount=from=ghcr.io/astral-sh/uv:0.6.1,source=/uv,target=/bin/uv \
--mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
umask 002 && uv sync --frozen --no-install-project --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS}
umask 002 && \
UV_SYNC_ARGS="--frozen --no-install-project --no-dev --all-extras" && \
uv sync ${UV_SYNC_ARGS} ${UV_SYNC_EXTRA_ARGS} --no-extra flash-attn && \
FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE uv sync ${UV_SYNC_ARGS} ${UV_SYNC_EXTRA_ARGS} --no-build-isolation-package=flash-attn
ARG MODELS_LIST="layout tableformer picture_classifier easyocr"

View File

@@ -35,7 +35,7 @@ docling-serve-image: Containerfile
.PHONY: docling-serve-cpu-image
docling-serve-cpu-image: Containerfile ## Build docling-serve "cpu only" container image
$(ECHO_PREFIX) printf " %-12s Containerfile\n" "[docling-serve CPU]"
$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-extra cu124" -f Containerfile -t ghcr.io/docling-project/docling-serve-cpu:$(TAG) .
$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-extra cu124 --no-extra flash-attn" -f Containerfile -t ghcr.io/docling-project/docling-serve-cpu:$(TAG) .
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cpu:$(TAG) ghcr.io/docling-project/docling-serve-cpu:$(BRANCH_TAG)
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cpu:$(TAG) quay.io/docling-project/docling-serve-cpu:$(BRANCH_TAG)

View File

@@ -251,7 +251,6 @@ def create_app(): # noqa: C901
async def _wait_task_complete(
orchestrator: BaseAsyncOrchestrator, task_id: str
) -> bool:
MAX_WAIT = 120
start_time = time.monotonic()
while True:
task = await orchestrator.task_status(task_id=task_id)
@@ -259,7 +258,7 @@ def create_app(): # noqa: C901
return True
await asyncio.sleep(5)
elapsed_time = time.monotonic() - start_time
if elapsed_time > MAX_WAIT:
if elapsed_time > docling_serve_settings.max_sync_wait:
return False
#############################
@@ -310,7 +309,8 @@ def create_app(): # noqa: C901
if not success:
# TODO: abort task!
return HTTPException(
status_code=504, detail="Conversion is taking too long."
status_code=504,
detail=f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={docling_serve_settings.max_sync_wait}.",
)
result = await orchestrator.task_result(
@@ -351,7 +351,8 @@ def create_app(): # noqa: C901
if not success:
# TODO: abort task!
return HTTPException(
status_code=504, detail="Conversion is taking too long."
status_code=504,
detail=f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={docling_serve_settings.max_sync_wait}.",
)
result = await orchestrator.task_result(

View File

@@ -9,6 +9,7 @@ from docling.datamodel.pipeline_options import (
EasyOcrOptions,
PdfBackend,
PdfPipeline,
PictureDescriptionBaseOptions,
TableFormerMode,
TableStructureOptions,
)
@@ -339,6 +340,14 @@ class ConvertDocumentsOptions(BaseModel):
),
] = False
picture_description_area_threshold: Annotated[
float,
Field(
description="Minimum percentage of the area for a picture to be processed with the models.",
examples=[PictureDescriptionBaseOptions().picture_area_threshold],
),
] = PictureDescriptionBaseOptions().picture_area_threshold
picture_description_local: Annotated[
Optional[PictureDescriptionLocal],
Field(

View File

@@ -42,15 +42,12 @@ _log = logging.getLogger(__name__)
# Custom serializer for PdfFormatOption
# (model_dump_json does not work with some classes)
def _hash_pdf_format_option(pdf_format_option: PdfFormatOption) -> bytes:
data = pdf_format_option.model_dump()
data = pdf_format_option.model_dump(serialize_as_any=True)
# pipeline_options are not fully serialized by model_dump, dedicated pass
if pdf_format_option.pipeline_options:
data["pipeline_options"] = pdf_format_option.pipeline_options.model_dump()
# Replace `artifacts_path` with a string representation
data["pipeline_options"]["artifacts_path"] = repr(
data["pipeline_options"]["artifacts_path"]
data["pipeline_options"] = pdf_format_option.pipeline_options.model_dump(
serialize_as_any=True, mode="json"
)
# Replace `pipeline_cls` with a string representation
@@ -59,12 +56,6 @@ def _hash_pdf_format_option(pdf_format_option: PdfFormatOption) -> bytes:
# Replace `backend` with a string representation
data["backend"] = repr(data["backend"])
# Handle `device` in `accelerator_options`
if "accelerator_options" in data and "device" in data["accelerator_options"]:
data["accelerator_options"]["device"] = repr(
data["accelerator_options"]["device"]
)
# Serialize the dictionary to JSON with sorted keys to have consistent hashes
serialized_data = json.dumps(data, sort_keys=True)
options_hash = hashlib.sha1(serialized_data.encode()).digest()
@@ -150,6 +141,9 @@ def _parse_standard_pdf_opts(
request.picture_description_api.model_dump()
)
)
pipeline_options.picture_description_options.picture_area_threshold = (
request.picture_description_area_threshold
)
return pipeline_options

View File

@@ -48,6 +48,8 @@ class DoclingServeSettings(BaseSettings):
max_num_pages: int = sys.maxsize
max_file_size: int = sys.maxsize
max_sync_wait: int = 120 # 2 minutes
cors_origins: list[str] = ["*"]
cors_methods: list[str] = ["*"]
cors_headers: list[str] = ["*"]

View File

@@ -45,6 +45,7 @@ THe following table describes the options to configure the Docling Serve app.
| | `DOCLING_SERVE_MAX_DOCUMENT_TIMEOUT` | `604800` (7 days) | The maximum time for processing a document. |
| | `DOCLING_SERVE_MAX_NUM_PAGES` | | The maximum number of pages for a document to be processed. |
| | `DOCLING_SERVE_MAX_FILE_SIZE` | | The maximum file size for a document to be processed. |
| | `DOCLING_SERVE_MAX_SYNC_WAIT` | `120` | Max number of seconds a synchronous endpoint is waiting for the task completion. |
| | `DOCLING_SERVE_OPTIONS_CACHE_SIZE` | `2` | How many DocumentConveter objects (including their loaded models) to keep in the cache. |
| | `DOCLING_SERVE_CORS_ORIGINS` | `["*"]` | A list of origins that should be permitted to make cross-origin requests. |
| | `DOCLING_SERVE_CORS_METHODS` | `["*"]` | A list of HTTP methods that should be allowed for cross-origin requests. |

View File

@@ -23,6 +23,7 @@ On top of the source of file (see below), both endpoints support the same parame
- `do_formula_enrichment` (bool): If enabled, perform formula OCR, return LaTeX code. Defaults to false.
- `do_picture_classification` (bool): If enabled, classify pictures in documents. Defaults to false.
- `do_picture_description` (bool): If enabled, describe pictures in documents. Defaults to false.
- `picture_description_area_threshold` (float): Minimum percentage of the area for a picture to be processed with the models. Defaults to 0.05.
- `picture_description_local` (dict): Options for running a local vision-language model in the picture description. The parameters refer to a model hosted on Hugging Face. This parameter is mutually exclusive with picture_description_api.
- `picture_description_api` (dict): API details for using a vision-language model in the picture description. This parameter is mutually exclusive with picture_description_local.
- `include_images` (bool): If enabled, images will be extracted from the document. Defaults to false.

View File

@@ -1,6 +1,6 @@
[project]
name = "docling-serve"
version = "0.10.0" # DO NOT EDIT, updated automatically
version = "0.10.1" # DO NOT EDIT, updated automatically
description = "Running Docling as a service"
license = {text = "MIT"}
authors = [
@@ -63,6 +63,9 @@ cu124 = [
"torch>=2.6.0",
"torchvision>=0.21.0",
]
flash-attn = [
"flash-attn~=2.7.0; sys_platform == 'linux' and platform_machine == 'x86_64'"
]
[dependency-groups]
dev = [
@@ -83,7 +86,10 @@ conflicts = [
{ extra = "cpu" },
{ extra = "cu124" },
],
]
[
{ extra = "cpu" },
{ extra = "flash-attn" },
],]
environments = ["sys_platform != 'darwin' or platform_machine != 'x86_64'"]
override-dependencies = [
"urllib3~=2.0"

View File

@@ -0,0 +1,54 @@
from docling_serve.datamodel.convert import (
ConvertDocumentsOptions,
PictureDescriptionApi,
)
from docling_serve.docling_conversion import (
_hash_pdf_format_option,
get_pdf_pipeline_opts,
)
def test_options_cache_key():
hashes = set()
opts = ConvertDocumentsOptions()
pipeline_opts = get_pdf_pipeline_opts(opts)
hash = _hash_pdf_format_option(pipeline_opts)
assert hash not in hashes
hashes.add(hash)
opts.do_picture_description = True
pipeline_opts = get_pdf_pipeline_opts(opts)
hash = _hash_pdf_format_option(pipeline_opts)
# pprint(pipeline_opts.pipeline_options.model_dump(serialize_as_any=True))
assert hash not in hashes
hashes.add(hash)
opts.picture_description_api = PictureDescriptionApi(
url="http://localhost",
params={"model": "mymodel"},
prompt="Hello 1",
)
pipeline_opts = get_pdf_pipeline_opts(opts)
hash = _hash_pdf_format_option(pipeline_opts)
# pprint(pipeline_opts.pipeline_options.model_dump(serialize_as_any=True))
assert hash not in hashes
hashes.add(hash)
opts.picture_description_api = PictureDescriptionApi(
url="http://localhost",
params={"model": "your-model"},
prompt="Hello 1",
)
pipeline_opts = get_pdf_pipeline_opts(opts)
hash = _hash_pdf_format_option(pipeline_opts)
# pprint(pipeline_opts.pipeline_options.model_dump(serialize_as_any=True))
assert hash not in hashes
hashes.add(hash)
opts.picture_description_api.prompt = "World"
pipeline_opts = get_pdf_pipeline_opts(opts)
hash = _hash_pdf_format_option(pipeline_opts)
# pprint(pipeline_opts.pipeline_options.model_dump(serialize_as_any=True))
assert hash not in hashes
hashes.add(hash)

5883
uv.lock generated

File diff suppressed because it is too large Load Diff