mirror of
https://github.com/docling-project/docling-serve.git
synced 2025-11-29 08:33:50 +00:00
Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3bd7828570 | ||
|
|
8b470cba8e | ||
|
|
8048f4589a | ||
|
|
b3058e91e0 | ||
|
|
63da9eedeb | ||
|
|
b15dc2529f | ||
|
|
4c7207be00 | ||
|
|
db3fdb5bc1 | ||
|
|
fd1b987e8d |
@@ -19,6 +19,7 @@ Kubeflow
|
||||
(?i)PyTorch
|
||||
(?i)CUDA
|
||||
(?i)NVIDIA
|
||||
(?i)ROCm
|
||||
(?i)env
|
||||
Gradio
|
||||
bool
|
||||
|
||||
4
.github/workflows/cd.yml
vendored
4
.github/workflows/cd.yml
vendored
@@ -15,7 +15,7 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0 # for fetching tags, required for semantic-release
|
||||
- name: Install uv and set the python version
|
||||
uses: astral-sh/setup-uv@v5
|
||||
uses: astral-sh/setup-uv@v6
|
||||
with:
|
||||
enable-cache: true
|
||||
- name: Install dependencies
|
||||
@@ -45,7 +45,7 @@ jobs:
|
||||
token: ${{ steps.app-token.outputs.token }}
|
||||
fetch-depth: 0 # for fetching tags, required for semantic-release
|
||||
- name: Install uv and set the python version
|
||||
uses: astral-sh/setup-uv@v5
|
||||
uses: astral-sh/setup-uv@v6
|
||||
with:
|
||||
enable-cache: true
|
||||
- name: Install dependencies
|
||||
|
||||
12
.github/workflows/ci-images-dryrun.yml
vendored
12
.github/workflows/ci-images-dryrun.yml
vendored
@@ -21,10 +21,10 @@ jobs:
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
|
||||
platforms: linux/amd64, linux/arm64
|
||||
- name: docling-project/docling-serve-cu124
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
|
||||
platforms: linux/amd64
|
||||
# - name: docling-project/docling-serve-cu124
|
||||
# build_args: |
|
||||
# UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
|
||||
# platforms: linux/amd64
|
||||
- name: docling-project/docling-serve-cu126
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
|
||||
@@ -33,6 +33,10 @@ jobs:
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
|
||||
platforms: linux/amd64
|
||||
# - name: docling-project/docling-serve-rocm
|
||||
# build_args: |
|
||||
# UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn
|
||||
# platforms: linux/amd64
|
||||
|
||||
permissions:
|
||||
packages: write
|
||||
|
||||
13
.github/workflows/images.yml
vendored
13
.github/workflows/images.yml
vendored
@@ -25,10 +25,10 @@ jobs:
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
|
||||
platforms: linux/amd64, linux/arm64
|
||||
- name: docling-project/docling-serve-cu124
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
|
||||
platforms: linux/amd64
|
||||
# - name: docling-project/docling-serve-cu124
|
||||
# build_args: |
|
||||
# UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
|
||||
# platforms: linux/amd64
|
||||
- name: docling-project/docling-serve-cu126
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
|
||||
@@ -37,7 +37,10 @@ jobs:
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
|
||||
platforms: linux/amd64
|
||||
|
||||
# - name: docling-project/docling-serve-rocm
|
||||
# build_args: |
|
||||
# UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn
|
||||
# platforms: linux/amd64
|
||||
permissions:
|
||||
packages: write
|
||||
contents: read
|
||||
|
||||
2
.github/workflows/job-build.yml
vendored
2
.github/workflows/job-build.yml
vendored
@@ -12,7 +12,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Install uv and set the python version
|
||||
uses: astral-sh/setup-uv@v5
|
||||
uses: astral-sh/setup-uv@v6
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
enable-cache: true
|
||||
|
||||
11
.github/workflows/job-checks.yml
vendored
11
.github/workflows/job-checks.yml
vendored
@@ -12,7 +12,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Install uv and set the python version
|
||||
uses: astral-sh/setup-uv@v5
|
||||
uses: astral-sh/setup-uv@v6
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
enable-cache: true
|
||||
@@ -28,7 +28,7 @@ jobs:
|
||||
run: uv sync --frozen --all-extras --no-extra flash-attn
|
||||
|
||||
- name: Run styling check
|
||||
run: pre-commit run --all-files
|
||||
run: uv run pre-commit run --all-files
|
||||
|
||||
build-package:
|
||||
uses: ./.github/workflows/job-build.yml
|
||||
@@ -47,14 +47,16 @@ jobs:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
- name: Install uv and set the python version
|
||||
uses: astral-sh/setup-uv@v5
|
||||
uses: astral-sh/setup-uv@v6
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
enable-cache: true
|
||||
- name: Create virtual environment
|
||||
run: uv venv
|
||||
- name: Install package
|
||||
run: uv pip install dist/*.whl
|
||||
- name: Create the server
|
||||
run: python -c 'from docling_serve.app import create_app; create_app()'
|
||||
run: .venv/bin/python -c 'from docling_serve.app import create_app; create_app()'
|
||||
|
||||
markdown-lint:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -64,4 +66,3 @@ jobs:
|
||||
uses: DavidAnson/markdownlint-cli2-action@v16
|
||||
with:
|
||||
globs: "**/*.md"
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ repos:
|
||||
args: ["--config=.github/vale.ini"]
|
||||
files: \.md$
|
||||
- repo: https://github.com/astral-sh/uv-pre-commit
|
||||
# uv version.
|
||||
rev: 0.7.13
|
||||
# uv version, https://github.com/astral-sh/uv-pre-commit/releases
|
||||
rev: 0.8.3
|
||||
hooks:
|
||||
- id: uv-lock
|
||||
|
||||
20
CHANGELOG.md
20
CHANGELOG.md
@@ -1,3 +1,23 @@
|
||||
## [v1.2.1](https://github.com/docling-project/docling-serve/releases/tag/v1.2.1) - 2025-08-13
|
||||
|
||||
### Fix
|
||||
|
||||
* Handling of vlm model options and update deps ([#314](https://github.com/docling-project/docling-serve/issues/314)) ([`8b470cb`](https://github.com/docling-project/docling-serve/commit/8b470cba8ef500c271eb84c8368c8a1a1a5a6d6a))
|
||||
* Add missing response type in sync endpoints ([#309](https://github.com/docling-project/docling-serve/issues/309)) ([`8048f45`](https://github.com/docling-project/docling-serve/commit/8048f4589a91de2b2b391ab33a326efd1b29f25b))
|
||||
|
||||
### Documentation
|
||||
|
||||
* Update readme to use v1 ([#306](https://github.com/docling-project/docling-serve/issues/306)) ([`b3058e9`](https://github.com/docling-project/docling-serve/commit/b3058e91e0c56e27110eb50f22cbdd89640bf398))
|
||||
* Update deployment examples to use v1 API ([#308](https://github.com/docling-project/docling-serve/issues/308)) ([`63da9ee`](https://github.com/docling-project/docling-serve/commit/63da9eedebae3ad31d04e65635e573194e413793))
|
||||
* Fix typo in v1 migration instructions ([#307](https://github.com/docling-project/docling-serve/issues/307)) ([`b15dc25`](https://github.com/docling-project/docling-serve/commit/b15dc2529f78d68a475e5221c37408c3f77d8588))
|
||||
|
||||
## [v1.2.0](https://github.com/docling-project/docling-serve/releases/tag/v1.2.0) - 2025-08-07
|
||||
|
||||
### Feature
|
||||
|
||||
* Workers without shared models and convert params ([#304](https://github.com/docling-project/docling-serve/issues/304)) ([`db3fdb5`](https://github.com/docling-project/docling-serve/commit/db3fdb5bc1a0ae250afd420d737abc4071a7546c))
|
||||
* Add rocm image build support and fix cuda ([#292](https://github.com/docling-project/docling-serve/issues/292)) ([`fd1b987`](https://github.com/docling-project/docling-serve/commit/fd1b987e8dc174f1a6013c003dde33e9acbae39a))
|
||||
|
||||
## [v1.1.0](https://github.com/docling-project/docling-serve/releases/tag/v1.1.0) - 2025-07-30
|
||||
|
||||
### Feature
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
ARG BASE_IMAGE=quay.io/sclorg/python-312-c9s:c9s
|
||||
|
||||
FROM ${BASE_IMAGE}
|
||||
ARG UV_VERSION=0.8.3
|
||||
|
||||
USER 0
|
||||
ARG UV_SYNC_EXTRA_ARGS=""
|
||||
|
||||
FROM ${BASE_IMAGE} AS docling-base
|
||||
|
||||
###################################################################################################
|
||||
# OS Layer #
|
||||
###################################################################################################
|
||||
|
||||
USER 0
|
||||
|
||||
RUN --mount=type=bind,source=os-packages.txt,target=/tmp/os-packages.txt \
|
||||
dnf -y install --best --nodocs --setopt=install_weak_deps=False dnf-plugins-core && \
|
||||
dnf config-manager --best --nodocs --setopt=install_weak_deps=False --save && \
|
||||
@@ -21,16 +25,19 @@ RUN /usr/bin/fix-permissions /opt/app-root/src/.cache
|
||||
|
||||
ENV TESSDATA_PREFIX=/usr/share/tesseract/tessdata/
|
||||
|
||||
FROM ghcr.io/astral-sh/uv:${UV_VERSION} AS uv_stage
|
||||
|
||||
###################################################################################################
|
||||
# Docling layer #
|
||||
###################################################################################################
|
||||
|
||||
FROM docling-base
|
||||
|
||||
USER 1001
|
||||
|
||||
WORKDIR /opt/app-root/src
|
||||
|
||||
ENV \
|
||||
# On container environments, always set a thread budget to avoid undesired thread congestion.
|
||||
OMP_NUM_THREADS=4 \
|
||||
LANG=en_US.UTF-8 \
|
||||
LC_ALL=en_US.UTF-8 \
|
||||
@@ -40,9 +47,9 @@ ENV \
|
||||
UV_PROJECT_ENVIRONMENT=/opt/app-root \
|
||||
DOCLING_SERVE_ARTIFACTS_PATH=/opt/app-root/src/.cache/docling/models
|
||||
|
||||
ARG UV_SYNC_EXTRA_ARGS=""
|
||||
ARG UV_SYNC_EXTRA_ARGS
|
||||
|
||||
RUN --mount=from=ghcr.io/astral-sh/uv:0.7.19,source=/uv,target=/bin/uv \
|
||||
RUN --mount=from=uv_stage,source=/uv,target=/bin/uv \
|
||||
--mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
|
||||
--mount=type=bind,source=uv.lock,target=uv.lock \
|
||||
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
|
||||
@@ -61,7 +68,8 @@ RUN echo "Downloading models..." && \
|
||||
chmod -R g=u ${DOCLING_SERVE_ARTIFACTS_PATH}
|
||||
|
||||
COPY --chown=1001:0 ./docling_serve ./docling_serve
|
||||
RUN --mount=from=ghcr.io/astral-sh/uv:0.7.19,source=/uv,target=/bin/uv \
|
||||
|
||||
RUN --mount=from=uv_stage,source=/uv,target=/bin/uv \
|
||||
--mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
|
||||
--mount=type=bind,source=uv.lock,target=uv.lock \
|
||||
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
|
||||
|
||||
28
Makefile
28
Makefile
@@ -60,6 +60,13 @@ docling-serve-cu128-image: Containerfile ## Build docling-serve container image
|
||||
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) ghcr.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
|
||||
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) quay.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
|
||||
|
||||
.PHONY: docling-serve-rocm-image
|
||||
docling-serve-rocm-image: Containerfile ## Build docling-serve container image with ROCm support
|
||||
$(ECHO_PREFIX) printf " %-12s Containerfile\n" "[docling-serve with ROCm 6.3]"
|
||||
$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-rocm:$(TAG) .
|
||||
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-rocm:$(TAG) ghcr.io/docling-project/docling-serve-rocm:$(BRANCH_TAG)
|
||||
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-rocm:$(TAG) quay.io/docling-project/docling-serve-rocm:$(BRANCH_TAG)
|
||||
|
||||
.PHONY: action-lint
|
||||
action-lint: .action-lint ## Lint GitHub Action workflows
|
||||
.action-lint: $(shell find .github -type f) | action-lint-file
|
||||
@@ -107,3 +114,24 @@ run-docling-cu124: ## Run the docling-serve container with GPU support and assig
|
||||
$(CMD_PREFIX) docker rm -f docling-serve-cu124 2>/dev/null || true
|
||||
$(ECHO_PREFIX) printf " %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.4]"
|
||||
$(CMD_PREFIX) docker run -it --name docling-serve-cu124 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu124:main
|
||||
|
||||
.PHONY: run-docling-cu126
|
||||
run-docling-cu126: ## Run the docling-serve container with GPU support and assign a container name
|
||||
$(ECHO_PREFIX) printf " %-12s Removing existing container if it exists...\n" "[CLEANUP]"
|
||||
$(CMD_PREFIX) docker rm -f docling-serve-cu126 2>/dev/null || true
|
||||
$(ECHO_PREFIX) printf " %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.6]"
|
||||
$(CMD_PREFIX) docker run -it --name docling-serve-cu126 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu126:main
|
||||
|
||||
.PHONY: run-docling-cu128
|
||||
run-docling-cu128: ## Run the docling-serve container with GPU support and assign a container name
|
||||
$(ECHO_PREFIX) printf " %-12s Removing existing container if it exists...\n" "[CLEANUP]"
|
||||
$(CMD_PREFIX) docker rm -f docling-serve-cu128 2>/dev/null || true
|
||||
$(ECHO_PREFIX) printf " %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.8]"
|
||||
$(CMD_PREFIX) docker run -it --name docling-serve-cu128 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu128:main
|
||||
|
||||
.PHONY: run-docling-rocm
|
||||
run-docling-rocm: ## Run the docling-serve container with GPU support and assign a container name
|
||||
$(ECHO_PREFIX) printf " %-12s Removing existing container if it exists...\n" "[CLEANUP]"
|
||||
$(CMD_PREFIX) docker rm -f docling-serve-rocm 2>/dev/null || true
|
||||
$(ECHO_PREFIX) printf " %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN ROCm 6.3]"
|
||||
$(CMD_PREFIX) docker run -it --name docling-serve-rocm -p 5001:5001 ghcr.io/docling-project/docling-serve-rocm:main
|
||||
|
||||
38
README.md
38
README.md
@@ -36,7 +36,8 @@ The server is available at
|
||||
- API <http://127.0.0.1:5001>
|
||||
- API documentation <http://127.0.0.1:5001/docs>
|
||||
- UI playground <http://127.0.0.1:5001/ui>
|
||||

|
||||
|
||||

|
||||
|
||||
Try it out with a simple conversion:
|
||||
|
||||
@@ -46,21 +47,36 @@ curl -X 'POST' \
|
||||
-H 'accept: application/json' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
|
||||
"sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
|
||||
}'
|
||||
```
|
||||
|
||||
### Container images
|
||||
### Container Images
|
||||
|
||||
Available container images:
|
||||
The following container images are available for running **Docling Serve** with different hardware and PyTorch configurations:
|
||||
|
||||
| Name | Description | Arch | Size |
|
||||
| -----|-------------|------|------|
|
||||
| [`ghcr.io/docling-project/docling-serve`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve) <br /> [`quay.io/docling-project/docling-serve`](https://quay.io/repository/docling-project/docling-serve) | Simple image for Docling Serve, installing all packages from the official pypi.org index. | `linux/amd64`, `linux/arm64` | 3.6 GB (arm64) <br /> 8.7 GB (amd64) |
|
||||
| [`ghcr.io/docling-project/docling-serve-cpu`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cpu) <br /> [`quay.io/docling-project/docling-serve-cpu`](https://quay.io/repository/docling-project/docling-serve-cpu) | Cpu-only image which installs `torch` from the pytorch cpu index. | `linux/amd64`, `linux/arm64` | 3.6 GB |
|
||||
| [`ghcr.io/docling-project/docling-serve-cu124`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu124) <br /> [`quay.io/docling-project/docling-serve-cu124`](https://quay.io/repository/docling-project/docling-serve-cu124) | Cuda 12.4 image which installs `torch` from the pytorch cu124 index. | `linux/amd64` | 8.7 GB |
|
||||
| [`ghcr.io/docling-project/docling-serve-cu126`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu126) <br /> [`quay.io/docling-project/docling-serve-cu126`](https://quay.io/repository/docling-project/docling-serve-cu126) | Cuda 12.6 image which installs `torch` from the pytorch cu126 index. | `linux/amd64` | 8.7 GB |
|
||||
| [`ghcr.io/docling-project/docling-serve-cu128`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu128) <br /> [`quay.io/docling-project/docling-serve-cu128`](https://quay.io/repository/docling-project/docling-serve-cu128) | Cuda 12.8 image which installs `torch` from the pytorch cu128 index. | `linux/amd64` | 8.7 GB |
|
||||
#### 📦 Distributed Images
|
||||
|
||||
| Image | Description | Architectures | Size |
|
||||
|-------|-------------|----------------|------|
|
||||
| [`ghcr.io/docling-project/docling-serve`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve) <br> [`quay.io/docling-project/docling-serve`](https://quay.io/repository/docling-project/docling-serve) | Base image with all packages installed from the official PyPI index. | `linux/amd64`, `linux/arm64` | 4.4 GB (arm64) <br> 8.7 GB (amd64) |
|
||||
| [`ghcr.io/docling-project/docling-serve-cpu`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cpu) <br> [`quay.io/docling-project/docling-serve-cpu`](https://quay.io/repository/docling-project/docling-serve-cpu) | CPU-only variant, using `torch` from the PyTorch CPU index. | `linux/amd64`, `linux/arm64` | 4.4 GB |
|
||||
| [`ghcr.io/docling-project/docling-serve-cu126`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu126) <br> [`quay.io/docling-project/docling-serve-cu126`](https://quay.io/repository/docling-project/docling-serve-cu126) | CUDA 12.6 build with `torch` from the cu126 index. | `linux/amd64` | 10.0 GB |
|
||||
| [`ghcr.io/docling-project/docling-serve-cu128`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu128) <br> [`quay.io/docling-project/docling-serve-cu128`](https://quay.io/repository/docling-project/docling-serve-cu128) | CUDA 12.8 build with `torch` from the cu128 index. | `linux/amd64` | 11.4 GB |
|
||||
|
||||
#### 🚫 Not Distributed
|
||||
|
||||
An image for AMD ROCm 6.3 (`docling-serve-rocm`) is supported but **not published** due to its large size.
|
||||
|
||||
To build it locally:
|
||||
|
||||
```bash
|
||||
git clone --branch main git@github.com:docling-project/docling-serve.git
|
||||
cd docling-serve/
|
||||
make docling-serve-rocm-image
|
||||
```
|
||||
|
||||
For deployment using Docker Compose, see [docs/deployment.md](docs/deployment.md).
|
||||
|
||||
Coming soon: `docling-serve-slim` images will reduce the size by skipping the model weights download.
|
||||
|
||||
|
||||
@@ -390,7 +390,7 @@ def create_app(): # noqa: C901
|
||||
# Convert a document from URL(s)
|
||||
@app.post(
|
||||
"/v1/convert/source",
|
||||
response_model=ConvertDocumentResponse,
|
||||
response_model=ConvertDocumentResponse | PresignedUrlConvertDocumentResponse,
|
||||
responses={
|
||||
200: {
|
||||
"content": {"application/zip": {}},
|
||||
@@ -426,7 +426,7 @@ def create_app(): # noqa: C901
|
||||
# Convert a document from file(s)
|
||||
@app.post(
|
||||
"/v1/convert/file",
|
||||
response_model=ConvertDocumentResponse,
|
||||
response_model=ConvertDocumentResponse | PresignedUrlConvertDocumentResponse,
|
||||
responses={
|
||||
200: {
|
||||
"content": {"application/zip": {}},
|
||||
|
||||
@@ -19,6 +19,7 @@ def get_async_orchestrator() -> BaseOrchestrator:
|
||||
|
||||
local_config = LocalOrchestratorConfig(
|
||||
num_workers=docling_serve_settings.eng_loc_num_workers,
|
||||
shared_models=docling_serve_settings.eng_loc_share_models,
|
||||
)
|
||||
|
||||
cm_config = DoclingConverterManagerConfig(
|
||||
|
||||
@@ -63,6 +63,7 @@ class DoclingServeSettings(BaseSettings):
|
||||
eng_kind: AsyncEngine = AsyncEngine.LOCAL
|
||||
# Local engine
|
||||
eng_loc_num_workers: int = 2
|
||||
eng_loc_share_models: bool = False
|
||||
# KFP engine
|
||||
eng_kfp_endpoint: Optional[AnyUrl] = None
|
||||
eng_kfp_token: Optional[str] = None
|
||||
|
||||
@@ -66,6 +66,7 @@ The following table describes the options to configure the Docling Serve local e
|
||||
| ENV | Default | Description |
|
||||
|-----|---------|-------------|
|
||||
| `DOCLING_SERVE_ENG_LOC_NUM_WORKERS` | 2 | Number of workers/threads processing the incoming tasks. |
|
||||
| `DOCLING_SERVE_ENG_LOC_SHARE_MODELS` | False | If true, each process will share the same models among all thread workers. Otherwise, one instance of the models is allocated for each worker thread. |
|
||||
|
||||
#### KFP engine
|
||||
|
||||
|
||||
21
docs/deploy-examples/compose-amd.yaml
Normal file
21
docs/deploy-examples/compose-amd.yaml
Normal file
@@ -0,0 +1,21 @@
|
||||
# AMD ROCm deployment
|
||||
|
||||
services:
|
||||
docling-serve:
|
||||
image: ghcr.io/docling-project/docling-serve-rocm:main
|
||||
container_name: docling-serve
|
||||
ports:
|
||||
- "5001:5001"
|
||||
environment:
|
||||
DOCLING_SERVE_ENABLE_UI: "true"
|
||||
ROCR_VISIBLE_DEVICES: "0" # https://rocm.docs.amd.com/en/latest/conceptual/gpu-isolation.html#rocr-visible-devices
|
||||
## This section is for compatibility with older cards
|
||||
# HSA_OVERRIDE_GFX_VERSION: "11.0.0"
|
||||
# HSA_ENABLE_SDMA: "0"
|
||||
devices:
|
||||
- /dev/kfd:/dev/kfd
|
||||
- /dev/dri:/dev/dri
|
||||
group_add:
|
||||
- 44 # video group GID from host
|
||||
- 992 # render group GID from host
|
||||
restart: always
|
||||
@@ -1,15 +0,0 @@
|
||||
services:
|
||||
docling:
|
||||
image: ghcr.io/docling-project/docling-serve-cu124
|
||||
container_name: docling-serve
|
||||
ports:
|
||||
- 5001:5001
|
||||
environment:
|
||||
- DOCLING_SERVE_ENABLE_UI=true
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: all # nvidia-smi
|
||||
capabilities: [gpu]
|
||||
20
docs/deploy-examples/compose-nvidia.yaml
Normal file
20
docs/deploy-examples/compose-nvidia.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
# NVIDIA CUDA deployment
|
||||
|
||||
services:
|
||||
docling-serve:
|
||||
image: ghcr.io/docling-project/docling-serve-cu126:main
|
||||
container_name: docling-serve
|
||||
ports:
|
||||
- "5001:5001"
|
||||
environment:
|
||||
DOCLING_SERVE_ENABLE_UI: "true"
|
||||
NVIDIA_VISIBLE_DEVICES: "all" # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/docker-specialized.html
|
||||
# deploy: # This section is for compatibility with Swarm
|
||||
# resources:
|
||||
# reservations:
|
||||
# devices:
|
||||
# - driver: nvidia
|
||||
# count: all
|
||||
# capabilities: [gpu]
|
||||
runtime: nvidia
|
||||
restart: always
|
||||
@@ -4,16 +4,17 @@ This document provides deployment examples for running the application in differ
|
||||
|
||||
Choose the deployment option that best fits your setup.
|
||||
|
||||
- **[Local GPU](#local-gpu)**: For deploying the application locally on a machine with a NVIDIA GPU (using Docker Compose).
|
||||
- **[Local GPU NVIDIA](#local-gpu-nvidia)**: For deploying the application locally on a machine with a supported NVIDIA GPU (using Docker Compose).
|
||||
- **[Local GPU AMD](#local-gpu-amd)**: For deploying the application locally on a machine with a supported AMD GPU (using Docker Compose).
|
||||
- **[OpenShift](#openshift)**: For deploying the application on an OpenShift cluster, designed for cloud-native environments.
|
||||
|
||||
---
|
||||
|
||||
## Local GPU
|
||||
## Local GPU NVIDIA
|
||||
|
||||
### Docker compose
|
||||
|
||||
Manifest example: [compose-gpu.yaml](./deploy-examples/compose-gpu.yaml)
|
||||
Manifest example: [compose-nvidia.yaml](./deploy-examples/compose-nvidia.yaml)
|
||||
|
||||
This deployment has the following features:
|
||||
|
||||
@@ -22,7 +23,7 @@ This deployment has the following features:
|
||||
Install the app with:
|
||||
|
||||
```sh
|
||||
docker compose -f docs/deploy-examples/compose-gpu.yaml up -d
|
||||
docker compose -f docs/deploy-examples/compose-nvidia.yaml up -d
|
||||
```
|
||||
|
||||
For using the API:
|
||||
@@ -34,7 +35,7 @@ curl -X 'POST' \
|
||||
-H "accept: application/json" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
|
||||
"sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -56,7 +57,7 @@ Docs:
|
||||
<details>
|
||||
<summary><b>Steps</b></summary>
|
||||
|
||||
1. Check driver version and which GPU you want to use (0/1/2/3.. and update [compose-gpu.yaml](./deploy-examples/compose-gpu.yaml) file or use `count: all`)
|
||||
1. Check driver version and which GPU you want to use 0/1/2/n (and update [compose-nvidia.yaml](./deploy-examples/compose-nvidia.yaml) file or use `count: all`)
|
||||
|
||||
```sh
|
||||
nvidia-smi
|
||||
@@ -117,7 +118,75 @@ Docs:
|
||||
5. Run the container:
|
||||
|
||||
```sh
|
||||
docker compose -f docs/deploy-examples/compose-gpu.yaml up -d
|
||||
docker compose -f docs/deploy-examples/compose-nvidia.yaml up -d
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
## Local GPU AMD
|
||||
|
||||
### Docker compose
|
||||
|
||||
Manifest example: [compose-amd.yaml](./deploy-examples/compose-amd.yaml)
|
||||
|
||||
This deployment has the following features:
|
||||
|
||||
- AMD rocm enabled
|
||||
|
||||
Install the app with:
|
||||
|
||||
```sh
|
||||
docker compose -f docs/deploy-examples/compose-amd.yaml up -d
|
||||
```
|
||||
|
||||
For using the API:
|
||||
|
||||
```sh
|
||||
# Make a test query
|
||||
curl -X 'POST' \
|
||||
"localhost:5001/v1/convert/source/async" \
|
||||
-H "accept: application/json" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
|
||||
}'
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary><b>Requirements</b></summary>
|
||||
|
||||
- debian/ubuntu/rhel/fedora/opensuse
|
||||
- docker
|
||||
- AMDGPU driver >=6.3
|
||||
- AMD ROCm >=6.3
|
||||
|
||||
Docs:
|
||||
|
||||
- [AMD ROCm installation](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/quick-start.html)
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>Steps</b></summary>
|
||||
|
||||
1. Check driver version and which GPU you want to use 0/1/2/n (and update [compose-amd.yaml](./deploy-examples/compose-amd.yaml) file)
|
||||
|
||||
```sh
|
||||
rocm-smi --showdriverversion
|
||||
rocminfo | grep -i "ROCm version"
|
||||
```
|
||||
|
||||
2. Find both video group GID and render group GID from host (and update [compose-amd.yaml](./deploy-examples/compose-amd.yaml) file)
|
||||
|
||||
```sh
|
||||
getent group video
|
||||
getent group render
|
||||
```
|
||||
|
||||
3. Build the image locally (and update [compose-amd.yaml](./deploy-examples/compose-amd.yaml) file)
|
||||
|
||||
```sh
|
||||
make docling-serve-rocm-image
|
||||
```
|
||||
|
||||
</details>
|
||||
@@ -152,7 +221,7 @@ curl -X 'POST' \
|
||||
-H "accept: application/json" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
|
||||
"sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -189,7 +258,7 @@ curl -X 'POST' \
|
||||
-H "accept: application/json" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
|
||||
"sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -222,7 +291,7 @@ task_id=$(curl -s -X 'POST' \
|
||||
-H "accept: application/json" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
|
||||
"sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
|
||||
}' \
|
||||
-c cookies.txt | grep -oP '"task_id":"\K[^"]+')
|
||||
```
|
||||
|
||||
@@ -37,7 +37,7 @@ New version:
|
||||
"options": {}, // conversion options
|
||||
"sources": [
|
||||
// input document provided as base64-encoded string
|
||||
{"kind": "kind", "base64_string": "abc123...", "filename": "file.pdf"},
|
||||
{"kind": "file", "base64_string": "abc123...", "filename": "file.pdf"},
|
||||
// input document provided as http urls
|
||||
{"kind": "http", "url": "https://..."},
|
||||
]
|
||||
|
||||
BIN
img/fastapi-ui.png
Normal file
BIN
img/fastapi-ui.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 226 KiB |
BIN
img/swagger.png
BIN
img/swagger.png
Binary file not shown.
|
Before Width: | Height: | Size: 24 KiB |
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "docling-serve"
|
||||
version = "1.1.0" # DO NOT EDIT, updated automatically
|
||||
version = "1.2.1" # DO NOT EDIT, updated automatically
|
||||
description = "Running Docling as a service"
|
||||
license = {text = "MIT"}
|
||||
authors = [
|
||||
@@ -8,7 +8,6 @@ authors = [
|
||||
{name="Guillaume Moutier", email="gmoutier@redhat.com"},
|
||||
{name="Anil Vishnoi", email="avishnoi@redhat.com"},
|
||||
{name="Panos Vagenas", email="pva@zurich.ibm.com"},
|
||||
{name="Panos Vagenas", email="pva@zurich.ibm.com"},
|
||||
{name="Christoph Auer", email="cau@zurich.ibm.com"},
|
||||
{name="Peter Staar", email="taa@zurich.ibm.com"},
|
||||
]
|
||||
@@ -36,7 +35,7 @@ requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"docling~=2.38",
|
||||
"docling-core>=2.44.1",
|
||||
"docling-jobkit[kfp,vlm]~=1.2",
|
||||
"docling-jobkit[kfp,vlm]>=1.3.1,<2.0.0",
|
||||
"fastapi[standard]~=0.115",
|
||||
"httpx~=0.28",
|
||||
"pydantic~=2.10",
|
||||
@@ -62,7 +61,7 @@ rapidocr = [
|
||||
"onnxruntime~=1.7",
|
||||
]
|
||||
flash-attn = [
|
||||
"flash-attn~=2.7.0; sys_platform == 'linux' and platform_machine == 'x86_64'"
|
||||
"flash-attn~=2.8.2; sys_platform == 'linux' and platform_machine == 'x86_64'"
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
@@ -76,25 +75,36 @@ dev = [
|
||||
"python-semantic-release~=7.32",
|
||||
"ruff>=0.9.6",
|
||||
]
|
||||
|
||||
pypi = [
|
||||
"torch>=2.6.0",
|
||||
"torchvision>=0.21.0",
|
||||
"torch>=2.7.1",
|
||||
"torchvision>=0.22.1",
|
||||
]
|
||||
|
||||
cpu = [
|
||||
"torch>=2.6.0",
|
||||
"torchvision>=0.21.0",
|
||||
"torch>=2.7.1",
|
||||
"torchvision>=0.22.1",
|
||||
]
|
||||
|
||||
cu124 = [
|
||||
"torch>=2.6.0",
|
||||
"torchvision>=0.21.0",
|
||||
"torch>=2.6.0 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
|
||||
"torchvision>=0.21.0 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
|
||||
]
|
||||
|
||||
cu126 = [
|
||||
"torch>=2.6.0",
|
||||
"torchvision>=0.21.0",
|
||||
"torch>=2.7.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
|
||||
"torchvision>=0.22.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
|
||||
]
|
||||
|
||||
cu128 = [
|
||||
"torch>=2.7.0",
|
||||
"torchvision>=0.22.0",
|
||||
"torch>=2.7.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
|
||||
"torchvision>=0.22.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
|
||||
]
|
||||
|
||||
rocm = [
|
||||
"torch>=2.7.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
|
||||
"torchvision>=0.22.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
|
||||
"pytorch-triton-rocm>=3.3.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
|
||||
]
|
||||
|
||||
[tool.uv]
|
||||
@@ -107,6 +117,7 @@ conflicts = [
|
||||
{ group = "cu124" },
|
||||
{ group = "cu126" },
|
||||
{ group = "cu128" },
|
||||
{ group = "rocm" },
|
||||
],
|
||||
]
|
||||
environments = ["sys_platform != 'darwin' or platform_machine != 'x86_64'"]
|
||||
@@ -118,17 +129,25 @@ override-dependencies = [
|
||||
torch = [
|
||||
{ index = "pytorch-pypi", group = "pypi" },
|
||||
{ index = "pytorch-cpu", group = "cpu" },
|
||||
{ index = "pytorch-cu124", group = "cu124" },
|
||||
{ index = "pytorch-cu126", group = "cu126" },
|
||||
{ index = "pytorch-cu128", group = "cu128" },
|
||||
{ index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
|
||||
{ index = "pytorch-cu126", group = "cu126", marker = "sys_platform == 'linux'" },
|
||||
{ index = "pytorch-cu128", group = "cu128", marker = "sys_platform == 'linux'" },
|
||||
{ index = "pytorch-rocm", group = "rocm", marker = "sys_platform == 'linux'" },
|
||||
]
|
||||
|
||||
torchvision = [
|
||||
{ index = "pytorch-pypi", group = "pypi" },
|
||||
{ index = "pytorch-cpu", group = "cpu" },
|
||||
{ index = "pytorch-cu124", group = "cu124" },
|
||||
{ index = "pytorch-cu126", group = "cu126" },
|
||||
{ index = "pytorch-cu128", group = "cu128" },
|
||||
{ index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
|
||||
{ index = "pytorch-cu126", group = "cu126", marker = "sys_platform == 'linux'" },
|
||||
{ index = "pytorch-cu128", group = "cu128", marker = "sys_platform == 'linux'" },
|
||||
{ index = "pytorch-rocm", group = "rocm", marker = "sys_platform == 'linux'" },
|
||||
]
|
||||
|
||||
pytorch-triton-rocm = [
|
||||
{ index = "pytorch-rocm", marker = "sys_platform == 'linux'" },
|
||||
]
|
||||
|
||||
# docling-jobkit = { git = "https://github.com/docling-project/docling-jobkit/", rev = "main" }
|
||||
# docling-jobkit = { path = "../docling-jobkit", editable = true }
|
||||
|
||||
@@ -157,6 +176,11 @@ name = "pytorch-cu128"
|
||||
url = "https://download.pytorch.org/whl/cu128"
|
||||
explicit = true
|
||||
|
||||
[[tool.uv.index]]
|
||||
name = "pytorch-rocm"
|
||||
url = "https://download.pytorch.org/whl/rocm6.3"
|
||||
explicit = true
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
include = ["docling_serve*"]
|
||||
namespaces = true
|
||||
|
||||
Reference in New Issue
Block a user