19 Commits

Author SHA1 Message Date
github-actions[bot]
767ce0982b chore: bump version to 0.16.1 [skip ci] 2025-07-07 16:17:50 +00:00
Michele Dolfi
bfde1a0991 fix: upgrade deps including, docling v2.40.0 with locks in models init (#264)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-07-07 17:13:45 +02:00
VIktor Kuropiantnyk
eb3892ee14 fix: missing tesseract osd (#263)
Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com>
2025-07-07 16:36:43 +02:00
tassadarliu
93b84712b2 docs: fix typo (#259)
Signed-off-by: tassadarliu <rhapsodyn@gmail.com>
2025-07-07 08:47:34 +02:00
Yishen Miao
c45b937064 docs: change the doc example (#258)
Signed-off-by: Yishen Miao <mys721tx@gmail.com>
2025-07-07 08:47:21 +02:00
Francisco Arceo
50e431f30f docs: Update typo (#247)
Signed-off-by: Francisco Arceo <arceofrancisco@gmail.com>
2025-06-27 16:58:37 +02:00
Michele Dolfi
149a8cb1c0 fix: properly load models at boot (#244)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-06-27 12:20:38 +02:00
github-actions[bot]
5f9c20a985 chore: bump version to 0.16.0 [skip ci] 2025-06-25 09:52:08 +00:00
Michele Dolfi
80755a7d59 docs: Update example resources and improve README (#231)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-06-25 07:56:14 +02:00
Michele Dolfi
30aca92298 feat: package updates and more cuda images (#229)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-06-24 16:59:05 +02:00
github-actions[bot]
717fb3a8d8 chore: bump version to 0.15.0 [skip ci] 2025-06-17 15:00:38 +00:00
Michele Dolfi
873d05aefe feat: use redocs and scalar as api docs (#228)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-06-17 16:54:00 +02:00
Ryan Fernandes
196c5ce42a fix: "tesserocr" instead of "tesseract_cli" in usage docs (#223)
Signed-off-by: Ryan Fernandes <ryan@fernandes.us>
2025-06-17 16:53:51 +02:00
github-actions[bot]
b5c5f47892 chore: bump version to 0.14.0 [skip ci] 2025-06-17 13:10:27 +00:00
23Ro
d5455b7f66 fix: Typo in Headline (#220)
Signed-off-by: 23Ro <m.n@23ro.de>
2025-06-17 14:55:27 +02:00
Michele Dolfi
7a682494d6 chore: dco advisor (#224)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-06-17 09:38:56 +02:00
Eugene
524f6a8997 feat: Read supported file extensions from docling (#214)
Signed-off-by: Eugene <fogaprod@gmail.com>
2025-06-05 09:38:28 +02:00
github-actions[bot]
9ccf8e3b5e chore: bump version to 0.13.0 [skip ci] 2025-06-04 12:24:40 +00:00
Michele Dolfi
ffea34732b feat: upgrade docling to 2.36 (#212)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-06-04 14:20:34 +02:00
29 changed files with 2896 additions and 1732 deletions

2
.github/dco.yml vendored Normal file
View File

@@ -0,0 +1,2 @@
allowRemediationCommits:
individual: true

View File

@@ -15,15 +15,23 @@ jobs:
spec:
- name: docling-project/docling-serve
build_args: |
UV_SYNC_EXTRA_ARGS=--no-extra cu124 --no-extra cpu
UV_SYNC_EXTRA_ARGS=--no-extra flash-attn
platforms: linux/amd64, linux/arm64
- name: docling-project/docling-serve-cpu
build_args: |
UV_SYNC_EXTRA_ARGS=--no-extra cu124 --no-extra flash-attn
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
platforms: linux/amd64, linux/arm64
- name: docling-project/docling-serve-cu124
build_args: |
UV_SYNC_EXTRA_ARGS=--no-extra cpu
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
platforms: linux/amd64
- name: docling-project/docling-serve-cu126
build_args: |
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
platforms: linux/amd64
- name: docling-project/docling-serve-cu128
build_args: |
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
platforms: linux/amd64
permissions:

192
.github/workflows/dco-advisor.yml vendored Normal file
View File

@@ -0,0 +1,192 @@
name: DCO Advisor Bot
on:
pull_request_target:
types: [opened, reopened, synchronize]
permissions:
pull-requests: write
issues: write
jobs:
dco_advisor:
runs-on: ubuntu-latest
steps:
- name: Handle DCO check result
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const pr = context.payload.pull_request || context.payload.check_run?.pull_requests?.[0];
if (!pr) return;
const prNumber = pr.number;
const baseRef = pr.base.ref;
const headSha =
context.payload.check_run?.head_sha ||
pr.head?.sha;
const username = pr.user.login;
console.log("HEAD SHA:", headSha);
const sleep = ms => new Promise(resolve => setTimeout(resolve, ms));
// Poll until DCO check has a conclusion (max 6 attempts, 30s)
let dcoCheck = null;
for (let attempt = 0; attempt < 6; attempt++) {
const { data: checks } = await github.rest.checks.listForRef({
owner: context.repo.owner,
repo: context.repo.repo,
ref: headSha
});
console.log("All check runs:");
checks.check_runs.forEach(run => {
console.log(`- ${run.name} (${run.status}/${run.conclusion}) @ ${run.head_sha}`);
});
dcoCheck = checks.check_runs.find(run =>
run.name.toLowerCase().includes("dco") &&
!run.name.toLowerCase().includes("dco_advisor") &&
run.head_sha === headSha
);
if (dcoCheck?.conclusion) break;
console.log(`Waiting for DCO check... (${attempt + 1})`);
await sleep(5000); // wait 5 seconds
}
if (!dcoCheck || !dcoCheck.conclusion) {
console.log("DCO check did not complete in time.");
return;
}
const isFailure = ["failure", "action_required"].includes(dcoCheck.conclusion);
console.log(`DCO check conclusion for ${headSha}: ${dcoCheck.conclusion} (treated as ${isFailure ? "failure" : "success"})`);
// Parse DCO output for commit SHAs and author
let badCommits = [];
let authorName = "";
let authorEmail = "";
let moreInfo = `More info: [DCO check report](${dcoCheck?.html_url})`;
if (isFailure) {
const { data: commits } = await github.rest.pulls.listCommits({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: prNumber,
});
for (const commit of commits) {
const commitMessage = commit.commit.message;
const signoffMatch = commitMessage.match(/^Signed-off-by:\s+.+<.+>$/m);
if (!signoffMatch) {
console.log(`Bad commit found ${commit.sha}`)
badCommits.push({
sha: commit.sha,
authorName: commit.commit.author.name,
authorEmail: commit.commit.author.email,
});
}
}
}
// If multiple authors are present, you could adapt the message accordingly
// For now, we'll just use the first one
if (badCommits.length > 0) {
authorName = badCommits[0].authorName;
authorEmail = badCommits[0].authorEmail;
}
// Generate remediation commit message if needed
let remediationSnippet = "";
if (badCommits.length && authorEmail) {
remediationSnippet = `git commit --allow-empty -s -m "DCO Remediation Commit for ${authorName} <${authorEmail}>\n\n` +
badCommits.map(c => `I, ${c.authorName} <${c.authorEmail}>, hereby add my Signed-off-by to this commit: ${c.sha}`).join('\n') +
`"`;
} else {
remediationSnippet = "# Unable to auto-generate remediation message. Please check the DCO check details.";
}
// Build comment
const commentHeader = '<!-- dco-advice-bot -->';
let body = "";
if (isFailure) {
body = [
commentHeader,
'❌ **DCO Check Failed**',
'',
`Hi @${username}, your pull request has failed the Developer Certificate of Origin (DCO) check.`,
'',
'This repository supports **remediation commits**, so you can fix this without rewriting history — but you must follow the required message format.',
'',
'---',
'',
'### 🛠 Quick Fix: Add a remediation commit',
'Run this command:',
'',
'```bash',
remediationSnippet,
'git push',
'```',
'',
'---',
'',
'<details>',
'<summary>🔧 Advanced: Sign off each commit directly</summary>',
'',
'**For the latest commit:**',
'```bash',
'git commit --amend --signoff',
'git push --force-with-lease',
'```',
'',
'**For multiple commits:**',
'```bash',
`git rebase --signoff origin/${baseRef}`,
'git push --force-with-lease',
'```',
'',
'</details>',
'',
moreInfo
].join('\n');
} else {
body = [
commentHeader,
'✅ **DCO Check Passed**',
'',
`Thanks @${username}, all your commits are properly signed off. 🎉`
].join('\n');
}
// Get existing comments on the PR
const { data: comments } = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber
});
// Look for a previous bot comment
const existingComment = comments.find(c =>
c.body.includes("<!-- dco-advice-bot -->")
);
if (existingComment) {
await github.rest.issues.updateComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: existingComment.id,
body: body
});
} else {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
body: body
});
}

View File

@@ -19,15 +19,23 @@ jobs:
spec:
- name: docling-project/docling-serve
build_args: |
UV_SYNC_EXTRA_ARGS=--no-extra cu124 --no-extra cpu
UV_SYNC_EXTRA_ARGS=--no-extra flash-attn
platforms: linux/amd64, linux/arm64
- name: docling-project/docling-serve-cpu
build_args: |
UV_SYNC_EXTRA_ARGS=--no-extra cu124 --no-extra flash-attn
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
platforms: linux/amd64, linux/arm64
- name: docling-project/docling-serve-cu124
build_args: |
UV_SYNC_EXTRA_ARGS=--no-extra cpu
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
platforms: linux/amd64
- name: docling-project/docling-serve-cu126
build_args: |
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
platforms: linux/amd64
- name: docling-project/docling-serve-cu128
build_args: |
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
platforms: linux/amd64
permissions:

View File

@@ -17,7 +17,7 @@ jobs:
python-version: ${{ matrix.python-version }}
enable-cache: true
- name: Install dependencies
run: uv sync --all-extras --no-extra cu124 --no-extra flash-attn
run: uv sync --all-extras --no-extra flash-attn
- name: Build package
run: uv build
- name: Check content of wheel

View File

@@ -25,7 +25,7 @@ jobs:
key: pre-commit|${{ env.PY }}|${{ hashFiles('.pre-commit-config.yaml') }}
- name: Install dependencies
run: uv sync --frozen --all-extras --no-extra cu124 --no-extra flash-attn
run: uv sync --frozen --all-extras --no-extra flash-attn
- name: Run styling check
run: pre-commit run --all-files

View File

@@ -23,6 +23,6 @@ repos:
files: '\.py$'
- repo: https://github.com/astral-sh/uv-pre-commit
# uv version.
rev: 0.6.1
rev: 0.7.13
hooks:
- id: uv-lock

View File

@@ -1,3 +1,53 @@
## [v0.16.1](https://github.com/docling-project/docling-serve/releases/tag/v0.16.1) - 2025-07-07
### Fix
* Upgrade deps including, docling v2.40.0 with locks in models init ([#264](https://github.com/docling-project/docling-serve/issues/264)) ([`bfde1a0`](https://github.com/docling-project/docling-serve/commit/bfde1a0991c2da53b72c4f131ff74fa10f6340de))
* Missing tesseract osd ([#263](https://github.com/docling-project/docling-serve/issues/263)) ([`eb3892e`](https://github.com/docling-project/docling-serve/commit/eb3892ee141eb2c941d580b095d8a266f2d2610c))
* Properly load models at boot ([#244](https://github.com/docling-project/docling-serve/issues/244)) ([`149a8cb`](https://github.com/docling-project/docling-serve/commit/149a8cb1c0a16c1e0b7d17f40b88b4d6e8f0109d))
### Documentation
* Fix typo ([#259](https://github.com/docling-project/docling-serve/issues/259)) ([`93b8471`](https://github.com/docling-project/docling-serve/commit/93b84712b2c6d180908a197847b52b217a7ff05f))
* Change the doc example ([#258](https://github.com/docling-project/docling-serve/issues/258)) ([`c45b937`](https://github.com/docling-project/docling-serve/commit/c45b93706466a073ab4a5c75aa8a267110873e26))
* Update typo ([#247](https://github.com/docling-project/docling-serve/issues/247)) ([`50e431f`](https://github.com/docling-project/docling-serve/commit/50e431f30fbffa33f43727417fe746d20cbb9d6b))
## [v0.16.0](https://github.com/docling-project/docling-serve/releases/tag/v0.16.0) - 2025-06-25
### Feature
* Package updates and more cuda images ([#229](https://github.com/docling-project/docling-serve/issues/229)) ([`30aca92`](https://github.com/docling-project/docling-serve/commit/30aca92298ab0d86bb4debcfcacb2dd8b9040a27))
### Documentation
* Update example resources and improve README ([#231](https://github.com/docling-project/docling-serve/issues/231)) ([`80755a7`](https://github.com/docling-project/docling-serve/commit/80755a7d5955f7d0c53df8e558fdd852dd1f5b75))
## [v0.15.0](https://github.com/docling-project/docling-serve/releases/tag/v0.15.0) - 2025-06-17
### Feature
* Use redocs and scalar as api docs ([#228](https://github.com/docling-project/docling-serve/issues/228)) ([`873d05a`](https://github.com/docling-project/docling-serve/commit/873d05aefe141c63b9c1cf53b23b4fa8c96de05d))
### Fix
* "tesserocr" instead of "tesseract_cli" in usage docs ([#223](https://github.com/docling-project/docling-serve/issues/223)) ([`196c5ce`](https://github.com/docling-project/docling-serve/commit/196c5ce42a04d77234a4212c3d9b9772d2c2073e))
## [v0.14.0](https://github.com/docling-project/docling-serve/releases/tag/v0.14.0) - 2025-06-17
### Feature
* Read supported file extensions from docling ([#214](https://github.com/docling-project/docling-serve/issues/214)) ([`524f6a8`](https://github.com/docling-project/docling-serve/commit/524f6a8997b86d2f869ca491ec8fb40585b42ca4))
### Fix
* Typo in Headline ([#220](https://github.com/docling-project/docling-serve/issues/220)) ([`d5455b7`](https://github.com/docling-project/docling-serve/commit/d5455b7f66de39ea1f8b8927b5968d2baa23ca88))
## [v0.13.0](https://github.com/docling-project/docling-serve/releases/tag/v0.13.0) - 2025-06-04
### Feature
* Upgrade docling to 2.36 ([#212](https://github.com/docling-project/docling-serve/issues/212)) ([`ffea347`](https://github.com/docling-project/docling-serve/commit/ffea34732b24fdd438fabd6df02d3d9ce66b4534))
## [v0.12.0](https://github.com/docling-project/docling-serve/releases/tag/v0.12.0) - 2025-06-03
### Feature

View File

@@ -42,7 +42,7 @@ ENV \
ARG UV_SYNC_EXTRA_ARGS=""
RUN --mount=from=ghcr.io/astral-sh/uv:0.6.1,source=/uv,target=/bin/uv \
RUN --mount=from=ghcr.io/astral-sh/uv:0.7.13,source=/uv,target=/bin/uv \
--mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
@@ -61,7 +61,7 @@ RUN echo "Downloading models..." && \
chmod -R g=u ${DOCLING_SERVE_ARTIFACTS_PATH}
COPY --chown=1001:0 ./docling_serve ./docling_serve
RUN --mount=from=ghcr.io/astral-sh/uv:0.6.1,source=/uv,target=/bin/uv \
RUN --mount=from=ghcr.io/astral-sh/uv:0.7.13,source=/uv,target=/bin/uv \
--mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \

View File

@@ -26,26 +26,40 @@ md-lint-file:
$(CMD_PREFIX) touch .markdown-lint
.PHONY: docling-serve-image
docling-serve-image: Containerfile
docling-serve-image: Containerfile ## Build docling-serve container image
$(ECHO_PREFIX) printf " %-12s Containerfile\n" "[docling-serve]"
$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-extra cu124 --no-extra cpu" -f Containerfile -t ghcr.io/docling-project/docling-serve:$(TAG) .
$(CMD_PREFIX) docker build --load -f Containerfile -t ghcr.io/docling-project/docling-serve:$(TAG) .
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve:$(TAG) ghcr.io/docling-project/docling-serve:$(BRANCH_TAG)
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve:$(TAG) quay.io/docling-project/docling-serve:$(BRANCH_TAG)
.PHONY: docling-serve-cpu-image
docling-serve-cpu-image: Containerfile ## Build docling-serve "cpu only" container image
$(ECHO_PREFIX) printf " %-12s Containerfile\n" "[docling-serve CPU]"
$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-extra cu124 --no-extra flash-attn" -f Containerfile -t ghcr.io/docling-project/docling-serve-cpu:$(TAG) .
$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn" -f Containerfile -t ghcr.io/docling-project/docling-serve-cpu:$(TAG) .
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cpu:$(TAG) ghcr.io/docling-project/docling-serve-cpu:$(BRANCH_TAG)
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cpu:$(TAG) quay.io/docling-project/docling-serve-cpu:$(BRANCH_TAG)
.PHONY: docling-serve-cu124-image
docling-serve-cu124-image: Containerfile ## Build docling-serve container image with GPU support
docling-serve-cu124-image: Containerfile ## Build docling-serve container image with CUDA 12.4 support
$(ECHO_PREFIX) printf " %-12s Containerfile\n" "[docling-serve with Cuda 12.4]"
$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-extra cpu" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu124:$(TAG) .
$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu124:$(TAG) .
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu124:$(TAG) ghcr.io/docling-project/docling-serve-cu124:$(BRANCH_TAG)
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu124:$(TAG) quay.io/docling-project/docling-serve-cu124:$(BRANCH_TAG)
.PHONY: docling-serve-cu126-image
docling-serve-cu126-image: Containerfile ## Build docling-serve container image with CUDA 12.6 support
$(ECHO_PREFIX) printf " %-12s Containerfile\n" "[docling-serve with Cuda 12.6]"
$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu126:$(TAG) .
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu126:$(TAG) ghcr.io/docling-project/docling-serve-cu126:$(BRANCH_TAG)
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu126:$(TAG) quay.io/docling-project/docling-serve-cu126:$(BRANCH_TAG)
.PHONY: docling-serve-cu128-image
docling-serve-cu128-image: Containerfile ## Build docling-serve container image with CUDA 12.8 support
$(ECHO_PREFIX) printf " %-12s Containerfile\n" "[docling-serve with Cuda 12.8]"
$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu128:$(TAG) .
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) ghcr.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) quay.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
.PHONY: action-lint
action-lint: .action-lint ## Lint GitHub Action workflows
.action-lint: $(shell find .github -type f) | action-lint-file
@@ -87,9 +101,9 @@ run-docling-cpu: ## Run the docling-serve container with CPU support and assign
$(ECHO_PREFIX) printf " %-12s Running docling-serve container with CPU support on port 5001...\n" "[RUN CPU]"
$(CMD_PREFIX) docker run -it --name docling-serve-cpu -p 5001:5001 ghcr.io/docling-project/docling-serve-cpu:main
.PHONY: run-docling-gpu
run-docling-gpu: ## Run the docling-serve container with GPU support and assign a container name
.PHONY: run-docling-cu124
run-docling-cu124: ## Run the docling-serve container with GPU support and assign a container name
$(ECHO_PREFIX) printf " %-12s Removing existing container if it exists...\n" "[CLEANUP]"
$(CMD_PREFIX) docker rm -f docling-serve-gpu 2>/dev/null || true
$(ECHO_PREFIX) printf " %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN GPU]"
$(CMD_PREFIX) docker run -it --name docling-serve-gpu -p 5001:5001 ghcr.io/docling-project/docling-serve:main
$(CMD_PREFIX) docker rm -f docling-serve-cu124 2>/dev/null || true
$(ECHO_PREFIX) printf " %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.4]"
$(CMD_PREFIX) docker run -it --name docling-serve-cu124 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu124:main

View File

@@ -8,23 +8,31 @@
Running [Docling](https://github.com/docling-project/docling) as an API service.
📚 [Docling Serve documentation](./docs/README.md)
- Learning how to [configure the webserver](./docs/configuration.md)
- Get to know all [runtime options](./docs/usage.md) of the API
- Explore usefule [deployment examples](./docs/deployment.md)
- And more
## Getting started
Install the `docling-serve` package and run the server.
```bash
# Using the python package
pip install "docling-serve"
docling-serve run
pip install "docling-serve[ui]"
docling-serve run --enable-ui
# Using container images, e.g. with Podman
podman run -p 5001:5001 quay.io/docling-project/docling-serve
podman run -p 5001:5001 -e DOCLING_SERVE_ENABLE_UI=1 quay.io/docling-project/docling-serve
```
The server is available at
- API <http://127.0.0.1:5001>
- API documentation <http://127.0.0.1:5001/docs>
- UI playground <http://127.0.0.1:5001/ui>
![swagger.png](img/swagger.png)
Try it out with a simple conversion:
@@ -45,33 +53,22 @@ Available container images:
| Name | Description | Arch | Size |
| -----|-------------|------|------|
| [`ghcr.io/docling-project/docling-serve`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve) <br /> [`quay.io/docling-project/docling-serve`](https://quay.io/repository/docling-project/docling-serve) | Simple image for Docling Serve, installing all packages from the official pypi.org index. | `linux/amd64`, `linux/arm64` | 3.6 GB |
| [`ghcr.io/docling-project/docling-serve`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve) <br /> [`quay.io/docling-project/docling-serve`](https://quay.io/repository/docling-project/docling-serve) | Simple image for Docling Serve, installing all packages from the official pypi.org index. | `linux/amd64`, `linux/arm64` | 3.6 GB (arm64) <br /> 8.7 GB (amd64) |
| [`ghcr.io/docling-project/docling-serve-cpu`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cpu) <br /> [`quay.io/docling-project/docling-serve-cpu`](https://quay.io/repository/docling-project/docling-serve-cpu) | Cpu-only image which installs `torch` from the pytorch cpu index. | `linux/amd64`, `linux/arm64` | 3.6 GB |
| [`ghcr.io/docling-project/docling-serve-cu124`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu124) <br /> [`quay.io/docling-project/docling-serve-cu124`](https://quay.io/repository/docling-project/docling-serve-cu124) | Cuda 12.4 image which installs `torch` from the pytorch cu124 index. | `linux/amd64` | 8.7 GB |
| [`ghcr.io/docling-project/docling-serve-cu126`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu126) <br /> [`quay.io/docling-project/docling-serve-cu126`](https://quay.io/repository/docling-project/docling-serve-cu126) | Cuda 12.6 image which installs `torch` from the pytorch cu126 index. | `linux/amd64` | 8.7 GB |
| [`ghcr.io/docling-project/docling-serve-cu128`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu128) <br /> [`quay.io/docling-project/docling-serve-cu128`](https://quay.io/repository/docling-project/docling-serve-cu128) | Cuda 12.8 image which installs `torch` from the pytorch cu128 index. | `linux/amd64` | 8.7 GB |
Coming soon: `docling-serve-slim` images will reduce the size by skipping the model weights download.
### Demonstration UI
```bash
# Install the Python package with the extra dependencies
pip install "docling-serve[ui]"
docling-serve run --enable-ui
# Run the container image with the extra env parameters
podman run -p 5001:5001 -e DOCLING_SERVE_ENABLE_UI=true quay.io/docling-project/docling-serve
```
An easy to use UI is available at the `/ui` endpoint.
![ui-input.png](img/ui-input.png)
![ui-output.png](img/ui-output.png)
## Documentation and advance usages
Visit the [Docling Serve documentation](./docs/README.md) for learning how to [configure the webserver](./docs/configuration.md), use all the [runtime options](./docs/usage.md) of the API and [deployment examples](./docs/deployment.md), pre-load model weights into a persistent volume [model weights on persistent volume](./docs/pre-loading-models.md)
## Get help and support
Please feel free to connect with us using the [discussion section](https://github.com/docling-project/docling/discussions).

View File

@@ -113,11 +113,13 @@ def _run(
protocol = "https" if run_ssl else "http"
url = f"{protocol}://{uvicorn_settings.host}:{uvicorn_settings.port}"
url_docs = f"{url}/docs"
url_scalar = f"{url}/scalar"
url_ui = f"{url}/ui"
console.print("")
console.print(f"Server started at [link={url}]{url}[/]")
console.print(f"Documentation at [link={url_docs}]{url_docs}[/]")
console.print(f"Scalar docs at [link={url_docs}]{url_scalar}[/]")
if docling_serve_settings.enable_ui:
console.print(f"UI at [link={url_ui}]{url_ui}[/]")

View File

@@ -25,6 +25,7 @@ from fastapi.openapi.docs import (
)
from fastapi.responses import RedirectResponse
from fastapi.staticfiles import StaticFiles
from scalar_fastapi import get_scalar_api_reference
from docling.datamodel.base_models import DocumentStream
@@ -98,7 +99,8 @@ async def lifespan(app: FastAPI):
scratch_dir = get_scratch()
# Warm up processing cache
await orchestrator.warm_up_caches()
if docling_serve_settings.load_models_at_boot:
await orchestrator.warm_up_caches()
# Start the background queue processor
queue_task = asyncio.create_task(orchestrator.process_queue())
@@ -140,8 +142,8 @@ def create_app(): # noqa: C901
app = FastAPI(
title="Docling Serve",
docs_url=None if offline_docs_assets else "/docs",
redoc_url=None if offline_docs_assets else "/redocs",
docs_url=None if offline_docs_assets else "/swagger",
redoc_url=None if offline_docs_assets else "/docs",
lifespan=lifespan,
version=version,
)
@@ -192,7 +194,7 @@ def create_app(): # noqa: C901
name="static",
)
@app.get("/docs", include_in_schema=False)
@app.get("/swagger", include_in_schema=False)
async def custom_swagger_ui_html():
return get_swagger_ui_html(
openapi_url=app.openapi_url,
@@ -206,7 +208,7 @@ def create_app(): # noqa: C901
async def swagger_ui_redirect():
return get_swagger_ui_oauth2_redirect_html()
@app.get("/redoc", include_in_schema=False)
@app.get("/docs", include_in_schema=False)
async def redoc_html():
return get_redoc_html(
openapi_url=app.openapi_url,
@@ -214,6 +216,15 @@ def create_app(): # noqa: C901
redoc_js_url="/static/redoc.standalone.js",
)
@app.get("/scalar", include_in_schema=False)
async def scalar_html():
return get_scalar_api_reference(
openapi_url=app.openapi_url,
title=app.title,
scalar_favicon_url="https://raw.githubusercontent.com/docling-project/docling/refs/heads/main/docs/assets/logo.svg",
# hide_client_button=True, # not yet released but in main
)
########################
# Async / Sync helpers #
########################

View File

@@ -8,8 +8,8 @@ from docling.datamodel.base_models import InputFormat, OutputFormat
from docling.datamodel.pipeline_options import (
EasyOcrOptions,
PdfBackend,
PdfPipeline,
PictureDescriptionBaseOptions,
ProcessingPipeline,
TableFormerMode,
TableStructureOptions,
)
@@ -132,7 +132,11 @@ class ConvertDocumentsOptions(BaseModel):
f"Allowed values: {', '.join([v.value for v in OutputFormat])}. "
"Optional, defaults to Markdown."
),
examples=[[OutputFormat.MARKDOWN]],
examples=[
[OutputFormat.MARKDOWN],
[OutputFormat.MARKDOWN, OutputFormat.JSON],
[v.value for v in OutputFormat],
],
),
] = [OutputFormat.MARKDOWN]
@@ -223,15 +227,15 @@ class ConvertDocumentsOptions(BaseModel):
] = TableStructureOptions().mode
pipeline: Annotated[
PdfPipeline,
ProcessingPipeline,
Field(description="Choose the pipeline to process PDF or image files."),
] = PdfPipeline.STANDARD
] = ProcessingPipeline.STANDARD
page_range: Annotated[
PageRange,
Field(
description="Only convert a range of pages. The page number starts at 1.",
examples=[(1, 4)],
examples=[DEFAULT_PAGE_RANGE, (1, 4)],
),
] = DEFAULT_PAGE_RANGE

View File

@@ -19,10 +19,10 @@ from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import (
OcrOptions,
PdfBackend,
PdfPipeline,
PdfPipelineOptions,
PictureDescriptionApiOptions,
PictureDescriptionVlmOptions,
ProcessingPipeline,
TableFormerMode,
VlmPipelineOptions,
smoldocling_vlm_conversion_options,
@@ -217,7 +217,7 @@ def get_pdf_pipeline_opts(
)
pipeline_options: Union[PdfPipelineOptions, VlmPipelineOptions]
if request.pipeline == PdfPipeline.STANDARD:
if request.pipeline == ProcessingPipeline.STANDARD:
pipeline_options = _parse_standard_pdf_opts(request, artifacts_path)
backend = _parse_backend(request)
pdf_format_option = PdfFormatOption(
@@ -225,7 +225,7 @@ def get_pdf_pipeline_opts(
backend=backend,
)
elif request.pipeline == PdfPipeline.VLM:
elif request.pipeline == ProcessingPipeline.VLM:
pipeline_options = _parse_vlm_pdf_opts(request, artifacts_path)
pdf_format_option = PdfFormatOption(
pipeline_cls=VlmPipeline, pipeline_options=pipeline_options

View File

@@ -3,6 +3,8 @@ import logging
import uuid
from typing import Optional
from docling.datamodel.base_models import InputFormat
from docling_serve.datamodel.convert import ConvertDocumentsOptions
from docling_serve.datamodel.task import Task, TaskSource
from docling_serve.docling_conversion import get_converter, get_pdf_pipeline_opts
@@ -54,4 +56,5 @@ class AsyncLocalOrchestrator(BaseAsyncOrchestrator):
async def warm_up_caches(self):
# Converter with default options
pdf_format_option = get_pdf_pipeline_opts(ConvertDocumentsOptions())
get_converter(pdf_format_option)
converter = get_converter(pdf_format_option)
converter.initialize_pipeline(InputFormat.PDF)

View File

@@ -1,5 +1,6 @@
import base64
import importlib
import itertools
import json
import logging
import ssl
@@ -12,9 +13,10 @@ import certifi
import gradio as gr
import httpx
from docling.datamodel.base_models import FormatToExtensions
from docling.datamodel.pipeline_options import (
PdfBackend,
PdfPipeline,
ProcessingPipeline,
TableFormerMode,
TableStructureOptions,
)
@@ -545,19 +547,10 @@ with gr.Blocks(
elem_id="file_input_zone",
label="Upload File",
file_types=[
".pdf",
".docx",
".pptx",
".html",
".xlsx",
".json",
".asciidoc",
".txt",
".md",
".jpg",
".jpeg",
".png",
".gif",
f".{v}"
for v in itertools.chain.from_iterable(
FormatToExtensions.values()
)
],
file_count="multiple",
scale=4,
@@ -594,9 +587,9 @@ with gr.Blocks(
with gr.Row():
with gr.Column(scale=1, min_width=200):
pipeline = gr.Radio(
[(v.value.capitalize(), v.value) for v in PdfPipeline],
[(v.value.capitalize(), v.value) for v in ProcessingPipeline],
label="Pipeline type",
value=PdfPipeline.STANDARD.value,
value=ProcessingPipeline.STANDARD.value,
)
with gr.Row():
with gr.Column(scale=1, min_width=200):

View File

@@ -36,7 +36,7 @@ def FormDepends(cls: type[BaseModel]):
annotation = model_field.annotation
description = model_field.description
default = (
Form(..., description=description)
Form(..., description=description, examples=model_field.examples)
if model_field.is_required()
else Form(
model_field.default,

View File

@@ -41,6 +41,7 @@ class DoclingServeSettings(BaseSettings):
scratch_path: Optional[Path] = None
single_use_results: bool = True
result_removal_delay: float = 300 # 5 minutes
load_models_at_boot: bool = True
options_cache_size: int = 2
enable_remote_services: bool = False
allow_external_plugins: bool = False

View File

@@ -1,4 +1,4 @@
# Dolcing Serve documentation
# Docling Serve documentation
This documentation pages explore the webserver configurations, runtime options, deployment examples as well as development best practices.

View File

@@ -7,7 +7,7 @@ server and the actual app-specific configurations.
> [!WARNING]
> When the server is running with `reload` or with multiple `workers`, uvicorn
> will spawn multiple subprocessed. This invalides all the values configured
> will spawn multiple subprocessed. This invalidates all the values configured
> via the CLI command line options. Please use environment variables in this
> type of deployments.
@@ -47,6 +47,7 @@ THe following table describes the options to configure the Docling Serve app.
| | `DOCLING_SERVE_MAX_NUM_PAGES` | | The maximum number of pages for a document to be processed. |
| | `DOCLING_SERVE_MAX_FILE_SIZE` | | The maximum file size for a document to be processed. |
| | `DOCLING_SERVE_MAX_SYNC_WAIT` | `120` | Max number of seconds a synchronous endpoint is waiting for the task completion. |
| | `DOCLING_SERVE_LOAD_MODELS_AT_BOOT` | `True` | If enabled, the models for the default options will be loaded at boot. |
| | `DOCLING_SERVE_OPTIONS_CACHE_SIZE` | `2` | How many DocumentConveter objects (including their loaded models) to keep in the cache. |
| | `DOCLING_SERVE_CORS_ORIGINS` | `["*"]` | A list of origins that should be permitted to make cross-origin requests. |
| | `DOCLING_SERVE_CORS_METHODS` | `["*"]` | A list of HTTP methods that should be allowed for cross-origin requests. |
@@ -60,7 +61,7 @@ The selected compute engine will be running all the async jobs.
#### Local engine
The following table describes the options to configure the Docling Serve KFP engine.
The following table describes the options to configure the Docling Serve local engine.
| ENV | Default | Description |
|-----|---------|-------------|

View File

@@ -22,8 +22,8 @@ spec:
- name: api
resources:
limits:
cpu: 500m
memory: 2Gi
cpu: 2
memory: 4Gi
requests:
cpu: 250m
memory: 1Gi

View File

@@ -85,7 +85,7 @@ spec:
resources:
limits:
cpu: 2000m
memory: 2Gi
memory: 4Gi
requests:
cpu: 800m
memory: 1Gi

View File

@@ -60,8 +60,8 @@ spec:
- name: api
resources:
limits:
cpu: 500m
memory: 2Gi
cpu: 1
memory: 4Gi
requests:
cpu: 250m
memory: 1Gi

View File

@@ -40,8 +40,8 @@ spec:
- name: api
resources:
limits:
cpu: 500m
memory: 2Gi
cpu: 1
memory: 4Gi
nvidia.com/gpu: 1 # Limit to one GPU
requests:
cpu: 250m

View File

@@ -13,13 +13,13 @@ On top of the source of file (see below), both endpoints support the same parame
- `do_ocr` (bool): If enabled, the bitmap content will be processed using OCR. Defaults to `True`.
- `image_export_mode`: Image export mode for the document (only in case of JSON, Markdown or HTML). Allowed values: embedded, placeholder, referenced. Optional, defaults to `embedded`.
- `force_ocr` (bool): If enabled, replace any existing text with OCR-generated text over the full content. Defaults to `False`.
- `ocr_engine` (str): OCR engine to use. Allowed values: `easyocr`, `tesseract_cli`, `tesseract`, `rapidocr`, `ocrmac`. Defaults to `easyocr`.
- `ocr_engine` (str): OCR engine to use. Allowed values: `easyocr`, `tesserocr`, `tesseract`, `rapidocr`, `ocrmac`. Defaults to `easyocr`. To use the `tesserocr` engine, `tesserocr` must be installed where docling-serve is running: `pip install tesserocr`
- `ocr_lang` (List[str]): List of languages used by the OCR engine. Note that each OCR engine has different values for the language names. Defaults to empty.
- `pdf_backend` (str): PDF backend to use. Allowed values: `pypdfium2`, `dlparse_v1`, `dlparse_v2`, `dlparse_v4`. Defaults to `dlparse_v4`.
- `table_mode` (str): Table mode to use. Allowed values: `fast`, `accurate`. Defaults to `fast`.
- `abort_on_error` (bool): If enabled, abort on error. Defaults to false.
- `return_as_file` (boo): If enabled, return the output as a file. Defaults to false.
- `md_page_break_placeholder` (str): Add this placeholder betweek pages in the markdown output.
- `md_page_break_placeholder` (str): Add this placeholder between pages in the markdown output.
- `do_table_structure` (bool): If enabled, the table structure will be extracted. Defaults to true.
- `do_code_enrichment` (bool): If enabled, perform OCR code enrichment. Defaults to false.
- `do_formula_enrichment` (bool): If enabled, perform formula OCR, return LaTeX code. Defaults to false.
@@ -288,33 +288,42 @@ The api option is specified with:
Example URLs are:
- `http://localhost:8000/v1/chat/completions` for the local vllm api, with example `params`:
- `http://localhost:8000/v1/chat/completions` for the local vllm api, with example `picture_description_api`:
- the `HuggingFaceTB/SmolVLM-256M-Instruct` model
```json
{
"url": "http://localhost:8000/v1/chat/completions",
"params": {
"model": "HuggingFaceTB/SmolVLM-256M-Instruct",
"max_completion_tokens": 200,
}
}
```
- the `ibm-granite/granite-vision-3.2-2b` model
```json
{
"url": "http://localhost:8000/v1/chat/completions",
"params": {
"model": "ibm-granite/granite-vision-3.2-2b",
"max_completion_tokens": 200,
}
}
```
- `http://localhost:11434/v1/chat/completions` for the local ollama api, with example `params`:
- `http://localhost:11434/v1/chat/completions` for the local ollama api, with example `picture_description_api`:
- the `granite3.2-vision:2b` model
```json
{
"url": "http://localhost:11434/v1/chat/completions",
"params": {
"model": "granite3.2-vision:2b"
}
}
```
```
Note that when using `picture_description_api`, the server must be launched with `DOCLING_SERVE_ENABLE_REMOTE_SERVICES=true`.

View File

@@ -1,6 +1,7 @@
tesseract
tesseract-devel
tesseract-langpack-eng
tesseract-osd
leptonica-devel
libglvnd-glx
glib2

View File

@@ -1,6 +1,6 @@
[project]
name = "docling-serve"
version = "0.12.0" # DO NOT EDIT, updated automatically
version = "0.16.1" # DO NOT EDIT, updated automatically
description = "Running Docling as a service"
license = {text = "MIT"}
authors = [
@@ -26,11 +26,15 @@ classifiers = [
# "Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"Typing :: Typed",
"Programming Language :: Python :: 3"
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
]
requires-python = ">=3.10"
dependencies = [
"docling[vlm]~=2.28",
"docling[vlm]~=2.38",
"docling-core>=2.32.0",
"mlx-vlm~=0.1.12; sys_platform == 'darwin' and platform_machine == 'arm64'",
"fastapi[standard]~=0.115",
@@ -42,6 +46,7 @@ dependencies = [
"typer~=0.12",
"uvicorn[standard]>=0.29.0,<1.0.0",
"websockets~=14.0",
"scalar-fastapi>=1.0.3",
]
[project.optional-dependencies]
@@ -56,14 +61,6 @@ rapidocr = [
"rapidocr-onnxruntime~=1.4; python_version<'3.13'",
"onnxruntime~=1.7",
]
cpu = [
"torch>=2.6.0",
"torchvision>=0.21.0",
]
cu124 = [
"torch>=2.6.0",
"torchvision>=0.21.0",
]
flash-attn = [
"flash-attn~=2.7.0; sys_platform == 'linux' and platform_machine == 'x86_64'"
]
@@ -79,18 +76,39 @@ dev = [
"python-semantic-release~=7.32",
"ruff>=0.9.6",
]
pypi = [
"torch>=2.6.0",
"torchvision>=0.21.0",
]
cpu = [
"torch>=2.6.0",
"torchvision>=0.21.0",
]
cu124 = [
"torch>=2.6.0",
"torchvision>=0.21.0",
]
cu126 = [
"torch>=2.6.0",
"torchvision>=0.21.0",
]
cu128 = [
"torch>=2.7.0",
"torchvision>=0.22.0",
]
[tool.uv]
package = true
default-groups = ["dev", "pypi"]
conflicts = [
[
{ extra = "cpu" },
{ extra = "cu124" },
{ group = "pypi" },
{ group = "cpu" },
{ group = "cu124" },
{ group = "cu126" },
{ group = "cu128" },
],
[
{ extra = "cpu" },
{ extra = "flash-attn" },
],]
]
environments = ["sys_platform != 'darwin' or platform_machine != 'x86_64'"]
override-dependencies = [
"urllib3~=2.0"
@@ -98,14 +116,25 @@ override-dependencies = [
[tool.uv.sources]
torch = [
{ index = "pytorch-cpu", extra = "cpu" },
{ index = "pytorch-cu124", extra = "cu124" },
{ index = "pytorch-pypi", group = "pypi" },
{ index = "pytorch-cpu", group = "cpu" },
{ index = "pytorch-cu124", group = "cu124" },
{ index = "pytorch-cu126", group = "cu126" },
{ index = "pytorch-cu128", group = "cu128" },
]
torchvision = [
{ index = "pytorch-cpu", extra = "cpu" },
{ index = "pytorch-cu124", extra = "cu124" },
{ index = "pytorch-pypi", group = "pypi" },
{ index = "pytorch-cpu", group = "cpu" },
{ index = "pytorch-cu124", group = "cu124" },
{ index = "pytorch-cu126", group = "cu126" },
{ index = "pytorch-cu128", group = "cu128" },
]
[[tool.uv.index]]
name = "pytorch-pypi"
url = "https://pypi.org/simple"
explicit = true
[[tool.uv.index]]
name = "pytorch-cpu"
url = "https://download.pytorch.org/whl/cpu"
@@ -116,6 +145,16 @@ name = "pytorch-cu124"
url = "https://download.pytorch.org/whl/cu124"
explicit = true
[[tool.uv.index]]
name = "pytorch-cu126"
url = "https://download.pytorch.org/whl/cu126"
explicit = true
[[tool.uv.index]]
name = "pytorch-cu128"
url = "https://download.pytorch.org/whl/cu128"
explicit = true
[tool.setuptools.packages.find]
include = ["docling_serve*"]
namespaces = true
@@ -213,6 +252,7 @@ module = [
"kfp.*",
"kfp_server_api.*",
"mlx_vlm.*",
"scalar_fastapi.*",
]
ignore_missing_imports = true

4080
uv.lock generated

File diff suppressed because one or more lines are too long