mirror of
https://github.com/docling-project/docling-serve.git
synced 2025-11-29 08:33:50 +00:00
Compare commits
35 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
496f7ec26b | ||
|
|
9d6def0ec8 | ||
|
|
a4fed2d965 | ||
|
|
b0360d723b | ||
|
|
4adc0dfa79 | ||
|
|
40c7f1bcd3 | ||
|
|
d64a2a974a | ||
|
|
0d4545a65a | ||
|
|
fe98338239 | ||
|
|
b844ce737e | ||
|
|
27fdd7b85a | ||
|
|
1df62adf01 | ||
|
|
e5449472b2 | ||
|
|
81f0a8ddf8 | ||
|
|
a69cc867f5 | ||
|
|
624f65d41b | ||
|
|
f02dbc0144 | ||
|
|
37fe02277b | ||
|
|
783ada0580 | ||
|
|
71edf41849 | ||
|
|
9a64410552 | ||
|
|
6e9aa8c759 | ||
|
|
885f319d3a | ||
|
|
d584895e11 | ||
|
|
d26e6637d8 | ||
|
|
7692eb2600 | ||
|
|
3bd7828570 | ||
|
|
8b470cba8e | ||
|
|
8048f4589a | ||
|
|
b3058e91e0 | ||
|
|
63da9eedeb | ||
|
|
b15dc2529f | ||
|
|
4c7207be00 | ||
|
|
db3fdb5bc1 | ||
|
|
fd1b987e8d |
56
.github/scripts/release.sh
vendored
56
.github/scripts/release.sh
vendored
@@ -3,32 +3,68 @@
|
||||
set -e # trigger failure on error - do not remove!
|
||||
set -x # display command on output
|
||||
|
||||
## debug
|
||||
# TARGET_VERSION="1.2.x"
|
||||
|
||||
if [ -z "${TARGET_VERSION}" ]; then
|
||||
>&2 echo "No TARGET_VERSION specified"
|
||||
exit 1
|
||||
fi
|
||||
CHGLOG_FILE="${CHGLOG_FILE:-CHANGELOG.md}"
|
||||
|
||||
# update package version
|
||||
# Update package version
|
||||
uvx --from=toml-cli toml set --toml-path=pyproject.toml project.version "${TARGET_VERSION}"
|
||||
uv lock --upgrade-package docling-serve
|
||||
|
||||
# collect release notes
|
||||
# Extract all docling packages and versions from uv.lock
|
||||
DOCVERSIONS=$(uvx --with toml python3 - <<'PY'
|
||||
import toml
|
||||
data = toml.load("uv.lock")
|
||||
for pkg in data.get("package", []):
|
||||
if pkg["name"].startswith("docling"):
|
||||
print(f"{pkg['name']} {pkg['version']}")
|
||||
PY
|
||||
)
|
||||
|
||||
# Format docling versions list without trailing newline
|
||||
DOCLING_VERSIONS="### Docling libraries included in this release:"
|
||||
while IFS= read -r line; do
|
||||
DOCLING_VERSIONS+="
|
||||
- $line"
|
||||
done <<< "$DOCVERSIONS"
|
||||
|
||||
# Collect release notes
|
||||
REL_NOTES=$(mktemp)
|
||||
uv run --no-sync semantic-release changelog --unreleased >> "${REL_NOTES}"
|
||||
|
||||
# update changelog
|
||||
# Strip trailing blank lines from release notes and append docling versions
|
||||
{
|
||||
sed -e :a -e '/^\n*$/{$d;N;};/\n$/ba' "${REL_NOTES}"
|
||||
printf "\n"
|
||||
printf "%s" "${DOCLING_VERSIONS}"
|
||||
printf "\n"
|
||||
} > "${REL_NOTES}.tmp" && mv "${REL_NOTES}.tmp" "${REL_NOTES}"
|
||||
|
||||
# Update changelog
|
||||
TMP_CHGLOG=$(mktemp)
|
||||
TARGET_TAG_NAME="v${TARGET_VERSION}"
|
||||
RELEASE_URL="$(gh repo view --json url -q ".url")/releases/tag/${TARGET_TAG_NAME}"
|
||||
printf "## [${TARGET_TAG_NAME}](${RELEASE_URL}) - $(date -Idate)\n\n" >> "${TMP_CHGLOG}"
|
||||
cat "${REL_NOTES}" >> "${TMP_CHGLOG}"
|
||||
if [ -f "${CHGLOG_FILE}" ]; then
|
||||
printf "\n" | cat - "${CHGLOG_FILE}" >> "${TMP_CHGLOG}"
|
||||
fi
|
||||
## debug
|
||||
#RELEASE_URL="myrepo/releases/tag/${TARGET_TAG_NAME}"
|
||||
|
||||
# Strip leading blank lines from existing changelog to avoid multiple blank lines when appending
|
||||
EXISTING_CL=$(sed -e :a -e '/^\n*$/{$d;N;};/\n$/ba' "${CHGLOG_FILE}")
|
||||
|
||||
{
|
||||
printf "## [${TARGET_TAG_NAME}](${RELEASE_URL}) - $(date -Idate)\n\n"
|
||||
cat "${REL_NOTES}"
|
||||
printf "\n"
|
||||
printf "%s\n" "${EXISTING_CL}"
|
||||
} >> "${TMP_CHGLOG}"
|
||||
|
||||
mv "${TMP_CHGLOG}" "${CHGLOG_FILE}"
|
||||
|
||||
# push changes
|
||||
# Push changes
|
||||
git config --global user.name 'github-actions[bot]'
|
||||
git config --global user.email 'github-actions[bot]@users.noreply.github.com'
|
||||
git add pyproject.toml uv.lock "${CHGLOG_FILE}"
|
||||
@@ -36,5 +72,5 @@ COMMIT_MSG="chore: bump version to ${TARGET_VERSION} [skip ci]"
|
||||
git commit -m "${COMMIT_MSG}"
|
||||
git push origin main
|
||||
|
||||
# create GitHub release (incl. Git tag)
|
||||
# Create GitHub release (incl. Git tag)
|
||||
gh release create "${TARGET_TAG_NAME}" -F "${REL_NOTES}"
|
||||
|
||||
@@ -5,6 +5,8 @@ async
|
||||
(?i)urls
|
||||
uvicorn
|
||||
[Ww]ebserver
|
||||
RQ
|
||||
(?i)url
|
||||
keyfile
|
||||
[Ww]ebsocket(s?)
|
||||
[Kk]ubernetes
|
||||
@@ -19,8 +21,10 @@ Kubeflow
|
||||
(?i)PyTorch
|
||||
(?i)CUDA
|
||||
(?i)NVIDIA
|
||||
(?i)ROCm
|
||||
(?i)env
|
||||
Gradio
|
||||
Podman
|
||||
bool
|
||||
Ollama
|
||||
inbody
|
||||
|
||||
2
.github/workflows/actionlint.yml
vendored
2
.github/workflows/actionlint.yml
vendored
@@ -13,7 +13,7 @@ jobs:
|
||||
actionlint:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v5
|
||||
- name: Download actionlint
|
||||
id: get_actionlint
|
||||
run: bash <(curl https://raw.githubusercontent.com/rhysd/actionlint/main/scripts/download-actionlint.bash)
|
||||
|
||||
8
.github/workflows/cd.yml
vendored
8
.github/workflows/cd.yml
vendored
@@ -11,11 +11,11 @@ jobs:
|
||||
outputs:
|
||||
TARGET_TAG_V: ${{ steps.version_check.outputs.TRGT_VERSION }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v5
|
||||
with:
|
||||
fetch-depth: 0 # for fetching tags, required for semantic-release
|
||||
- name: Install uv and set the python version
|
||||
uses: astral-sh/setup-uv@v5
|
||||
uses: astral-sh/setup-uv@v6
|
||||
with:
|
||||
enable-cache: true
|
||||
- name: Install dependencies
|
||||
@@ -40,12 +40,12 @@ jobs:
|
||||
with:
|
||||
app-id: ${{ vars.CI_APP_ID }}
|
||||
private-key: ${{ secrets.CI_PRIVATE_KEY }}
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v5
|
||||
with:
|
||||
token: ${{ steps.app-token.outputs.token }}
|
||||
fetch-depth: 0 # for fetching tags, required for semantic-release
|
||||
- name: Install uv and set the python version
|
||||
uses: astral-sh/setup-uv@v5
|
||||
uses: astral-sh/setup-uv@v6
|
||||
with:
|
||||
enable-cache: true
|
||||
- name: Install dependencies
|
||||
|
||||
12
.github/workflows/ci-images-dryrun.yml
vendored
12
.github/workflows/ci-images-dryrun.yml
vendored
@@ -21,10 +21,10 @@ jobs:
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
|
||||
platforms: linux/amd64, linux/arm64
|
||||
- name: docling-project/docling-serve-cu124
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
|
||||
platforms: linux/amd64
|
||||
# - name: docling-project/docling-serve-cu124
|
||||
# build_args: |
|
||||
# UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
|
||||
# platforms: linux/amd64
|
||||
- name: docling-project/docling-serve-cu126
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
|
||||
@@ -33,6 +33,10 @@ jobs:
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
|
||||
platforms: linux/amd64
|
||||
# - name: docling-project/docling-serve-rocm
|
||||
# build_args: |
|
||||
# UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn
|
||||
# platforms: linux/amd64
|
||||
|
||||
permissions:
|
||||
packages: write
|
||||
|
||||
13
.github/workflows/images.yml
vendored
13
.github/workflows/images.yml
vendored
@@ -25,10 +25,10 @@ jobs:
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
|
||||
platforms: linux/amd64, linux/arm64
|
||||
- name: docling-project/docling-serve-cu124
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
|
||||
platforms: linux/amd64
|
||||
# - name: docling-project/docling-serve-cu124
|
||||
# build_args: |
|
||||
# UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
|
||||
# platforms: linux/amd64
|
||||
- name: docling-project/docling-serve-cu126
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
|
||||
@@ -37,7 +37,10 @@ jobs:
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
|
||||
platforms: linux/amd64
|
||||
|
||||
# - name: docling-project/docling-serve-rocm
|
||||
# build_args: |
|
||||
# UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn
|
||||
# platforms: linux/amd64
|
||||
permissions:
|
||||
packages: write
|
||||
contents: read
|
||||
|
||||
4
.github/workflows/job-build.yml
vendored
4
.github/workflows/job-build.yml
vendored
@@ -10,9 +10,9 @@ jobs:
|
||||
matrix:
|
||||
python-version: ['3.12']
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v5
|
||||
- name: Install uv and set the python version
|
||||
uses: astral-sh/setup-uv@v5
|
||||
uses: astral-sh/setup-uv@v6
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
enable-cache: true
|
||||
|
||||
29
.github/workflows/job-checks.yml
vendored
29
.github/workflows/job-checks.yml
vendored
@@ -10,9 +10,9 @@ jobs:
|
||||
matrix:
|
||||
python-version: ['3.12']
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v5
|
||||
- name: Install uv and set the python version
|
||||
uses: astral-sh/setup-uv@v5
|
||||
uses: astral-sh/setup-uv@v6
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
enable-cache: true
|
||||
@@ -28,7 +28,7 @@ jobs:
|
||||
run: uv sync --frozen --all-extras --no-extra flash-attn
|
||||
|
||||
- name: Run styling check
|
||||
run: pre-commit run --all-files
|
||||
run: uv run pre-commit run --all-files
|
||||
|
||||
build-package:
|
||||
uses: ./.github/workflows/job-build.yml
|
||||
@@ -47,21 +47,22 @@ jobs:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
- name: Install uv and set the python version
|
||||
uses: astral-sh/setup-uv@v5
|
||||
uses: astral-sh/setup-uv@v6
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
enable-cache: true
|
||||
- name: Create virtual environment
|
||||
run: uv venv
|
||||
- name: Install package
|
||||
run: uv pip install dist/*.whl
|
||||
- name: Create the server
|
||||
run: python -c 'from docling_serve.app import create_app; create_app()'
|
||||
|
||||
markdown-lint:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: markdownlint-cli2-action
|
||||
uses: DavidAnson/markdownlint-cli2-action@v16
|
||||
with:
|
||||
globs: "**/*.md"
|
||||
run: .venv/bin/python -c 'from docling_serve.app import create_app; create_app()'
|
||||
|
||||
# markdown-lint:
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
# - uses: actions/checkout@v5
|
||||
# - name: markdownlint-cli2-action
|
||||
# uses: DavidAnson/markdownlint-cli2-action@v16
|
||||
# with:
|
||||
# globs: "**/*.md"
|
||||
|
||||
107
.github/workflows/job-image.yml
vendored
107
.github/workflows/job-image.yml
vendored
@@ -53,7 +53,7 @@ jobs:
|
||||
df -h
|
||||
|
||||
- name: Check out the repo
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Log in to the GHCR container image registry
|
||||
if: ${{ inputs.publish }}
|
||||
@@ -88,12 +88,19 @@ jobs:
|
||||
with:
|
||||
images: ${{ env.GHCR_REGISTRY }}/${{ inputs.ghcr_image_name }}
|
||||
|
||||
# # Local test
|
||||
# - name: Set metadata outputs for local testing ## comment out Free up space, Log in to cr, Cache Docker, Extract metadata, and quay blocks and run act
|
||||
# id: ghcr_meta
|
||||
# run: |
|
||||
# echo "tags=ghcr.io/docling-project/docling-serve:pr-123" >> $GITHUB_OUTPUT
|
||||
# echo "labels=org.opencontainers.image.source=https://github.com/docling-project/docling-serve" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Build and push image to ghcr.io
|
||||
id: ghcr_push
|
||||
uses: docker/build-push-action@v5
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
push: ${{ inputs.publish }}
|
||||
push: ${{ inputs.publish }} # set 'false' for local test
|
||||
tags: ${{ steps.ghcr_meta.outputs.tags }}
|
||||
labels: ${{ steps.ghcr_meta.outputs.labels }}
|
||||
platforms: ${{ inputs.platforms }}
|
||||
@@ -101,6 +108,94 @@ jobs:
|
||||
cache-to: type=gha,mode=max
|
||||
file: Containerfile
|
||||
build-args: ${{ inputs.build_args }}
|
||||
##
|
||||
## This stage runs after the build, so it leverages all build cache
|
||||
##
|
||||
- name: Export built image for testing
|
||||
id: ghcr_export_built_image
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
push: false
|
||||
load: true
|
||||
tags: ${{ env.GHCR_REGISTRY }}/${{ inputs.ghcr_image_name }}:${{ github.sha }}-test
|
||||
labels: |
|
||||
org.opencontainers.image.title=docling-serve
|
||||
org.opencontainers.image.test=true
|
||||
platforms: linux/amd64 # when 'load' is true, we can't use a list ${{ inputs.platforms }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
file: Containerfile
|
||||
build-args: ${{ inputs.build_args }}
|
||||
|
||||
- name: Test image
|
||||
if: steps.ghcr_export_built_image.outcome == 'success'
|
||||
run: |
|
||||
set -e
|
||||
|
||||
IMAGE_TAG="${{ env.GHCR_REGISTRY }}/${{ inputs.ghcr_image_name }}:${{ github.sha }}-test"
|
||||
echo "Testing local image: $IMAGE_TAG"
|
||||
|
||||
# Remove existing container if any
|
||||
docker rm -f docling-serve-test-container 2>/dev/null || true
|
||||
|
||||
echo "Starting container..."
|
||||
docker run -d -p 5001:5001 --name docling-serve-test-container "$IMAGE_TAG"
|
||||
|
||||
echo "Waiting 15s for container to boot..."
|
||||
sleep 15
|
||||
|
||||
# Health check
|
||||
echo "Checking service health..."
|
||||
for i in {1..20}; do
|
||||
HEALTH_RESPONSE=$(curl -s http://localhost:5001/health || true)
|
||||
echo "Health check response [$i]: $HEALTH_RESPONSE"
|
||||
|
||||
if echo "$HEALTH_RESPONSE" | grep -q '"status":"ok"'; then
|
||||
echo "Service is healthy!"
|
||||
|
||||
# Install pytest and dependencies
|
||||
echo "Installing pytest and dependencies..."
|
||||
pip install uv
|
||||
uv venv --allow-existing
|
||||
source .venv/bin/activate
|
||||
uv sync --all-extras --no-extra flash-attn
|
||||
|
||||
# Run pytest tests
|
||||
echo "Running tests..."
|
||||
# Test import
|
||||
python -c 'from docling_serve.app import create_app; create_app()'
|
||||
|
||||
# Run pytest and check result directly
|
||||
if ! pytest -sv -k "test_convert_url" tests/test_1-url-async.py \
|
||||
--disable-warnings; then
|
||||
echo "Tests failed!"
|
||||
docker logs docling-serve-test-container
|
||||
docker rm -f docling-serve-test-container
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Tests passed successfully!"
|
||||
break
|
||||
else
|
||||
echo "Waiting for service... [$i/20]"
|
||||
sleep 3
|
||||
fi
|
||||
done
|
||||
|
||||
# Final health check if service didn't pass earlier
|
||||
if ! echo "$HEALTH_RESPONSE" | grep -q '"status":"ok"'; then
|
||||
echo "Service did not become healthy in time."
|
||||
docker logs docling-serve-test-container
|
||||
docker rm -f docling-serve-test-container
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Cleanup
|
||||
echo "Cleaning up test container..."
|
||||
docker rm -f docling-serve-test-container
|
||||
echo "Cleaning up test image..."
|
||||
docker rmi "$IMAGE_TAG"
|
||||
|
||||
- name: Generate artifact attestation
|
||||
if: ${{ inputs.publish }}
|
||||
@@ -120,7 +215,7 @@ jobs:
|
||||
- name: Build and push image to quay.io
|
||||
if: ${{ inputs.publish }}
|
||||
# id: push-serve-cpu-quay
|
||||
uses: docker/build-push-action@v5
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
push: ${{ inputs.publish }}
|
||||
@@ -132,10 +227,6 @@ jobs:
|
||||
file: Containerfile
|
||||
build-args: ${{ inputs.build_args }}
|
||||
|
||||
# - name: Inspect the image details
|
||||
# run: |
|
||||
# echo "${{ steps.ghcr_push.outputs.metadata }}"
|
||||
|
||||
- name: Remove Local Docker Images
|
||||
run: |
|
||||
docker image prune -af
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -446,3 +446,6 @@ pip-selfcheck.json
|
||||
.markdown-lint
|
||||
|
||||
cookies.txt
|
||||
|
||||
# Examples
|
||||
/examples/splitted_pdf/*
|
||||
@@ -7,12 +7,12 @@ repos:
|
||||
- id: ruff-format
|
||||
name: "Ruff formatter"
|
||||
args: [--config=pyproject.toml]
|
||||
files: '^(docling_serve|tests).*\.(py|ipynb)$'
|
||||
files: '^(docling_serve|tests|examples).*\.(py|ipynb)$'
|
||||
# Run the Ruff linter.
|
||||
- id: ruff
|
||||
name: "Ruff linter"
|
||||
args: [--exit-non-zero-on-fix, --fix, --config=pyproject.toml]
|
||||
files: '^(docling_serve|tests).*\.(py|ipynb)$'
|
||||
files: '^(docling_serve|tests|examples).*\.(py|ipynb)$'
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: system
|
||||
@@ -33,7 +33,7 @@ repos:
|
||||
args: ["--config=.github/vale.ini"]
|
||||
files: \.md$
|
||||
- repo: https://github.com/astral-sh/uv-pre-commit
|
||||
# uv version.
|
||||
rev: 0.7.13
|
||||
# uv version, https://github.com/astral-sh/uv-pre-commit/releases
|
||||
rev: 0.8.3
|
||||
hooks:
|
||||
- id: uv-lock
|
||||
|
||||
108
CHANGELOG.md
108
CHANGELOG.md
@@ -1,3 +1,111 @@
|
||||
## [v1.5.0](https://github.com/docling-project/docling-serve/releases/tag/v1.5.0) - 2025-09-09
|
||||
|
||||
### Feature
|
||||
|
||||
* Add chunking endpoints ([#353](https://github.com/docling-project/docling-serve/issues/353)) ([`9d6def0`](https://github.com/docling-project/docling-serve/commit/9d6def0ec8b1804ad31aa71defa17658d73d29a1))
|
||||
|
||||
### Docling libraries included in this release:
|
||||
- docling 2.46.0
|
||||
- docling 2.51.0
|
||||
- docling-core 2.47.0
|
||||
- docling-ibm-models 3.9.1
|
||||
- docling-jobkit 1.5.0
|
||||
- docling-mcp 1.2.0
|
||||
- docling-parse 4.4.0
|
||||
- docling-serve 1.5.0
|
||||
|
||||
## [v1.4.1](https://github.com/docling-project/docling-serve/releases/tag/v1.4.1) - 2025-09-08
|
||||
|
||||
### Fix
|
||||
|
||||
* Trigger fix after ci fixes ([#355](https://github.com/docling-project/docling-serve/issues/355)) ([`b0360d7`](https://github.com/docling-project/docling-serve/commit/b0360d723bff202dcf44a25a3173ec1995945fc2))
|
||||
|
||||
### Docling libraries included in this release:
|
||||
- docling 2.46.0
|
||||
- docling 2.51.0
|
||||
- docling-core 2.47.0
|
||||
- docling-ibm-models 3.9.1
|
||||
- docling-jobkit 1.4.1
|
||||
- docling-mcp 1.2.0
|
||||
- docling-parse 4.4.0
|
||||
- docling-serve 1.4.1
|
||||
|
||||
## [v1.4.0](https://github.com/docling-project/docling-serve/releases/tag/v1.4.0) - 2025-09-05
|
||||
|
||||
### Feature
|
||||
|
||||
* **docling:** Perfomance improvements in parsing, new layout model, fixes in html processing ([#352](https://github.com/docling-project/docling-serve/issues/352)) ([`d64a2a9`](https://github.com/docling-project/docling-serve/commit/d64a2a974a276c7ae3b105c448fd79f77a653d20))
|
||||
|
||||
### Fix
|
||||
|
||||
* Upgrade to latest docling version with fixes ([#335](https://github.com/docling-project/docling-serve/issues/335)) ([`e544947`](https://github.com/docling-project/docling-serve/commit/e5449472b2a3e71796f41c8a58c251d8229305c1))
|
||||
|
||||
### Documentation
|
||||
|
||||
* Add split processing example ([#303](https://github.com/docling-project/docling-serve/issues/303)) ([`0d4545a`](https://github.com/docling-project/docling-serve/commit/0d4545a65a5a941fc1fdefda57e39cfb1ea106ab))
|
||||
* Document DOCLING_NUM_THREADS environment variable ([#341](https://github.com/docling-project/docling-serve/issues/341)) ([`27fdd7b`](https://github.com/docling-project/docling-serve/commit/27fdd7b85ab18b3eece428366f46dc5cf0995e38))
|
||||
* Fix parameters typo ([#333](https://github.com/docling-project/docling-serve/issues/333)) ([`81f0a8d`](https://github.com/docling-project/docling-serve/commit/81f0a8ddf80a532042d550ae4568f891458b45e7))
|
||||
* Describe how to use Docling MCP ([#332](https://github.com/docling-project/docling-serve/issues/332)) ([`a69cc86`](https://github.com/docling-project/docling-serve/commit/a69cc867f5a3fb76648803ca866d65cc3a75c6b8))
|
||||
|
||||
### Docling libraries included in this release:
|
||||
- docling 2.46.0
|
||||
- docling 2.51.0
|
||||
- docling-core 2.47.0
|
||||
- docling-ibm-models 3.9.1
|
||||
- docling-jobkit 1.4.1
|
||||
- docling-mcp 1.2.0
|
||||
- docling-parse 4.4.0
|
||||
- docling-serve 1.4.0
|
||||
|
||||
## [v1.3.1](https://github.com/docling-project/docling-serve/releases/tag/v1.3.1) - 2025-08-21
|
||||
|
||||
### Fix
|
||||
|
||||
* Configuration and performance fixes via upgrade of packages ([#328](https://github.com/docling-project/docling-serve/issues/328)) ([`f02dbc0`](https://github.com/docling-project/docling-serve/commit/f02dbc01449fe1caf3fb4a73c0a5f4adf8265faf))
|
||||
|
||||
### Documentation
|
||||
|
||||
* Fix parameter in api key docs ([#323](https://github.com/docling-project/docling-serve/issues/323)) ([`37fe022`](https://github.com/docling-project/docling-serve/commit/37fe02277b3e2358eced28e15b4360e7c82d3b43))
|
||||
|
||||
## [v1.3.0](https://github.com/docling-project/docling-serve/releases/tag/v1.3.0) - 2025-08-14
|
||||
|
||||
### Feature
|
||||
|
||||
* Add configuration option for apikey security ([#322](https://github.com/docling-project/docling-serve/issues/322)) ([`9a64410`](https://github.com/docling-project/docling-serve/commit/9a644105523d312431993ded8dd88e064550a5db))
|
||||
* Add RQ engine ([#315](https://github.com/docling-project/docling-serve/issues/315)) ([`885f319`](https://github.com/docling-project/docling-serve/commit/885f319d3a3488a4090869560447437a4104f14e))
|
||||
|
||||
### Documentation
|
||||
|
||||
* Example of docling-serve deployment in the RQ engine mode ([#321](https://github.com/docling-project/docling-serve/issues/321)) ([`71edf41`](https://github.com/docling-project/docling-serve/commit/71edf4184960d8664ef9da20617e2d0f91793d36))
|
||||
* Handling models in docling-serve ([#319](https://github.com/docling-project/docling-serve/issues/319)) ([`6e9aa8c`](https://github.com/docling-project/docling-serve/commit/6e9aa8c759220458281c7fe4c87443ac41023eee))
|
||||
* Add Gradio cache usage ([#312](https://github.com/docling-project/docling-serve/issues/312)) ([`d584895`](https://github.com/docling-project/docling-serve/commit/d584895e1108d71a0f45deadcd3c669eb0a58133))
|
||||
|
||||
## [v1.2.2](https://github.com/docling-project/docling-serve/releases/tag/v1.2.2) - 2025-08-13
|
||||
|
||||
### Fix
|
||||
|
||||
* Update of transformers module to 4.55.1 ([#316](https://github.com/docling-project/docling-serve/issues/316)) ([`7692eb2`](https://github.com/docling-project/docling-serve/commit/7692eb26006fd4deaa021180c99e23a1b65de506))
|
||||
|
||||
## [v1.2.1](https://github.com/docling-project/docling-serve/releases/tag/v1.2.1) - 2025-08-13
|
||||
|
||||
### Fix
|
||||
|
||||
* Handling of vlm model options and update deps ([#314](https://github.com/docling-project/docling-serve/issues/314)) ([`8b470cb`](https://github.com/docling-project/docling-serve/commit/8b470cba8ef500c271eb84c8368c8a1a1a5a6d6a))
|
||||
* Add missing response type in sync endpoints ([#309](https://github.com/docling-project/docling-serve/issues/309)) ([`8048f45`](https://github.com/docling-project/docling-serve/commit/8048f4589a91de2b2b391ab33a326efd1b29f25b))
|
||||
|
||||
### Documentation
|
||||
|
||||
* Update readme to use v1 ([#306](https://github.com/docling-project/docling-serve/issues/306)) ([`b3058e9`](https://github.com/docling-project/docling-serve/commit/b3058e91e0c56e27110eb50f22cbdd89640bf398))
|
||||
* Update deployment examples to use v1 API ([#308](https://github.com/docling-project/docling-serve/issues/308)) ([`63da9ee`](https://github.com/docling-project/docling-serve/commit/63da9eedebae3ad31d04e65635e573194e413793))
|
||||
* Fix typo in v1 migration instructions ([#307](https://github.com/docling-project/docling-serve/issues/307)) ([`b15dc25`](https://github.com/docling-project/docling-serve/commit/b15dc2529f78d68a475e5221c37408c3f77d8588))
|
||||
|
||||
## [v1.2.0](https://github.com/docling-project/docling-serve/releases/tag/v1.2.0) - 2025-08-07
|
||||
|
||||
### Feature
|
||||
|
||||
* Workers without shared models and convert params ([#304](https://github.com/docling-project/docling-serve/issues/304)) ([`db3fdb5`](https://github.com/docling-project/docling-serve/commit/db3fdb5bc1a0ae250afd420d737abc4071a7546c))
|
||||
* Add rocm image build support and fix cuda ([#292](https://github.com/docling-project/docling-serve/issues/292)) ([`fd1b987`](https://github.com/docling-project/docling-serve/commit/fd1b987e8dc174f1a6013c003dde33e9acbae39a))
|
||||
|
||||
## [v1.1.0](https://github.com/docling-project/docling-serve/releases/tag/v1.1.0) - 2025-07-30
|
||||
|
||||
### Feature
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
ARG BASE_IMAGE=quay.io/sclorg/python-312-c9s:c9s
|
||||
|
||||
FROM ${BASE_IMAGE}
|
||||
ARG UV_VERSION=0.8.3
|
||||
|
||||
USER 0
|
||||
ARG UV_SYNC_EXTRA_ARGS=""
|
||||
|
||||
FROM ${BASE_IMAGE} AS docling-base
|
||||
|
||||
###################################################################################################
|
||||
# OS Layer #
|
||||
###################################################################################################
|
||||
|
||||
USER 0
|
||||
|
||||
RUN --mount=type=bind,source=os-packages.txt,target=/tmp/os-packages.txt \
|
||||
dnf -y install --best --nodocs --setopt=install_weak_deps=False dnf-plugins-core && \
|
||||
dnf config-manager --best --nodocs --setopt=install_weak_deps=False --save && \
|
||||
@@ -21,16 +25,19 @@ RUN /usr/bin/fix-permissions /opt/app-root/src/.cache
|
||||
|
||||
ENV TESSDATA_PREFIX=/usr/share/tesseract/tessdata/
|
||||
|
||||
FROM ghcr.io/astral-sh/uv:${UV_VERSION} AS uv_stage
|
||||
|
||||
###################################################################################################
|
||||
# Docling layer #
|
||||
###################################################################################################
|
||||
|
||||
FROM docling-base
|
||||
|
||||
USER 1001
|
||||
|
||||
WORKDIR /opt/app-root/src
|
||||
|
||||
ENV \
|
||||
# On container environments, always set a thread budget to avoid undesired thread congestion.
|
||||
OMP_NUM_THREADS=4 \
|
||||
LANG=en_US.UTF-8 \
|
||||
LC_ALL=en_US.UTF-8 \
|
||||
@@ -40,9 +47,9 @@ ENV \
|
||||
UV_PROJECT_ENVIRONMENT=/opt/app-root \
|
||||
DOCLING_SERVE_ARTIFACTS_PATH=/opt/app-root/src/.cache/docling/models
|
||||
|
||||
ARG UV_SYNC_EXTRA_ARGS=""
|
||||
ARG UV_SYNC_EXTRA_ARGS
|
||||
|
||||
RUN --mount=from=ghcr.io/astral-sh/uv:0.7.19,source=/uv,target=/bin/uv \
|
||||
RUN --mount=from=uv_stage,source=/uv,target=/bin/uv \
|
||||
--mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
|
||||
--mount=type=bind,source=uv.lock,target=uv.lock \
|
||||
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
|
||||
@@ -61,7 +68,8 @@ RUN echo "Downloading models..." && \
|
||||
chmod -R g=u ${DOCLING_SERVE_ARTIFACTS_PATH}
|
||||
|
||||
COPY --chown=1001:0 ./docling_serve ./docling_serve
|
||||
RUN --mount=from=ghcr.io/astral-sh/uv:0.7.19,source=/uv,target=/bin/uv \
|
||||
|
||||
RUN --mount=from=uv_stage,source=/uv,target=/bin/uv \
|
||||
--mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
|
||||
--mount=type=bind,source=uv.lock,target=uv.lock \
|
||||
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
|
||||
|
||||
28
Makefile
28
Makefile
@@ -60,6 +60,13 @@ docling-serve-cu128-image: Containerfile ## Build docling-serve container image
|
||||
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) ghcr.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
|
||||
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) quay.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
|
||||
|
||||
.PHONY: docling-serve-rocm-image
|
||||
docling-serve-rocm-image: Containerfile ## Build docling-serve container image with ROCm support
|
||||
$(ECHO_PREFIX) printf " %-12s Containerfile\n" "[docling-serve with ROCm 6.3]"
|
||||
$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-rocm:$(TAG) .
|
||||
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-rocm:$(TAG) ghcr.io/docling-project/docling-serve-rocm:$(BRANCH_TAG)
|
||||
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-rocm:$(TAG) quay.io/docling-project/docling-serve-rocm:$(BRANCH_TAG)
|
||||
|
||||
.PHONY: action-lint
|
||||
action-lint: .action-lint ## Lint GitHub Action workflows
|
||||
.action-lint: $(shell find .github -type f) | action-lint-file
|
||||
@@ -107,3 +114,24 @@ run-docling-cu124: ## Run the docling-serve container with GPU support and assig
|
||||
$(CMD_PREFIX) docker rm -f docling-serve-cu124 2>/dev/null || true
|
||||
$(ECHO_PREFIX) printf " %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.4]"
|
||||
$(CMD_PREFIX) docker run -it --name docling-serve-cu124 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu124:main
|
||||
|
||||
.PHONY: run-docling-cu126
|
||||
run-docling-cu126: ## Run the docling-serve container with GPU support and assign a container name
|
||||
$(ECHO_PREFIX) printf " %-12s Removing existing container if it exists...\n" "[CLEANUP]"
|
||||
$(CMD_PREFIX) docker rm -f docling-serve-cu126 2>/dev/null || true
|
||||
$(ECHO_PREFIX) printf " %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.6]"
|
||||
$(CMD_PREFIX) docker run -it --name docling-serve-cu126 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu126:main
|
||||
|
||||
.PHONY: run-docling-cu128
|
||||
run-docling-cu128: ## Run the docling-serve container with GPU support and assign a container name
|
||||
$(ECHO_PREFIX) printf " %-12s Removing existing container if it exists...\n" "[CLEANUP]"
|
||||
$(CMD_PREFIX) docker rm -f docling-serve-cu128 2>/dev/null || true
|
||||
$(ECHO_PREFIX) printf " %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.8]"
|
||||
$(CMD_PREFIX) docker run -it --name docling-serve-cu128 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu128:main
|
||||
|
||||
.PHONY: run-docling-rocm
|
||||
run-docling-rocm: ## Run the docling-serve container with GPU support and assign a container name
|
||||
$(ECHO_PREFIX) printf " %-12s Removing existing container if it exists...\n" "[CLEANUP]"
|
||||
$(CMD_PREFIX) docker rm -f docling-serve-rocm 2>/dev/null || true
|
||||
$(ECHO_PREFIX) printf " %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN ROCm 6.3]"
|
||||
$(CMD_PREFIX) docker run -it --name docling-serve-rocm -p 5001:5001 ghcr.io/docling-project/docling-serve-rocm:main
|
||||
|
||||
38
README.md
38
README.md
@@ -36,7 +36,8 @@ The server is available at
|
||||
- API <http://127.0.0.1:5001>
|
||||
- API documentation <http://127.0.0.1:5001/docs>
|
||||
- UI playground <http://127.0.0.1:5001/ui>
|
||||

|
||||
|
||||

|
||||
|
||||
Try it out with a simple conversion:
|
||||
|
||||
@@ -46,21 +47,36 @@ curl -X 'POST' \
|
||||
-H 'accept: application/json' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
|
||||
"sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
|
||||
}'
|
||||
```
|
||||
|
||||
### Container images
|
||||
### Container Images
|
||||
|
||||
Available container images:
|
||||
The following container images are available for running **Docling Serve** with different hardware and PyTorch configurations:
|
||||
|
||||
| Name | Description | Arch | Size |
|
||||
| -----|-------------|------|------|
|
||||
| [`ghcr.io/docling-project/docling-serve`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve) <br /> [`quay.io/docling-project/docling-serve`](https://quay.io/repository/docling-project/docling-serve) | Simple image for Docling Serve, installing all packages from the official pypi.org index. | `linux/amd64`, `linux/arm64` | 3.6 GB (arm64) <br /> 8.7 GB (amd64) |
|
||||
| [`ghcr.io/docling-project/docling-serve-cpu`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cpu) <br /> [`quay.io/docling-project/docling-serve-cpu`](https://quay.io/repository/docling-project/docling-serve-cpu) | Cpu-only image which installs `torch` from the pytorch cpu index. | `linux/amd64`, `linux/arm64` | 3.6 GB |
|
||||
| [`ghcr.io/docling-project/docling-serve-cu124`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu124) <br /> [`quay.io/docling-project/docling-serve-cu124`](https://quay.io/repository/docling-project/docling-serve-cu124) | Cuda 12.4 image which installs `torch` from the pytorch cu124 index. | `linux/amd64` | 8.7 GB |
|
||||
| [`ghcr.io/docling-project/docling-serve-cu126`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu126) <br /> [`quay.io/docling-project/docling-serve-cu126`](https://quay.io/repository/docling-project/docling-serve-cu126) | Cuda 12.6 image which installs `torch` from the pytorch cu126 index. | `linux/amd64` | 8.7 GB |
|
||||
| [`ghcr.io/docling-project/docling-serve-cu128`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu128) <br /> [`quay.io/docling-project/docling-serve-cu128`](https://quay.io/repository/docling-project/docling-serve-cu128) | Cuda 12.8 image which installs `torch` from the pytorch cu128 index. | `linux/amd64` | 8.7 GB |
|
||||
#### 📦 Distributed Images
|
||||
|
||||
| Image | Description | Architectures | Size |
|
||||
|-------|-------------|----------------|------|
|
||||
| [`ghcr.io/docling-project/docling-serve`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve) <br> [`quay.io/docling-project/docling-serve`](https://quay.io/repository/docling-project/docling-serve) | Base image with all packages installed from the official PyPI index. | `linux/amd64`, `linux/arm64` | 4.4 GB (arm64) <br> 8.7 GB (amd64) |
|
||||
| [`ghcr.io/docling-project/docling-serve-cpu`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cpu) <br> [`quay.io/docling-project/docling-serve-cpu`](https://quay.io/repository/docling-project/docling-serve-cpu) | CPU-only variant, using `torch` from the PyTorch CPU index. | `linux/amd64`, `linux/arm64` | 4.4 GB |
|
||||
| [`ghcr.io/docling-project/docling-serve-cu126`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu126) <br> [`quay.io/docling-project/docling-serve-cu126`](https://quay.io/repository/docling-project/docling-serve-cu126) | CUDA 12.6 build with `torch` from the cu126 index. | `linux/amd64` | 10.0 GB |
|
||||
| [`ghcr.io/docling-project/docling-serve-cu128`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu128) <br> [`quay.io/docling-project/docling-serve-cu128`](https://quay.io/repository/docling-project/docling-serve-cu128) | CUDA 12.8 build with `torch` from the cu128 index. | `linux/amd64` | 11.4 GB |
|
||||
|
||||
#### 🚫 Not Distributed
|
||||
|
||||
An image for AMD ROCm 6.3 (`docling-serve-rocm`) is supported but **not published** due to its large size.
|
||||
|
||||
To build it locally:
|
||||
|
||||
```bash
|
||||
git clone --branch main git@github.com:docling-project/docling-serve.git
|
||||
cd docling-serve/
|
||||
make docling-serve-rocm-image
|
||||
```
|
||||
|
||||
For deployment using Docker Compose, see [docs/deployment.md](docs/deployment.md).
|
||||
|
||||
Coming soon: `docling-serve-slim` images will reduce the size by skipping the model weights download.
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ import uvicorn
|
||||
from rich.console import Console
|
||||
|
||||
from docling_serve.settings import docling_serve_settings, uvicorn_settings
|
||||
from docling_serve.storage import get_scratch
|
||||
|
||||
warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch")
|
||||
warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr")
|
||||
@@ -361,6 +362,37 @@ def run(
|
||||
)
|
||||
|
||||
|
||||
@app.command()
|
||||
def rq_worker() -> Any:
|
||||
"""
|
||||
Run the [bold]Docling JobKit[/bold] RQ worker.
|
||||
"""
|
||||
from docling_jobkit.convert.manager import DoclingConverterManagerConfig
|
||||
from docling_jobkit.orchestrators.rq.orchestrator import RQOrchestratorConfig
|
||||
from docling_jobkit.orchestrators.rq.worker import run_worker
|
||||
|
||||
rq_config = RQOrchestratorConfig(
|
||||
redis_url=docling_serve_settings.eng_rq_redis_url,
|
||||
results_prefix=docling_serve_settings.eng_rq_results_prefix,
|
||||
sub_channel=docling_serve_settings.eng_rq_sub_channel,
|
||||
scratch_dir=get_scratch(),
|
||||
)
|
||||
|
||||
cm_config = DoclingConverterManagerConfig(
|
||||
artifacts_path=docling_serve_settings.artifacts_path,
|
||||
options_cache_size=docling_serve_settings.options_cache_size,
|
||||
enable_remote_services=docling_serve_settings.enable_remote_services,
|
||||
allow_external_plugins=docling_serve_settings.allow_external_plugins,
|
||||
max_num_pages=docling_serve_settings.max_num_pages,
|
||||
max_file_size=docling_serve_settings.max_file_size,
|
||||
)
|
||||
|
||||
run_worker(
|
||||
rq_config=rq_config,
|
||||
cm_config=cm_config,
|
||||
)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
app()
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@ from fastapi import (
|
||||
UploadFile,
|
||||
WebSocket,
|
||||
WebSocketDisconnect,
|
||||
status,
|
||||
)
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.openapi.docs import (
|
||||
@@ -34,9 +35,15 @@ from docling_jobkit.datamodel.callback import (
|
||||
ProgressCallbackRequest,
|
||||
ProgressCallbackResponse,
|
||||
)
|
||||
from docling_jobkit.datamodel.chunking import (
|
||||
BaseChunkerOptions,
|
||||
ChunkingExportOptions,
|
||||
HierarchicalChunkerOptions,
|
||||
HybridChunkerOptions,
|
||||
)
|
||||
from docling_jobkit.datamodel.http_inputs import FileSource, HttpSource
|
||||
from docling_jobkit.datamodel.s3_coords import S3Coordinates
|
||||
from docling_jobkit.datamodel.task import Task, TaskSource
|
||||
from docling_jobkit.datamodel.task import Task, TaskSource, TaskType
|
||||
from docling_jobkit.datamodel.task_targets import (
|
||||
InBodyTarget,
|
||||
TaskTarget,
|
||||
@@ -48,15 +55,19 @@ from docling_jobkit.orchestrators.base_orchestrator import (
|
||||
TaskNotFoundError,
|
||||
)
|
||||
|
||||
from docling_serve.auth import APIKeyAuth, AuthenticationResult
|
||||
from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions
|
||||
from docling_serve.datamodel.requests import (
|
||||
ConvertDocumentsRequest,
|
||||
FileSourceRequest,
|
||||
GenericChunkDocumentsRequest,
|
||||
HttpSourceRequest,
|
||||
S3SourceRequest,
|
||||
TargetName,
|
||||
make_request_model,
|
||||
)
|
||||
from docling_serve.datamodel.responses import (
|
||||
ChunkDocumentResponse,
|
||||
ClearResponse,
|
||||
ConvertDocumentResponse,
|
||||
HealthCheckResponse,
|
||||
@@ -156,6 +167,7 @@ def create_app(): # noqa: C901
|
||||
offline_docs_assets = True
|
||||
_log.info("Found static assets.")
|
||||
|
||||
require_auth = APIKeyAuth(docling_serve_settings.api_key)
|
||||
app = FastAPI(
|
||||
title="Docling Serve",
|
||||
docs_url=None if offline_docs_assets else "/swagger",
|
||||
@@ -246,10 +258,11 @@ def create_app(): # noqa: C901
|
||||
########################
|
||||
|
||||
async def _enque_source(
|
||||
orchestrator: BaseOrchestrator, conversion_request: ConvertDocumentsRequest
|
||||
orchestrator: BaseOrchestrator,
|
||||
request: ConvertDocumentsRequest | GenericChunkDocumentsRequest,
|
||||
) -> Task:
|
||||
sources: list[TaskSource] = []
|
||||
for s in conversion_request.sources:
|
||||
for s in request.sources:
|
||||
if isinstance(s, FileSourceRequest):
|
||||
sources.append(FileSource.model_validate(s))
|
||||
elif isinstance(s, HttpSourceRequest):
|
||||
@@ -257,17 +270,40 @@ def create_app(): # noqa: C901
|
||||
elif isinstance(s, S3SourceRequest):
|
||||
sources.append(S3Coordinates.model_validate(s))
|
||||
|
||||
convert_options: ConvertDocumentsRequestOptions
|
||||
chunking_options: BaseChunkerOptions | None = None
|
||||
chunking_export_options = ChunkingExportOptions()
|
||||
task_type: TaskType
|
||||
if isinstance(request, ConvertDocumentsRequest):
|
||||
task_type = TaskType.CONVERT
|
||||
convert_options = request.options
|
||||
elif isinstance(request, GenericChunkDocumentsRequest):
|
||||
task_type = TaskType.CHUNK
|
||||
convert_options = request.convert_options
|
||||
chunking_options = request.chunking_options
|
||||
chunking_export_options.include_converted_doc = (
|
||||
request.include_converted_doc
|
||||
)
|
||||
else:
|
||||
raise RuntimeError("Uknown request type.")
|
||||
|
||||
task = await orchestrator.enqueue(
|
||||
task_type=task_type,
|
||||
sources=sources,
|
||||
options=conversion_request.options,
|
||||
target=conversion_request.target,
|
||||
convert_options=convert_options,
|
||||
chunking_options=chunking_options,
|
||||
chunking_export_options=chunking_export_options,
|
||||
target=request.target,
|
||||
)
|
||||
return task
|
||||
|
||||
async def _enque_file(
|
||||
orchestrator: BaseOrchestrator,
|
||||
files: list[UploadFile],
|
||||
options: ConvertDocumentsRequestOptions,
|
||||
task_type: TaskType,
|
||||
convert_options: ConvertDocumentsRequestOptions,
|
||||
chunking_options: BaseChunkerOptions | None,
|
||||
chunking_export_options: ChunkingExportOptions | None,
|
||||
target: TaskTarget,
|
||||
) -> Task:
|
||||
_log.info(f"Received {len(files)} files for processing.")
|
||||
@@ -281,7 +317,12 @@ def create_app(): # noqa: C901
|
||||
file_sources.append(DocumentStream(name=name, stream=buf))
|
||||
|
||||
task = await orchestrator.enqueue(
|
||||
sources=file_sources, options=options, target=target
|
||||
task_type=task_type,
|
||||
sources=file_sources,
|
||||
convert_options=convert_options,
|
||||
chunking_options=chunking_options,
|
||||
chunking_export_options=chunking_export_options,
|
||||
target=target,
|
||||
)
|
||||
return task
|
||||
|
||||
@@ -378,7 +419,7 @@ def create_app(): # noqa: C901
|
||||
response = RedirectResponse(url=logo_url)
|
||||
return response
|
||||
|
||||
@app.get("/health")
|
||||
@app.get("/health", tags=["health"])
|
||||
def health() -> HealthCheckResponse:
|
||||
return HealthCheckResponse()
|
||||
|
||||
@@ -390,7 +431,8 @@ def create_app(): # noqa: C901
|
||||
# Convert a document from URL(s)
|
||||
@app.post(
|
||||
"/v1/convert/source",
|
||||
response_model=ConvertDocumentResponse,
|
||||
tags=["convert"],
|
||||
response_model=ConvertDocumentResponse | PresignedUrlConvertDocumentResponse,
|
||||
responses={
|
||||
200: {
|
||||
"content": {"application/zip": {}},
|
||||
@@ -400,11 +442,12 @@ def create_app(): # noqa: C901
|
||||
)
|
||||
async def process_url(
|
||||
background_tasks: BackgroundTasks,
|
||||
auth: Annotated[AuthenticationResult, Depends(require_auth)],
|
||||
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
|
||||
conversion_request: ConvertDocumentsRequest,
|
||||
):
|
||||
task = await _enque_source(
|
||||
orchestrator=orchestrator, conversion_request=conversion_request
|
||||
orchestrator=orchestrator, request=conversion_request
|
||||
)
|
||||
completed = await _wait_task_complete(
|
||||
orchestrator=orchestrator, task_id=task.task_id
|
||||
@@ -417,16 +460,25 @@ def create_app(): # noqa: C901
|
||||
detail=f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={docling_serve_settings.max_sync_wait}.",
|
||||
)
|
||||
|
||||
task = await orchestrator.get_raw_task(task_id=task.task_id)
|
||||
task_result = await orchestrator.task_result(task_id=task.task_id)
|
||||
if task_result is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="Task result not found. Please wait for a completion status.",
|
||||
)
|
||||
response = await prepare_response(
|
||||
task=task, orchestrator=orchestrator, background_tasks=background_tasks
|
||||
task_id=task.task_id,
|
||||
task_result=task_result,
|
||||
orchestrator=orchestrator,
|
||||
background_tasks=background_tasks,
|
||||
)
|
||||
return response
|
||||
|
||||
# Convert a document from file(s)
|
||||
@app.post(
|
||||
"/v1/convert/file",
|
||||
response_model=ConvertDocumentResponse,
|
||||
tags=["convert"],
|
||||
response_model=ConvertDocumentResponse | PresignedUrlConvertDocumentResponse,
|
||||
responses={
|
||||
200: {
|
||||
"content": {"application/zip": {}},
|
||||
@@ -435,6 +487,7 @@ def create_app(): # noqa: C901
|
||||
)
|
||||
async def process_file(
|
||||
background_tasks: BackgroundTasks,
|
||||
auth: Annotated[AuthenticationResult, Depends(require_auth)],
|
||||
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
|
||||
files: list[UploadFile],
|
||||
options: Annotated[
|
||||
@@ -444,7 +497,13 @@ def create_app(): # noqa: C901
|
||||
):
|
||||
target = InBodyTarget() if target_type == TargetName.INBODY else ZipTarget()
|
||||
task = await _enque_file(
|
||||
orchestrator=orchestrator, files=files, options=options, target=target
|
||||
task_type=TaskType.CONVERT,
|
||||
orchestrator=orchestrator,
|
||||
files=files,
|
||||
convert_options=options,
|
||||
chunking_options=None,
|
||||
chunking_export_options=None,
|
||||
target=target,
|
||||
)
|
||||
completed = await _wait_task_complete(
|
||||
orchestrator=orchestrator, task_id=task.task_id
|
||||
@@ -457,29 +516,40 @@ def create_app(): # noqa: C901
|
||||
detail=f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={docling_serve_settings.max_sync_wait}.",
|
||||
)
|
||||
|
||||
task = await orchestrator.get_raw_task(task_id=task.task_id)
|
||||
task_result = await orchestrator.task_result(task_id=task.task_id)
|
||||
if task_result is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="Task result not found. Please wait for a completion status.",
|
||||
)
|
||||
response = await prepare_response(
|
||||
task=task, orchestrator=orchestrator, background_tasks=background_tasks
|
||||
task_id=task.task_id,
|
||||
task_result=task_result,
|
||||
orchestrator=orchestrator,
|
||||
background_tasks=background_tasks,
|
||||
)
|
||||
return response
|
||||
|
||||
# Convert a document from URL(s) using the async api
|
||||
@app.post(
|
||||
"/v1/convert/source/async",
|
||||
tags=["convert"],
|
||||
response_model=TaskStatusResponse,
|
||||
)
|
||||
async def process_url_async(
|
||||
auth: Annotated[AuthenticationResult, Depends(require_auth)],
|
||||
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
|
||||
conversion_request: ConvertDocumentsRequest,
|
||||
):
|
||||
task = await _enque_source(
|
||||
orchestrator=orchestrator, conversion_request=conversion_request
|
||||
orchestrator=orchestrator, request=conversion_request
|
||||
)
|
||||
task_queue_position = await orchestrator.get_queue_position(
|
||||
task_id=task.task_id
|
||||
)
|
||||
return TaskStatusResponse(
|
||||
task_id=task.task_id,
|
||||
task_type=task.task_type,
|
||||
task_status=task.task_status,
|
||||
task_position=task_queue_position,
|
||||
task_meta=task.processing_meta,
|
||||
@@ -488,9 +558,11 @@ def create_app(): # noqa: C901
|
||||
# Convert a document from file(s) using the async api
|
||||
@app.post(
|
||||
"/v1/convert/file/async",
|
||||
tags=["convert"],
|
||||
response_model=TaskStatusResponse,
|
||||
)
|
||||
async def process_file_async(
|
||||
auth: Annotated[AuthenticationResult, Depends(require_auth)],
|
||||
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
|
||||
background_tasks: BackgroundTasks,
|
||||
files: list[UploadFile],
|
||||
@@ -501,24 +573,253 @@ def create_app(): # noqa: C901
|
||||
):
|
||||
target = InBodyTarget() if target_type == TargetName.INBODY else ZipTarget()
|
||||
task = await _enque_file(
|
||||
orchestrator=orchestrator, files=files, options=options, target=target
|
||||
task_type=TaskType.CONVERT,
|
||||
orchestrator=orchestrator,
|
||||
files=files,
|
||||
convert_options=options,
|
||||
chunking_options=None,
|
||||
chunking_export_options=None,
|
||||
target=target,
|
||||
)
|
||||
task_queue_position = await orchestrator.get_queue_position(
|
||||
task_id=task.task_id
|
||||
)
|
||||
return TaskStatusResponse(
|
||||
task_id=task.task_id,
|
||||
task_type=task.task_type,
|
||||
task_status=task.task_status,
|
||||
task_position=task_queue_position,
|
||||
task_meta=task.processing_meta,
|
||||
)
|
||||
|
||||
# Chunking endpoints
|
||||
for display_name, path_name, opt_cls in (
|
||||
("HybridChunker", "hybrid", HybridChunkerOptions),
|
||||
("HierarchicalChunker", "hierarchical", HierarchicalChunkerOptions),
|
||||
):
|
||||
req_cls = make_request_model(opt_cls)
|
||||
|
||||
@app.post(
|
||||
f"/v1/chunk/{path_name}/source/async",
|
||||
name=f"Chunk sources with {display_name} as async task",
|
||||
tags=["chunk"],
|
||||
response_model=TaskStatusResponse,
|
||||
)
|
||||
async def chunk_source_async(
|
||||
background_tasks: BackgroundTasks,
|
||||
auth: Annotated[AuthenticationResult, Depends(require_auth)],
|
||||
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
|
||||
request: req_cls,
|
||||
):
|
||||
task = await _enque_source(orchestrator=orchestrator, request=request)
|
||||
task_queue_position = await orchestrator.get_queue_position(
|
||||
task_id=task.task_id
|
||||
)
|
||||
return TaskStatusResponse(
|
||||
task_id=task.task_id,
|
||||
task_type=task.task_type,
|
||||
task_status=task.task_status,
|
||||
task_position=task_queue_position,
|
||||
task_meta=task.processing_meta,
|
||||
)
|
||||
|
||||
@app.post(
|
||||
f"/v1/chunk/{path_name}/file/async",
|
||||
name=f"Chunk files with {display_name} as async task",
|
||||
tags=["chunk"],
|
||||
response_model=TaskStatusResponse,
|
||||
)
|
||||
async def chunk_file_async(
|
||||
background_tasks: BackgroundTasks,
|
||||
auth: Annotated[AuthenticationResult, Depends(require_auth)],
|
||||
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
|
||||
files: list[UploadFile],
|
||||
convert_options: Annotated[
|
||||
ConvertDocumentsRequestOptions,
|
||||
FormDepends(
|
||||
ConvertDocumentsRequestOptions,
|
||||
prefix="convert_",
|
||||
excluded_fields=[
|
||||
"to_formats",
|
||||
],
|
||||
),
|
||||
],
|
||||
chunking_options: Annotated[
|
||||
opt_cls,
|
||||
FormDepends(
|
||||
HybridChunkerOptions,
|
||||
prefix="chunking_",
|
||||
excluded_fields=["chunker"],
|
||||
),
|
||||
],
|
||||
include_converted_doc: Annotated[
|
||||
bool,
|
||||
Form(
|
||||
description="If true, the output will include both the chunks and the converted document."
|
||||
),
|
||||
] = False,
|
||||
target_type: Annotated[
|
||||
TargetName,
|
||||
Form(description="Specification for the type of output target."),
|
||||
] = TargetName.INBODY,
|
||||
):
|
||||
target = InBodyTarget() if target_type == TargetName.INBODY else ZipTarget()
|
||||
task = await _enque_file(
|
||||
task_type=TaskType.CHUNK,
|
||||
orchestrator=orchestrator,
|
||||
files=files,
|
||||
convert_options=convert_options,
|
||||
chunking_options=chunking_options,
|
||||
chunking_export_options=ChunkingExportOptions(
|
||||
include_converted_doc=include_converted_doc
|
||||
),
|
||||
target=target,
|
||||
)
|
||||
task_queue_position = await orchestrator.get_queue_position(
|
||||
task_id=task.task_id
|
||||
)
|
||||
return TaskStatusResponse(
|
||||
task_id=task.task_id,
|
||||
task_type=task.task_type,
|
||||
task_status=task.task_status,
|
||||
task_position=task_queue_position,
|
||||
task_meta=task.processing_meta,
|
||||
)
|
||||
|
||||
@app.post(
|
||||
f"/v1/chunk/{path_name}/source",
|
||||
name=f"Chunk sources with {display_name}",
|
||||
tags=["chunk"],
|
||||
response_model=ChunkDocumentResponse,
|
||||
responses={
|
||||
200: {
|
||||
"content": {"application/zip": {}},
|
||||
# "description": "Return the JSON item or an image.",
|
||||
}
|
||||
},
|
||||
)
|
||||
async def chunk_source(
|
||||
background_tasks: BackgroundTasks,
|
||||
auth: Annotated[AuthenticationResult, Depends(require_auth)],
|
||||
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
|
||||
request: req_cls,
|
||||
):
|
||||
task = await _enque_source(orchestrator=orchestrator, request=request)
|
||||
completed = await _wait_task_complete(
|
||||
orchestrator=orchestrator, task_id=task.task_id
|
||||
)
|
||||
|
||||
if not completed:
|
||||
# TODO: abort task!
|
||||
return HTTPException(
|
||||
status_code=504,
|
||||
detail=f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={docling_serve_settings.max_sync_wait}.",
|
||||
)
|
||||
|
||||
task_result = await orchestrator.task_result(task_id=task.task_id)
|
||||
if task_result is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="Task result not found. Please wait for a completion status.",
|
||||
)
|
||||
response = await prepare_response(
|
||||
task_id=task.task_id,
|
||||
task_result=task_result,
|
||||
orchestrator=orchestrator,
|
||||
background_tasks=background_tasks,
|
||||
)
|
||||
return response
|
||||
|
||||
@app.post(
|
||||
f"/v1/chunk/{path_name}/file",
|
||||
name=f"Chunk files with {display_name}",
|
||||
tags=["chunk"],
|
||||
response_model=ChunkDocumentResponse,
|
||||
responses={
|
||||
200: {
|
||||
"content": {"application/zip": {}},
|
||||
}
|
||||
},
|
||||
)
|
||||
async def chunk_file(
|
||||
background_tasks: BackgroundTasks,
|
||||
auth: Annotated[AuthenticationResult, Depends(require_auth)],
|
||||
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
|
||||
files: list[UploadFile],
|
||||
convert_options: Annotated[
|
||||
ConvertDocumentsRequestOptions,
|
||||
FormDepends(
|
||||
ConvertDocumentsRequestOptions,
|
||||
prefix="convert_",
|
||||
excluded_fields=[
|
||||
"to_formats",
|
||||
],
|
||||
),
|
||||
],
|
||||
chunking_options: Annotated[
|
||||
opt_cls,
|
||||
FormDepends(
|
||||
HybridChunkerOptions,
|
||||
prefix="chunking_",
|
||||
excluded_fields=["chunker"],
|
||||
),
|
||||
],
|
||||
include_converted_doc: Annotated[
|
||||
bool,
|
||||
Form(
|
||||
description="If true, the output will include both the chunks and the converted document."
|
||||
),
|
||||
] = False,
|
||||
target_type: Annotated[
|
||||
TargetName,
|
||||
Form(description="Specification for the type of output target."),
|
||||
] = TargetName.INBODY,
|
||||
):
|
||||
target = InBodyTarget() if target_type == TargetName.INBODY else ZipTarget()
|
||||
task = await _enque_file(
|
||||
task_type=TaskType.CHUNK,
|
||||
orchestrator=orchestrator,
|
||||
files=files,
|
||||
convert_options=convert_options,
|
||||
chunking_options=chunking_options,
|
||||
chunking_export_options=ChunkingExportOptions(
|
||||
include_converted_doc=include_converted_doc
|
||||
),
|
||||
target=target,
|
||||
)
|
||||
completed = await _wait_task_complete(
|
||||
orchestrator=orchestrator, task_id=task.task_id
|
||||
)
|
||||
|
||||
if not completed:
|
||||
# TODO: abort task!
|
||||
return HTTPException(
|
||||
status_code=504,
|
||||
detail=f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={docling_serve_settings.max_sync_wait}.",
|
||||
)
|
||||
|
||||
task_result = await orchestrator.task_result(task_id=task.task_id)
|
||||
if task_result is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="Task result not found. Please wait for a completion status.",
|
||||
)
|
||||
response = await prepare_response(
|
||||
task_id=task.task_id,
|
||||
task_result=task_result,
|
||||
orchestrator=orchestrator,
|
||||
background_tasks=background_tasks,
|
||||
)
|
||||
return response
|
||||
|
||||
# Task status poll
|
||||
@app.get(
|
||||
"/v1/status/poll/{task_id}",
|
||||
tags=["tasks"],
|
||||
response_model=TaskStatusResponse,
|
||||
)
|
||||
async def task_status_poll(
|
||||
auth: Annotated[AuthenticationResult, Depends(require_auth)],
|
||||
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
|
||||
task_id: str,
|
||||
wait: Annotated[
|
||||
@@ -533,6 +834,7 @@ def create_app(): # noqa: C901
|
||||
raise HTTPException(status_code=404, detail="Task not found.")
|
||||
return TaskStatusResponse(
|
||||
task_id=task.task_id,
|
||||
task_type=task.task_type,
|
||||
task_status=task.task_status,
|
||||
task_position=task_queue_position,
|
||||
task_meta=task.processing_meta,
|
||||
@@ -546,7 +848,15 @@ def create_app(): # noqa: C901
|
||||
websocket: WebSocket,
|
||||
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
|
||||
task_id: str,
|
||||
api_key: Annotated[str, Query()] = "",
|
||||
):
|
||||
if docling_serve_settings.api_key:
|
||||
if api_key != docling_serve_settings.api_key:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Api key is required as ?api_key=SECRET.",
|
||||
)
|
||||
|
||||
assert isinstance(orchestrator.notifier, WebsocketNotifier)
|
||||
await websocket.accept()
|
||||
|
||||
@@ -568,6 +878,7 @@ def create_app(): # noqa: C901
|
||||
task_queue_position = await orchestrator.get_queue_position(task_id=task_id)
|
||||
task_response = TaskStatusResponse(
|
||||
task_id=task.task_id,
|
||||
task_type=task.task_type,
|
||||
task_status=task.task_status,
|
||||
task_position=task_queue_position,
|
||||
task_meta=task.processing_meta,
|
||||
@@ -583,6 +894,7 @@ def create_app(): # noqa: C901
|
||||
)
|
||||
task_response = TaskStatusResponse(
|
||||
task_id=task.task_id,
|
||||
task_type=task.task_type,
|
||||
task_status=task.task_status,
|
||||
task_position=task_queue_position,
|
||||
task_meta=task.processing_meta,
|
||||
@@ -605,7 +917,10 @@ def create_app(): # noqa: C901
|
||||
# Task result
|
||||
@app.get(
|
||||
"/v1/result/{task_id}",
|
||||
response_model=ConvertDocumentResponse | PresignedUrlConvertDocumentResponse,
|
||||
tags=["tasks"],
|
||||
response_model=ConvertDocumentResponse
|
||||
| PresignedUrlConvertDocumentResponse
|
||||
| ChunkDocumentResponse,
|
||||
responses={
|
||||
200: {
|
||||
"content": {"application/zip": {}},
|
||||
@@ -613,14 +928,23 @@ def create_app(): # noqa: C901
|
||||
},
|
||||
)
|
||||
async def task_result(
|
||||
auth: Annotated[AuthenticationResult, Depends(require_auth)],
|
||||
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
|
||||
background_tasks: BackgroundTasks,
|
||||
task_id: str,
|
||||
):
|
||||
try:
|
||||
task = await orchestrator.get_raw_task(task_id=task_id)
|
||||
task_result = await orchestrator.task_result(task_id=task_id)
|
||||
if task_result is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="Task result not found. Please wait for a completion status.",
|
||||
)
|
||||
response = await prepare_response(
|
||||
task=task, orchestrator=orchestrator, background_tasks=background_tasks
|
||||
task_id=task_id,
|
||||
task_result=task_result,
|
||||
orchestrator=orchestrator,
|
||||
background_tasks=background_tasks,
|
||||
)
|
||||
return response
|
||||
except TaskNotFoundError:
|
||||
@@ -629,9 +953,12 @@ def create_app(): # noqa: C901
|
||||
# Update task progress
|
||||
@app.post(
|
||||
"/v1/callback/task/progress",
|
||||
tags=["internal"],
|
||||
include_in_schema=False,
|
||||
response_model=ProgressCallbackResponse,
|
||||
)
|
||||
async def callback_task_progress(
|
||||
auth: Annotated[AuthenticationResult, Depends(require_auth)],
|
||||
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
|
||||
request: ProgressCallbackRequest,
|
||||
):
|
||||
@@ -650,9 +977,11 @@ def create_app(): # noqa: C901
|
||||
# Offload models
|
||||
@app.get(
|
||||
"/v1/clear/converters",
|
||||
tags=["clear"],
|
||||
response_model=ClearResponse,
|
||||
)
|
||||
async def clear_converters(
|
||||
auth: Annotated[AuthenticationResult, Depends(require_auth)],
|
||||
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
|
||||
):
|
||||
await orchestrator.clear_converters()
|
||||
@@ -661,9 +990,11 @@ def create_app(): # noqa: C901
|
||||
# Clean results
|
||||
@app.get(
|
||||
"/v1/clear/results",
|
||||
tags=["clear"],
|
||||
response_model=ClearResponse,
|
||||
)
|
||||
async def clear_results(
|
||||
auth: Annotated[AuthenticationResult, Depends(require_auth)],
|
||||
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
|
||||
older_then: float = 3600,
|
||||
):
|
||||
|
||||
56
docling_serve/auth.py
Normal file
56
docling_serve/auth.py
Normal file
@@ -0,0 +1,56 @@
|
||||
from typing import Any
|
||||
|
||||
from fastapi import HTTPException, Request, status
|
||||
from fastapi.security import APIKeyHeader
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class AuthenticationResult(BaseModel):
|
||||
valid: bool
|
||||
errors: list[str] = []
|
||||
detail: Any | None = None
|
||||
|
||||
|
||||
class APIKeyAuth(APIKeyHeader):
|
||||
"""
|
||||
FastAPI dependency which evaluates a status API Key.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str,
|
||||
header_name: str = "X-Api-Key",
|
||||
fail_on_unauthorized: bool = True,
|
||||
) -> None:
|
||||
self.api_key = api_key
|
||||
self.header_name = header_name
|
||||
super().__init__(name=self.header_name, auto_error=False)
|
||||
|
||||
async def _validate_api_key(self, header_api_key: str | None):
|
||||
if header_api_key is None:
|
||||
return AuthenticationResult(
|
||||
valid=False, errors=[f"Missing header {self.header_name}."]
|
||||
)
|
||||
|
||||
header_api_key = header_api_key.strip()
|
||||
|
||||
# Otherwise check the apikey
|
||||
if header_api_key == self.api_key or self.api_key == "":
|
||||
return AuthenticationResult(
|
||||
valid=True,
|
||||
detail=header_api_key,
|
||||
)
|
||||
else:
|
||||
return AuthenticationResult(
|
||||
valid=False,
|
||||
errors=["The provided API Key is invalid."],
|
||||
)
|
||||
|
||||
async def __call__(self, request: Request) -> AuthenticationResult: # type: ignore
|
||||
header_api_key = await super().__call__(request=request)
|
||||
result = await self._validate_api_key(header_api_key)
|
||||
if self.api_key and not result.valid:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED, detail=result.detail
|
||||
)
|
||||
return result
|
||||
@@ -1,10 +1,14 @@
|
||||
import enum
|
||||
from typing import Annotated, Literal
|
||||
from functools import cache
|
||||
from typing import Annotated, Generic, Literal
|
||||
|
||||
from pydantic import BaseModel, Field, model_validator
|
||||
from pydantic_core import PydanticCustomError
|
||||
from typing_extensions import Self
|
||||
from typing_extensions import Self, TypeVar
|
||||
|
||||
from docling_jobkit.datamodel.chunking import (
|
||||
BaseChunkerOptions,
|
||||
)
|
||||
from docling_jobkit.datamodel.http_inputs import FileSource, HttpSource
|
||||
from docling_jobkit.datamodel.s3_coords import S3Coordinates
|
||||
from docling_jobkit.datamodel.task_targets import (
|
||||
@@ -70,3 +74,52 @@ class ConvertDocumentsRequest(BaseModel):
|
||||
"error target", 'target kind "s3" requires source kind "s3"'
|
||||
)
|
||||
return self
|
||||
|
||||
|
||||
## Source chunking requests
|
||||
|
||||
|
||||
class BaseChunkDocumentsRequest(BaseModel):
|
||||
convert_options: Annotated[
|
||||
ConvertDocumentsRequestOptions, Field(description="Conversion options.")
|
||||
] = ConvertDocumentsRequestOptions()
|
||||
sources: Annotated[
|
||||
list[SourceRequestItem],
|
||||
Field(description="List of input document sources to process."),
|
||||
]
|
||||
include_converted_doc: Annotated[
|
||||
bool,
|
||||
Field(
|
||||
description="If true, the output will include both the chunks and the converted document."
|
||||
),
|
||||
] = False
|
||||
target: Annotated[
|
||||
TaskTarget, Field(description="Specification for the type of output target.")
|
||||
] = InBodyTarget()
|
||||
|
||||
|
||||
ChunkingOptT = TypeVar("ChunkingOptT", bound=BaseChunkerOptions)
|
||||
|
||||
|
||||
class GenericChunkDocumentsRequest(BaseChunkDocumentsRequest, Generic[ChunkingOptT]):
|
||||
chunking_options: ChunkingOptT
|
||||
|
||||
|
||||
@cache
|
||||
def make_request_model(
|
||||
opt_type: type[ChunkingOptT],
|
||||
) -> type[GenericChunkDocumentsRequest[ChunkingOptT]]:
|
||||
"""
|
||||
Dynamically create (and cache) a subclass of GenericChunkDocumentsRequest[opt_type]
|
||||
with chunking_options having a default factory.
|
||||
"""
|
||||
return type(
|
||||
f"{opt_type.__name__}DocumentsRequest",
|
||||
(GenericChunkDocumentsRequest[opt_type],), # type: ignore[valid-type]
|
||||
{
|
||||
"__annotations__": {"chunking_options": opt_type},
|
||||
"chunking_options": Field(
|
||||
default_factory=opt_type, description="Options specific to the chunker."
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -5,8 +5,12 @@ from pydantic import BaseModel
|
||||
|
||||
from docling.datamodel.document import ConversionStatus, ErrorItem
|
||||
from docling.utils.profiling import ProfilingItem
|
||||
from docling_core.types.doc import DoclingDocument
|
||||
from docling_jobkit.datamodel.task_meta import TaskProcessingMeta
|
||||
from docling_jobkit.datamodel.result import (
|
||||
ChunkedDocumentResultItem,
|
||||
ExportDocumentResponse,
|
||||
ExportResult,
|
||||
)
|
||||
from docling_jobkit.datamodel.task_meta import TaskProcessingMeta, TaskType
|
||||
|
||||
|
||||
# Status
|
||||
@@ -18,17 +22,8 @@ class ClearResponse(BaseModel):
|
||||
status: str = "ok"
|
||||
|
||||
|
||||
class DocumentResponse(BaseModel):
|
||||
filename: str
|
||||
md_content: Optional[str] = None
|
||||
json_content: Optional[DoclingDocument] = None
|
||||
html_content: Optional[str] = None
|
||||
text_content: Optional[str] = None
|
||||
doctags_content: Optional[str] = None
|
||||
|
||||
|
||||
class ConvertDocumentResponse(BaseModel):
|
||||
document: DocumentResponse
|
||||
document: ExportDocumentResponse
|
||||
status: ConversionStatus
|
||||
errors: list[ErrorItem] = []
|
||||
processing_time: float
|
||||
@@ -36,16 +31,25 @@ class ConvertDocumentResponse(BaseModel):
|
||||
|
||||
|
||||
class PresignedUrlConvertDocumentResponse(BaseModel):
|
||||
status: ConversionStatus
|
||||
processing_time: float
|
||||
num_converted: int
|
||||
num_succeeded: int
|
||||
num_failed: int
|
||||
|
||||
|
||||
class ConvertDocumentErrorResponse(BaseModel):
|
||||
status: ConversionStatus
|
||||
|
||||
|
||||
class ChunkDocumentResponse(BaseModel):
|
||||
chunks: list[ChunkedDocumentResultItem]
|
||||
documents: list[ExportResult]
|
||||
processing_time: float
|
||||
|
||||
|
||||
class TaskStatusResponse(BaseModel):
|
||||
task_id: str
|
||||
task_type: TaskType
|
||||
task_status: str
|
||||
task_position: Optional[int] = None
|
||||
task_meta: Optional[TaskProcessingMeta] = None
|
||||
|
||||
@@ -233,15 +233,21 @@ def change_ocr_lang(ocr_engine):
|
||||
return "english,chinese"
|
||||
|
||||
|
||||
def wait_task_finish(task_id: str, return_as_file: bool):
|
||||
def wait_task_finish(auth: str, task_id: str, return_as_file: bool):
|
||||
conversion_sucess = False
|
||||
task_finished = False
|
||||
task_status = ""
|
||||
|
||||
headers = {}
|
||||
if docling_serve_settings.api_key:
|
||||
headers["X-Api-Key"] = str(auth)
|
||||
|
||||
ssl_ctx = get_ssl_context()
|
||||
while not task_finished:
|
||||
try:
|
||||
response = httpx.get(
|
||||
f"{get_api_endpoint()}/v1/status/poll/{task_id}?wait=5",
|
||||
headers=headers,
|
||||
verify=ssl_ctx,
|
||||
timeout=15,
|
||||
)
|
||||
@@ -265,6 +271,7 @@ def wait_task_finish(task_id: str, return_as_file: bool):
|
||||
try:
|
||||
response = httpx.get(
|
||||
f"{get_api_endpoint()}/v1/result/{task_id}",
|
||||
headers=headers,
|
||||
timeout=15,
|
||||
verify=ssl_ctx,
|
||||
)
|
||||
@@ -279,6 +286,7 @@ def wait_task_finish(task_id: str, return_as_file: bool):
|
||||
|
||||
|
||||
def process_url(
|
||||
auth,
|
||||
input_sources,
|
||||
to_formats,
|
||||
image_export_mode,
|
||||
@@ -326,11 +334,18 @@ def process_url(
|
||||
):
|
||||
logger.error("No input sources provided.")
|
||||
raise gr.Error("No input sources provided.", print_exception=False)
|
||||
|
||||
headers = {}
|
||||
if docling_serve_settings.api_key:
|
||||
headers["X-Api-Key"] = str(auth)
|
||||
|
||||
print(f"{headers=}")
|
||||
try:
|
||||
ssl_ctx = get_ssl_context()
|
||||
response = httpx.post(
|
||||
f"{get_api_endpoint()}/v1/convert/source/async",
|
||||
json=parameters,
|
||||
headers=headers,
|
||||
verify=ssl_ctx,
|
||||
timeout=60,
|
||||
)
|
||||
@@ -354,6 +369,7 @@ def file_to_base64(file):
|
||||
|
||||
|
||||
def process_file(
|
||||
auth,
|
||||
files,
|
||||
to_formats,
|
||||
image_export_mode,
|
||||
@@ -402,11 +418,16 @@ def process_file(
|
||||
"target": target,
|
||||
}
|
||||
|
||||
headers = {}
|
||||
if docling_serve_settings.api_key:
|
||||
headers["X-Api-Key"] = str(auth)
|
||||
|
||||
try:
|
||||
ssl_ctx = get_ssl_context()
|
||||
response = httpx.post(
|
||||
f"{get_api_endpoint()}/v1/convert/source/async",
|
||||
json=parameters,
|
||||
headers=headers,
|
||||
verify=ssl_ctx,
|
||||
timeout=60,
|
||||
)
|
||||
@@ -480,7 +501,7 @@ with gr.Blocks(
|
||||
css=css,
|
||||
theme=theme,
|
||||
title="Docling Serve",
|
||||
delete_cache=(3600, 3600), # Delete all files older than 1 hour every hour
|
||||
delete_cache=(3600, 36000), # Delete all files older than 10 hour every hour
|
||||
) as ui:
|
||||
# Constants stored in states to be able to pass them as inputs to functions
|
||||
processing_text = gr.State("Processing your document(s), please wait...")
|
||||
@@ -565,6 +586,15 @@ with gr.Blocks(
|
||||
file_process_btn = gr.Button("Process File", scale=1)
|
||||
file_reset_btn = gr.Button("Reset", scale=1)
|
||||
|
||||
# Auth
|
||||
with gr.Row(visible=bool(docling_serve_settings.api_key)):
|
||||
with gr.Column():
|
||||
auth = gr.Textbox(
|
||||
label="Authentication",
|
||||
placeholder="API Key",
|
||||
type="password",
|
||||
)
|
||||
|
||||
# Options
|
||||
with gr.Accordion("Options") as options:
|
||||
with gr.Row():
|
||||
@@ -590,6 +620,7 @@ with gr.Blocks(
|
||||
label="Image Export Mode",
|
||||
value="embedded",
|
||||
)
|
||||
|
||||
with gr.Row():
|
||||
with gr.Column(scale=1, min_width=200):
|
||||
pipeline = gr.Radio(
|
||||
@@ -724,6 +755,7 @@ with gr.Blocks(
|
||||
).then(
|
||||
process_url,
|
||||
inputs=[
|
||||
auth,
|
||||
url_input,
|
||||
to_formats,
|
||||
image_export_mode,
|
||||
@@ -750,7 +782,7 @@ with gr.Blocks(
|
||||
outputs=[content_output, file_output],
|
||||
).then(
|
||||
wait_task_finish,
|
||||
inputs=[task_id_rendered, return_as_file],
|
||||
inputs=[auth, task_id_rendered, return_as_file],
|
||||
outputs=[
|
||||
output_markdown,
|
||||
output_markdown_rendered,
|
||||
@@ -811,6 +843,7 @@ with gr.Blocks(
|
||||
).then(
|
||||
process_file,
|
||||
inputs=[
|
||||
auth,
|
||||
file_input,
|
||||
to_formats,
|
||||
image_export_mode,
|
||||
@@ -837,7 +870,7 @@ with gr.Blocks(
|
||||
outputs=[content_output, file_output],
|
||||
).then(
|
||||
wait_task_finish,
|
||||
inputs=[task_id_rendered, return_as_file],
|
||||
inputs=[auth, task_id_rendered, return_as_file],
|
||||
outputs=[
|
||||
output_markdown,
|
||||
output_markdown_rendered,
|
||||
|
||||
@@ -29,10 +29,15 @@ def is_pydantic_model(type_):
|
||||
|
||||
# Adapted from
|
||||
# https://github.com/fastapi/fastapi/discussions/8971#discussioncomment-7892972
|
||||
def FormDepends(cls: type[BaseModel]):
|
||||
def FormDepends(
|
||||
cls: type[BaseModel], prefix: str = "", excluded_fields: list[str] = []
|
||||
):
|
||||
new_parameters = []
|
||||
|
||||
for field_name, model_field in cls.model_fields.items():
|
||||
if field_name in excluded_fields:
|
||||
continue
|
||||
|
||||
annotation = model_field.annotation
|
||||
description = model_field.description
|
||||
default = (
|
||||
@@ -63,7 +68,7 @@ def FormDepends(cls: type[BaseModel]):
|
||||
|
||||
new_parameters.append(
|
||||
inspect.Parameter(
|
||||
name=field_name,
|
||||
name=f"{prefix}{field_name}",
|
||||
kind=inspect.Parameter.POSITIONAL_ONLY,
|
||||
default=default,
|
||||
annotation=annotation,
|
||||
@@ -71,19 +76,23 @@ def FormDepends(cls: type[BaseModel]):
|
||||
)
|
||||
|
||||
async def as_form_func(**data):
|
||||
newdata = {}
|
||||
for field_name, model_field in cls.model_fields.items():
|
||||
value = data.get(field_name)
|
||||
if field_name in excluded_fields:
|
||||
continue
|
||||
value = data.get(f"{prefix}{field_name}")
|
||||
newdata[field_name] = value
|
||||
annotation = model_field.annotation
|
||||
|
||||
# Parse nested models from JSON string
|
||||
if value is not None and is_pydantic_model(annotation):
|
||||
try:
|
||||
validator = TypeAdapter(annotation)
|
||||
data[field_name] = validator.validate_json(value)
|
||||
newdata[field_name] = validator.validate_json(value)
|
||||
except Exception as e:
|
||||
raise ValueError(f"Invalid JSON for field '{field_name}': {e}")
|
||||
|
||||
return cls(**data)
|
||||
return cls(**newdata)
|
||||
|
||||
sig = inspect.signature(as_form_func)
|
||||
sig = sig.replace(parameters=new_parameters)
|
||||
|
||||
@@ -3,6 +3,7 @@ from functools import lru_cache
|
||||
from docling_jobkit.orchestrators.base_orchestrator import BaseOrchestrator
|
||||
|
||||
from docling_serve.settings import AsyncEngine, docling_serve_settings
|
||||
from docling_serve.storage import get_scratch
|
||||
|
||||
|
||||
@lru_cache
|
||||
@@ -19,6 +20,8 @@ def get_async_orchestrator() -> BaseOrchestrator:
|
||||
|
||||
local_config = LocalOrchestratorConfig(
|
||||
num_workers=docling_serve_settings.eng_loc_num_workers,
|
||||
shared_models=docling_serve_settings.eng_loc_share_models,
|
||||
scratch_dir=get_scratch(),
|
||||
)
|
||||
|
||||
cm_config = DoclingConverterManagerConfig(
|
||||
@@ -32,6 +35,20 @@ def get_async_orchestrator() -> BaseOrchestrator:
|
||||
cm = DoclingConverterManager(config=cm_config)
|
||||
|
||||
return LocalOrchestrator(config=local_config, converter_manager=cm)
|
||||
elif docling_serve_settings.eng_kind == AsyncEngine.RQ:
|
||||
from docling_jobkit.orchestrators.rq.orchestrator import (
|
||||
RQOrchestrator,
|
||||
RQOrchestratorConfig,
|
||||
)
|
||||
|
||||
rq_config = RQOrchestratorConfig(
|
||||
redis_url=docling_serve_settings.eng_rq_redis_url,
|
||||
results_prefix=docling_serve_settings.eng_rq_results_prefix,
|
||||
sub_channel=docling_serve_settings.eng_rq_sub_channel,
|
||||
scratch_dir=get_scratch(),
|
||||
)
|
||||
|
||||
return RQOrchestrator(config=rq_config)
|
||||
elif docling_serve_settings.eng_kind == AsyncEngine.KFP:
|
||||
from docling_jobkit.orchestrators.kfp.orchestrator import (
|
||||
KfpOrchestrator,
|
||||
|
||||
@@ -1,317 +1,78 @@
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import time
|
||||
from collections.abc import Iterable
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
|
||||
import httpx
|
||||
from fastapi import BackgroundTasks, HTTPException
|
||||
from fastapi.responses import FileResponse
|
||||
from fastapi import BackgroundTasks, Response
|
||||
|
||||
from docling.datamodel.base_models import OutputFormat
|
||||
from docling.datamodel.document import ConversionResult, ConversionStatus
|
||||
from docling_core.types.doc import ImageRefMode
|
||||
from docling_jobkit.datamodel.convert import ConvertDocumentsOptions
|
||||
from docling_jobkit.datamodel.task import Task
|
||||
from docling_jobkit.datamodel.task_targets import InBodyTarget, PutTarget, TaskTarget
|
||||
from docling_jobkit.datamodel.result import (
|
||||
ChunkedDocumentResult,
|
||||
DoclingTaskResult,
|
||||
ExportResult,
|
||||
RemoteTargetResult,
|
||||
ZipArchiveResult,
|
||||
)
|
||||
from docling_jobkit.orchestrators.base_orchestrator import (
|
||||
BaseOrchestrator,
|
||||
)
|
||||
|
||||
from docling_serve.datamodel.responses import (
|
||||
ChunkDocumentResponse,
|
||||
ConvertDocumentResponse,
|
||||
DocumentResponse,
|
||||
PresignedUrlConvertDocumentResponse,
|
||||
)
|
||||
from docling_serve.settings import docling_serve_settings
|
||||
from docling_serve.storage import get_scratch
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _export_document_as_content(
|
||||
conv_res: ConversionResult,
|
||||
export_json: bool,
|
||||
export_html: bool,
|
||||
export_md: bool,
|
||||
export_txt: bool,
|
||||
export_doctags: bool,
|
||||
image_mode: ImageRefMode,
|
||||
md_page_break_placeholder: str,
|
||||
):
|
||||
document = DocumentResponse(filename=conv_res.input.file.name)
|
||||
|
||||
if conv_res.status == ConversionStatus.SUCCESS:
|
||||
new_doc = conv_res.document._make_copy_with_refmode(
|
||||
Path(), image_mode, page_no=None
|
||||
)
|
||||
|
||||
# Create the different formats
|
||||
if export_json:
|
||||
document.json_content = new_doc
|
||||
if export_html:
|
||||
document.html_content = new_doc.export_to_html(image_mode=image_mode)
|
||||
if export_txt:
|
||||
document.text_content = new_doc.export_to_markdown(
|
||||
strict_text=True,
|
||||
image_mode=image_mode,
|
||||
)
|
||||
if export_md:
|
||||
document.md_content = new_doc.export_to_markdown(
|
||||
image_mode=image_mode,
|
||||
page_break_placeholder=md_page_break_placeholder or None,
|
||||
)
|
||||
if export_doctags:
|
||||
document.doctags_content = new_doc.export_to_doctags()
|
||||
elif conv_res.status == ConversionStatus.SKIPPED:
|
||||
raise HTTPException(status_code=400, detail=conv_res.errors)
|
||||
else:
|
||||
raise HTTPException(status_code=500, detail=conv_res.errors)
|
||||
|
||||
return document
|
||||
|
||||
|
||||
def _export_documents_as_files(
|
||||
conv_results: Iterable[ConversionResult],
|
||||
output_dir: Path,
|
||||
export_json: bool,
|
||||
export_html: bool,
|
||||
export_md: bool,
|
||||
export_txt: bool,
|
||||
export_doctags: bool,
|
||||
image_export_mode: ImageRefMode,
|
||||
md_page_break_placeholder: str,
|
||||
) -> ConversionStatus:
|
||||
success_count = 0
|
||||
failure_count = 0
|
||||
|
||||
# Default failure in case results is empty
|
||||
conv_result = ConversionStatus.FAILURE
|
||||
|
||||
artifacts_dir = Path("artifacts/") # will be relative to the fname
|
||||
|
||||
for conv_res in conv_results:
|
||||
conv_result = conv_res.status
|
||||
if conv_res.status == ConversionStatus.SUCCESS:
|
||||
success_count += 1
|
||||
doc_filename = conv_res.input.file.stem
|
||||
|
||||
# Export JSON format:
|
||||
if export_json:
|
||||
fname = output_dir / f"{doc_filename}.json"
|
||||
_log.info(f"writing JSON output to {fname}")
|
||||
conv_res.document.save_as_json(
|
||||
filename=fname,
|
||||
image_mode=image_export_mode,
|
||||
artifacts_dir=artifacts_dir,
|
||||
)
|
||||
|
||||
# Export HTML format:
|
||||
if export_html:
|
||||
fname = output_dir / f"{doc_filename}.html"
|
||||
_log.info(f"writing HTML output to {fname}")
|
||||
conv_res.document.save_as_html(
|
||||
filename=fname,
|
||||
image_mode=image_export_mode,
|
||||
artifacts_dir=artifacts_dir,
|
||||
)
|
||||
|
||||
# Export Text format:
|
||||
if export_txt:
|
||||
fname = output_dir / f"{doc_filename}.txt"
|
||||
_log.info(f"writing TXT output to {fname}")
|
||||
conv_res.document.save_as_markdown(
|
||||
filename=fname,
|
||||
strict_text=True,
|
||||
image_mode=ImageRefMode.PLACEHOLDER,
|
||||
)
|
||||
|
||||
# Export Markdown format:
|
||||
if export_md:
|
||||
fname = output_dir / f"{doc_filename}.md"
|
||||
_log.info(f"writing Markdown output to {fname}")
|
||||
conv_res.document.save_as_markdown(
|
||||
filename=fname,
|
||||
artifacts_dir=artifacts_dir,
|
||||
image_mode=image_export_mode,
|
||||
page_break_placeholder=md_page_break_placeholder or None,
|
||||
)
|
||||
|
||||
# Export Document Tags format:
|
||||
if export_doctags:
|
||||
fname = output_dir / f"{doc_filename}.doctags"
|
||||
_log.info(f"writing Doc Tags output to {fname}")
|
||||
conv_res.document.save_as_doctags(filename=fname)
|
||||
|
||||
else:
|
||||
_log.warning(f"Document {conv_res.input.file} failed to convert.")
|
||||
failure_count += 1
|
||||
|
||||
_log.info(
|
||||
f"Processed {success_count + failure_count} docs, "
|
||||
f"of which {failure_count} failed"
|
||||
)
|
||||
return conv_result
|
||||
|
||||
|
||||
def process_results(
|
||||
conversion_options: ConvertDocumentsOptions,
|
||||
target: TaskTarget,
|
||||
conv_results: Iterable[ConversionResult],
|
||||
work_dir: Path,
|
||||
) -> Union[ConvertDocumentResponse, FileResponse, PresignedUrlConvertDocumentResponse]:
|
||||
# Let's start by processing the documents
|
||||
try:
|
||||
start_time = time.monotonic()
|
||||
|
||||
# Convert the iterator to a list to count the number of results and get timings
|
||||
# As it's an iterator (lazy evaluation), it will also start the conversion
|
||||
conv_results = list(conv_results)
|
||||
|
||||
processing_time = time.monotonic() - start_time
|
||||
|
||||
_log.info(
|
||||
f"Processed {len(conv_results)} docs in {processing_time:.2f} seconds."
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
if len(conv_results) == 0:
|
||||
raise HTTPException(
|
||||
status_code=500, detail="No documents were generated by Docling."
|
||||
)
|
||||
|
||||
# We have some results, let's prepare the response
|
||||
response: Union[
|
||||
FileResponse, ConvertDocumentResponse, PresignedUrlConvertDocumentResponse
|
||||
]
|
||||
|
||||
# Booleans to know what to export
|
||||
export_json = OutputFormat.JSON in conversion_options.to_formats
|
||||
export_html = OutputFormat.HTML in conversion_options.to_formats
|
||||
export_md = OutputFormat.MARKDOWN in conversion_options.to_formats
|
||||
export_txt = OutputFormat.TEXT in conversion_options.to_formats
|
||||
export_doctags = OutputFormat.DOCTAGS in conversion_options.to_formats
|
||||
|
||||
# Only 1 document was processed, and we are not returning it as a file
|
||||
if len(conv_results) == 1 and isinstance(target, InBodyTarget):
|
||||
conv_res = conv_results[0]
|
||||
document = _export_document_as_content(
|
||||
conv_res,
|
||||
export_json=export_json,
|
||||
export_html=export_html,
|
||||
export_md=export_md,
|
||||
export_txt=export_txt,
|
||||
export_doctags=export_doctags,
|
||||
image_mode=conversion_options.image_export_mode,
|
||||
md_page_break_placeholder=conversion_options.md_page_break_placeholder,
|
||||
)
|
||||
|
||||
response = ConvertDocumentResponse(
|
||||
document=document,
|
||||
status=conv_res.status,
|
||||
processing_time=processing_time,
|
||||
timings=conv_res.timings,
|
||||
)
|
||||
|
||||
# Multiple documents were processed, or we are forced returning as a file
|
||||
else:
|
||||
# Temporary directory to store the outputs
|
||||
output_dir = work_dir / "output"
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Worker pid to use in archive identification as we may have multiple workers
|
||||
os.getpid()
|
||||
|
||||
# Export the documents
|
||||
conv_result = _export_documents_as_files(
|
||||
conv_results=conv_results,
|
||||
output_dir=output_dir,
|
||||
export_json=export_json,
|
||||
export_html=export_html,
|
||||
export_md=export_md,
|
||||
export_txt=export_txt,
|
||||
export_doctags=export_doctags,
|
||||
image_export_mode=conversion_options.image_export_mode,
|
||||
md_page_break_placeholder=conversion_options.md_page_break_placeholder,
|
||||
)
|
||||
|
||||
files = os.listdir(output_dir)
|
||||
if len(files) == 0:
|
||||
raise HTTPException(status_code=500, detail="No documents were exported.")
|
||||
|
||||
file_path = work_dir / "converted_docs.zip"
|
||||
shutil.make_archive(
|
||||
base_name=str(file_path.with_suffix("")),
|
||||
format="zip",
|
||||
root_dir=output_dir,
|
||||
)
|
||||
|
||||
# Other cleanups after the response is sent
|
||||
# Output directory
|
||||
# background_tasks.add_task(shutil.rmtree, work_dir, ignore_errors=True)
|
||||
|
||||
if isinstance(target, PutTarget):
|
||||
try:
|
||||
with open(file_path, "rb") as file_data:
|
||||
r = httpx.put(str(target.url), files={"file": file_data})
|
||||
r.raise_for_status()
|
||||
response = PresignedUrlConvertDocumentResponse(
|
||||
status=conv_result,
|
||||
processing_time=processing_time,
|
||||
)
|
||||
except Exception as exc:
|
||||
_log.error("An error occour while uploading zip to s3", exc_info=exc)
|
||||
raise HTTPException(
|
||||
status_code=500, detail="An error occour while uploading zip to s3."
|
||||
)
|
||||
else:
|
||||
response = FileResponse(
|
||||
file_path, filename=file_path.name, media_type="application/zip"
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
async def prepare_response(
|
||||
task: Task, orchestrator: BaseOrchestrator, background_tasks: BackgroundTasks
|
||||
task_id: str,
|
||||
task_result: DoclingTaskResult,
|
||||
orchestrator: BaseOrchestrator,
|
||||
background_tasks: BackgroundTasks,
|
||||
):
|
||||
if task.results is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="Task result not found. Please wait for a completion status.",
|
||||
response: (
|
||||
Response
|
||||
| ConvertDocumentResponse
|
||||
| PresignedUrlConvertDocumentResponse
|
||||
| ChunkDocumentResponse
|
||||
)
|
||||
assert task.options is not None
|
||||
|
||||
work_dir = get_scratch() / task.task_id
|
||||
response = process_results(
|
||||
conversion_options=task.options,
|
||||
target=task.target,
|
||||
conv_results=task.results,
|
||||
work_dir=work_dir,
|
||||
if isinstance(task_result.result, ExportResult):
|
||||
response = ConvertDocumentResponse(
|
||||
document=task_result.result.content,
|
||||
status=task_result.result.status,
|
||||
processing_time=task_result.processing_time,
|
||||
timings=task_result.result.timings,
|
||||
errors=task_result.result.errors,
|
||||
)
|
||||
|
||||
if work_dir.exists():
|
||||
task.scratch_dir = work_dir
|
||||
if not isinstance(response, FileResponse):
|
||||
_log.warning(
|
||||
f"Task {task.task_id=} produced content in {work_dir=} but the response is not a file."
|
||||
elif isinstance(task_result.result, ZipArchiveResult):
|
||||
response = Response(
|
||||
content=task_result.result.content,
|
||||
media_type="application/zip",
|
||||
headers={
|
||||
"Content-Disposition": 'attachment; filename="converted_docs.zip"'
|
||||
},
|
||||
)
|
||||
shutil.rmtree(work_dir, ignore_errors=True)
|
||||
elif isinstance(task_result.result, RemoteTargetResult):
|
||||
response = PresignedUrlConvertDocumentResponse(
|
||||
processing_time=task_result.processing_time,
|
||||
num_converted=task_result.num_converted,
|
||||
num_succeeded=task_result.num_succeeded,
|
||||
num_failed=task_result.num_failed,
|
||||
)
|
||||
elif isinstance(task_result.result, ChunkedDocumentResult):
|
||||
response = ChunkDocumentResponse(
|
||||
chunks=task_result.result.chunks,
|
||||
documents=task_result.result.documents,
|
||||
processing_time=task_result.processing_time,
|
||||
)
|
||||
else:
|
||||
raise ValueError("Unknown result type")
|
||||
|
||||
if docling_serve_settings.single_use_results:
|
||||
if task.scratch_dir is not None:
|
||||
background_tasks.add_task(
|
||||
shutil.rmtree, task.scratch_dir, ignore_errors=True
|
||||
)
|
||||
|
||||
async def _remove_task_impl():
|
||||
await asyncio.sleep(docling_serve_settings.result_removal_delay)
|
||||
await orchestrator.delete_task(task_id=task.task_id)
|
||||
await orchestrator.delete_task(task_id=task_id)
|
||||
|
||||
async def _remove_task():
|
||||
asyncio.create_task(_remove_task_impl()) # noqa: RUF006
|
||||
|
||||
@@ -28,6 +28,7 @@ class UvicornSettings(BaseSettings):
|
||||
class AsyncEngine(str, enum.Enum):
|
||||
LOCAL = "local"
|
||||
KFP = "kfp"
|
||||
RQ = "rq"
|
||||
|
||||
|
||||
class DoclingServeSettings(BaseSettings):
|
||||
@@ -50,6 +51,8 @@ class DoclingServeSettings(BaseSettings):
|
||||
enable_remote_services: bool = False
|
||||
allow_external_plugins: bool = False
|
||||
|
||||
api_key: str = ""
|
||||
|
||||
max_document_timeout: float = 3_600 * 24 * 7 # 7 days
|
||||
max_num_pages: int = sys.maxsize
|
||||
max_file_size: int = sys.maxsize
|
||||
@@ -63,6 +66,11 @@ class DoclingServeSettings(BaseSettings):
|
||||
eng_kind: AsyncEngine = AsyncEngine.LOCAL
|
||||
# Local engine
|
||||
eng_loc_num_workers: int = 2
|
||||
eng_loc_share_models: bool = False
|
||||
# RQ engine
|
||||
eng_rq_redis_url: str = ""
|
||||
eng_rq_results_prefix: str = "docling:results"
|
||||
eng_rq_sub_channel: str = "docling:updates"
|
||||
# KFP engine
|
||||
eng_kfp_endpoint: Optional[AnyUrl] = None
|
||||
eng_kfp_token: Optional[str] = None
|
||||
@@ -86,6 +94,10 @@ class DoclingServeSettings(BaseSettings):
|
||||
"KFP is not yet working. To enable the development version, you must set DOCLING_SERVE_ENG_KFP_EXPERIMENTAL=true."
|
||||
)
|
||||
|
||||
if self.eng_kind == AsyncEngine.RQ:
|
||||
if not self.eng_rq_redis_url:
|
||||
raise ValueError("RQ Redis url is required when using the RQ engine.")
|
||||
|
||||
return self
|
||||
|
||||
|
||||
|
||||
@@ -34,6 +34,7 @@ class WebsocketNotifier(BaseNotifier):
|
||||
task_queue_position = await self.orchestrator.get_queue_position(task_id)
|
||||
msg = TaskStatusResponse(
|
||||
task_id=task.task_id,
|
||||
task_type=task.task_type,
|
||||
task_status=task.task_status,
|
||||
task_position=task_queue_position,
|
||||
task_meta=task.processing_meta,
|
||||
|
||||
@@ -3,7 +3,9 @@
|
||||
This documentation pages explore the webserver configurations, runtime options, deployment examples as well as development best practices.
|
||||
|
||||
- [Configuration](./configuration.md)
|
||||
- [Advance usage](./usage.md)
|
||||
- [Handling models](./models.md)
|
||||
- [Usage](./usage.md)
|
||||
- [Deployment](./deployment.md)
|
||||
- [MCP](./mcp.md)
|
||||
- [Development](./development.md)
|
||||
- [`v1` migration](./v1_migration.md)
|
||||
|
||||
@@ -44,6 +44,7 @@ THe following table describes the options to configure the Docling Serve app.
|
||||
| | `DOCLING_SERVE_SINGLE_USE_RESULTS` | `true` | If true, results can be accessed only once. If false, the results accumulate in the scratch directory. |
|
||||
| | `DOCLING_SERVE_RESULT_REMOVAL_DELAY` | `300` | When `DOCLING_SERVE_SINGLE_USE_RESULTS` is active, this is the delay before results are removed from the task registry. |
|
||||
| | `DOCLING_SERVE_MAX_DOCUMENT_TIMEOUT` | `604800` (7 days) | The maximum time for processing a document. |
|
||||
| | `DOCLING_NUM_THREADS` | `4` | Number of concurrent threads for processing a document. |
|
||||
| | `DOCLING_SERVE_MAX_NUM_PAGES` | | The maximum number of pages for a document to be processed. |
|
||||
| | `DOCLING_SERVE_MAX_FILE_SIZE` | | The maximum file size for a document to be processed. |
|
||||
| | `DOCLING_SERVE_MAX_SYNC_WAIT` | `120` | Max number of seconds a synchronous endpoint is waiting for the task completion. |
|
||||
@@ -52,7 +53,8 @@ THe following table describes the options to configure the Docling Serve app.
|
||||
| | `DOCLING_SERVE_CORS_ORIGINS` | `["*"]` | A list of origins that should be permitted to make cross-origin requests. |
|
||||
| | `DOCLING_SERVE_CORS_METHODS` | `["*"]` | A list of HTTP methods that should be allowed for cross-origin requests. |
|
||||
| | `DOCLING_SERVE_CORS_HEADERS` | `["*"]` | A list of HTTP request headers that should be supported for cross-origin requests. |
|
||||
| | `DOCLING_SERVE_ENG_KIND` | `local` | The compute engine to use for the async tasks. Possible values are `local` and `kfp`. See below for more configurations of the engines. |
|
||||
| | `DOCLING_SERVE_API_KEY` | | If specified, all the API requests must contain the header `X-Api-Key` with this value. |
|
||||
| | `DOCLING_SERVE_ENG_KIND` | `local` | The compute engine to use for the async tasks. Possible values are `local`, `rq` and `kfp`. See below for more configurations of the engines. |
|
||||
|
||||
### Compute engine
|
||||
|
||||
@@ -66,6 +68,17 @@ The following table describes the options to configure the Docling Serve local e
|
||||
| ENV | Default | Description |
|
||||
|-----|---------|-------------|
|
||||
| `DOCLING_SERVE_ENG_LOC_NUM_WORKERS` | 2 | Number of workers/threads processing the incoming tasks. |
|
||||
| `DOCLING_SERVE_ENG_LOC_SHARE_MODELS` | False | If true, each process will share the same models among all thread workers. Otherwise, one instance of the models is allocated for each worker thread. |
|
||||
|
||||
#### RQ engine
|
||||
|
||||
The following table describes the options to configure the Docling Serve RQ engine.
|
||||
|
||||
| ENV | Default | Description |
|
||||
|-----|---------|-------------|
|
||||
| `DOCLING_SERVE_ENG_RQ_REDIS_URL` | (required) | The connection Redis url, e.g. `redis://localhost:6373/` |
|
||||
| `DOCLING_SERVE_ENG_RQ_RESULTS_PREFIX` | `docling:results` | The prefix used for storing the results in Redis. |
|
||||
| `DOCLING_SERVE_ENG_RQ_SUB_CHANNEL` | `docling:updates` | The channel key name used for storing communicating updates between the workers and the orchestrator. |
|
||||
|
||||
#### KFP engine
|
||||
|
||||
@@ -79,3 +92,10 @@ The following table describes the options to configure the Docling Serve KFP eng
|
||||
| `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_ENDPOINT` | | If set, it enables internal callbacks providing status update of the KFP job. Usually something like `https://NAME.NAMESPACE.svc.cluster.local:5001/v1/callback/task/progress`. |
|
||||
| `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_TOKEN_PATH` | | The token used for authenticating the progress callback. For cluster-internal workloads, use `/run/secrets/kubernetes.io/serviceaccount/token`. |
|
||||
| `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_CA_CERT_PATH` | | The CA certificate for the progress callback. For cluster-inetrnal workloads, use `/var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt`. |
|
||||
|
||||
#### Gradio UI
|
||||
|
||||
When using Gradio UI and using the option to output conversion as file, Gradio uses cache to prevent files to be overwritten ([more info here](https://www.gradio.app/guides/file-access#the-gradio-cache)), and we defined the cache clean frequency of one hour to clean files older than 10hours. For situations that files need to be available to download from UI older than 10 hours, there is two options:
|
||||
|
||||
- Increase the older age of files to clean [here](https://github.com/docling-project/docling-serve/blob/main/docling_serve/gradio_ui.py#L483) to suffice the age desired;
|
||||
- Or set the clean up manually by defining the temporary dir of Gradio to use the same as `DOCLING_SERVE_SCRATCH_PATH` absolute path. This can be achieved by setting the environment variable `GRADIO_TEMP_DIR`, that can be done via command line `export GRADIO_TEMP_DIR="<same_path_as_scratch>"` or in `Dockerfile` using `ENV GRADIO_TEMP_DIR="<same_path_as_scratch>"`. After this, set the clean of cache to `None` [here](https://github.com/docling-project/docling-serve/blob/main/docling_serve/gradio_ui.py#L483). Now, the clean up of `DOCLING_SERVE_SCRATCH_PATH` will also clean the Gradio temporary dir. (If you use this option, please remember when reversing changes to remove the environment variable `GRADIO_TEMP_DIR`, otherwise may lead to files not be available to download).
|
||||
|
||||
21
docs/deploy-examples/compose-amd.yaml
Normal file
21
docs/deploy-examples/compose-amd.yaml
Normal file
@@ -0,0 +1,21 @@
|
||||
# AMD ROCm deployment
|
||||
|
||||
services:
|
||||
docling-serve:
|
||||
image: ghcr.io/docling-project/docling-serve-rocm:main
|
||||
container_name: docling-serve
|
||||
ports:
|
||||
- "5001:5001"
|
||||
environment:
|
||||
DOCLING_SERVE_ENABLE_UI: "true"
|
||||
ROCR_VISIBLE_DEVICES: "0" # https://rocm.docs.amd.com/en/latest/conceptual/gpu-isolation.html#rocr-visible-devices
|
||||
## This section is for compatibility with older cards
|
||||
# HSA_OVERRIDE_GFX_VERSION: "11.0.0"
|
||||
# HSA_ENABLE_SDMA: "0"
|
||||
devices:
|
||||
- /dev/kfd:/dev/kfd
|
||||
- /dev/dri:/dev/dri
|
||||
group_add:
|
||||
- 44 # video group GID from host
|
||||
- 992 # render group GID from host
|
||||
restart: always
|
||||
@@ -1,15 +0,0 @@
|
||||
services:
|
||||
docling:
|
||||
image: ghcr.io/docling-project/docling-serve-cu124
|
||||
container_name: docling-serve
|
||||
ports:
|
||||
- 5001:5001
|
||||
environment:
|
||||
- DOCLING_SERVE_ENABLE_UI=true
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: all # nvidia-smi
|
||||
capabilities: [gpu]
|
||||
20
docs/deploy-examples/compose-nvidia.yaml
Normal file
20
docs/deploy-examples/compose-nvidia.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
# NVIDIA CUDA deployment
|
||||
|
||||
services:
|
||||
docling-serve:
|
||||
image: ghcr.io/docling-project/docling-serve-cu126:main
|
||||
container_name: docling-serve
|
||||
ports:
|
||||
- "5001:5001"
|
||||
environment:
|
||||
DOCLING_SERVE_ENABLE_UI: "true"
|
||||
NVIDIA_VISIBLE_DEVICES: "all" # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/docker-specialized.html
|
||||
# deploy: # This section is for compatibility with Swarm
|
||||
# resources:
|
||||
# reservations:
|
||||
# devices:
|
||||
# - driver: nvidia
|
||||
# count: all
|
||||
# capabilities: [gpu]
|
||||
runtime: nvidia
|
||||
restart: always
|
||||
192
docs/deploy-examples/docling-serve-rq-workers.yaml
Normal file
192
docs/deploy-examples/docling-serve-rq-workers.yaml
Normal file
@@ -0,0 +1,192 @@
|
||||
# This example deployment configures Docling Serve with a Service and RQ workers
|
||||
|
||||
# Create following secret
|
||||
# kubectl create secret generic docling-serve-rq-secrets --from-literal=REDIS_PASSWORD=myredispassword --from-literal=RQ_REDIS_URL=redis://:myredispassword@docling-serve-redis-service:6373/
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docling-serve
|
||||
labels:
|
||||
app: docling-serve
|
||||
component: docling-serve-api
|
||||
spec:
|
||||
ports:
|
||||
- name: http
|
||||
port: 5001
|
||||
targetPort: http
|
||||
selector:
|
||||
app: docling-serve
|
||||
component: docling-serve-api
|
||||
---
|
||||
kind: Deployment
|
||||
apiVersion: apps/v1
|
||||
metadata:
|
||||
name: docling-serve
|
||||
labels:
|
||||
app: docling-serve
|
||||
component: docling-serve-api
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: docling-serve
|
||||
component: docling-serve-api
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: docling-serve
|
||||
component: docling-serve-api
|
||||
spec:
|
||||
restartPolicy: Always
|
||||
containers:
|
||||
- name: api
|
||||
resources:
|
||||
limits:
|
||||
cpu: 1
|
||||
memory: 8Gi
|
||||
requests:
|
||||
cpu: 250m
|
||||
memory: 1Gi
|
||||
env:
|
||||
- name: DOCLING_SERVE_ENABLE_UI
|
||||
value: 'true'
|
||||
- name: DOCLING_SERVE_ENG_KIND
|
||||
value: 'rq'
|
||||
- name: DOCLING_SERVE_ENG_RQ_REDIS_URL
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: docling-serve-rq-secrets
|
||||
key: RQ_REDIS_URL
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 5001
|
||||
protocol: TCP
|
||||
imagePullPolicy: Always
|
||||
image: 'ghcr.io/docling-project/docling-serve-cpu'
|
||||
---
|
||||
kind: Deployment
|
||||
apiVersion: apps/v1
|
||||
metadata:
|
||||
name: docling-serve-rq-workers
|
||||
labels:
|
||||
app: docling-serve-rq-workers
|
||||
component: docling-serve-rq-worker
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: docling-serve-rq-workers
|
||||
component: docling-serve-rq-worker
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: docling-serve-rq-workers
|
||||
component: docling-serve-rq-worker
|
||||
spec:
|
||||
restartPolicy: Always
|
||||
containers:
|
||||
- name: worker
|
||||
resources:
|
||||
limits:
|
||||
cpu: 1
|
||||
memory: 4Gi
|
||||
requests:
|
||||
cpu: 250m
|
||||
memory: 1Gi
|
||||
env:
|
||||
- name: DOCLING_SERVE_ENG_KIND
|
||||
value: 'rq'
|
||||
- name: DOCLING_SERVE_ENG_RQ_REDIS_URL
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: docling-serve-rq-secrets
|
||||
key: RQ_REDIS_URL
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 5001
|
||||
protocol: TCP
|
||||
imagePullPolicy: Always
|
||||
image: 'ghcr.io/docling-project/docling-serve-cpu'
|
||||
command: ["docling-serve"]
|
||||
args: ["rq-worker"]
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: docling-serve-redis
|
||||
labels:
|
||||
app: docling-serve-redis
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: docling-serve-redis
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: docling-serve-redis
|
||||
spec:
|
||||
restartPolicy: Always
|
||||
terminationGracePeriodSeconds: 30
|
||||
containers:
|
||||
- name: redis
|
||||
resources:
|
||||
limits:
|
||||
cpu: 1
|
||||
memory: 1Gi
|
||||
requests:
|
||||
cpu: 250m
|
||||
memory: 100Mi
|
||||
image: redis:latest
|
||||
command: ["redis-server"]
|
||||
args:
|
||||
- "--port"
|
||||
- "6373"
|
||||
- "--dir"
|
||||
- "/mnt/redis/data"
|
||||
- "--appendonly"
|
||||
- "yes"
|
||||
- "--requirepass"
|
||||
- "$(REDIS_PASSWORD)"
|
||||
ports:
|
||||
- containerPort: 6373
|
||||
env:
|
||||
- name: REDIS_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: docling-serve-rq-secrets
|
||||
key: REDIS_PASSWORD
|
||||
volumeMounts:
|
||||
- name: redis-data
|
||||
mountPath: /mnt/redis/data
|
||||
securityContext:
|
||||
fsGroup: 1004
|
||||
runAsNonRoot: true
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
volumes:
|
||||
- name: redis-data
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 2Gi
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docling-serve-redis-service
|
||||
labels:
|
||||
app: docling-serve-redis
|
||||
spec:
|
||||
type: NodePort
|
||||
ports:
|
||||
- name: redis-service
|
||||
protocol: TCP
|
||||
port: 6373
|
||||
targetPort: 6373
|
||||
selector:
|
||||
app: docling-serve-redis
|
||||
@@ -4,16 +4,17 @@ This document provides deployment examples for running the application in differ
|
||||
|
||||
Choose the deployment option that best fits your setup.
|
||||
|
||||
- **[Local GPU](#local-gpu)**: For deploying the application locally on a machine with a NVIDIA GPU (using Docker Compose).
|
||||
- **[Local GPU NVIDIA](#local-gpu-nvidia)**: For deploying the application locally on a machine with a supported NVIDIA GPU (using Docker Compose).
|
||||
- **[Local GPU AMD](#local-gpu-amd)**: For deploying the application locally on a machine with a supported AMD GPU (using Docker Compose).
|
||||
- **[OpenShift](#openshift)**: For deploying the application on an OpenShift cluster, designed for cloud-native environments.
|
||||
|
||||
---
|
||||
|
||||
## Local GPU
|
||||
## Local GPU NVIDIA
|
||||
|
||||
### Docker compose
|
||||
|
||||
Manifest example: [compose-gpu.yaml](./deploy-examples/compose-gpu.yaml)
|
||||
Manifest example: [compose-nvidia.yaml](./deploy-examples/compose-nvidia.yaml)
|
||||
|
||||
This deployment has the following features:
|
||||
|
||||
@@ -22,7 +23,7 @@ This deployment has the following features:
|
||||
Install the app with:
|
||||
|
||||
```sh
|
||||
docker compose -f docs/deploy-examples/compose-gpu.yaml up -d
|
||||
docker compose -f docs/deploy-examples/compose-nvidia.yaml up -d
|
||||
```
|
||||
|
||||
For using the API:
|
||||
@@ -34,7 +35,7 @@ curl -X 'POST' \
|
||||
-H "accept: application/json" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
|
||||
"sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -56,7 +57,7 @@ Docs:
|
||||
<details>
|
||||
<summary><b>Steps</b></summary>
|
||||
|
||||
1. Check driver version and which GPU you want to use (0/1/2/3.. and update [compose-gpu.yaml](./deploy-examples/compose-gpu.yaml) file or use `count: all`)
|
||||
1. Check driver version and which GPU you want to use 0/1/2/n (and update [compose-nvidia.yaml](./deploy-examples/compose-nvidia.yaml) file or use `count: all`)
|
||||
|
||||
```sh
|
||||
nvidia-smi
|
||||
@@ -117,7 +118,75 @@ Docs:
|
||||
5. Run the container:
|
||||
|
||||
```sh
|
||||
docker compose -f docs/deploy-examples/compose-gpu.yaml up -d
|
||||
docker compose -f docs/deploy-examples/compose-nvidia.yaml up -d
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
## Local GPU AMD
|
||||
|
||||
### Docker compose
|
||||
|
||||
Manifest example: [compose-amd.yaml](./deploy-examples/compose-amd.yaml)
|
||||
|
||||
This deployment has the following features:
|
||||
|
||||
- AMD rocm enabled
|
||||
|
||||
Install the app with:
|
||||
|
||||
```sh
|
||||
docker compose -f docs/deploy-examples/compose-amd.yaml up -d
|
||||
```
|
||||
|
||||
For using the API:
|
||||
|
||||
```sh
|
||||
# Make a test query
|
||||
curl -X 'POST' \
|
||||
"localhost:5001/v1/convert/source/async" \
|
||||
-H "accept: application/json" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
|
||||
}'
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary><b>Requirements</b></summary>
|
||||
|
||||
- debian/ubuntu/rhel/fedora/opensuse
|
||||
- docker
|
||||
- AMDGPU driver >=6.3
|
||||
- AMD ROCm >=6.3
|
||||
|
||||
Docs:
|
||||
|
||||
- [AMD ROCm installation](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/quick-start.html)
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>Steps</b></summary>
|
||||
|
||||
1. Check driver version and which GPU you want to use 0/1/2/n (and update [compose-amd.yaml](./deploy-examples/compose-amd.yaml) file)
|
||||
|
||||
```sh
|
||||
rocm-smi --showdriverversion
|
||||
rocminfo | grep -i "ROCm version"
|
||||
```
|
||||
|
||||
2. Find both video group GID and render group GID from host (and update [compose-amd.yaml](./deploy-examples/compose-amd.yaml) file)
|
||||
|
||||
```sh
|
||||
getent group video
|
||||
getent group render
|
||||
```
|
||||
|
||||
3. Build the image locally (and update [compose-amd.yaml](./deploy-examples/compose-amd.yaml) file)
|
||||
|
||||
```sh
|
||||
make docling-serve-rocm-image
|
||||
```
|
||||
|
||||
</details>
|
||||
@@ -152,10 +221,35 @@ curl -X 'POST' \
|
||||
-H "accept: application/json" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
|
||||
"sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
|
||||
}'
|
||||
```
|
||||
|
||||
### Multiple workers with RQ
|
||||
|
||||
Manifest example: [`docling-serve-rq-workers.yaml`](./deploy-examples/docling-serve-rq-workers.yaml)
|
||||
|
||||
This deployment example has the following features:
|
||||
|
||||
- Deployment configuration
|
||||
- Service configuration
|
||||
- Redis deployment
|
||||
- Multiple (2 by default) worker Pods
|
||||
|
||||
Install the app with:
|
||||
|
||||
- create k8s secret:
|
||||
|
||||
```sh
|
||||
kubectl create secret generic docling-serve-rq-secrets --from-literal=REDIS_PASSWORD=myredispassword --from-literal=RQ_REDIS_URL=redis://:myredispassword@docling-serve-redis-service:6373/
|
||||
```
|
||||
|
||||
- apply deployment manifest:
|
||||
|
||||
```sh
|
||||
oc apply -f docs/deploy-examples/docling-serve-rq-workers.yaml
|
||||
```
|
||||
|
||||
### Secure deployment with `oauth-proxy`
|
||||
|
||||
Manifest example: [docling-serve-oauth.yaml](./deploy-examples/docling-serve-oauth.yaml)
|
||||
@@ -189,7 +283,7 @@ curl -X 'POST' \
|
||||
-H "accept: application/json" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
|
||||
"sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -222,7 +316,7 @@ task_id=$(curl -s -X 'POST' \
|
||||
-H "accept: application/json" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
|
||||
"sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
|
||||
}' \
|
||||
-c cookies.txt | grep -oP '"task_id":"\K[^"]+')
|
||||
```
|
||||
|
||||
22
docs/examples.md
Normal file
22
docs/examples.md
Normal file
@@ -0,0 +1,22 @@
|
||||
# Examples
|
||||
|
||||
## Split processing
|
||||
|
||||
The example of provided of split processing demonstrates how to split a PDF into chunks of pages and send them for conversion. At the end, it concatenates all split pages into a single conversion `JSON`.
|
||||
|
||||
At beginning of file there's variables to be used (and modified) such as:
|
||||
| Variable | Description |
|
||||
| ---------|-------------|
|
||||
| `path_to_pdf`| Path to PDF file to be split |
|
||||
| `pages_per_file`| The number of pages per chunk to split PDF |
|
||||
| `base_url`| Base url of the `docling-serve` host |
|
||||
| `out_dir`| The output folder of each conversion `JSON` of split PDF and the final concatenated `JSON` |
|
||||
|
||||
The example follows the following logic:
|
||||
- Get the number of pages of the `PDF`
|
||||
- Based on the number of chunks of pages, send each chunk to conversion using `page_range` parameter
|
||||
- Wait all conversions to finish
|
||||
- Get all conversion results
|
||||
- Save each conversion `JSON` result into a `JSON` file
|
||||
- Concatenate all `JSONs` into a single `JSON` using `docling` concatenate method
|
||||
- Save concatenated `JSON` into a `JSON` file
|
||||
39
docs/mcp.md
Normal file
39
docs/mcp.md
Normal file
@@ -0,0 +1,39 @@
|
||||
# Docling MCP in Docling Serve
|
||||
|
||||
The `docling-serve` container image includes all MCP (Model Communication Protocol) features starting from version v1.1.0. To leverage these features, you simply need to use a different entrypoint—no custom image builds or additional installations are required. The image provides the `docling-mcp-server` executable, which enables MCP functionality out of the box as of version v1.1.0 ([changelog](https://github.com/docling-project/docling-serve/blob/624f65d41b734e8b39ff267bc8bf6e766c376d6d/CHANGELOG.md)).
|
||||
|
||||
Read more on [Docling MCP](https://github.com/docling-project/docling-mcp) in its dedicated repository.
|
||||
|
||||
## Launching the MCP Service
|
||||
|
||||
By default, the container runs `docling-serve run` and exposes port 5001. To start the MCP service, override the entrypoint and specify your desired port mapping. For example:
|
||||
|
||||
```sh
|
||||
podman run -p 8000:8000 quay.io/docling-project/docling-serve -- docling-mcp-server --transport streamable-http --port 8000 --host 0.0.0.0
|
||||
```
|
||||
|
||||
This command starts the MCP server on port 8000, accessible at `http://localhost:8000/mcp`. Adjust the port and host as needed. Key arguments for `docling-mcp-server` include `--transport streamable-http` (HTTP transport for client connections), `--port <PORT>`, and `--host <HOST>` (use `0.0.0.0` to accept connections from any interface).
|
||||
|
||||
## Configuring MCP Clients
|
||||
|
||||
Most MCP-compatible clients, such as LM Studio and Claude Desktop, allow you to specify custom MCP server endpoints. The standard configuration uses a JSON block to define available MCP servers. For example, to connect to the Docling MCP server running on port 8000:
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"docling": {
|
||||
"url": "http://localhost:8000/mcp"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Insert this configuration in your client's settings where MCP servers are defined. Update the URL if you use a different port.
|
||||
|
||||
### LM Studio and Claude Desktop
|
||||
|
||||
Both LM Studio and Claude Desktop support MCP endpoints via configuration files or UI settings. Paste the above JSON block into the appropriate configuration section. For Claude Desktop, add the MCP server in the "Custom Model" or "MCP Server" section. For LM Studio, refer to its documentation for the location of the MCP server configuration.
|
||||
|
||||
### Other MCP Clients
|
||||
|
||||
Other clients, such as Continue Coding Assistant, also support custom MCP endpoints. Use the same configuration pattern: provide the MCP server URL ending with `/mcp` and ensure the port matches your container setup. See the [Docling MCP docs](https://github.com/docling-project/docling-mcp/tree/main/docs/integrations) for more details.
|
||||
175
docs/models.md
Normal file
175
docs/models.md
Normal file
@@ -0,0 +1,175 @@
|
||||
# Handling Models in Docling Serve
|
||||
|
||||
When enabling steps in Docling Serve that require extra models (such as picture classification, picture description, table detection, code recognition, formula extraction, or vision-language modules), you must ensure those models are available in the runtime environment. The standard container image includes only the default models. Any additional models must be downloaded and made available before use. If required models are missing, Docling Serve will raise runtime errors rather than downloading them automatically. This default choice wants to guarantee the system is not calling external services.
|
||||
|
||||
## Model Storage Location
|
||||
|
||||
Docling Serve loads models from the directory specified by the `DOCLING_SERVE_ARTIFACTS_PATH` environment variable. This path must be consistent across model download and runtime. When running with multiple workers or reload enabled, you must use the environment variable rather than the CLI argument for configuration [[source]](./configuration.md).
|
||||
|
||||
## Approaches for Making Extra Models Available
|
||||
|
||||
There are several ways to ensure required models are present:
|
||||
|
||||
### 1. Disable Local Models (Trigger Auto-Download)
|
||||
|
||||
You can configure the container to download all models at startup by clearing the artifacts path:
|
||||
|
||||
```sh
|
||||
podman run -d -p 5001:5001 --name docling-serve \
|
||||
-e DOCLING_SERVE_ARTIFACTS_PATH="" \
|
||||
-e DOCLING_SERVE_ENABLE_UI=true \
|
||||
quay.io/docling-project/docling-serve
|
||||
```
|
||||
|
||||
This approach is simple for local development but not recommended for production, as it increases startup time and depends on network availability.
|
||||
|
||||
### 2. Build a Custom Image with Pre-Downloaded Models
|
||||
|
||||
You can create a new image that includes the required models:
|
||||
|
||||
```Dockerfile
|
||||
FROM quay.io/docling-project/docling-serve
|
||||
RUN docling-tools models download smolvlm
|
||||
```
|
||||
|
||||
This method is suitable for production, as it ensures all models are present in the image and avoids runtime downloads.
|
||||
|
||||
### 3. Update the Entrypoint to Download Models Before Startup
|
||||
|
||||
You can override the entrypoint to download models before starting the service:
|
||||
|
||||
```sh
|
||||
podman run -p 5001:5001 -e DOCLING_SERVE_ENABLE_UI=true \
|
||||
quay.io/docling-project/docling-serve \
|
||||
-- sh -c 'exec docling-tools models download smolvlm && exec docling-serve run'
|
||||
```
|
||||
|
||||
This is useful for environments where you want to keep the base image unchanged but still automate model preparation.
|
||||
|
||||
### 4. Mount a Volume with Pre-Downloaded Models
|
||||
|
||||
Download models locally and mount them into the container:
|
||||
|
||||
```sh
|
||||
# Download the models locally
|
||||
docling-tools models download --all -o models
|
||||
|
||||
# Start the container with the local models folder
|
||||
podman run -p 5001:5001 \
|
||||
-v $(pwd)/models:/opt/app-root/src/models \
|
||||
-e DOCLING_SERVE_ARTIFACTS_PATH="/opt/app-root/src/models" \
|
||||
-e DOCLING_SERVE_ENABLE_UI=true \
|
||||
quay.io/docling-project/docling-serve
|
||||
```
|
||||
|
||||
This approach is robust for both local and production deployments, especially when using persistent storage.
|
||||
|
||||
## Kubernetes/Cluster Deployments
|
||||
|
||||
For Kubernetes or OpenShift clusters, the recommended approach is to use a PersistentVolumeClaim (PVC) for model storage, a Kubernetes Job to download models, and mount the volume into the deployment. This ensures models persist across pod restarts and scale-out scenarios.
|
||||
|
||||
### Example: PersistentVolumeClaim
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: docling-model-cache-pvc
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
volumeMode: Filesystem
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi
|
||||
```
|
||||
|
||||
If you don't want to use default storage class, set your custom storage class with following:
|
||||
|
||||
```yaml
|
||||
spec:
|
||||
...
|
||||
storageClassName: <Storage Class Name>
|
||||
```
|
||||
|
||||
Manifest example: [docling-model-cache-pvc.yaml](./deploy-examples/docling-model-cache-pvc.yaml)
|
||||
|
||||
### Example: Model Download Job
|
||||
|
||||
```yaml
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: docling-model-cache-load
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: loader
|
||||
image: ghcr.io/docling-project/docling-serve-cpu:main
|
||||
command:
|
||||
- docling-tools
|
||||
- models
|
||||
- download
|
||||
- '--output-dir=/modelcache'
|
||||
- 'layout'
|
||||
- 'tableformer'
|
||||
- 'code_formula'
|
||||
- 'picture_classifier'
|
||||
- 'smolvlm'
|
||||
- 'granite_vision'
|
||||
- 'easyocr'
|
||||
volumeMounts:
|
||||
- name: docling-model-cache
|
||||
mountPath: /modelcache
|
||||
volumes:
|
||||
- name: docling-model-cache
|
||||
persistentVolumeClaim:
|
||||
claimName: docling-model-cache-pvc
|
||||
restartPolicy: Never
|
||||
```
|
||||
|
||||
The job will mount the previously created persistent volume and execute command similar to how we would load models locally:
|
||||
`docling-tools models download --output-dir <MOUNT-PATH> [LIST_OF_MODELS]`
|
||||
|
||||
In manifest, we specify desired models individually, or we can use `--all` parameter to download all models.
|
||||
|
||||
Manifest example: [docling-model-cache-job.yaml](./deploy-examples/docling-model-cache-job.yaml)
|
||||
|
||||
### Example: Deployment with Mounted Volume
|
||||
|
||||
```yaml
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: api
|
||||
env:
|
||||
- name: DOCLING_SERVE_ARTIFACTS_PATH
|
||||
value: '/modelcache'
|
||||
volumeMounts:
|
||||
- name: docling-model-cache
|
||||
mountPath: /modelcache
|
||||
volumes:
|
||||
- name: docling-model-cache
|
||||
persistentVolumeClaim:
|
||||
claimName: docling-model-cache-pvc
|
||||
```
|
||||
|
||||
The value of `DOCLING_SERVE_ARTIFACTS_PATH` must match the mount path where models are stored.
|
||||
|
||||
Now, when docling-serve is executing tasks, the underlying docling installation will load model weights from mounted volume.
|
||||
|
||||
Manifest example: [docling-model-cache-deployment.yaml](./deploy-examples/docling-model-cache-deployment.yaml)
|
||||
|
||||
## Local Docker Execution
|
||||
|
||||
For local Docker or Podman execution, you can use any of the approaches above. Mounting a local directory with pre-downloaded models is the most reliable for repeated runs and avoids network dependencies.
|
||||
|
||||
## Troubleshooting and Best Practices
|
||||
|
||||
- If a required model is missing from the artifacts path, Docling Serve will raise a runtime error.
|
||||
- Always ensure the value of `DOCLING_SERVE_ARTIFACTS_PATH` matches the directory where models are stored and mounted.
|
||||
- For production and cluster environments, prefer persistent storage and pre-loading models via a dedicated job.
|
||||
|
||||
For more details and YAML manifest examples, see the [deployment documentation](./deployment.md).
|
||||
@@ -1,103 +0,0 @@
|
||||
# Pre-loading models for docling
|
||||
|
||||
This document provides examples for pre-loading docling models to a persistent volume and re-using it for docling-serve deployments.
|
||||
|
||||
1. We need to create a persistent volume that will store models weights:
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: docling-model-cache-pvc
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
volumeMode: Filesystem
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi
|
||||
```
|
||||
|
||||
If you don't want to use default storage class, set your custom storage class with following:
|
||||
|
||||
```yaml
|
||||
spec:
|
||||
...
|
||||
storageClassName: <Storage Class Name>
|
||||
```
|
||||
|
||||
Manifest example: [docling-model-cache-pvc.yaml](./deploy-examples/docling-model-cache-pvc.yaml)
|
||||
|
||||
2. In order to load model weights, we can use docling-toolkit to download them, as this is a one time operation we can use kubernetes job for this:
|
||||
|
||||
```yaml
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: docling-model-cache-load
|
||||
spec:
|
||||
selector: {}
|
||||
template:
|
||||
metadata:
|
||||
name: docling-model-load
|
||||
spec:
|
||||
containers:
|
||||
- name: loader
|
||||
image: ghcr.io/docling-project/docling-serve-cpu:main
|
||||
command:
|
||||
- docling-tools
|
||||
- models
|
||||
- download
|
||||
- '--output-dir=/modelcache'
|
||||
- 'layout'
|
||||
- 'tableformer'
|
||||
- 'code_formula'
|
||||
- 'picture_classifier'
|
||||
- 'smolvlm'
|
||||
- 'granite_vision'
|
||||
- 'easyocr'
|
||||
volumeMounts:
|
||||
- name: docling-model-cache
|
||||
mountPath: /modelcache
|
||||
volumes:
|
||||
- name: docling-model-cache
|
||||
persistentVolumeClaim:
|
||||
claimName: docling-model-cache-pvc
|
||||
restartPolicy: Never
|
||||
```
|
||||
|
||||
The job will mount previously created persistent volume and execute command similar to how we would load models locally:
|
||||
`docling-tools models download --output-dir <MOUNT-PATH> [LIST_OF_MODELS]`
|
||||
|
||||
In manifest, we specify desired models individually, or we can use `--all` parameter to download all models.
|
||||
|
||||
Manifest example: [docling-model-cache-job.yaml](./deploy-examples/docling-model-cache-job.yaml)
|
||||
|
||||
3. Now we can mount volume in the docling-serve deployment and set env `DOCLING_SERVE_ARTIFACTS_PATH` to point to it.
|
||||
Following additions to deployment should be made:
|
||||
|
||||
```yaml
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: api
|
||||
env:
|
||||
...
|
||||
- name: DOCLING_SERVE_ARTIFACTS_PATH
|
||||
value: '/modelcache'
|
||||
volumeMounts:
|
||||
- name: docling-model-cache
|
||||
mountPath: /modelcache
|
||||
...
|
||||
volumes:
|
||||
- name: docling-model-cache
|
||||
persistentVolumeClaim:
|
||||
claimName: docling-model-cache-pvc
|
||||
```
|
||||
|
||||
Make sure that value of `DOCLING_SERVE_ARTIFACTS_PATH` is the same as where models were downloaded and where volume is mounted.
|
||||
|
||||
Now when docling-serve is executing tasks, the underlying docling installation will load model weights from mounted volume.
|
||||
|
||||
Manifest example: [docling-model-cache-deployment.yaml](./deploy-examples/docling-model-cache-deployment.yaml)
|
||||
@@ -30,6 +30,10 @@ On top of the source of file (see below), both endpoints support the same parame
|
||||
- `include_images` (bool): If enabled, images will be extracted from the document. Defaults to false.
|
||||
- `images_scale` (float): Scale factor for images. Defaults to 2.0.
|
||||
|
||||
### Authentication
|
||||
|
||||
When authentication is activated (see the parameter `DOCLING_SERVE_API_KEY` in [configuration.md](./configuration.md)), all the API requests **must** provide the header `X-Api-Key` with the correct secret key.
|
||||
|
||||
## Convert endpoints
|
||||
|
||||
### Source endpoint
|
||||
|
||||
@@ -37,7 +37,7 @@ New version:
|
||||
"options": {}, // conversion options
|
||||
"sources": [
|
||||
// input document provided as base64-encoded string
|
||||
{"kind": "kind", "base64_string": "abc123...", "filename": "file.pdf"},
|
||||
{"kind": "file", "base64_string": "abc123...", "filename": "file.pdf"},
|
||||
// input document provided as http urls
|
||||
{"kind": "http", "url": "https://..."},
|
||||
]
|
||||
|
||||
124
examples/split_processing.py
Normal file
124
examples/split_processing.py
Normal file
@@ -0,0 +1,124 @@
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
from pydantic import BaseModel
|
||||
from pypdf import PdfReader
|
||||
|
||||
from docling_core.types.doc.document import DoclingDocument
|
||||
|
||||
# Variables to use
|
||||
path_to_pdf = Path("./tests/2206.01062v1.pdf")
|
||||
pages_per_file = 4
|
||||
base_url = "http://localhost:5001/v1"
|
||||
out_dir = Path("examples/splitted_pdf/")
|
||||
|
||||
|
||||
class ConvertedSplittedPdf(BaseModel):
|
||||
task_id: str
|
||||
conversion_finished: bool = False
|
||||
result: dict | None = None
|
||||
|
||||
|
||||
def get_task_result(task_id: str):
|
||||
response = httpx.get(
|
||||
f"{base_url}/result/{task_id}",
|
||||
timeout=15,
|
||||
)
|
||||
return response.json()
|
||||
|
||||
|
||||
def check_task_status(task_id: str):
|
||||
response = httpx.get(f"{base_url}/status/poll/{task_id}", timeout=15)
|
||||
task = response.json()
|
||||
task_status = task["task_status"]
|
||||
|
||||
task_finished = False
|
||||
if task_status == "success":
|
||||
task_finished = True
|
||||
|
||||
if task_status in ("failure", "revoked"):
|
||||
raise RuntimeError("A conversion failed")
|
||||
|
||||
time.sleep(5)
|
||||
|
||||
return task_finished
|
||||
|
||||
|
||||
def post_file(file_path: Path, start_page: int, end_page: int):
|
||||
payload = {
|
||||
"to_formats": ["json"],
|
||||
"image_export_mode": "placeholder",
|
||||
"ocr": False,
|
||||
"abort_on_error": False,
|
||||
"page_range": [start_page, end_page],
|
||||
}
|
||||
|
||||
files = {
|
||||
"files": (file_path.name, file_path.open("rb"), "application/pdf"),
|
||||
}
|
||||
response = httpx.post(
|
||||
f"{base_url}/convert/file/async",
|
||||
files=files,
|
||||
data=payload,
|
||||
timeout=15,
|
||||
)
|
||||
|
||||
task = response.json()
|
||||
|
||||
return task["task_id"]
|
||||
|
||||
|
||||
def main():
|
||||
filename = path_to_pdf
|
||||
|
||||
splitted_pdfs: list[ConvertedSplittedPdf] = []
|
||||
|
||||
with open(filename, "rb") as input_pdf_file:
|
||||
pdf_reader = PdfReader(input_pdf_file)
|
||||
total_pages = len(pdf_reader.pages)
|
||||
|
||||
for start_page in range(0, total_pages, pages_per_file):
|
||||
task_id = post_file(
|
||||
filename, start_page + 1, min(start_page + pages_per_file, total_pages)
|
||||
)
|
||||
splitted_pdfs.append(ConvertedSplittedPdf(task_id=task_id))
|
||||
|
||||
all_files_converted = False
|
||||
while not all_files_converted:
|
||||
found_conversion_running = False
|
||||
for splitted_pdf in splitted_pdfs:
|
||||
if not splitted_pdf.conversion_finished:
|
||||
found_conversion_running = True
|
||||
print("checking conversion status...")
|
||||
splitted_pdf.conversion_finished = check_task_status(
|
||||
splitted_pdf.task_id
|
||||
)
|
||||
if not found_conversion_running:
|
||||
all_files_converted = True
|
||||
|
||||
for splitted_pdf in splitted_pdfs:
|
||||
splitted_pdf.result = get_task_result(splitted_pdf.task_id)
|
||||
|
||||
files = []
|
||||
for i, splitted_pdf in enumerate(splitted_pdfs):
|
||||
json_content = json.dumps(
|
||||
splitted_pdf.result.get("document").get("json_content"), indent=2
|
||||
)
|
||||
doc = DoclingDocument.model_validate_json(json_content)
|
||||
filename = f"{out_dir}/splited_json_{i}.json"
|
||||
doc.save_as_json(filename=filename)
|
||||
files.append(filename)
|
||||
|
||||
docs = [DoclingDocument.load_from_json(filename=f) for f in files]
|
||||
concate_doc = DoclingDocument.concatenate(docs=docs)
|
||||
|
||||
exp_json_file = Path(f"{out_dir}/concatenated.json")
|
||||
concate_doc.save_as_json(exp_json_file)
|
||||
|
||||
print("Finished")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
BIN
img/fastapi-ui.png
Normal file
BIN
img/fastapi-ui.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 226 KiB |
BIN
img/swagger.png
BIN
img/swagger.png
Binary file not shown.
|
Before Width: | Height: | Size: 24 KiB |
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "docling-serve"
|
||||
version = "1.1.0" # DO NOT EDIT, updated automatically
|
||||
version = "1.5.0" # DO NOT EDIT, updated automatically
|
||||
description = "Running Docling as a service"
|
||||
license = {text = "MIT"}
|
||||
authors = [
|
||||
@@ -8,7 +8,6 @@ authors = [
|
||||
{name="Guillaume Moutier", email="gmoutier@redhat.com"},
|
||||
{name="Anil Vishnoi", email="avishnoi@redhat.com"},
|
||||
{name="Panos Vagenas", email="pva@zurich.ibm.com"},
|
||||
{name="Panos Vagenas", email="pva@zurich.ibm.com"},
|
||||
{name="Christoph Auer", email="cau@zurich.ibm.com"},
|
||||
{name="Peter Staar", email="taa@zurich.ibm.com"},
|
||||
]
|
||||
@@ -35,8 +34,8 @@ classifiers = [
|
||||
requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"docling~=2.38",
|
||||
"docling-core>=2.44.1",
|
||||
"docling-jobkit[kfp,vlm]~=1.2",
|
||||
"docling-core>=2.45.0",
|
||||
"docling-jobkit[kfp,rq,vlm]>=1.5.0,<2.0.0",
|
||||
"fastapi[standard]~=0.115",
|
||||
"httpx~=0.28",
|
||||
"pydantic~=2.10",
|
||||
@@ -62,7 +61,7 @@ rapidocr = [
|
||||
"onnxruntime~=1.7",
|
||||
]
|
||||
flash-attn = [
|
||||
"flash-attn~=2.7.0; sys_platform == 'linux' and platform_machine == 'x86_64'"
|
||||
"flash-attn~=2.8.2; sys_platform == 'linux' and platform_machine == 'x86_64'"
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
@@ -70,31 +69,43 @@ dev = [
|
||||
"asgi-lifespan~=2.0",
|
||||
"mypy~=1.11",
|
||||
"pre-commit-uv~=4.1",
|
||||
"pypdf>=6.0.0",
|
||||
"pytest~=8.3",
|
||||
"pytest-asyncio~=0.24",
|
||||
"pytest-check~=2.4",
|
||||
"python-semantic-release~=7.32",
|
||||
"ruff>=0.9.6",
|
||||
]
|
||||
|
||||
pypi = [
|
||||
"torch>=2.6.0",
|
||||
"torchvision>=0.21.0",
|
||||
"torch>=2.7.1",
|
||||
"torchvision>=0.22.1",
|
||||
]
|
||||
|
||||
cpu = [
|
||||
"torch>=2.6.0",
|
||||
"torchvision>=0.21.0",
|
||||
"torch>=2.7.1",
|
||||
"torchvision>=0.22.1",
|
||||
]
|
||||
|
||||
cu124 = [
|
||||
"torch>=2.6.0",
|
||||
"torchvision>=0.21.0",
|
||||
]
|
||||
|
||||
cu126 = [
|
||||
"torch>=2.6.0",
|
||||
"torchvision>=0.21.0",
|
||||
"torch>=2.7.1",
|
||||
"torchvision>=0.22.1",
|
||||
]
|
||||
|
||||
cu128 = [
|
||||
"torch>=2.7.0",
|
||||
"torchvision>=0.22.0",
|
||||
"torch>=2.7.1",
|
||||
"torchvision>=0.22.1",
|
||||
]
|
||||
|
||||
rocm = [
|
||||
"torch>=2.7.1",
|
||||
"torchvision>=0.22.1",
|
||||
"pytorch-triton-rocm>=3.3.1 ; sys_platform == 'linux' and platform_machine == 'x86_64'",
|
||||
]
|
||||
|
||||
[tool.uv]
|
||||
@@ -107,6 +118,7 @@ conflicts = [
|
||||
{ group = "cu124" },
|
||||
{ group = "cu126" },
|
||||
{ group = "cu128" },
|
||||
{ group = "rocm" },
|
||||
],
|
||||
]
|
||||
environments = ["sys_platform != 'darwin' or platform_machine != 'x86_64'"]
|
||||
@@ -118,17 +130,25 @@ override-dependencies = [
|
||||
torch = [
|
||||
{ index = "pytorch-pypi", group = "pypi" },
|
||||
{ index = "pytorch-cpu", group = "cpu" },
|
||||
{ index = "pytorch-cu124", group = "cu124" },
|
||||
{ index = "pytorch-cu126", group = "cu126" },
|
||||
{ index = "pytorch-cu128", group = "cu128" },
|
||||
{ index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
|
||||
{ index = "pytorch-cu126", group = "cu126", marker = "sys_platform == 'linux'" },
|
||||
{ index = "pytorch-cu128", group = "cu128", marker = "sys_platform == 'linux'" },
|
||||
{ index = "pytorch-rocm", group = "rocm", marker = "sys_platform == 'linux'" },
|
||||
]
|
||||
|
||||
torchvision = [
|
||||
{ index = "pytorch-pypi", group = "pypi" },
|
||||
{ index = "pytorch-cpu", group = "cpu" },
|
||||
{ index = "pytorch-cu124", group = "cu124" },
|
||||
{ index = "pytorch-cu126", group = "cu126" },
|
||||
{ index = "pytorch-cu128", group = "cu128" },
|
||||
{ index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
|
||||
{ index = "pytorch-cu126", group = "cu126", marker = "sys_platform == 'linux'" },
|
||||
{ index = "pytorch-cu128", group = "cu128", marker = "sys_platform == 'linux'" },
|
||||
{ index = "pytorch-rocm", group = "rocm", marker = "sys_platform == 'linux'" },
|
||||
]
|
||||
|
||||
pytorch-triton-rocm = [
|
||||
{ index = "pytorch-rocm", marker = "sys_platform == 'linux'" },
|
||||
]
|
||||
|
||||
# docling-jobkit = { git = "https://github.com/docling-project/docling-jobkit/", rev = "main" }
|
||||
# docling-jobkit = { path = "../docling-jobkit", editable = true }
|
||||
|
||||
@@ -157,6 +177,11 @@ name = "pytorch-cu128"
|
||||
url = "https://download.pytorch.org/whl/cu128"
|
||||
explicit = true
|
||||
|
||||
[[tool.uv.index]]
|
||||
name = "pytorch-rocm"
|
||||
url = "https://download.pytorch.org/whl/rocm6.3"
|
||||
explicit = true
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
include = ["docling_serve*"]
|
||||
namespaces = true
|
||||
@@ -254,6 +279,7 @@ module = [
|
||||
"kfp.*",
|
||||
"kfp_server_api.*",
|
||||
"mlx_vlm.*",
|
||||
"mlx.*",
|
||||
"scalar_fastapi.*",
|
||||
]
|
||||
ignore_missing_imports = true
|
||||
|
||||
@@ -6,10 +6,15 @@ import pytest
|
||||
import pytest_asyncio
|
||||
from pytest_check import check
|
||||
|
||||
from docling_serve.settings import docling_serve_settings
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def async_client():
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
headers = {}
|
||||
if docling_serve_settings.api_key:
|
||||
headers["X-Api-Key"] = docling_serve_settings.api_key
|
||||
async with httpx.AsyncClient(timeout=60.0, headers=headers) as client:
|
||||
yield client
|
||||
|
||||
|
||||
|
||||
@@ -6,10 +6,15 @@ import httpx
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
from docling_serve.settings import docling_serve_settings
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def async_client():
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
headers = {}
|
||||
if docling_serve_settings.api_key:
|
||||
headers["X-Api-Key"] = docling_serve_settings.api_key
|
||||
async with httpx.AsyncClient(timeout=60.0, headers=headers) as client:
|
||||
yield client
|
||||
|
||||
|
||||
|
||||
@@ -5,10 +5,15 @@ import pytest
|
||||
import pytest_asyncio
|
||||
from pytest_check import check
|
||||
|
||||
from docling_serve.settings import docling_serve_settings
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def async_client():
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
headers = {}
|
||||
if docling_serve_settings.api_key:
|
||||
headers["X-Api-Key"] = docling_serve_settings.api_key
|
||||
async with httpx.AsyncClient(timeout=60.0, headers=headers) as client:
|
||||
yield client
|
||||
|
||||
|
||||
|
||||
@@ -6,16 +6,24 @@ import pytest
|
||||
import pytest_asyncio
|
||||
from websockets.sync.client import connect
|
||||
|
||||
from docling_serve.settings import docling_serve_settings
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def async_client():
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
headers = {}
|
||||
if docling_serve_settings.api_key:
|
||||
headers["X-Api-Key"] = docling_serve_settings.api_key
|
||||
async with httpx.AsyncClient(timeout=60.0, headers=headers) as client:
|
||||
yield client
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_convert_url(async_client: httpx.AsyncClient):
|
||||
"""Test convert URL to all outputs"""
|
||||
headers = {}
|
||||
if docling_serve_settings.api_key:
|
||||
headers["X-Api-Key"] = docling_serve_settings.api_key
|
||||
|
||||
doc_filename = Path("tests/2408.09869v5.pdf")
|
||||
encoded_doc = base64.b64encode(doc_filename.read_bytes()).decode()
|
||||
@@ -57,7 +65,7 @@ async def test_convert_url(async_client: httpx.AsyncClient):
|
||||
|
||||
task = response.json()
|
||||
|
||||
uri = f"ws://localhost:5001/v1/status/ws/{task['task_id']}"
|
||||
uri = f"ws://localhost:5001/v1/status/ws/{task['task_id']}?api_key={docling_serve_settings.api_key}"
|
||||
with connect(uri) as websocket:
|
||||
for message in websocket:
|
||||
print(message)
|
||||
|
||||
@@ -6,10 +6,15 @@ import httpx
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
from docling_serve.settings import docling_serve_settings
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def async_client():
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
headers = {}
|
||||
if docling_serve_settings.api_key:
|
||||
headers["X-Api-Key"] = docling_serve_settings.api_key
|
||||
async with httpx.AsyncClient(timeout=60.0, headers=headers) as client:
|
||||
yield client
|
||||
|
||||
|
||||
@@ -57,3 +62,60 @@ async def test_convert_url(async_client):
|
||||
time.sleep(2)
|
||||
|
||||
assert task["task_status"] == "success"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("include_converted_doc", [False, True])
|
||||
async def test_chunk_url(async_client, include_converted_doc: bool):
|
||||
"""Test chunk URL"""
|
||||
|
||||
example_docs = [
|
||||
"https://arxiv.org/pdf/2311.18481",
|
||||
]
|
||||
|
||||
base_url = "http://localhost:5001/v1"
|
||||
payload = {
|
||||
"sources": [{"kind": "http", "url": random.choice(example_docs)}],
|
||||
"include_converted_doc": include_converted_doc,
|
||||
}
|
||||
|
||||
response = await async_client.post(
|
||||
f"{base_url}/chunk/hybrid/source/async", json=payload
|
||||
)
|
||||
assert response.status_code == 200, "Response should be 200 OK"
|
||||
|
||||
task = response.json()
|
||||
|
||||
print(json.dumps(task, indent=2))
|
||||
|
||||
while task["task_status"] not in ("success", "failure"):
|
||||
response = await async_client.get(f"{base_url}/status/poll/{task['task_id']}")
|
||||
assert response.status_code == 200, "Response should be 200 OK"
|
||||
task = response.json()
|
||||
print(f"{task['task_status']=}")
|
||||
print(f"{task['task_position']=}")
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
assert task["task_status"] == "success"
|
||||
|
||||
result_resp = await async_client.get(f"{base_url}/result/{task['task_id']}")
|
||||
assert result_resp.status_code == 200, "Response should be 200 OK"
|
||||
result = result_resp.json()
|
||||
print("Got result.")
|
||||
|
||||
assert "chunks" in result
|
||||
assert len(result["chunks"]) > 0
|
||||
|
||||
assert "documents" in result
|
||||
assert len(result["documents"]) > 0
|
||||
assert result["documents"][0]["status"] == "success"
|
||||
|
||||
if include_converted_doc:
|
||||
assert result["documents"][0]["content"]["json_content"] is not None
|
||||
assert (
|
||||
result["documents"][0]["content"]["json_content"]["schema_name"]
|
||||
== "DoclingDocument"
|
||||
)
|
||||
else:
|
||||
assert result["documents"][0]["content"]["json_content"] is None
|
||||
|
||||
@@ -5,10 +5,15 @@ import pytest
|
||||
import pytest_asyncio
|
||||
from pytest_check import check
|
||||
|
||||
from docling_serve.settings import docling_serve_settings
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def async_client():
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
headers = {}
|
||||
if docling_serve_settings.api_key:
|
||||
headers["X-Api-Key"] = docling_serve_settings.api_key
|
||||
async with httpx.AsyncClient(timeout=60.0, headers=headers) as client:
|
||||
yield client
|
||||
|
||||
|
||||
|
||||
@@ -3,10 +3,15 @@ import pytest
|
||||
import pytest_asyncio
|
||||
from pytest_check import check
|
||||
|
||||
from docling_serve.settings import docling_serve_settings
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def async_client():
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
headers = {}
|
||||
if docling_serve_settings.api_key:
|
||||
headers["X-Api-Key"] = docling_serve_settings.api_key
|
||||
async with httpx.AsyncClient(timeout=60.0, headers=headers) as client:
|
||||
yield client
|
||||
|
||||
|
||||
|
||||
@@ -6,10 +6,15 @@ import pytest
|
||||
import pytest_asyncio
|
||||
from pytest_check import check
|
||||
|
||||
from docling_serve.settings import docling_serve_settings
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def async_client():
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
headers = {}
|
||||
if docling_serve_settings.api_key:
|
||||
headers["X-Api-Key"] = docling_serve_settings.api_key
|
||||
async with httpx.AsyncClient(timeout=60.0, headers=headers) as client:
|
||||
yield client
|
||||
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@ from pytest_check import check
|
||||
from docling_core.types.doc import DoclingDocument, PictureItem
|
||||
|
||||
from docling_serve.app import create_app
|
||||
from docling_serve.settings import docling_serve_settings
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
@@ -20,6 +21,14 @@ def event_loop():
|
||||
return asyncio.get_event_loop()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def auth_headers():
|
||||
headers = {}
|
||||
if docling_serve_settings.api_key:
|
||||
headers["X-Api-Key"] = docling_serve_settings.api_key
|
||||
return headers
|
||||
|
||||
|
||||
@pytest_asyncio.fixture(scope="session")
|
||||
async def app():
|
||||
app = create_app()
|
||||
@@ -46,7 +55,7 @@ async def test_health(client: AsyncClient):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_convert_file(client: AsyncClient):
|
||||
async def test_convert_file(client: AsyncClient, auth_headers: dict):
|
||||
"""Test convert single file to all outputs"""
|
||||
|
||||
endpoint = "/v1/convert/file"
|
||||
@@ -79,7 +88,9 @@ async def test_convert_file(client: AsyncClient):
|
||||
"files": ("2206.01062v1.pdf", open(file_path, "rb"), "application/pdf"),
|
||||
}
|
||||
|
||||
response = await client.post(endpoint, files=files, data=options)
|
||||
response = await client.post(
|
||||
endpoint, files=files, data=options, headers=auth_headers
|
||||
)
|
||||
assert response.status_code == 200, "Response should be 200 OK"
|
||||
|
||||
data = response.json()
|
||||
@@ -160,7 +171,7 @@ async def test_convert_file(client: AsyncClient):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_referenced_artifacts(client: AsyncClient):
|
||||
async def test_referenced_artifacts(client: AsyncClient, auth_headers: dict):
|
||||
"""Test that paths in the zip file are relative to the zip file root."""
|
||||
|
||||
endpoint = "/v1/convert/file"
|
||||
@@ -178,7 +189,9 @@ async def test_referenced_artifacts(client: AsyncClient):
|
||||
"files": ("2206.01062v1.pdf", open(file_path, "rb"), "application/pdf"),
|
||||
}
|
||||
|
||||
response = await client.post(endpoint, files=files, data=options)
|
||||
response = await client.post(
|
||||
endpoint, files=files, data=options, headers=auth_headers
|
||||
)
|
||||
assert response.status_code == 200, "Response should be 200 OK"
|
||||
|
||||
with zipfile.ZipFile(io.BytesIO(response.content)) as zip_file:
|
||||
|
||||
@@ -11,6 +11,7 @@ from docling_core.types import DoclingDocument
|
||||
from docling_core.types.doc.document import PictureDescriptionData
|
||||
|
||||
from docling_serve.app import create_app
|
||||
from docling_serve.settings import docling_serve_settings
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
@@ -18,6 +19,14 @@ def event_loop():
|
||||
return asyncio.get_event_loop()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def auth_headers():
|
||||
headers = {}
|
||||
if docling_serve_settings.api_key:
|
||||
headers["X-Api-Key"] = docling_serve_settings.api_key
|
||||
return headers
|
||||
|
||||
|
||||
@pytest_asyncio.fixture(scope="session")
|
||||
async def app():
|
||||
app = create_app()
|
||||
@@ -37,7 +46,7 @@ async def client(app):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_convert_file(client: AsyncClient):
|
||||
async def test_convert_file(client: AsyncClient, auth_headers: dict):
|
||||
"""Test convert single file to all outputs"""
|
||||
|
||||
endpoint = "/v1/convert/file"
|
||||
@@ -63,7 +72,9 @@ async def test_convert_file(client: AsyncClient):
|
||||
"files": ("2206.01062v1.pdf", open(file_path, "rb"), "application/pdf"),
|
||||
}
|
||||
|
||||
response = await client.post(endpoint, files=files, data=options)
|
||||
response = await client.post(
|
||||
endpoint, files=files, data=options, headers=auth_headers
|
||||
)
|
||||
assert response.status_code == 200, "Response should be 200 OK"
|
||||
|
||||
data = response.json()
|
||||
|
||||
@@ -17,6 +17,14 @@ def event_loop():
|
||||
return asyncio.get_event_loop()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def auth_headers():
|
||||
headers = {}
|
||||
if docling_serve_settings.api_key:
|
||||
headers["X-Api-Key"] = docling_serve_settings.api_key
|
||||
return headers
|
||||
|
||||
|
||||
@pytest_asyncio.fixture(scope="session")
|
||||
async def app():
|
||||
app = create_app()
|
||||
@@ -35,7 +43,7 @@ async def client(app):
|
||||
yield client
|
||||
|
||||
|
||||
async def convert_file(client: AsyncClient):
|
||||
async def convert_file(client: AsyncClient, auth_headers: dict):
|
||||
doc_filename = Path("tests/2408.09869v5.pdf")
|
||||
encoded_doc = base64.b64encode(doc_filename.read_bytes()).decode()
|
||||
|
||||
@@ -52,7 +60,9 @@ async def convert_file(client: AsyncClient):
|
||||
],
|
||||
}
|
||||
|
||||
response = await client.post("/v1/convert/source/async", json=payload)
|
||||
response = await client.post(
|
||||
"/v1/convert/source/async", json=payload, headers=auth_headers
|
||||
)
|
||||
assert response.status_code == 200, "Response should be 200 OK"
|
||||
|
||||
task = response.json()
|
||||
@@ -60,7 +70,9 @@ async def convert_file(client: AsyncClient):
|
||||
print(json.dumps(task, indent=2))
|
||||
|
||||
while task["task_status"] not in ("success", "failure"):
|
||||
response = await client.get(f"/v1/status/poll/{task['task_id']}")
|
||||
response = await client.get(
|
||||
f"/v1/status/poll/{task['task_id']}", headers=auth_headers
|
||||
)
|
||||
assert response.status_code == 200, "Response should be 200 OK"
|
||||
task = response.json()
|
||||
print(f"{task['task_status']=}")
|
||||
@@ -74,52 +86,62 @@ async def convert_file(client: AsyncClient):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_clear_results(client: AsyncClient):
|
||||
async def test_clear_results(client: AsyncClient, auth_headers: dict):
|
||||
"""Test removal of task."""
|
||||
|
||||
# Set long delay deletion
|
||||
docling_serve_settings.result_removal_delay = 100
|
||||
|
||||
# Convert and wait for completion
|
||||
task = await convert_file(client)
|
||||
task = await convert_file(client, auth_headers=auth_headers)
|
||||
|
||||
# Get result once
|
||||
result_response = await client.get(f"/v1/result/{task['task_id']}")
|
||||
result_response = await client.get(
|
||||
f"/v1/result/{task['task_id']}", headers=auth_headers
|
||||
)
|
||||
assert result_response.status_code == 200, "Response should be 200 OK"
|
||||
print("Result 1 ok.")
|
||||
result = result_response.json()
|
||||
assert result["document"]["json_content"]["schema_name"] == "DoclingDocument"
|
||||
|
||||
# Get result twice
|
||||
result_response = await client.get(f"/v1/result/{task['task_id']}")
|
||||
result_response = await client.get(
|
||||
f"/v1/result/{task['task_id']}", headers=auth_headers
|
||||
)
|
||||
assert result_response.status_code == 200, "Response should be 200 OK"
|
||||
print("Result 2 ok.")
|
||||
result = result_response.json()
|
||||
assert result["document"]["json_content"]["schema_name"] == "DoclingDocument"
|
||||
|
||||
# Clear
|
||||
clear_response = await client.get("/v1/clear/results?older_then=0")
|
||||
clear_response = await client.get(
|
||||
"/v1/clear/results?older_then=0", headers=auth_headers
|
||||
)
|
||||
assert clear_response.status_code == 200, "Response should be 200 OK"
|
||||
print("Clear ok.")
|
||||
|
||||
# Get deleted result
|
||||
result_response = await client.get(f"/v1/result/{task['task_id']}")
|
||||
result_response = await client.get(
|
||||
f"/v1/result/{task['task_id']}", headers=auth_headers
|
||||
)
|
||||
assert result_response.status_code == 404, "Response should be removed"
|
||||
print("Result was no longer found.")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_delay_remove(client: AsyncClient):
|
||||
async def test_delay_remove(client: AsyncClient, auth_headers: dict):
|
||||
"""Test automatic removal of task with delay."""
|
||||
|
||||
# Set short delay deletion
|
||||
docling_serve_settings.result_removal_delay = 5
|
||||
|
||||
# Convert and wait for completion
|
||||
task = await convert_file(client)
|
||||
task = await convert_file(client, auth_headers=auth_headers)
|
||||
|
||||
# Get result once
|
||||
result_response = await client.get(f"/v1/result/{task['task_id']}")
|
||||
result_response = await client.get(
|
||||
f"/v1/result/{task['task_id']}", headers=auth_headers
|
||||
)
|
||||
assert result_response.status_code == 200, "Response should be 200 OK"
|
||||
print("Result ok.")
|
||||
result = result_response.json()
|
||||
@@ -129,5 +151,7 @@ async def test_delay_remove(client: AsyncClient):
|
||||
await asyncio.sleep(10)
|
||||
|
||||
# Get deleted result
|
||||
result_response = await client.get(f"/v1/result/{task['task_id']}")
|
||||
result_response = await client.get(
|
||||
f"/v1/result/{task['task_id']}", headers=auth_headers
|
||||
)
|
||||
assert result_response.status_code == 404, "Response should be removed"
|
||||
|
||||
Reference in New Issue
Block a user