fix: support python 3.13 and docling updates and switch to uv (#48)

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi
2025-02-19 09:53:07 +01:00
committed by GitHub
parent 7a351fcdea
commit ae3b4906f1
15 changed files with 4148 additions and 5518 deletions

View File

@@ -1,19 +0,0 @@
name: 'Set up Poetry and install'
description: 'Set up a specific version of Poetry and install dependencies using caching.'
inputs:
python-version:
description: "Version range or exact version of Python or PyPy to use, using SemVer's version range syntax."
default: '3.12'
runs:
using: 'composite'
steps:
- name: Install poetry
run: pipx install poetry==1.8.5
shell: bash
- uses: actions/setup-python@v4
with:
python-version: ${{ inputs.python-version }}
cache: 'poetry'
- name: Install dependencies
run: poetry install --all-extras
shell: bash

View File

@@ -10,11 +10,11 @@ fi
CHGLOG_FILE="${CHGLOG_FILE:-CHANGELOG.md}" CHGLOG_FILE="${CHGLOG_FILE:-CHANGELOG.md}"
# update package version # update package version
poetry version "${TARGET_VERSION}" uvx --from=toml-cli toml set --toml-path=pyproject.toml project.version "${TARGET_VERSION}"
# collect release notes # collect release notes
REL_NOTES=$(mktemp) REL_NOTES=$(mktemp)
poetry run semantic-release changelog --unreleased >> "${REL_NOTES}" uv run --no-sync semantic-release changelog --unreleased >> "${REL_NOTES}"
# update changelog # update changelog
TMP_CHGLOG=$(mktemp) TMP_CHGLOG=$(mktemp)

View File

@@ -14,15 +14,20 @@ jobs:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:
fetch-depth: 0 # for fetching tags, required for semantic-release fetch-depth: 0 # for fetching tags, required for semantic-release
- uses: ./.github/actions/setup-poetry - name: Install uv and set the python version
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
- name: Install dependencies
run: uv sync --only-dev
- name: Check version of potential release - name: Check version of potential release
id: version_check id: version_check
run: | run: |
TRGT_VERSION=$(poetry run semantic-release print-version) TRGT_VERSION=$(uv run --no-sync semantic-release print-version)
echo "TRGT_VERSION=${TRGT_VERSION}" >> "$GITHUB_OUTPUT" echo "TRGT_VERSION=${TRGT_VERSION}" >> "$GITHUB_OUTPUT"
echo "${TRGT_VERSION}" echo "${TRGT_VERSION}"
- name: Check notes of potential release - name: Check notes of potential release
run: poetry run semantic-release changelog --unreleased run: uv run --no-sync semantic-release changelog --unreleased
release: release:
needs: [code-checks, pre-release-check] needs: [code-checks, pre-release-check]
if: needs.pre-release-check.outputs.TARGET_TAG_V != '' if: needs.pre-release-check.outputs.TARGET_TAG_V != ''
@@ -39,7 +44,12 @@ jobs:
with: with:
token: ${{ steps.app-token.outputs.token }} token: ${{ steps.app-token.outputs.token }}
fetch-depth: 0 # for fetching tags, required for semantic-release fetch-depth: 0 # for fetching tags, required for semantic-release
- uses: ./.github/actions/setup-poetry - name: Install uv and set the python version
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
- name: Install dependencies
run: uv sync --only-dev
- name: Run release script - name: Run release script
env: env:
GH_TOKEN: ${{ steps.app-token.outputs.token }} GH_TOKEN: ${{ steps.app-token.outputs.token }}

View File

@@ -20,7 +20,7 @@ jobs:
with: with:
publish: false publish: false
build_args: | build_args: |
--build-arg CPU_ONLY=true CPU_ONLY=true
ghcr_image_name: ds4sd/docling-serve-cpu ghcr_image_name: ds4sd/docling-serve-cpu
quay_image_name: "" quay_image_name: ""
@@ -37,6 +37,7 @@ jobs:
with: with:
publish: false publish: false
build_args: | build_args: |
--build-arg CPU_ONLY=false CPU_ONLY=false
platforms: linux/amd64
ghcr_image_name: ds4sd/docling-serve ghcr_image_name: ds4sd/docling-serve
quay_image_name: "" quay_image_name: ""

View File

@@ -34,7 +34,7 @@ jobs:
publish: true publish: true
environment: registry-creds environment: registry-creds
build_args: | build_args: |
--build-arg CPU_ONLY=true CPU_ONLY=true
ghcr_image_name: ds4sd/docling-serve-cpu ghcr_image_name: ds4sd/docling-serve-cpu
quay_image_name: ds4sd/docling-serve-cpu quay_image_name: ds4sd/docling-serve-cpu
@@ -53,7 +53,8 @@ jobs:
publish: true publish: true
environment: registry-creds environment: registry-creds
build_args: | build_args: |
--build-arg CPU_ONLY=false CPU_ONLY=false
platforms: linux/amd64
ghcr_image_name: ds4sd/docling-serve ghcr_image_name: ds4sd/docling-serve
quay_image_name: ds4sd/docling-serve quay_image_name: ds4sd/docling-serve

View File

@@ -11,11 +11,15 @@ jobs:
python-version: ['3.12'] python-version: ['3.12']
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: ./.github/actions/setup-poetry - name: Install uv and set the python version
uses: astral-sh/setup-uv@v5
with: with:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
enable-cache: true
- name: Install dependencies
run: uv sync --all-extras --no-extra cu124
- name: Run styling check - name: Run styling check
run: poetry run pre-commit run --all-files run: uv run --no-sync pre-commit run --all-files
markdown-lint: markdown-lint:
runs-on: ubuntu-latest runs-on: ubuntu-latest

View File

@@ -17,9 +17,14 @@ jobs:
id-token: write # IMPORTANT: mandatory for trusted publishing id-token: write # IMPORTANT: mandatory for trusted publishing
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: ./.github/actions/setup-poetry - name: Install uv and set the python version
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
- name: Install dependencies
run: uv sync --all-extras --no-extra cu124
- name: Build - name: Build
run: poetry build run: uv build
- name: Publish distribution 📦 to PyPI - name: Publish distribution 📦 to PyPI
uses: pypa/gh-action-pypi-publish@release/v1 uses: pypa/gh-action-pypi-publish@release/v1
with: with:

View File

@@ -4,7 +4,7 @@ repos:
hooks: hooks:
- id: system - id: system
name: Black name: Black
entry: poetry run black docling_serve tests entry: uv run --no-sync black docling_serve tests
pass_filenames: false pass_filenames: false
language: system language: system
files: '\.py$' files: '\.py$'
@@ -12,7 +12,7 @@ repos:
hooks: hooks:
- id: system - id: system
name: isort name: isort
entry: poetry run isort docling_serve tests entry: uv run --no-sync isort docling_serve tests
pass_filenames: false pass_filenames: false
language: system language: system
files: '\.py$' files: '\.py$'
@@ -20,7 +20,7 @@ repos:
hooks: hooks:
- id: autoflake - id: autoflake
name: autoflake name: autoflake
entry: poetry run autoflake docling_serve tests entry: uv run --no-sync autoflake docling_serve tests
pass_filenames: false pass_filenames: false
language: system language: system
files: '\.py$' files: '\.py$'
@@ -28,7 +28,7 @@ repos:
hooks: hooks:
- id: system - id: system
name: flake8 name: flake8
entry: poetry run flake8 docling_serve entry: uv run --no-sync flake8 docling_serve
pass_filenames: false pass_filenames: false
language: system language: system
files: '\.py$' files: '\.py$'
@@ -36,14 +36,12 @@ repos:
hooks: hooks:
- id: system - id: system
name: MyPy name: MyPy
entry: poetry run mypy docling_serve entry: uv run --no-sync mypy docling_serve
pass_filenames: false pass_filenames: false
language: system language: system
files: '\.py$' files: '\.py$'
- repo: local - repo: https://github.com/astral-sh/uv-pre-commit
# uv version.
rev: 0.6.1
hooks: hooks:
- id: system - id: uv-lock
name: Poetry check
entry: poetry check --lock
pass_filenames: false
language: system

1
.python-version Normal file
View File

@@ -0,0 +1 @@
3.12

View File

@@ -3,6 +3,7 @@ ARG BASE_IMAGE=quay.io/sclorg/python-312-c9s:c9s
FROM ${BASE_IMAGE} FROM ${BASE_IMAGE}
ARG CPU_ONLY=false ARG CPU_ONLY=false
ARG MODELS_LIST="layout tableformer picture_classifier easyocr"
USER 0 USER 0
@@ -21,6 +22,8 @@ RUN --mount=type=bind,source=os-packages.txt,target=/tmp/os-packages.txt \
ENV TESSDATA_PREFIX=/usr/share/tesseract/tessdata/ ENV TESSDATA_PREFIX=/usr/share/tesseract/tessdata/
COPY --from=ghcr.io/astral-sh/uv:0.6.1 /uv /uvx /bin/
################################################################################################### ###################################################################################################
# Docling layer # # Docling layer #
################################################################################################### ###################################################################################################
@@ -35,27 +38,29 @@ ENV OMP_NUM_THREADS=4
ENV LANG=en_US.UTF-8 ENV LANG=en_US.UTF-8
ENV LC_ALL=en_US.UTF-8 ENV LC_ALL=en_US.UTF-8
ENV PYTHONIOENCODING=utf-8 ENV PYTHONIOENCODING=utf-8
ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy
ENV UV_PROJECT_ENVIRONMENT=/opt/app-root
ENV WITH_UI=True ENV WITH_UI=True
COPY --chown=1001:0 pyproject.toml poetry.lock models_download.py README.md ./ COPY --chown=1001:0 pyproject.toml uv.lock README.md ./
RUN pip install --no-cache-dir poetry && \ RUN --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
# We already are in a virtual environment, so we don't need to create a new one, only activate it.
poetry config virtualenvs.create false && \
source /opt/app-root/bin/activate && \
if [ "$CPU_ONLY" = "true" ]; then \ if [ "$CPU_ONLY" = "true" ]; then \
poetry install --no-root --no-cache --no-interaction --all-extras --with cpu --without dev; \ NO_EXTRA=cu124; \
else \ else \
poetry install --no-root --no-cache --no-interaction --all-extras --without dev; \ NO_EXTRA=cpu; \
fi && \ fi && \
echo "Downloading models..." && \ uv sync --frozen --no-install-project --no-dev --all-extras --no-extra ${NO_EXTRA}
python models_download.py && \
chown -R 1001:0 /opt/app-root/src && \ RUN echo "Downloading models..." && \
chmod -R g=u /opt/app-root/src docling-tools models download ${MODELS_LIST} && \
chown -R 1001:0 /opt/app-root/src/.cache && \
chmod -R g=u /opt/app-root/src/.cache
COPY --chown=1001:0 --chmod=664 ./docling_serve ./docling_serve COPY --chown=1001:0 --chmod=664 ./docling_serve ./docling_serve
EXPOSE 5001 EXPOSE 5001
CMD ["python", "-m", "docling_serve"] CMD ["python", "-m", "docling_serve"]

View File

@@ -325,10 +325,11 @@ RELOAD=true bash start_server.sh
The following variables are available: The following variables are available:
`TESSDATA_PREFIX`: Tesseract data location, example `/usr/share/tesseract/tessdata/`. - `DOCLING_ARTIFACTS_PATH`: if set Docling will use only the local weights of models, for example `/opt/app-root/.cache/docling/cache`.
`UVICORN_WORKERS`: Number of workers to use. - `TESSDATA_PREFIX`: Tesseract data location, example `/usr/share/tesseract/tessdata/`.
`RELOAD`: If `True`, this will enable auto-reload when you modify files, useful for development. - `UVICORN_WORKERS`: Number of workers to use.
`WITH_UI`: If `True`, The Gradio UI will be available at `/ui`. - `RELOAD`: If `True`, this will enable auto-reload when you modify files, useful for development.
- `WITH_UI`: If `True`, The Gradio UI will be available at `/ui`.
## Get help and support ## Get help and support

View File

@@ -1,36 +0,0 @@
import os
import zipfile
import requests
from deepsearch_glm.utils.load_pretrained_models import load_pretrained_nlp_models
from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline
# Download Docling models
StandardPdfPipeline.download_models_hf(force=True)
load_pretrained_nlp_models(verbose=True)
# Download EasyOCR models
urls = [
"https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/latin_g2.zip",
"https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip"
]
local_zip_paths = [
"/opt/app-root/src/latin_g2.zip",
"/opt/app-root/src/craft_mlt_25k.zip"
]
extract_path = "/opt/app-root/src/.EasyOCR/model/"
for url, local_zip_path in zip(urls, local_zip_paths):
# Download the file
response = requests.get(url)
with open(local_zip_path, "wb") as file:
file.write(response.content)
# Unzip the file
with zipfile.ZipFile(local_zip_path, "r") as zip_ref:
zip_ref.extractall(extract_path)
# Clean up the zip file
os.remove(local_zip_path)

5348
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,25 +1,25 @@
[tool.poetry] [project]
name = "docling-serve" name = "docling-serve"
version = "0.2.0" # DO NOT EDIT, updated automatically version = "0.2.0" # DO NOT EDIT, updated automatically
description = "Running Docling as a service" description = "Running Docling as a service"
license = "MIT" license = {text = "MIT"}
authors = [ authors = [
"Michele Dolfi <dol@zurich.ibm.com>", {name="Michele Dolfi", email="dol@zurich.ibm.com"},
"Christoph Auer <cau@zurich.ibm.com>", {name="Guillaume Moutier", email="gmoutier@redhat.com"},
"Panos Vagenas <pva@zurich.ibm.com>", {name="Anil Vishnoi", email="avishnoi@redhat.com"},
"Cesar Berrospi Ramis <ceb@zurich.ibm.com>", {name="Panos Vagenas", email="pva@zurich.ibm.com"},
"Peter Staar <taa@zurich.ibm.com>", {name="Panos Vagenas", email="pva@zurich.ibm.com"},
{name="Christoph Auer", email="cau@zurich.ibm.com"},
{name="Peter Staar", email="taa@zurich.ibm.com"},
] ]
maintainers = [ maintainers = [
"Peter Staar <taa@zurich.ibm.com>", {name="Michele Dolfi", email="dol@zurich.ibm.com"},
"Christoph Auer <cau@zurich.ibm.com>", {name="Anil Vishnoi", email="avishnoi@redhat.com"},
"Michele Dolfi <dol@zurich.ibm.com>", {name="Panos Vagenas", email="pva@zurich.ibm.com"},
"Cesar Berrospi Ramis <ceb@zurich.ibm.com>", {name="Christoph Auer", email="cau@zurich.ibm.com"},
"Panos Vagenas <pva@zurich.ibm.com>", {name="Peter Staar", email="taa@zurich.ibm.com"},
] ]
readme = "README.md" readme = "README.md"
repository = "https://github.com/DS4SD/docling-serve"
homepage = "https://github.com/DS4SD/docling-serve"
classifiers = [ classifiers = [
"License :: OSI Approved :: MIT License", "License :: OSI Approved :: MIT License",
"Operating System :: OS Independent", "Operating System :: OS Independent",
@@ -28,80 +28,88 @@ classifiers = [
"Typing :: Typed", "Typing :: Typed",
"Programming Language :: Python :: 3" "Programming Language :: Python :: 3"
] ]
requires-python = ">=3.10"
[tool.poetry.dependencies] dependencies = [
python = ">=3.10,<3.13" # 3.10 needed for Gradio, and no torchvision build for 3.13 yet "docling~=2.23",
docling = "^2.14.0" "fastapi[standard]~=0.115",
fastapi = {version = "^0.115.6", extras = ["standard"]} "httpx~=0.28",
gradio = { version = "^5.9.1", optional = true } "pydantic~=2.10",
uvicorn = "~0.29.0" "pydantic-settings~=2.4",
pydantic = "^2.10.3" "python-multipart>=0.0.14,<0.1.0",
pydantic-settings = "^2.4.0" "uvicorn[standard]>=0.29.0,<1.0.0",
python-multipart = "^0.0.19"
httpx = "^0.28.1"
tesserocr = { version = "^2.7.1", optional = true }
rapidocr-onnxruntime = { version = "^1.4.0", optional = true, markers = "python_version < '3.13'" }
onnxruntime = [
# 1.19.2 is the last version with python3.9 support,
# see https://github.com/microsoft/onnxruntime/releases/tag/v1.20.0
{ version = ">=1.7.0,<1.20.0", optional = true, markers = "python_version < '3.10'" },
{ version = "^1.7.0", optional = true, markers = "python_version >= '3.10'" }
] ]
[project.optional-dependencies]
ui = [
"gradio~=5.9"
]
tesserocr = [
"tesserocr~=2.7"
]
rapidocr = [
"rapidocr-onnxruntime~=1.4; python_version<'3.13'",
"onnxruntime~=1.7",
]
cpu = [
"torch>=2.6.0",
"torchvision>=0.21.0",
]
cu124 = [
"torch>=2.6.0",
"torchvision>=0.21.0",
]
[tool.poetry.extras] [dependency-groups]
ui = ["gradio"] dev = [
tesserocr = ["tesserocr"] "autoflake~=2.3",
rapidocr = ["rapidocr-onnxruntime", "onnxruntime"] "black~=24.8",
"flake8~=7.1",
"isort~=5.13",
"mypy~=1.11",
"pre-commit~=3.8",
"pytest~=8.3",
"pytest-asyncio~=0.24",
"pytest-check~=2.4",
"python-semantic-release~=7.32",
]
[tool.uv]
conflicts = [
[
{ extra = "cpu" },
{ extra = "cu124" },
],
]
[tool.poetry.group.pypi-torch] [tool.uv.sources]
optional = false
[tool.poetry.group.pypi-torch.dependencies]
torch = [ torch = [
{version = "!=2.4.1+cpu" }, { index = "pytorch-cpu", extra = "cpu" },
{ index = "pytorch-cu124", extra = "cu124" },
] ]
torchvision = [ torchvision = [
{version = "!=0.19.1+cpu" }, { index = "pytorch-cpu", extra = "cpu" },
{ index = "pytorch-cu124", extra = "cu124" },
] ]
[tool.poetry.group.cpu] [[tool.uv.index]]
optional = true name = "pytorch-cpu"
url = "https://download.pytorch.org/whl/cpu"
explicit = true
[tool.poetry.group.cpu.dependencies] [[tool.uv.index]]
torch = [ name = "pytorch-cu124"
{markers = 'platform_machine=="x86_64" and sys_platform=="linux" and python_version == "3.10"', url="https://download.pytorch.org/whl/cpu/torch-2.4.1%2Bcpu-cp310-cp310-linux_x86_64.whl"}, url = "https://download.pytorch.org/whl/cu124"
{markers = 'platform_machine=="x86_64" and sys_platform=="linux" and python_version == "3.11"', url="https://download.pytorch.org/whl/cpu/torch-2.4.1%2Bcpu-cp311-cp311-linux_x86_64.whl"}, explicit = true
{markers = 'platform_machine=="x86_64" and sys_platform=="linux" and python_version == "3.12"', url="https://download.pytorch.org/whl/cpu/torch-2.4.1%2Bcpu-cp312-cp312-linux_x86_64.whl"},
]
torchvision = [
{markers = 'platform_machine=="x86_64" and sys_platform=="linux" and python_version == "3.10"', url="https://download.pytorch.org/whl/cpu/torchvision-0.19.1%2Bcpu-cp310-cp310-linux_x86_64.whl"},
{markers = 'platform_machine=="x86_64" and sys_platform=="linux" and python_version == "3.11"', url="https://download.pytorch.org/whl/cpu/torchvision-0.19.1%2Bcpu-cp311-cp311-linux_x86_64.whl"},
{markers = 'platform_machine=="x86_64" and sys_platform=="linux" and python_version == "3.12"', url="https://download.pytorch.org/whl/cpu/torchvision-0.19.1%2Bcpu-cp312-cp312-linux_x86_64.whl"},
]
[tool.poetry.group.constraints.dependencies] [tool.setuptools.packages.find]
numpy = [ include = ["docling_serve"]
{ version = "^2.1.0", markers = 'python_version >= "3.13"' },
{ version = "^1.24.4", markers = 'python_version < "3.13"' },
]
[tool.poetry.group.dev.dependencies] [project.urls]
black = "^24.8.0" Homepage = "https://github.com/DS4SD/docling-serve"
isort = "^5.13.2" # Documentation = "https://ds4sd.github.io/docling"
pre-commit = "^3.8.0" Repository = "https://github.com/DS4SD/docling-serve"
autoflake = "^2.3.1" Issues = "https://github.com/DS4SD/docling-serve/issues"
flake8 = "^7.1.1" Changelog = "https://github.com/DS4SD/docling-serve/blob/main/CHANGELOG.md"
pytest = "^8.3.4"
pytest-asyncio = "^0.24.0"
pytest-check = "^2.4.1"
mypy = "^1.11.2"
python-semantic-release = "^7.32.2"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.black] [tool.black]
line-length = 88 line-length = 88

3999
uv.lock generated Normal file

File diff suppressed because it is too large Load Diff