feat: Docling with auto-ocr (#403)

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi
2025-10-15 21:15:29 +02:00
committed by GitHub
parent 5344505718
commit d95ea94087
3 changed files with 1799 additions and 1361 deletions

View File

@@ -58,7 +58,7 @@ RUN --mount=from=uv_stage,source=/uv,target=/bin/uv \
uv sync ${UV_SYNC_ARGS} ${UV_SYNC_EXTRA_ARGS} --no-extra flash-attn && \
FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE uv sync ${UV_SYNC_ARGS} ${UV_SYNC_EXTRA_ARGS} --no-build-isolation-package=flash-attn
ARG MODELS_LIST="layout tableformer picture_classifier easyocr"
ARG MODELS_LIST="layout tableformer picture_classifier rapidocr easyocr"
RUN echo "Downloading models..." && \
HF_HUB_DOWNLOAD_TIMEOUT="90" \

View File

@@ -55,10 +55,12 @@ ui = [
tesserocr = [
"tesserocr~=2.7"
]
easyocr = [
"easyocr>=1.7",
]
rapidocr = [
"rapidocr (>=3.3,<4.0.0) ; python_version < '3.14'",
"onnxruntime (>=1.7.0,<2.0.0)",
"modelscope>=1.29.0",
]
flash-attn = [
"flash-attn~=2.8.2; sys_platform == 'linux' and platform_machine == 'x86_64'"

3154
uv.lock generated

File diff suppressed because one or more lines are too long