mirror of
https://github.com/docling-project/docling-serve.git
synced 2026-03-07 22:33:44 +00:00
fix: add flash-attn for cuda images (#161)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
2
.github/workflows/ci-images-dryrun.yml
vendored
2
.github/workflows/ci-images-dryrun.yml
vendored
@@ -19,7 +19,7 @@ jobs:
|
||||
platforms: linux/amd64, linux/arm64
|
||||
- name: docling-project/docling-serve-cpu
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-extra cu124
|
||||
UV_SYNC_EXTRA_ARGS=--no-extra cu124 --no-extra flash-attn
|
||||
platforms: linux/amd64, linux/arm64
|
||||
- name: docling-project/docling-serve-cu124
|
||||
build_args: |
|
||||
|
||||
2
.github/workflows/images.yml
vendored
2
.github/workflows/images.yml
vendored
@@ -23,7 +23,7 @@ jobs:
|
||||
platforms: linux/amd64, linux/arm64
|
||||
- name: docling-project/docling-serve-cpu
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-extra cu124
|
||||
UV_SYNC_EXTRA_ARGS=--no-extra cu124 --no-extra flash-attn
|
||||
platforms: linux/amd64, linux/arm64
|
||||
- name: docling-project/docling-serve-cu124
|
||||
build_args: |
|
||||
|
||||
2
.github/workflows/job-build.yml
vendored
2
.github/workflows/job-build.yml
vendored
@@ -17,7 +17,7 @@ jobs:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
enable-cache: true
|
||||
- name: Install dependencies
|
||||
run: uv sync --all-extras --no-extra cu124
|
||||
run: uv sync --all-extras --no-extra cu124 --no-extra flash-attn
|
||||
- name: Build package
|
||||
run: uv build
|
||||
- name: Check content of wheel
|
||||
|
||||
2
.github/workflows/job-checks.yml
vendored
2
.github/workflows/job-checks.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
key: pre-commit|${{ env.PY }}|${{ hashFiles('.pre-commit-config.yaml') }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: uv sync --frozen --all-extras --no-extra cu124
|
||||
run: uv sync --frozen --all-extras --no-extra cu124 --no-extra flash-attn
|
||||
|
||||
- name: Run styling check
|
||||
run: pre-commit run --all-files
|
||||
|
||||
@@ -46,7 +46,10 @@ RUN --mount=from=ghcr.io/astral-sh/uv:0.6.1,source=/uv,target=/bin/uv \
|
||||
--mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
|
||||
--mount=type=bind,source=uv.lock,target=uv.lock \
|
||||
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
|
||||
umask 002 && uv sync --frozen --no-install-project --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS}
|
||||
umask 002 && \
|
||||
UV_SYNC_ARGS="--frozen --no-install-project --no-dev --all-extras" && \
|
||||
uv sync ${UV_SYNC_ARGS} ${UV_SYNC_EXTRA_ARGS} --no-extra flash-attn && \
|
||||
FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE uv sync ${UV_SYNC_ARGS} ${UV_SYNC_EXTRA_ARGS} --no-build-isolation-package=flash-attn
|
||||
|
||||
ARG MODELS_LIST="layout tableformer picture_classifier easyocr"
|
||||
|
||||
|
||||
2
Makefile
2
Makefile
@@ -35,7 +35,7 @@ docling-serve-image: Containerfile
|
||||
.PHONY: docling-serve-cpu-image
|
||||
docling-serve-cpu-image: Containerfile ## Build docling-serve "cpu only" container image
|
||||
$(ECHO_PREFIX) printf " %-12s Containerfile\n" "[docling-serve CPU]"
|
||||
$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-extra cu124" -f Containerfile -t ghcr.io/docling-project/docling-serve-cpu:$(TAG) .
|
||||
$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-extra cu124 --no-extra flash-attn" -f Containerfile -t ghcr.io/docling-project/docling-serve-cpu:$(TAG) .
|
||||
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cpu:$(TAG) ghcr.io/docling-project/docling-serve-cpu:$(BRANCH_TAG)
|
||||
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cpu:$(TAG) quay.io/docling-project/docling-serve-cpu:$(BRANCH_TAG)
|
||||
|
||||
|
||||
@@ -63,6 +63,9 @@ cu124 = [
|
||||
"torch>=2.6.0",
|
||||
"torchvision>=0.21.0",
|
||||
]
|
||||
flash-attn = [
|
||||
"flash-attn~=2.7.0; sys_platform == 'linux' and platform_machine == 'x86_64'"
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
dev = [
|
||||
@@ -83,7 +86,10 @@ conflicts = [
|
||||
{ extra = "cpu" },
|
||||
{ extra = "cu124" },
|
||||
],
|
||||
]
|
||||
[
|
||||
{ extra = "cpu" },
|
||||
{ extra = "flash-attn" },
|
||||
],]
|
||||
environments = ["sys_platform != 'darwin' or platform_machine != 'x86_64'"]
|
||||
override-dependencies = [
|
||||
"urllib3~=2.0"
|
||||
|
||||
Reference in New Issue
Block a user