ci: workflow improvements (#310)

Signed-off-by: rmdg88 <rmdg88@gmail.com> Signed-off-by: Rui Dias Gomes <66125272+rmdg88@users.noreply.github.com>
2025-11-29 08:33:50 +00:00 · 2025-09-03 09:06:30 +01:00
parent e5449472b2
commit 1df62adf01
6 changed files with 345 additions and 25 deletions
--- a/.github/scripts/release.sh
+++ b/.github/scripts/release.sh
@@ -3,32 +3,68 @@
 set -e  # trigger failure on error - do not remove!
 set -x  # display command on output

+## debug
+# TARGET_VERSION="1.2.x"
+
 if [ -z "${TARGET_VERSION}" ]; then
    >&2 echo "No TARGET_VERSION specified"
    exit 1
 fi
 CHGLOG_FILE="${CHGLOG_FILE:-CHANGELOG.md}"

-# update package version
+# Update package version
 uvx --from=toml-cli toml set --toml-path=pyproject.toml project.version "${TARGET_VERSION}"
 uv lock --upgrade-package docling-serve

-# collect release notes
+# Extract all docling packages and versions from uv.lock
+DOCVERSIONS=$(uvx --with toml python3 - <<'PY'
+import toml
+data = toml.load("uv.lock")
+for pkg in data.get("package", []):
+    if pkg["name"].startswith("docling"):
+        print(f"{pkg['name']} {pkg['version']}")
+PY
+)
+
+# Format docling versions list without trailing newline
+DOCLING_VERSIONS="### Docling libraries included in this release:"
+while IFS= read -r line; do
+  DOCLING_VERSIONS+="
+- $line"
+done <<< "$DOCVERSIONS"
+
+# Collect release notes
 REL_NOTES=$(mktemp)
 uv run --no-sync semantic-release changelog --unreleased >> "${REL_NOTES}"

-# update changelog
+# Strip trailing blank lines from release notes and append docling versions
+{
+  sed -e :a -e '/^\n*$/{$d;N;};/\n$/ba' "${REL_NOTES}"
+  printf "\n"
+  printf "%s" "${DOCLING_VERSIONS}"
+  printf "\n"
+} > "${REL_NOTES}.tmp" && mv "${REL_NOTES}.tmp" "${REL_NOTES}"
+
+# Update changelog
 TMP_CHGLOG=$(mktemp)
 TARGET_TAG_NAME="v${TARGET_VERSION}"
 RELEASE_URL="$(gh repo view --json url -q ".url")/releases/tag/${TARGET_TAG_NAME}"
-printf "## [${TARGET_TAG_NAME}](${RELEASE_URL}) - $(date -Idate)\n\n" >> "${TMP_CHGLOG}"
-cat "${REL_NOTES}" >> "${TMP_CHGLOG}"
-if [ -f "${CHGLOG_FILE}" ]; then
-    printf "\n" | cat - "${CHGLOG_FILE}" >> "${TMP_CHGLOG}"
-fi
+## debug
+#RELEASE_URL="myrepo/releases/tag/${TARGET_TAG_NAME}"
+
+# Strip leading blank lines from existing changelog to avoid multiple blank lines when appending
+EXISTING_CL=$(sed -e :a -e '/^\n*$/{$d;N;};/\n$/ba' "${CHGLOG_FILE}")
+
+{
+  printf "## [${TARGET_TAG_NAME}](${RELEASE_URL}) - $(date -Idate)\n\n"
+  cat "${REL_NOTES}"
+  printf "\n"
+  printf "%s\n" "${EXISTING_CL}"
+} >> "${TMP_CHGLOG}"
+
 mv "${TMP_CHGLOG}" "${CHGLOG_FILE}"

-# push changes
+# Push changes
 git config --global user.name 'github-actions[bot]'
 git config --global user.email 'github-actions[bot]@users.noreply.github.com'
 git add pyproject.toml uv.lock "${CHGLOG_FILE}"
@@ -36,5 +72,5 @@ COMMIT_MSG="chore: bump version to ${TARGET_VERSION} [skip ci]"
 git commit -m "${COMMIT_MSG}"
 git push origin main

-# create GitHub release (incl. Git tag)
+# Create GitHub release (incl. Git tag)
 gh release create "${TARGET_TAG_NAME}" -F "${REL_NOTES}"
--- a/.github/workflows/actionlint.yml
+++ b/.github/workflows/actionlint.yml
@@ -13,7 +13,7 @@ jobs:
  actionlint:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
      - name: Download actionlint
        id: get_actionlint
        run: bash <(curl https://raw.githubusercontent.com/rhysd/actionlint/main/scripts/download-actionlint.bash)
--- a/.github/workflows/cd.yml
+++ b/.github/workflows/cd.yml
@@ -11,7 +11,7 @@ jobs:
    outputs:
      TARGET_TAG_V: ${{ steps.version_check.outputs.TRGT_VERSION }}
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
        with:
          fetch-depth: 0  # for fetching tags, required for semantic-release
      - name: Install uv and set the python version
@@ -40,7 +40,7 @@ jobs:
        with:
          app-id: ${{ vars.CI_APP_ID }}
          private-key: ${{ secrets.CI_PRIVATE_KEY }}
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
        with:
          token: ${{ steps.app-token.outputs.token }}
          fetch-depth: 0  # for fetching tags, required for semantic-release
--- a/.github/workflows/job-build.yml
+++ b/.github/workflows/job-build.yml
@@ -10,7 +10,7 @@ jobs:
      matrix:
        python-version: ['3.12']
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
      - name: Install uv and set the python version
        uses: astral-sh/setup-uv@v6
        with:
--- a/.github/workflows/job-checks.yml
+++ b/.github/workflows/job-checks.yml
@@ -10,7 +10,7 @@ jobs:
      matrix:
        python-version: ['3.12']
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
      - name: Install uv and set the python version
        uses: astral-sh/setup-uv@v6
        with:
@@ -61,7 +61,7 @@ jobs:
  markdown-lint:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
      - name: markdownlint-cli2-action
        uses: DavidAnson/markdownlint-cli2-action@v16
        with:
--- a/.github/workflows/job-image.yml
+++ b/.github/workflows/job-image.yml
@@ -53,7 +53,7 @@ jobs:
            df -h

      - name: Check out the repo
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5

      - name: Log in to the GHCR container image registry
        if: ${{ inputs.publish }}
@@ -88,19 +88,112 @@ jobs:
        with:
          images: ${{ env.GHCR_REGISTRY }}/${{ inputs.ghcr_image_name }}

+      # # Local test
+      # - name: Set metadata outputs for local testing ## comment out Free up space, Log in to cr, Cache Docker, Extract metadata, and quay blocks and run act
+      #   id: ghcr_meta
+      #   run: |
+      #     echo "tags=ghcr.io/docling-project/docling-serve:pr-123" >> $GITHUB_OUTPUT
+      #     echo "labels=org.opencontainers.image.source=https://github.com/docling-project/docling-serve" >> $GITHUB_OUTPUT
+
      - name: Build and push image to ghcr.io
        id: ghcr_push
-        uses: docker/build-push-action@v5
+        uses: docker/build-push-action@v6
        with:
          context: .
-          push: ${{ inputs.publish }}
+          push: ${{ inputs.publish }} # set 'false' for local test
          tags: ${{ steps.ghcr_meta.outputs.tags }}
          labels: ${{ steps.ghcr_meta.outputs.labels }}
-          platforms: ${{ inputs.platforms}}
+          platforms: ${{ inputs.platforms }}
          cache-from: type=gha
          cache-to: type=gha,mode=max
          file: Containerfile
          build-args: ${{ inputs.build_args }}
+      ##
+      ## This stage runs after the build, so it leverages all build cache
+      ## 
+      - name: Export built image for testing
+        id: ghcr_export_built_image
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          push: false
+          load: true # == '--output=type=docker'
+          tags: ${{ steps.ghcr_meta.outputs.tags }}-test
+          labels: |
+            org.opencontainers.image.title=docling-serve
+            org.opencontainers.image.test=true
+          platforms: linux/amd64 # when 'load' is true, we can't use a list ${{ inputs.platforms }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          file: Containerfile
+          build-args: ${{ inputs.build_args }}
+
+      - name: Test image
+        if: steps.ghcr_export_built_image.outcome == 'success'
+        run: |
+          set -e
+
+          IMAGE_TAG="${{ steps.ghcr_meta.outputs.tags }}-test"
+          echo "Testing local image: $IMAGE_TAG"
+
+          # Remove existing container if any
+          docker rm -f docling-serve-test-container 2>/dev/null || true
+
+          echo "Starting container..."
+          docker run -d -p 5001:5001 --name docling-serve-test-container "$IMAGE_TAG"
+
+          echo "Waiting 15s for container to boot..."
+          sleep 15
+
+          # Health check
+          echo "Checking service health..."
+          for i in {1..20}; do
+            HEALTH_RESPONSE=$(curl -s http://localhost:5001/health || true)
+            echo "Health check response [$i]: $HEALTH_RESPONSE"
+
+            if echo "$HEALTH_RESPONSE" | grep -q '"status":"ok"'; then
+              echo "Service is healthy!"
+
+              # Install pytest and dependencies
+              echo "Installing pytest and dependencies..."
+              pip install uv
+              uv venv --allow-existing
+              source .venv/bin/activate
+              uv sync --all-extras --no-extra flash-attn
+
+              # Run pytest tests
+              echo "Running tests..."
+              # Test import
+              python -c 'from docling_serve.app import create_app; create_app()'
+
+              # Run pytest and check result directly
+              if ! pytest -sv -k "test_convert_url" tests/test_1-url-async.py \
+                --disable-warnings; then
+                echo "Tests failed!"
+                docker logs docling-serve-test-container
+                docker rm -f docling-serve-test-container
+                exit 1
+              fi
+
+              echo "Tests passed successfully!"
+              break
+            else
+              echo "Waiting for service... [$i/20]"
+              sleep 3
+            fi
+          done
+
+          # Final health check if service didn't pass earlier
+          if ! echo "$HEALTH_RESPONSE" | grep -q '"status":"ok"'; then
+            echo "Service did not become healthy in time."
+            docker logs docling-serve-test-container
+            docker rm -f docling-serve-test-container
+            exit 1
+          fi
+
+          # Cleanup
+          echo "Cleaning up test container..."
+          docker rm -f docling-serve-test-container

      - name: Generate artifact attestation
        if: ${{ inputs.publish }}
@@ -120,7 +213,7 @@ jobs:
      - name: Build and push image to quay.io
        if: ${{ inputs.publish }}
        # id: push-serve-cpu-quay
-        uses: docker/build-push-action@v5
+        uses: docker/build-push-action@v6
        with:
          context: .
          push: ${{ inputs.publish }}
@@ -132,10 +225,201 @@ jobs:
          file: Containerfile
          build-args: ${{ inputs.build_args }}

-      # - name: Inspect the image details
-      #   run: |
-      #     echo "${{ steps.ghcr_push.outputs.metadata }}"
-
      - name: Remove Local Docker Images
        run: |
          docker image prune -af
+##
+## Extra tests for released images
+##
+
+    # outputs:
+    #   image-tags: ${{ steps.ghcr_meta.outputs.tags }}
+    #   image-labels: ${{ steps.ghcr_meta.outputs.labels }}
+
+  # test-cpu-image:
+  #   needs:
+  #     - image
+  #   runs-on: ubuntu-latest
+  #   permissions:
+  #     contents: read
+  #     packages: read
+
+  #   steps:
+  #     - name: Checkout code
+  #       uses: actions/checkout@v5
+
+  #     - name: Test CPU images
+  #       run: |
+  #         set -e
+
+  #         echo "Testing image: ${{ needs.image.outputs.image-tags }}"
+
+  #         for tag in ${{ needs.image.outputs.image-tags }}; do
+  #           if echo "$tag" | grep -q -- '-cpu' && echo "$tag" | grep -qE ':[vV][0-9]+(\.[0-9]+){0,2}$'; then
+  #             echo "Testing CPU image: $tag"
+
+  #             # Remove existing container if any
+  #             docker rm -f docling-serve-test-container 2>/dev/null || true
+
+  #             echo "Pulling image..."
+  #             docker pull "$tag"
+
+  #             echo "Waiting 5s after pull..."
+  #             sleep 5
+
+  #             echo "Starting container..."
+  #             docker run -d -p 5001:5001 --name docling-serve-test-container "$tag"
+
+  #             echo "Waiting 15s for container to boot..."
+  #             sleep 15
+
+  #             echo "Checking service health..."
+  #             for i in {1..20}; do
+  #               health_response=$(curl -s http://localhost:5001/health || true)
+  #               echo "Health check response [$i]: $health_response"
+  #               if echo "$health_response" | grep -q '"status":"ok"'; then
+  #                 echo "Service is healthy!"
+  #                 echo "Sending test conversion request..."
+
+  #                 status_code=$(curl -s -o /dev/null -w "%{http_code}" -X POST 'http://localhost:5001/v1/convert/source' \
+  #                   -H 'accept: application/json' \
+  #                   -H 'Content-Type: application/json' \
+  #                   -d '{
+  #                     "options": {
+  #                       "from_formats": ["pdf"],
+  #                       "to_formats": ["md"]
+  #                     },
+  #                     "sources": [
+  #                       {
+  #                         "kind": "http",
+  #                         "url": "https://arxiv.org/pdf/2501.17887"
+  #                       }
+  #                     ],
+  #                     "target": {
+  #                       "kind": "inbody"
+  #                     }
+  #                   }')
+
+  #                 echo "Conversion request returned status code: $status_code"
+
+  #                 if [ "$status_code" -ne 200 ]; then
+  #                   echo "Conversion failed!"
+  #                   docker logs docling-serve-test-container
+  #                   docker rm -f docling-serve-test-container
+  #                   exit 1
+  #                 fi
+
+  #                 break
+  #               else
+  #                 echo "Waiting for service... [$i/20]"
+  #                 sleep 3
+  #               fi
+  #             done
+
+  #             if ! echo "$health_response" | grep -q '"status":"ok"'; then
+  #               echo "Service did not become healthy in time."
+  #               docker logs docling-serve-test-container
+  #               docker rm -f docling-serve-test-container
+  #               exit 1
+  #             fi
+
+  #             echo "Cleaning up test container..."
+  #             docker rm -f docling-serve-test-container
+  #           else
+  #             echo "Skipping non-released or non-CPU image: $tag"
+  #           fi
+  #         done
+
+  # test-cuda-image:
+  #   needs:
+  #     - image
+  #   runs-on: ubuntu-latest # >> placeholder for GPU runner << #
+  #   permissions:
+  #     contents: read
+  #     packages: read
+
+  #   steps:
+  #     - name: Checkout code
+  #       uses: actions/checkout@v5
+
+  #     - name: Test CUDA images
+  #       run: |
+  #         set -e
+
+  #         echo "Testing image: ${{ needs.image.outputs.image-tags }}"
+
+  #         for tag in ${{ needs.image.outputs.image-tags }}; do
+  #           if echo "$tag" | grep -qE -- '-cu[0-9]+' && echo "$tag" | grep -qE ':[vV][0-9]+(\.[0-9]+){0,2}$'; then
+  #             echo "Testing CUDA image: $tag"
+
+  #             # Remove existing container if any
+  #             docker rm -f docling-serve-test-container 2>/dev/null || true
+
+  #             echo "Pulling image..."
+  #             docker pull "$tag"
+
+  #             echo "Waiting 5s after pull..."
+  #             sleep 5
+
+  #             echo "Starting container..."
+  #             docker run -d -p 5001:5001 --gpus all --name docling-serve-test-container "$tag"
+
+  #             echo "Waiting 15s for container to boot..."
+  #             sleep 15
+
+  #             echo "Checking service health..."
+  #             for i in {1..25}; do
+  #               health_response=$(curl -s http://localhost:5001/health || true)
+  #               echo "Health check response [$i]: $health_response"
+  #               if echo "$health_response" | grep -q '"status":"ok"'; then
+  #                 echo "Service is healthy!"
+  #                 echo "Sending test conversion request..."
+
+  #                 status_code=$(curl -s -o /dev/null -w "%{http_code}" -X POST 'http://localhost:5001/v1/convert/source' \
+  #                   -H 'accept: application/json' \
+  #                   -H 'Content-Type: application/json' \
+  #                   -d '{
+  #                     "options": {
+  #                       "from_formats": ["pdf"],
+  #                       "to_formats": ["md"]
+  #                     },
+  #                     "sources": [
+  #                       {
+  #                         "kind": "http",
+  #                         "url": "https://arxiv.org/pdf/2501.17887"
+  #                       }
+  #                     ],
+  #                     "target": {
+  #                       "kind": "inbody"
+  #                     }
+  #                   }')
+
+  #                 echo "Conversion request returned status code: $status_code"
+
+  #                 if [ "$status_code" -ne 200 ]; then
+  #                   echo "Conversion failed!"
+  #                   docker logs docling-serve-test-container
+  #                   docker rm -f docling-serve-test-container
+  #                   exit 1
+  #                 fi
+
+  #                 break
+  #               else
+  #                 echo "Waiting for service... [$i/25]"
+  #                 sleep 3
+  #               fi
+  #             done
+
+  #             if ! echo "$health_response" | grep -q '"status":"ok"'; then
+  #               echo "Service did not become healthy in time."
+  #               docker logs docling-serve-test-container
+  #               docker rm -f docling-serve-test-container
+  #               exit 1
+  #             fi
+
+  #             echo "Cleaning up test container..."
+  #             docker rm -f docling-serve-test-container
+  #           else
+  #             echo "Skipping non-released or non-CUDA image: $tag"
+  #           fi
+  #         done