From 0d4545a65a5a941fc1fdefda57e39cfb1ea106ab Mon Sep 17 00:00:00 2001 From: Tiago Santana <54704492+SantanaTiago@users.noreply.github.com> Date: Thu, 4 Sep 2025 09:42:11 +0100 Subject: [PATCH] docs: add split processing example (#303) Signed-off-by: Tiago Santana <54704492+SantanaTiago@users.noreply.github.com> Co-authored-by: Michele Dolfi --- .gitignore | 5 +- .pre-commit-config.yaml | 4 +- docs/examples.md | 22 +++++++ examples/split_processing.py | 124 +++++++++++++++++++++++++++++++++++ pyproject.toml | 3 +- uv.lock | 18 ++++- 6 files changed, 170 insertions(+), 6 deletions(-) create mode 100644 docs/examples.md create mode 100644 examples/split_processing.py diff --git a/.gitignore b/.gitignore index d94fc62..9d25b93 100644 --- a/.gitignore +++ b/.gitignore @@ -445,4 +445,7 @@ pip-selfcheck.json .action-lint .markdown-lint -cookies.txt \ No newline at end of file +cookies.txt + +# Examples +/examples/splitted_pdf/* \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5b91069..3f709a7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,12 +7,12 @@ repos: - id: ruff-format name: "Ruff formatter" args: [--config=pyproject.toml] - files: '^(docling_serve|tests).*\.(py|ipynb)$' + files: '^(docling_serve|tests|examples).*\.(py|ipynb)$' # Run the Ruff linter. - id: ruff name: "Ruff linter" args: [--exit-non-zero-on-fix, --fix, --config=pyproject.toml] - files: '^(docling_serve|tests).*\.(py|ipynb)$' + files: '^(docling_serve|tests|examples).*\.(py|ipynb)$' - repo: local hooks: - id: system diff --git a/docs/examples.md b/docs/examples.md new file mode 100644 index 0000000..a3c5edb --- /dev/null +++ b/docs/examples.md @@ -0,0 +1,22 @@ +# Examples + +## Split processing + +The example of provided of split processing demonstrates how to split a PDF into chunks of pages and send them for conversion. At the end, it concatenates all split pages into a single conversion `JSON`. + +At beginning of file there's variables to be used (and modified) such as: +| Variable | Description | +| ---------|-------------| +| `path_to_pdf`| Path to PDF file to be split | +| `pages_per_file`| The number of pages per chunk to split PDF | +| `base_url`| Base url of the `docling-serve` host | +| `out_dir`| The output folder of each conversion `JSON` of split PDF and the final concatenated `JSON` | + +The example follows the following logic: +- Get the number of pages of the `PDF` +- Based on the number of chunks of pages, send each chunk to conversion using `page_range` parameter +- Wait all conversions to finish +- Get all conversion results +- Save each conversion `JSON` result into a `JSON` file +- Concatenate all `JSONs` into a single `JSON` using `docling` concatenate method +- Save concatenated `JSON` into a `JSON` file \ No newline at end of file diff --git a/examples/split_processing.py b/examples/split_processing.py new file mode 100644 index 0000000..fe02a9e --- /dev/null +++ b/examples/split_processing.py @@ -0,0 +1,124 @@ +import json +import time +from pathlib import Path + +import httpx +from pydantic import BaseModel +from pypdf import PdfReader + +from docling_core.types.doc.document import DoclingDocument + +# Variables to use +path_to_pdf = Path("./tests/2206.01062v1.pdf") +pages_per_file = 4 +base_url = "http://localhost:5001/v1" +out_dir = Path("examples/splitted_pdf/") + + +class ConvertedSplittedPdf(BaseModel): + task_id: str + conversion_finished: bool = False + result: dict | None = None + + +def get_task_result(task_id: str): + response = httpx.get( + f"{base_url}/result/{task_id}", + timeout=15, + ) + return response.json() + + +def check_task_status(task_id: str): + response = httpx.get(f"{base_url}/status/poll/{task_id}", timeout=15) + task = response.json() + task_status = task["task_status"] + + task_finished = False + if task_status == "success": + task_finished = True + + if task_status in ("failure", "revoked"): + raise RuntimeError("A conversion failed") + + time.sleep(5) + + return task_finished + + +def post_file(file_path: Path, start_page: int, end_page: int): + payload = { + "to_formats": ["json"], + "image_export_mode": "placeholder", + "ocr": False, + "abort_on_error": False, + "page_range": [start_page, end_page], + } + + files = { + "files": (file_path.name, file_path.open("rb"), "application/pdf"), + } + response = httpx.post( + f"{base_url}/convert/file/async", + files=files, + data=payload, + timeout=15, + ) + + task = response.json() + + return task["task_id"] + + +def main(): + filename = path_to_pdf + + splitted_pdfs: list[ConvertedSplittedPdf] = [] + + with open(filename, "rb") as input_pdf_file: + pdf_reader = PdfReader(input_pdf_file) + total_pages = len(pdf_reader.pages) + + for start_page in range(0, total_pages, pages_per_file): + task_id = post_file( + filename, start_page + 1, min(start_page + pages_per_file, total_pages) + ) + splitted_pdfs.append(ConvertedSplittedPdf(task_id=task_id)) + + all_files_converted = False + while not all_files_converted: + found_conversion_running = False + for splitted_pdf in splitted_pdfs: + if not splitted_pdf.conversion_finished: + found_conversion_running = True + print("checking conversion status...") + splitted_pdf.conversion_finished = check_task_status( + splitted_pdf.task_id + ) + if not found_conversion_running: + all_files_converted = True + + for splitted_pdf in splitted_pdfs: + splitted_pdf.result = get_task_result(splitted_pdf.task_id) + + files = [] + for i, splitted_pdf in enumerate(splitted_pdfs): + json_content = json.dumps( + splitted_pdf.result.get("document").get("json_content"), indent=2 + ) + doc = DoclingDocument.model_validate_json(json_content) + filename = f"{out_dir}/splited_json_{i}.json" + doc.save_as_json(filename=filename) + files.append(filename) + + docs = [DoclingDocument.load_from_json(filename=f) for f in files] + concate_doc = DoclingDocument.concatenate(docs=docs) + + exp_json_file = Path(f"{out_dir}/concatenated.json") + concate_doc.save_as_json(exp_json_file) + + print("Finished") + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index 4d5916b..86193f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ classifiers = [ requires-python = ">=3.10" dependencies = [ "docling~=2.38", - "docling-core>=2.44.1", + "docling-core>=2.45.0", "docling-jobkit[kfp,rq,vlm]>=1.4.0,<2.0.0", "fastapi[standard]~=0.115", "httpx~=0.28", @@ -69,6 +69,7 @@ dev = [ "asgi-lifespan~=2.0", "mypy~=1.11", "pre-commit-uv~=4.1", + "pypdf>=6.0.0", "pytest~=8.3", "pytest-asyncio~=0.24", "pytest-check~=2.4", diff --git a/uv.lock b/uv.lock index b1b27b7..3f54ad3 100644 --- a/uv.lock +++ b/uv.lock @@ -1511,6 +1511,7 @@ dev = [ { name = "asgi-lifespan", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin' or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu124') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-pypi' and extra == 'group-13-docling-serve-rocm')" }, { name = "mypy", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin' or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu124') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-pypi' and extra == 'group-13-docling-serve-rocm')" }, { name = "pre-commit-uv", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin' or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu124') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-pypi' and extra == 'group-13-docling-serve-rocm')" }, + { name = "pypdf", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin' or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu124') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-pypi' and extra == 'group-13-docling-serve-rocm')" }, { name = "pytest", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin' or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu124') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-pypi' and extra == 'group-13-docling-serve-rocm')" }, { name = "pytest-asyncio", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin' or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu124') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-pypi' and extra == 'group-13-docling-serve-rocm')" }, { name = "pytest-check", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin' or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu124') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-pypi' and extra == 'group-13-docling-serve-rocm')" }, @@ -1532,7 +1533,7 @@ rocm = [ [package.metadata] requires-dist = [ { name = "docling", specifier = "~=2.38" }, - { name = "docling-core", specifier = ">=2.44.1" }, + { name = "docling-core", specifier = ">=2.45.0" }, { name = "docling-jobkit", extras = ["kfp", "rq", "vlm"], specifier = ">=1.4.0,<2.0.0" }, { name = "docling-mcp", specifier = ">=1.0.0" }, { name = "fastapi", extras = ["standard"], specifier = "~=0.115" }, @@ -1580,6 +1581,7 @@ dev = [ { name = "asgi-lifespan", specifier = "~=2.0" }, { name = "mypy", specifier = "~=1.11" }, { name = "pre-commit-uv", specifier = "~=4.1" }, + { name = "pypdf", specifier = ">=6.0.0" }, { name = "pytest", specifier = "~=8.3" }, { name = "pytest-asyncio", specifier = "~=0.24" }, { name = "pytest-check", specifier = "~=2.4" }, @@ -2561,7 +2563,7 @@ name = "jinja2" version = "3.1.6" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "markupsafe", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin' or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu124') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-pypi' and extra == 'group-13-docling-serve-rocm')" }, + { name = "markupsafe", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" } wheels = [ @@ -5648,6 +5650,18 @@ version = "2.10" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/5d/ab/34ec41718af73c00119d0351b7a2531d2ebddb51833a36448fc7b862be60/pylatexenc-2.10.tar.gz", hash = "sha256:3dd8fd84eb46dc30bee1e23eaab8d8fb5a7f507347b23e5f38ad9675c84f40d3", size = 162597, upload-time = "2021-04-06T07:56:07.854Z" } +[[package]] +name = "pypdf" +version = "6.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "(python_full_version < '3.11' and platform_machine != 'x86_64') or (python_full_version < '3.11' and sys_platform != 'darwin') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu124') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-pypi' and extra == 'group-13-docling-serve-rocm')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/20/ac/a300a03c3b34967c050677ccb16e7a4b65607ee5df9d51e8b6d713de4098/pypdf-6.0.0.tar.gz", hash = "sha256:282a99d2cc94a84a3a3159f0d9358c0af53f85b4d28d76ea38b96e9e5ac2a08d", size = 5033827, upload-time = "2025-08-11T14:22:02.352Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/83/2cacc506eb322bb31b747bc06ccb82cc9aa03e19ee9c1245e538e49d52be/pypdf-6.0.0-py3-none-any.whl", hash = "sha256:56ea60100ce9f11fc3eec4f359da15e9aec3821b036c1f06d2b660d35683abb8", size = 310465, upload-time = "2025-08-11T14:22:00.481Z" }, +] + [[package]] name = "pypdfium2" version = "4.30.0"