mirror of
https://github.com/docling-project/docling-serve.git
synced 2025-11-29 00:23:36 +00:00
docs: add split processing example (#303)
Signed-off-by: Tiago Santana <54704492+SantanaTiago@users.noreply.github.com> Co-authored-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -446,3 +446,6 @@ pip-selfcheck.json
|
|||||||
.markdown-lint
|
.markdown-lint
|
||||||
|
|
||||||
cookies.txt
|
cookies.txt
|
||||||
|
|
||||||
|
# Examples
|
||||||
|
/examples/splitted_pdf/*
|
||||||
@@ -7,12 +7,12 @@ repos:
|
|||||||
- id: ruff-format
|
- id: ruff-format
|
||||||
name: "Ruff formatter"
|
name: "Ruff formatter"
|
||||||
args: [--config=pyproject.toml]
|
args: [--config=pyproject.toml]
|
||||||
files: '^(docling_serve|tests).*\.(py|ipynb)$'
|
files: '^(docling_serve|tests|examples).*\.(py|ipynb)$'
|
||||||
# Run the Ruff linter.
|
# Run the Ruff linter.
|
||||||
- id: ruff
|
- id: ruff
|
||||||
name: "Ruff linter"
|
name: "Ruff linter"
|
||||||
args: [--exit-non-zero-on-fix, --fix, --config=pyproject.toml]
|
args: [--exit-non-zero-on-fix, --fix, --config=pyproject.toml]
|
||||||
files: '^(docling_serve|tests).*\.(py|ipynb)$'
|
files: '^(docling_serve|tests|examples).*\.(py|ipynb)$'
|
||||||
- repo: local
|
- repo: local
|
||||||
hooks:
|
hooks:
|
||||||
- id: system
|
- id: system
|
||||||
|
|||||||
22
docs/examples.md
Normal file
22
docs/examples.md
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
# Examples
|
||||||
|
|
||||||
|
## Split processing
|
||||||
|
|
||||||
|
The example of provided of split processing demonstrates how to split a PDF into chunks of pages and send them for conversion. At the end, it concatenates all split pages into a single conversion `JSON`.
|
||||||
|
|
||||||
|
At beginning of file there's variables to be used (and modified) such as:
|
||||||
|
| Variable | Description |
|
||||||
|
| ---------|-------------|
|
||||||
|
| `path_to_pdf`| Path to PDF file to be split |
|
||||||
|
| `pages_per_file`| The number of pages per chunk to split PDF |
|
||||||
|
| `base_url`| Base url of the `docling-serve` host |
|
||||||
|
| `out_dir`| The output folder of each conversion `JSON` of split PDF and the final concatenated `JSON` |
|
||||||
|
|
||||||
|
The example follows the following logic:
|
||||||
|
- Get the number of pages of the `PDF`
|
||||||
|
- Based on the number of chunks of pages, send each chunk to conversion using `page_range` parameter
|
||||||
|
- Wait all conversions to finish
|
||||||
|
- Get all conversion results
|
||||||
|
- Save each conversion `JSON` result into a `JSON` file
|
||||||
|
- Concatenate all `JSONs` into a single `JSON` using `docling` concatenate method
|
||||||
|
- Save concatenated `JSON` into a `JSON` file
|
||||||
124
examples/split_processing.py
Normal file
124
examples/split_processing.py
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
import json
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from pypdf import PdfReader
|
||||||
|
|
||||||
|
from docling_core.types.doc.document import DoclingDocument
|
||||||
|
|
||||||
|
# Variables to use
|
||||||
|
path_to_pdf = Path("./tests/2206.01062v1.pdf")
|
||||||
|
pages_per_file = 4
|
||||||
|
base_url = "http://localhost:5001/v1"
|
||||||
|
out_dir = Path("examples/splitted_pdf/")
|
||||||
|
|
||||||
|
|
||||||
|
class ConvertedSplittedPdf(BaseModel):
|
||||||
|
task_id: str
|
||||||
|
conversion_finished: bool = False
|
||||||
|
result: dict | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_task_result(task_id: str):
|
||||||
|
response = httpx.get(
|
||||||
|
f"{base_url}/result/{task_id}",
|
||||||
|
timeout=15,
|
||||||
|
)
|
||||||
|
return response.json()
|
||||||
|
|
||||||
|
|
||||||
|
def check_task_status(task_id: str):
|
||||||
|
response = httpx.get(f"{base_url}/status/poll/{task_id}", timeout=15)
|
||||||
|
task = response.json()
|
||||||
|
task_status = task["task_status"]
|
||||||
|
|
||||||
|
task_finished = False
|
||||||
|
if task_status == "success":
|
||||||
|
task_finished = True
|
||||||
|
|
||||||
|
if task_status in ("failure", "revoked"):
|
||||||
|
raise RuntimeError("A conversion failed")
|
||||||
|
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
|
return task_finished
|
||||||
|
|
||||||
|
|
||||||
|
def post_file(file_path: Path, start_page: int, end_page: int):
|
||||||
|
payload = {
|
||||||
|
"to_formats": ["json"],
|
||||||
|
"image_export_mode": "placeholder",
|
||||||
|
"ocr": False,
|
||||||
|
"abort_on_error": False,
|
||||||
|
"page_range": [start_page, end_page],
|
||||||
|
}
|
||||||
|
|
||||||
|
files = {
|
||||||
|
"files": (file_path.name, file_path.open("rb"), "application/pdf"),
|
||||||
|
}
|
||||||
|
response = httpx.post(
|
||||||
|
f"{base_url}/convert/file/async",
|
||||||
|
files=files,
|
||||||
|
data=payload,
|
||||||
|
timeout=15,
|
||||||
|
)
|
||||||
|
|
||||||
|
task = response.json()
|
||||||
|
|
||||||
|
return task["task_id"]
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
filename = path_to_pdf
|
||||||
|
|
||||||
|
splitted_pdfs: list[ConvertedSplittedPdf] = []
|
||||||
|
|
||||||
|
with open(filename, "rb") as input_pdf_file:
|
||||||
|
pdf_reader = PdfReader(input_pdf_file)
|
||||||
|
total_pages = len(pdf_reader.pages)
|
||||||
|
|
||||||
|
for start_page in range(0, total_pages, pages_per_file):
|
||||||
|
task_id = post_file(
|
||||||
|
filename, start_page + 1, min(start_page + pages_per_file, total_pages)
|
||||||
|
)
|
||||||
|
splitted_pdfs.append(ConvertedSplittedPdf(task_id=task_id))
|
||||||
|
|
||||||
|
all_files_converted = False
|
||||||
|
while not all_files_converted:
|
||||||
|
found_conversion_running = False
|
||||||
|
for splitted_pdf in splitted_pdfs:
|
||||||
|
if not splitted_pdf.conversion_finished:
|
||||||
|
found_conversion_running = True
|
||||||
|
print("checking conversion status...")
|
||||||
|
splitted_pdf.conversion_finished = check_task_status(
|
||||||
|
splitted_pdf.task_id
|
||||||
|
)
|
||||||
|
if not found_conversion_running:
|
||||||
|
all_files_converted = True
|
||||||
|
|
||||||
|
for splitted_pdf in splitted_pdfs:
|
||||||
|
splitted_pdf.result = get_task_result(splitted_pdf.task_id)
|
||||||
|
|
||||||
|
files = []
|
||||||
|
for i, splitted_pdf in enumerate(splitted_pdfs):
|
||||||
|
json_content = json.dumps(
|
||||||
|
splitted_pdf.result.get("document").get("json_content"), indent=2
|
||||||
|
)
|
||||||
|
doc = DoclingDocument.model_validate_json(json_content)
|
||||||
|
filename = f"{out_dir}/splited_json_{i}.json"
|
||||||
|
doc.save_as_json(filename=filename)
|
||||||
|
files.append(filename)
|
||||||
|
|
||||||
|
docs = [DoclingDocument.load_from_json(filename=f) for f in files]
|
||||||
|
concate_doc = DoclingDocument.concatenate(docs=docs)
|
||||||
|
|
||||||
|
exp_json_file = Path(f"{out_dir}/concatenated.json")
|
||||||
|
concate_doc.save_as_json(exp_json_file)
|
||||||
|
|
||||||
|
print("Finished")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -34,7 +34,7 @@ classifiers = [
|
|||||||
requires-python = ">=3.10"
|
requires-python = ">=3.10"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"docling~=2.38",
|
"docling~=2.38",
|
||||||
"docling-core>=2.44.1",
|
"docling-core>=2.45.0",
|
||||||
"docling-jobkit[kfp,rq,vlm]>=1.4.0,<2.0.0",
|
"docling-jobkit[kfp,rq,vlm]>=1.4.0,<2.0.0",
|
||||||
"fastapi[standard]~=0.115",
|
"fastapi[standard]~=0.115",
|
||||||
"httpx~=0.28",
|
"httpx~=0.28",
|
||||||
@@ -69,6 +69,7 @@ dev = [
|
|||||||
"asgi-lifespan~=2.0",
|
"asgi-lifespan~=2.0",
|
||||||
"mypy~=1.11",
|
"mypy~=1.11",
|
||||||
"pre-commit-uv~=4.1",
|
"pre-commit-uv~=4.1",
|
||||||
|
"pypdf>=6.0.0",
|
||||||
"pytest~=8.3",
|
"pytest~=8.3",
|
||||||
"pytest-asyncio~=0.24",
|
"pytest-asyncio~=0.24",
|
||||||
"pytest-check~=2.4",
|
"pytest-check~=2.4",
|
||||||
|
|||||||
18
uv.lock
generated
18
uv.lock
generated
@@ -1511,6 +1511,7 @@ dev = [
|
|||||||
{ name = "asgi-lifespan", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin' or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu124') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-pypi' and extra == 'group-13-docling-serve-rocm')" },
|
{ name = "asgi-lifespan", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin' or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu124') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-pypi' and extra == 'group-13-docling-serve-rocm')" },
|
||||||
{ name = "mypy", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin' or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu124') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-pypi' and extra == 'group-13-docling-serve-rocm')" },
|
{ name = "mypy", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin' or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu124') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-pypi' and extra == 'group-13-docling-serve-rocm')" },
|
||||||
{ name = "pre-commit-uv", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin' or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu124') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-pypi' and extra == 'group-13-docling-serve-rocm')" },
|
{ name = "pre-commit-uv", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin' or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu124') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-pypi' and extra == 'group-13-docling-serve-rocm')" },
|
||||||
|
{ name = "pypdf", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin' or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu124') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-pypi' and extra == 'group-13-docling-serve-rocm')" },
|
||||||
{ name = "pytest", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin' or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu124') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-pypi' and extra == 'group-13-docling-serve-rocm')" },
|
{ name = "pytest", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin' or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu124') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-pypi' and extra == 'group-13-docling-serve-rocm')" },
|
||||||
{ name = "pytest-asyncio", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin' or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu124') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-pypi' and extra == 'group-13-docling-serve-rocm')" },
|
{ name = "pytest-asyncio", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin' or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu124') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-pypi' and extra == 'group-13-docling-serve-rocm')" },
|
||||||
{ name = "pytest-check", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin' or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu124') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-pypi' and extra == 'group-13-docling-serve-rocm')" },
|
{ name = "pytest-check", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin' or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu124') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-pypi' and extra == 'group-13-docling-serve-rocm')" },
|
||||||
@@ -1532,7 +1533,7 @@ rocm = [
|
|||||||
[package.metadata]
|
[package.metadata]
|
||||||
requires-dist = [
|
requires-dist = [
|
||||||
{ name = "docling", specifier = "~=2.38" },
|
{ name = "docling", specifier = "~=2.38" },
|
||||||
{ name = "docling-core", specifier = ">=2.44.1" },
|
{ name = "docling-core", specifier = ">=2.45.0" },
|
||||||
{ name = "docling-jobkit", extras = ["kfp", "rq", "vlm"], specifier = ">=1.4.0,<2.0.0" },
|
{ name = "docling-jobkit", extras = ["kfp", "rq", "vlm"], specifier = ">=1.4.0,<2.0.0" },
|
||||||
{ name = "docling-mcp", specifier = ">=1.0.0" },
|
{ name = "docling-mcp", specifier = ">=1.0.0" },
|
||||||
{ name = "fastapi", extras = ["standard"], specifier = "~=0.115" },
|
{ name = "fastapi", extras = ["standard"], specifier = "~=0.115" },
|
||||||
@@ -1580,6 +1581,7 @@ dev = [
|
|||||||
{ name = "asgi-lifespan", specifier = "~=2.0" },
|
{ name = "asgi-lifespan", specifier = "~=2.0" },
|
||||||
{ name = "mypy", specifier = "~=1.11" },
|
{ name = "mypy", specifier = "~=1.11" },
|
||||||
{ name = "pre-commit-uv", specifier = "~=4.1" },
|
{ name = "pre-commit-uv", specifier = "~=4.1" },
|
||||||
|
{ name = "pypdf", specifier = ">=6.0.0" },
|
||||||
{ name = "pytest", specifier = "~=8.3" },
|
{ name = "pytest", specifier = "~=8.3" },
|
||||||
{ name = "pytest-asyncio", specifier = "~=0.24" },
|
{ name = "pytest-asyncio", specifier = "~=0.24" },
|
||||||
{ name = "pytest-check", specifier = "~=2.4" },
|
{ name = "pytest-check", specifier = "~=2.4" },
|
||||||
@@ -2561,7 +2563,7 @@ name = "jinja2"
|
|||||||
version = "3.1.6"
|
version = "3.1.6"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "markupsafe", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin' or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu124') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-pypi' and extra == 'group-13-docling-serve-rocm')" },
|
{ name = "markupsafe", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin'" },
|
||||||
]
|
]
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" }
|
sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" }
|
||||||
wheels = [
|
wheels = [
|
||||||
@@ -5648,6 +5650,18 @@ version = "2.10"
|
|||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/5d/ab/34ec41718af73c00119d0351b7a2531d2ebddb51833a36448fc7b862be60/pylatexenc-2.10.tar.gz", hash = "sha256:3dd8fd84eb46dc30bee1e23eaab8d8fb5a7f507347b23e5f38ad9675c84f40d3", size = 162597, upload-time = "2021-04-06T07:56:07.854Z" }
|
sdist = { url = "https://files.pythonhosted.org/packages/5d/ab/34ec41718af73c00119d0351b7a2531d2ebddb51833a36448fc7b862be60/pylatexenc-2.10.tar.gz", hash = "sha256:3dd8fd84eb46dc30bee1e23eaab8d8fb5a7f507347b23e5f38ad9675c84f40d3", size = 162597, upload-time = "2021-04-06T07:56:07.854Z" }
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pypdf"
|
||||||
|
version = "6.0.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "typing-extensions", marker = "(python_full_version < '3.11' and platform_machine != 'x86_64') or (python_full_version < '3.11' and sys_platform != 'darwin') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu124') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cpu' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu126') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu124' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-cu128') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu126' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-pypi') or (extra == 'group-13-docling-serve-cu128' and extra == 'group-13-docling-serve-rocm') or (extra == 'group-13-docling-serve-pypi' and extra == 'group-13-docling-serve-rocm')" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/20/ac/a300a03c3b34967c050677ccb16e7a4b65607ee5df9d51e8b6d713de4098/pypdf-6.0.0.tar.gz", hash = "sha256:282a99d2cc94a84a3a3159f0d9358c0af53f85b4d28d76ea38b96e9e5ac2a08d", size = 5033827, upload-time = "2025-08-11T14:22:02.352Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/2c/83/2cacc506eb322bb31b747bc06ccb82cc9aa03e19ee9c1245e538e49d52be/pypdf-6.0.0-py3-none-any.whl", hash = "sha256:56ea60100ce9f11fc3eec4f359da15e9aec3821b036c1f06d2b660d35683abb8", size = 310465, upload-time = "2025-08-11T14:22:00.481Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pypdfium2"
|
name = "pypdfium2"
|
||||||
version = "4.30.0"
|
version = "4.30.0"
|
||||||
|
|||||||
Reference in New Issue
Block a user