diff --git a/docling_serve/app.py b/docling_serve/app.py index 1fa8d36..0bb6510 100644 --- a/docling_serve/app.py +++ b/docling_serve/app.py @@ -5,7 +5,7 @@ import tempfile from contextlib import asynccontextmanager from io import BytesIO from pathlib import Path -from typing import Annotated, Any, Dict, List, Optional, Union +from typing import Annotated, Any, Optional, Union from fastapi import ( BackgroundTasks, @@ -205,8 +205,8 @@ def create_app(): # noqa: C901 def process_url( background_tasks: BackgroundTasks, conversion_request: ConvertDocumentsRequest ): - sources: List[Union[str, DocumentStream]] = [] - headers: Optional[Dict[str, Any]] = None + sources: list[Union[str, DocumentStream]] = [] + headers: Optional[dict[str, Any]] = None if isinstance(conversion_request, ConvertDocumentFileSourcesRequest): for file_source in conversion_request.file_sources: sources.append(file_source.to_document_stream()) @@ -242,7 +242,7 @@ def create_app(): # noqa: C901 ) async def process_file( background_tasks: BackgroundTasks, - files: List[UploadFile], + files: list[UploadFile], options: Annotated[ ConvertDocumentsOptions, FormDepends(ConvertDocumentsOptions) ], diff --git a/docling_serve/datamodel/convert.py b/docling_serve/datamodel/convert.py index 5d50e18..f299815 100644 --- a/docling_serve/datamodel/convert.py +++ b/docling_serve/datamodel/convert.py @@ -1,5 +1,5 @@ # Define the input options for the API -from typing import Annotated, List, Optional +from typing import Annotated, Optional from pydantic import BaseModel, Field @@ -10,7 +10,7 @@ from docling_core.types.doc import ImageRefMode class ConvertDocumentsOptions(BaseModel): from_formats: Annotated[ - List[InputFormat], + list[InputFormat], Field( description=( "Input format(s) to convert from. String or list of strings. " @@ -22,7 +22,7 @@ class ConvertDocumentsOptions(BaseModel): ] = list(InputFormat) to_formats: Annotated[ - List[OutputFormat], + list[OutputFormat], Field( description=( "Output format(s) to convert to. String or list of strings. " @@ -83,7 +83,7 @@ class ConvertDocumentsOptions(BaseModel): ] = OcrEngine.EASYOCR ocr_lang: Annotated[ - Optional[List[str]], + Optional[list[str]], Field( description=( "List of languages used by the OCR engine. " diff --git a/docling_serve/datamodel/requests.py b/docling_serve/datamodel/requests.py index 129e46f..864254d 100644 --- a/docling_serve/datamodel/requests.py +++ b/docling_serve/datamodel/requests.py @@ -1,6 +1,6 @@ import base64 from io import BytesIO -from typing import Annotated, Any, Dict, List, Union +from typing import Annotated, Any, Union from pydantic import BaseModel, Field @@ -22,7 +22,7 @@ class HttpSource(BaseModel): ), ] headers: Annotated[ - Dict[str, Any], + dict[str, Any], Field( description="Additional headers used to fetch the urls, " "e.g. authorization, agent, etc" @@ -50,11 +50,11 @@ class FileSource(BaseModel): class ConvertDocumentHttpSourcesRequest(DocumentsConvertBase): - http_sources: List[HttpSource] + http_sources: list[HttpSource] class ConvertDocumentFileSourcesRequest(DocumentsConvertBase): - file_sources: List[FileSource] + file_sources: list[FileSource] ConvertDocumentsRequest = Union[ diff --git a/docling_serve/datamodel/responses.py b/docling_serve/datamodel/responses.py index a6ee9c2..8e7da7d 100644 --- a/docling_serve/datamodel/responses.py +++ b/docling_serve/datamodel/responses.py @@ -1,5 +1,5 @@ import enum -from typing import Dict, List, Optional +from typing import Optional from pydantic import BaseModel @@ -25,9 +25,9 @@ class DocumentResponse(BaseModel): class ConvertDocumentResponse(BaseModel): document: DocumentResponse status: ConversionStatus - errors: List[ErrorItem] = [] + errors: list[ErrorItem] = [] processing_time: float - timings: Dict[str, ProfilingItem] = {} + timings: dict[str, ProfilingItem] = {} class ConvertDocumentErrorResponse(BaseModel): diff --git a/docling_serve/docling_conversion.py b/docling_serve/docling_conversion.py index 7f2ab14..2d41f5a 100644 --- a/docling_serve/docling_conversion.py +++ b/docling_serve/docling_conversion.py @@ -1,8 +1,9 @@ import hashlib import json import logging +from collections.abc import Iterable, Iterator from pathlib import Path -from typing import Any, Dict, Iterable, Iterator, Optional, Tuple, Type, Union +from typing import Any, Optional, Union from fastapi import HTTPException @@ -33,7 +34,7 @@ _log = logging.getLogger(__name__) # Document converters will be preloaded and stored in a dictionary -converters: Dict[bytes, DocumentConverter] = {} +converters: dict[bytes, DocumentConverter] = {} # Custom serializer for PdfFormatOption @@ -69,7 +70,7 @@ def _serialize_pdf_format_option(pdf_format_option: PdfFormatOption) -> str: # Computes the PDF pipeline options and returns the PdfFormatOption and its hash def get_pdf_pipeline_opts( # noqa: C901 request: ConvertDocumentsOptions, -) -> Tuple[PdfFormatOption, bytes]: +) -> tuple[PdfFormatOption, bytes]: if request.ocr_engine == OcrEngine.EASYOCR: try: import easyocr # noqa: F401 @@ -129,7 +130,7 @@ def get_pdf_pipeline_opts( # noqa: C901 pipeline_options.images_scale = request.images_scale if request.pdf_backend == PdfBackend.DLPARSE_V1: - backend: Type[PdfDocumentBackend] = DoclingParseDocumentBackend + backend: type[PdfDocumentBackend] = DoclingParseDocumentBackend elif request.pdf_backend == PdfBackend.DLPARSE_V2: backend = DoclingParseV2DocumentBackend elif request.pdf_backend == PdfBackend.PYPDFIUM2: @@ -177,12 +178,12 @@ def get_pdf_pipeline_opts( # noqa: C901 def convert_documents( sources: Iterable[Union[Path, str, DocumentStream]], options: ConvertDocumentsOptions, - headers: Optional[Dict[str, Any]] = None, + headers: Optional[dict[str, Any]] = None, ): pdf_format_option, options_hash = get_pdf_pipeline_opts(options) if options_hash not in converters: - format_options: Dict[InputFormat, FormatOption] = { + format_options: dict[InputFormat, FormatOption] = { InputFormat.PDF: pdf_format_option, InputFormat.IMAGE: pdf_format_option, } diff --git a/docling_serve/engines/async_local/orchestrator.py b/docling_serve/engines/async_local/orchestrator.py index 8de5f50..cb5f322 100644 --- a/docling_serve/engines/async_local/orchestrator.py +++ b/docling_serve/engines/async_local/orchestrator.py @@ -1,7 +1,7 @@ import asyncio import logging import uuid -from typing import Dict, List, Optional, Set +from typing import Optional from fastapi import WebSocket @@ -30,9 +30,9 @@ class TaskNotFoundError(OrchestratorError): class AsyncLocalOrchestrator(BaseOrchestrator): def __init__(self): self.task_queue = asyncio.Queue() - self.tasks: Dict[str, Task] = {} - self.queue_list: List[str] = [] - self.task_subscribers: Dict[str, Set[WebSocket]] = {} + self.tasks: dict[str, Task] = {} + self.queue_list: list[str] = [] + self.task_subscribers: dict[str, set[WebSocket]] = {} async def enqueue(self, request: ConvertDocumentsRequest) -> Task: task_id = str(uuid.uuid4()) diff --git a/docling_serve/engines/async_local/worker.py b/docling_serve/engines/async_local/worker.py index c3258fe..f19f33c 100644 --- a/docling_serve/engines/async_local/worker.py +++ b/docling_serve/engines/async_local/worker.py @@ -1,7 +1,7 @@ import asyncio import logging import time -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Any, Optional, Union from fastapi import BackgroundTasks @@ -50,8 +50,8 @@ class AsyncLocalWorker: # Define a callback function to send progress updates to the client. # TODO: send partial updates, e.g. when a document in the batch is done def run_conversion(): - sources: List[Union[str, DocumentStream]] = [] - headers: Optional[Dict[str, Any]] = None + sources: list[Union[str, DocumentStream]] = [] + headers: Optional[dict[str, Any]] = None if isinstance(task.request, ConvertDocumentFileSourcesRequest): for file_source in task.request.file_sources: sources.append(file_source.to_document_stream()) diff --git a/docling_serve/helper_functions.py b/docling_serve/helper_functions.py index 80bedb3..c42a391 100644 --- a/docling_serve/helper_functions.py +++ b/docling_serve/helper_functions.py @@ -1,6 +1,6 @@ import inspect import re -from typing import List, Type, Union +from typing import Union from fastapi import Depends, Form from pydantic import BaseModel @@ -8,7 +8,7 @@ from pydantic import BaseModel # Adapted from # https://github.com/fastapi/fastapi/discussions/8971#discussioncomment-7892972 -def FormDepends(cls: Type[BaseModel]): +def FormDepends(cls: type[BaseModel]): new_parameters = [] for field_name, model_field in cls.model_fields.items(): @@ -34,8 +34,8 @@ def FormDepends(cls: Type[BaseModel]): return Depends(as_form_func) -def _to_list_of_strings(input_value: Union[str, List[str]]) -> List[str]: - def split_and_strip(value: str) -> List[str]: +def _to_list_of_strings(input_value: Union[str, list[str]]) -> list[str]: + def split_and_strip(value: str) -> list[str]: if re.search(r"[;,]", value): return [item.strip() for item in re.split(r"[;,]", value)] else: diff --git a/docling_serve/response_preparation.py b/docling_serve/response_preparation.py index ecf334f..df704d8 100644 --- a/docling_serve/response_preparation.py +++ b/docling_serve/response_preparation.py @@ -3,8 +3,9 @@ import os import shutil import tempfile import time +from collections.abc import Iterable from pathlib import Path -from typing import Iterable, Union +from typing import Union from fastapi import BackgroundTasks, HTTPException from fastapi.responses import FileResponse diff --git a/pyproject.toml b/pyproject.toml index e43455b..d08a207 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -145,7 +145,8 @@ select = [ "S307", # eval # "T20", # (disallow print statements) keep debugging statements out of the codebase "W", # pycodestyle warnings - "ASYNC" # async + "ASYNC", # async + "UP", # pyupgrade ] ignore = [ @@ -154,6 +155,7 @@ ignore = [ "F811", # "redefinition of the same function" "PL", # Pylint "RUF012", # Mutable Class Attributes + "UP007", # Option and Union ] #extend-select = []