import re from typing import Annotated, Any, Union, get_args, get_origin from pydantic import BaseModel from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions DOCS_FILE = "docs/usage.md" VARIABLE_WORDS: list[str] = [ "picture_description_local", "vlm_pipeline_model", "vlm", "vlm_pipeline_model_api", "ocr_engines_enum", "easyocr", "dlparse_v4", "fast", "picture_description_api", "vlm_pipeline_model_local", ] def format_variable_names(text: str) -> str: """Format specific words in description to be code-formatted.""" sorted_words = sorted(VARIABLE_WORDS, key=len, reverse=True) escaped_words = [re.escape(word) for word in sorted_words] for word in escaped_words: pattern = rf"(? str: """Format description to code-format allowed values.""" # Regex pattern to find text after "Allowed values:" match = re.search(r"Allowed values:(.+?)(?:\.|$)", description, re.DOTALL) if match: # Extract the allowed values values_str = match.group(1).strip() # Split values, handling both comma and 'and' separators values = re.split(r"\s*(?:,\s*|\s+and\s+)", values_str) # Remove any remaining punctuation and whitespace values = [value.strip("., ") for value in values] # Create code-formatted values formatted_values = ", ".join(f"`{value}`" for value in values) # Replace the original allowed values with formatted version formatted_description = re.sub( r"(Allowed values:)(.+?)(?:\.|$)", f"\\1 {formatted_values}.", description, flags=re.DOTALL, ) return formatted_description return description def _format_type(type_hint: Any) -> str: """Format type ccrrectly, like Annotation or Union.""" if get_origin(type_hint) is Annotated: base_type = get_args(type_hint)[0] return _format_type(base_type) if hasattr(type_hint, "__origin__"): origin = type_hint.__origin__ args = get_args(type_hint) if origin is list: return f"List[{_format_type(args[0])}]" elif origin is dict: return f"Dict[{_format_type(args[0])}, {_format_type(args[1])}]" elif str(origin).__contains__("Union") or str(origin).__contains__("Optional"): return " or ".join(_format_type(arg) for arg in args) elif origin is None: return "null" if hasattr(type_hint, "__name__"): return type_hint.__name__ return str(type_hint) def _unroll_types(tp) -> list[type]: """ Unrolls typing.Union and typing.Optional types into a flat list of types. """ origin = get_origin(tp) if origin is Union: # Recursively unroll each type inside the Union types = [] for arg in get_args(tp): types.extend(_unroll_types(arg)) # Remove duplicates while preserving order return list(dict.fromkeys(types)) else: # If it's not a Union, just return it as a single-element list return [tp] def generate_model_doc(model: type[BaseModel]) -> str: """Generate documentation for a Pydantic model.""" models_stack = [model] doc = "" while models_stack: current_model = models_stack.pop() doc += f"