diff --git a/.github/styles/config/vocabularies/Docling/accept.txt b/.github/styles/config/vocabularies/Docling/accept.txt
index a4f1a66..1f47d31 100644
--- a/.github/styles/config/vocabularies/Docling/accept.txt
+++ b/.github/styles/config/vocabularies/Docling/accept.txt
@@ -4,6 +4,7 @@ asgi
async
(?i)urls
uvicorn
+Config
[Ww]ebserver
RQ
(?i)url
diff --git a/docs/usage.md b/docs/usage.md
index b7c8f5c..2be6f51 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -7,6 +7,7 @@ The API provides two endpoints: one for urls, one for files. This is necessary t
On top of the source of file (see below), both endpoints support the same parameters.
+
ConvertDocumentsRequestOptions
| Field Name | Type | Description |
|------------|------|-------------|
@@ -39,6 +40,52 @@ On top of the source of file (see below), both endpoints support the same parame
| `vlm_pipeline_model_local` | VlmModelLocal or NoneType | Options for running a local vision-language model for the `vlm` pipeline. The parameters refer to a model hosted on Hugging Face. This parameter is mutually exclusive with `vlm_pipeline_model_api` and `vlm_pipeline_model`. |
| `vlm_pipeline_model_api` | VlmModelApi or NoneType | API details for using a vision-language model for the `vlm` pipeline. This parameter is mutually exclusive with `vlm_pipeline_model_local` and `vlm_pipeline_model`. |
+VlmModelApi
+
+| Field Name | Type | Description |
+|------------|------|-------------|
+| `url` | AnyUrl | Endpoint which accepts openai-api compatible requests. |
+| `headers` | Dict[str, str] | Headers used for calling the API endpoint. For example, it could include authentication headers. |
+| `params` | Dict[str, Any] | Model parameters. |
+| `timeout` | float | Timeout for the API request. |
+| `concurrency` | int | Maximum number of concurrent requests to the API. |
+| `prompt` | str | Prompt used when calling the vision-language model. |
+| `scale` | float | Scale factor of the images used. |
+| `response_format` | ResponseFormat | Type of response generated by the model. |
+| `temperature` | float | Temperature parameter controlling the reproducibility of the result. |
+
+VlmModelLocal
+
+| Field Name | Type | Description |
+|------------|------|-------------|
+| `repo_id` | str | Repository id from the Hugging Face Hub. |
+| `prompt` | str | Prompt used when calling the vision-language model. |
+| `scale` | float | Scale factor of the images used. |
+| `response_format` | ResponseFormat | Type of response generated by the model. |
+| `inference_framework` | InferenceFramework | Inference framework to use. |
+| `transformers_model_type` | TransformersModelType | Type of transformers auto-model to use. |
+| `extra_generation_config` | Dict[str, Any] | Config from https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.GenerationConfig |
+| `temperature` | float | Temperature parameter controlling the reproducibility of the result. |
+
+PictureDescriptionApi
+
+| Field Name | Type | Description |
+|------------|------|-------------|
+| `url` | AnyUrl | Endpoint which accepts openai-api compatible requests. |
+| `headers` | Dict[str, str] | Headers used for calling the API endpoint. For example, it could include authentication headers. |
+| `params` | Dict[str, Any] | Model parameters. |
+| `timeout` | float | Timeout for the API request. |
+| `concurrency` | int | Maximum number of concurrent requests to the API. |
+| `prompt` | str | Prompt used when calling the vision-language model. |
+
+PictureDescriptionLocal
+
+| Field Name | Type | Description |
+|------------|------|-------------|
+| `repo_id` | str | Repository id from the Hugging Face Hub. |
+| `prompt` | str | Prompt used when calling the vision-language model. |
+| `generation_config` | Dict[str, Any] | Config from https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.GenerationConfig |
+
### Authentication
diff --git a/scripts/update_doc_usage.py b/scripts/update_doc_usage.py
index fcd6039..763aa3b 100644
--- a/scripts/update_doc_usage.py
+++ b/scripts/update_doc_usage.py
@@ -1,5 +1,5 @@
import re
-from typing import Annotated, Any, get_args, get_origin
+from typing import Annotated, Any, Union, get_args, get_origin
from pydantic import BaseModel
@@ -90,39 +90,75 @@ def _format_type(type_hint: Any) -> str:
return str(type_hint)
+def _unroll_types(tp) -> list[type]:
+ """
+ Unrolls typing.Union and typing.Optional types into a flat list of types.
+ """
+ origin = get_origin(tp)
+ if origin is Union:
+ # Recursively unroll each type inside the Union
+ types = []
+ for arg in get_args(tp):
+ types.extend(_unroll_types(arg))
+ # Remove duplicates while preserving order
+ return list(dict.fromkeys(types))
+ else:
+ # If it's not a Union, just return it as a single-element list
+ return [tp]
+
+
def generate_model_doc(model: type[BaseModel]) -> str:
"""Generate documentation for a Pydantic model."""
- doc = "\n| Field Name | Type | Description |\n"
- doc += "|------------|------|-------------|\n"
- for base_model in model.__mro__:
- # Check if this is a Pydantic model
- if hasattr(base_model, "model_fields"):
- # Iterate through fields of this model
- for field_name, field in base_model.model_fields.items():
- # Extract description from Annotated field if possible
- description = field.description or "No description provided."
- description = format_allowed_values_description(description)
- description = format_variable_names(description)
+ models_stack = [model]
- # Handle Annotated types
- original_type = field.annotation
- if get_origin(original_type) is Annotated:
- # Extract base type and additional metadata
- type_args = get_args(original_type)
- base_type = type_args[0]
- else:
- base_type = original_type
+ doc = ""
+ while models_stack:
+ current_model = models_stack.pop()
- field_type = _format_type(base_type)
- field_type = format_variable_names(field_type)
+ doc += f"{current_model.__name__}
\n"
- doc += f"| `{field_name}` | {field_type} | {description} |\n"
+ doc += "\n| Field Name | Type | Description |\n"
+ doc += "|------------|------|-------------|\n"
- # stop iterating the base classes
- break
+ base_models = []
+ if hasattr(current_model, "__mro__"):
+ base_models = current_model.__mro__
+ else:
+ base_models = [current_model]
- doc += "\n"
+ for base_model in base_models:
+ # Check if this is a Pydantic model
+ if hasattr(base_model, "model_fields"):
+ # Iterate through fields of this model
+ for field_name, field in base_model.model_fields.items():
+ # Extract description from Annotated field if possible
+ description = field.description or "No description provided."
+ description = format_allowed_values_description(description)
+ description = format_variable_names(description)
+
+ # Handle Annotated types
+ original_type = field.annotation
+ if get_origin(original_type) is Annotated:
+ # Extract base type and additional metadata
+ type_args = get_args(original_type)
+ base_type = type_args[0]
+ else:
+ base_type = original_type
+
+ field_type = _format_type(base_type)
+ field_type = format_variable_names(field_type)
+
+ doc += f"| `{field_name}` | {field_type} | {description} |\n"
+
+ for field_type in _unroll_types(base_type):
+ if issubclass(field_type, BaseModel):
+ models_stack.append(field_type)
+
+ # stop iterating the base classes
+ break
+
+ doc += "\n"
return doc