feat: process pdfs synthetically im model does not support file natively (#2263)

* feat: process pdfs synthetically im model does not support file natively * fix: small code optimisations
2026-01-20 14:00:55 +00:00 · 2026-01-15 02:30:33 +02:00
parent 2c55c6cd9a
commit f61d112cea
13 changed files with 449 additions and 72 deletions
--- a/application/Dockerfile
+++ b/application/Dockerfile
@@ -52,6 +52,7 @@ RUN apt-get update && \
        python3.12 \
        libgl1 \
        libglib2.0-0 \
+        poppler-utils \
        && \
    ln -s /usr/bin/python3.12 /usr/bin/python && \
    rm -rf /var/lib/apt/lists/*
--- a/application/core/model_configs.py
+++ b/application/core/model_configs.py
@@ -8,8 +8,8 @@ from application.core.model_settings import (
    ModelProvider,
 )

-OPENAI_ATTACHMENTS = [
-    "application/pdf",
+# Base image attachment types supported by most vision-capable LLMs
+IMAGE_ATTACHMENTS = [
    "image/png",
    "image/jpeg",
    "image/jpg",
@@ -17,14 +17,15 @@ OPENAI_ATTACHMENTS = [
    "image/gif",
 ]

-GOOGLE_ATTACHMENTS = [
-    "application/pdf",
-    "image/png",
-    "image/jpeg",
-    "image/jpg",
-    "image/webp",
-    "image/gif",
-]
+# PDF excluded: most OpenAI-compatible endpoints don't support native PDF uploads.
+# When excluded, PDFs are synthetically processed by converting pages to images.
+OPENAI_ATTACHMENTS = IMAGE_ATTACHMENTS
+
+GOOGLE_ATTACHMENTS = ["application/pdf"] + IMAGE_ATTACHMENTS
+
+ANTHROPIC_ATTACHMENTS = IMAGE_ATTACHMENTS
+
+OPENROUTER_ATTACHMENTS = IMAGE_ATTACHMENTS


 OPENAI_MODELS = [
@@ -63,6 +64,7 @@ ANTHROPIC_MODELS = [
        description="Latest Claude 3.5 Sonnet with enhanced capabilities",
        capabilities=ModelCapabilities(
            supports_tools=True,
+            supported_attachment_types=ANTHROPIC_ATTACHMENTS,
            context_window=200000,
        ),
    ),
@@ -73,6 +75,7 @@ ANTHROPIC_MODELS = [
        description="Balanced performance and capability",
        capabilities=ModelCapabilities(
            supports_tools=True,
+            supported_attachment_types=ANTHROPIC_ATTACHMENTS,
            context_window=200000,
        ),
    ),
@@ -83,6 +86,7 @@ ANTHROPIC_MODELS = [
        description="Most capable Claude model",
        capabilities=ModelCapabilities(
            supports_tools=True,
+            supported_attachment_types=ANTHROPIC_ATTACHMENTS,
            context_window=200000,
        ),
    ),
@@ -93,6 +97,7 @@ ANTHROPIC_MODELS = [
        description="Fastest Claude model",
        capabilities=ModelCapabilities(
            supports_tools=True,
+            supported_attachment_types=ANTHROPIC_ATTACHMENTS,
            context_window=200000,
        ),
    ),
@@ -151,28 +156,43 @@ GROQ_MODELS = [
        ),
    ),
    AvailableModel(
-        id="llama-3.1-8b-instant",
+        id="openai/gpt-oss-120b",
        provider=ModelProvider.GROQ,
-        display_name="Llama 3.1 8B",
-        description="Ultra-fast inference",
+        display_name="GPT-OSS 120B",
+        description="Open-source GPT model optimized for speed",
        capabilities=ModelCapabilities(
            supports_tools=True,
            context_window=128000,
        ),
    ),
+]
+
+
+OPENROUTER_MODELS = [
    AvailableModel(
-        id="mixtral-8x7b-32768",
-        provider=ModelProvider.GROQ,
-        display_name="Mixtral 8x7B",
-        description="High-speed inference with tools",
+        id="qwen/qwen3-coder:free",
+        provider=ModelProvider.OPENROUTER,
+        display_name="Qwen 3 Coder",
+        description="Latest Qwen model with high-speed inference",
        capabilities=ModelCapabilities(
            supports_tools=True,
-            context_window=32768,
+            context_window=128000,
+            supported_attachment_types=OPENROUTER_ATTACHMENTS
+        ),
+    ),
+    AvailableModel(
+        id="google/gemma-3-27b-it:free",
+        provider=ModelProvider.OPENROUTER,
+        display_name="Gemma 3 27B",
+        description="Latest Gemma model with high-speed inference",
+        capabilities=ModelCapabilities(
+            supports_tools=True,
+            context_window=128000,
+            supported_attachment_types=OPENROUTER_ATTACHMENTS
        ),
    ),
 ]

-
 AZURE_OPENAI_MODELS = [
    AvailableModel(
        id="azure-gpt-4",
--- a/application/core/model_settings.py
+++ b/application/core/model_settings.py
@@ -8,6 +8,7 @@ logger = logging.getLogger(__name__)

 class ModelProvider(str, Enum):
    OPENAI = "openai"
+    OPENROUTER = "openrouter"
    AZURE_OPENAI = "azure_openai"
    ANTHROPIC = "anthropic"
    GROQ = "groq"
@@ -107,6 +108,10 @@ class ModelRegistry:
            settings.LLM_PROVIDER == "groq" and settings.API_KEY
        ):
            self._add_groq_models(settings)
+        if settings.OPEN_ROUTER_API_KEY or (
+            settings.LLM_PROVIDER == "openrouter" and settings.API_KEY
+        ):
+            self._add_openrouter_models(settings)
        if settings.HUGGINGFACE_API_KEY or (
            settings.LLM_PROVIDER == "huggingface" and settings.API_KEY
        ):
@@ -211,6 +216,21 @@ class ModelRegistry:
                    return
        for model in GROQ_MODELS:
            self.models[model.id] = model
+    
+    def _add_openrouter_models(self, settings):
+        from application.core.model_configs import OPENROUTER_MODELS
+
+        if settings.OPEN_ROUTER_API_KEY:
+            for model in OPENROUTER_MODELS:
+                self.models[model.id] = model
+            return
+        if settings.LLM_PROVIDER == "openrouter" and settings.LLM_NAME:
+            for model in OPENROUTER_MODELS:
+                if model.id == settings.LLM_NAME:
+                    self.models[model.id] = model
+                    return
+        for model in OPENROUTER_MODELS:
+            self.models[model.id] = model

    def _add_docsgpt_models(self, settings):
        model_id = "docsgpt-local"
--- a/application/core/model_utils.py
+++ b/application/core/model_utils.py
@@ -9,6 +9,7 @@ def get_api_key_for_provider(provider: str) -> Optional[str]:

    provider_key_map = {
        "openai": settings.OPENAI_API_KEY,
+        "openrouter": settings.OPEN_ROUTER_API_KEY,
        "anthropic": settings.ANTHROPIC_API_KEY,
        "google": settings.GOOGLE_API_KEY,
        "groq": settings.GROQ_API_KEY,
--- a/application/core/settings.py
+++ b/application/core/settings.py
@@ -81,6 +81,7 @@ class Settings(BaseSettings):
    GOOGLE_API_KEY: Optional[str] = None
    GROQ_API_KEY: Optional[str] = None
    HUGGINGFACE_API_KEY: Optional[str] = None
+    OPEN_ROUTER_API_KEY: Optional[str] = None

    OPENAI_API_BASE: Optional[str] = None  # azure openai api base url
    OPENAI_API_VERSION: Optional[str] = None  # azure openai api version
--- a/application/llm/anthropic.py
+++ b/application/llm/anthropic.py
@@ -1,7 +1,13 @@
+import base64
+import logging
+
 from anthropic import AI_PROMPT, Anthropic, HUMAN_PROMPT

 from application.core.settings import settings
 from application.llm.base import BaseLLM
+from application.storage.storage_creator import StorageCreator
+
+logger = logging.getLogger(__name__)


 class AnthropicLLM(BaseLLM):
@@ -20,6 +26,7 @@ class AnthropicLLM(BaseLLM):

        self.HUMAN_PROMPT = HUMAN_PROMPT
        self.AI_PROMPT = AI_PROMPT
+        self.storage = StorageCreator.get_storage()

    def _raw_gen(
        self,
@@ -70,3 +77,115 @@ class AnthropicLLM(BaseLLM):
        finally:
            if hasattr(stream_response, "close"):
                stream_response.close()
+
+    def get_supported_attachment_types(self):
+        """
+        Return a list of MIME types supported by Anthropic Claude for file uploads.
+        Claude supports images but not PDFs natively.
+        PDFs are synthetically supported via PDF-to-image conversion in the handler.
+
+        Returns:
+            list: List of supported MIME types
+        """
+        return [
+            "image/png",
+            "image/jpeg",
+            "image/jpg",
+            "image/webp",
+            "image/gif",
+        ]
+
+    def prepare_messages_with_attachments(self, messages, attachments=None):
+        """
+        Process attachments for Anthropic Claude API.
+        Formats images using Claude's vision message format.
+
+        Args:
+            messages (list): List of message dictionaries.
+            attachments (list): List of attachment dictionaries with content and metadata.
+
+        Returns:
+            list: Messages formatted with image content for Claude API.
+        """
+        if not attachments:
+            return messages
+
+        prepared_messages = messages.copy()
+
+        # Find the last user message to attach images to
+        user_message_index = None
+        for i in range(len(prepared_messages) - 1, -1, -1):
+            if prepared_messages[i].get("role") == "user":
+                user_message_index = i
+                break
+
+        if user_message_index is None:
+            user_message = {"role": "user", "content": []}
+            prepared_messages.append(user_message)
+            user_message_index = len(prepared_messages) - 1
+
+        # Convert content to list format if it's a string
+        if isinstance(prepared_messages[user_message_index].get("content"), str):
+            text_content = prepared_messages[user_message_index]["content"]
+            prepared_messages[user_message_index]["content"] = [
+                {"type": "text", "text": text_content}
+            ]
+        elif not isinstance(prepared_messages[user_message_index].get("content"), list):
+            prepared_messages[user_message_index]["content"] = []
+
+        for attachment in attachments:
+            mime_type = attachment.get("mime_type")
+
+            if mime_type and mime_type.startswith("image/"):
+                try:
+                    # Check if this is a pre-converted image (from PDF-to-image conversion)
+                    # These have 'data' key with base64 already
+                    if "data" in attachment:
+                        base64_image = attachment["data"]
+                    else:
+                        base64_image = self._get_base64_image(attachment)
+
+                    # Claude uses a specific format for images
+                    prepared_messages[user_message_index]["content"].append(
+                        {
+                            "type": "image",
+                            "source": {
+                                "type": "base64",
+                                "media_type": mime_type,
+                                "data": base64_image,
+                            },
+                        }
+                    )
+
+                except Exception as e:
+                    logger.error(
+                        f"Error processing image attachment: {e}", exc_info=True
+                    )
+                    if "content" in attachment:
+                        prepared_messages[user_message_index]["content"].append(
+                            {
+                                "type": "text",
+                                "text": f"[Image could not be processed: {attachment.get('path', 'unknown')}]",
+                            }
+                        )
+
+        return prepared_messages
+
+    def _get_base64_image(self, attachment):
+        """
+        Convert an image file to base64 encoding.
+
+        Args:
+            attachment (dict): Attachment dictionary with path and metadata.
+
+        Returns:
+            str: Base64-encoded image data.
+        """
+        file_path = attachment.get("path")
+        if not file_path:
+            raise ValueError("No file path provided in attachment")
+        try:
+            with self.storage.get_file(file_path) as image_file:
+                return base64.b64encode(image_file.read()).decode("utf-8")
+        except FileNotFoundError:
+            raise FileNotFoundError(f"File not found: {file_path}")
--- a/application/llm/handlers/base.py
+++ b/application/llm/handlers/base.py
@@ -105,6 +105,7 @@ class LLMHandler(ABC):
        """
        Prepare messages with attachments and provider-specific formatting.

+
        Args:
            agent: The agent instance
            messages: Original messages
@@ -118,11 +119,40 @@ class LLMHandler(ABC):
        logger.info(f"Preparing messages with {len(attachments)} attachments")
        supported_types = agent.llm.get_supported_attachment_types()

+        # Check if provider supports images but not PDF (synthetic PDF support)
+        supports_images = any(t.startswith("image/") for t in supported_types)
+        supports_pdf = "application/pdf" in supported_types
+
+        # Process attachments, converting PDFs to images if needed
+        processed_attachments = []
+        for attachment in attachments:
+            mime_type = attachment.get("mime_type")
+
+            # Synthetic PDF support: convert PDF to images if LLM supports images but not PDF
+            if mime_type == "application/pdf" and supports_images and not supports_pdf:
+                logger.info(
+                    f"Converting PDF to images for synthetic PDF support: {attachment.get('path', 'unknown')}"
+                )
+                try:
+                    converted_images = self._convert_pdf_to_images(attachment)
+                    processed_attachments.extend(converted_images)
+                    logger.info(
+                        f"Converted PDF to {len(converted_images)} images"
+                    )
+                except Exception as e:
+                    logger.error(
+                        f"Failed to convert PDF to images, falling back to text: {e}"
+                    )
+                    # Fall back to treating as unsupported (text extraction)
+                    processed_attachments.append(attachment)
+            else:
+                processed_attachments.append(attachment)
+
        supported_attachments = [
-            a for a in attachments if a.get("mime_type") in supported_types
+            a for a in processed_attachments if a.get("mime_type") in supported_types
        ]
        unsupported_attachments = [
-            a for a in attachments if a.get("mime_type") not in supported_types
+            a for a in processed_attachments if a.get("mime_type") not in supported_types
        ]

        # Process supported attachments with the LLM's custom method
@@ -145,6 +175,37 @@ class LLMHandler(ABC):
            )
        return messages

+    def _convert_pdf_to_images(self, attachment: Dict) -> List[Dict]:
+        """
+        Convert a PDF attachment to a list of image attachments.
+
+        This enables synthetic PDF support for LLMs that support images but not PDFs.
+
+        Args:
+            attachment: PDF attachment dictionary with 'path' and optional 'content'
+
+        Returns:
+            List of image attachment dictionaries with 'data', 'mime_type', and 'page'
+        """
+        from application.utils import convert_pdf_to_images
+        from application.storage.storage_creator import StorageCreator
+
+        file_path = attachment.get("path")
+        if not file_path:
+            raise ValueError("No file path provided in PDF attachment")
+
+        storage = StorageCreator.get_storage()
+
+        # Convert PDF to images
+        images_data = convert_pdf_to_images(
+            file_path=file_path,
+            storage=storage,
+            max_pages=20,
+            dpi=150,
+        )
+
+        return images_data
+
    def _append_unsupported_attachments(
        self, messages: List[Dict], attachments: List[Dict]
    ) -> List[Dict]:
--- a/application/llm/llm_creator.py
+++ b/application/llm/llm_creator.py
@@ -9,6 +9,7 @@ from application.llm.novita import NovitaLLM
 from application.llm.openai import AzureOpenAILLM, OpenAILLM
 from application.llm.premai import PremAILLM
 from application.llm.sagemaker import SagemakerAPILLM
+from application.llm.open_router import OpenRouterLLM

 logger = logging.getLogger(__name__)

@@ -25,6 +26,7 @@ class LLMCreator:
        "groq": GroqLLM,
        "google": GoogleLLM,
        "novita": NovitaLLM,
+        "openrouter": OpenRouterLLM,
    }

    @classmethod
--- a/application/llm/open_router.py
+++ b/application/llm/open_router.py
@@ -0,0 +1,15 @@
+from application.core.settings import settings
+from application.llm.openai import OpenAILLM
+
+OPEN_ROUTER_BASE_URL = "https://openrouter.ai/api/v1"
+
+
+class OpenRouterLLM(OpenAILLM):
+    def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):
+        super().__init__(
+            api_key=api_key or settings.OPEN_ROUTER_API_KEY or settings.API_KEY,
+            user_api_key=user_api_key,
+            base_url=base_url or OPEN_ROUTER_BASE_URL,
+            *args,
+            **kwargs,
+        )
--- a/application/llm/openai.py
+++ b/application/llm/openai.py
@@ -9,6 +9,57 @@ from application.llm.base import BaseLLM
 from application.storage.storage_creator import StorageCreator


+def _truncate_base64_for_logging(messages):
+    """
+    Create a copy of messages with base64 data truncated for readable logging.
+
+    Args:
+        messages: List of message dicts
+
+    Returns:
+        Copy of messages with truncated base64 content
+    """
+    import copy
+
+    def truncate_content(content):
+        if isinstance(content, str):
+            # Check if it looks like a data URL with base64
+            if content.startswith("data:") and ";base64," in content:
+                prefix_end = content.index(";base64,") + len(";base64,")
+                prefix = content[:prefix_end]
+                return f"{prefix}[BASE64_DATA_TRUNCATED, length={len(content) - prefix_end}]"
+            return content
+        elif isinstance(content, list):
+            return [truncate_item(item) for item in content]
+        elif isinstance(content, dict):
+            return {k: truncate_content(v) for k, v in content.items()}
+        return content
+
+    def truncate_item(item):
+        if isinstance(item, dict):
+            result = {}
+            for k, v in item.items():
+                if k == "url" and isinstance(v, str) and ";base64," in v:
+                    prefix_end = v.index(";base64,") + len(";base64,")
+                    prefix = v[:prefix_end]
+                    result[k] = f"{prefix}[BASE64_DATA_TRUNCATED, length={len(v) - prefix_end}]"
+                elif k == "data" and isinstance(v, str) and len(v) > 100:
+                    result[k] = f"[BASE64_DATA_TRUNCATED, length={len(v)}]"
+                else:
+                    result[k] = truncate_content(v)
+            return result
+        return truncate_content(item)
+
+    truncated = []
+    for msg in messages:
+        msg_copy = copy.copy(msg)
+        if "content" in msg_copy:
+            msg_copy["content"] = truncate_content(msg_copy["content"])
+        truncated.append(msg_copy)
+
+    return truncated
+
+
 class OpenAILLM(BaseLLM):

    def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):
@@ -44,12 +95,12 @@ class OpenAILLM(BaseLLM):
                if isinstance(content, str):
                    cleaned_messages.append({"role": role, "content": content})
                elif isinstance(content, list):
+                    # Collect all content parts into a single message
+                    content_parts = []
+
                    for item in content:
-                        if "text" in item:
-                            cleaned_messages.append(
-                                {"role": role, "content": item["text"]}
-                            )
-                        elif "function_call" in item:
+                        if "function_call" in item:
+                            # Function calls need their own message
                            cleaned_args = self._remove_null_values(
                                item["function_call"]["args"]
                            )
@@ -69,6 +120,7 @@ class OpenAILLM(BaseLLM):
                                }
                            )
                        elif "function_response" in item:
+                            # Function responses need their own message
                            cleaned_messages.append(
                                {
                                    "role": "tool",
@@ -81,36 +133,20 @@ class OpenAILLM(BaseLLM):
                                }
                            )
                        elif isinstance(item, dict):
-                            content_parts = []
-                            if "text" in item:
-                                content_parts.append(
-                                    {"type": "text", "text": item["text"]}
-                                )
-                            elif (
-                                "type" in item
-                                and item["type"] == "text"
-                                and "text" in item
-                            ):
+                            # Collect content parts (text, images, files) into a single message
+                            if "type" in item and item["type"] == "text" and "text" in item:
                                content_parts.append(item)
-                            elif (
-                                "type" in item
-                                and item["type"] == "file"
-                                and "file" in item
-                            ):
+                            elif "type" in item and item["type"] == "file" and "file" in item:
                                content_parts.append(item)
-                            elif (
-                                "type" in item
-                                and item["type"] == "image_url"
-                                and "image_url" in item
-                            ):
+                            elif "type" in item and item["type"] == "image_url" and "image_url" in item:
                                content_parts.append(item)
-                            cleaned_messages.append(
-                                {"role": role, "content": content_parts}
-                            )
-                        else:
-                            raise ValueError(
-                                f"Unexpected content dictionary format: {item}"
-                            )
+                            elif "text" in item and "type" not in item:
+                                # Legacy format: {"text": "..."} without type
+                                content_parts.append({"type": "text", "text": item["text"]})
+
+                    # Add the collected content parts as a single message
+                    if content_parts:
+                        cleaned_messages.append({"role": role, "content": content_parts})
                else:
                    raise ValueError(f"Unexpected content type: {type(content)}")
        return cleaned_messages
@@ -127,7 +163,7 @@ class OpenAILLM(BaseLLM):
        **kwargs,
    ):
        messages = self._clean_messages_openai(messages)
-        logging.info(f"Cleaned messages: {messages}")
+        logging.info(f"Cleaned messages: {_truncate_base64_for_logging(messages)}")

        # Convert max_tokens to max_completion_tokens for newer models
        if "max_tokens" in kwargs:
@@ -163,7 +199,7 @@ class OpenAILLM(BaseLLM):
        **kwargs,
    ):
        messages = self._clean_messages_openai(messages)
-        logging.info(f"Cleaned messages: {messages}")
+        logging.info(f"Cleaned messages: {_truncate_base64_for_logging(messages)}")

        # Convert max_tokens to max_completion_tokens for newer models
        if "max_tokens" in kwargs:
@@ -261,17 +297,14 @@ class OpenAILLM(BaseLLM):
        """
        Return a list of MIME types supported by OpenAI for file uploads.

+        This reads from the model config to ensure consistency.
+        If no model config found, falls back to images only (safest default).
+
        Returns:
            list: List of supported MIME types
        """
-        return [
-            "application/pdf",
-            "image/png",
-            "image/jpeg",
-            "image/jpg",
-            "image/webp",
-            "image/gif",
-        ]
+        from application.core.model_configs import OPENAI_ATTACHMENTS
+        return OPENAI_ATTACHMENTS

    def prepare_messages_with_attachments(self, messages, attachments=None):
        """
@@ -308,10 +341,16 @@ class OpenAILLM(BaseLLM):
            prepared_messages[user_message_index]["content"] = []
        for attachment in attachments:
            mime_type = attachment.get("mime_type")
+            logging.info(f"Processing attachment with mime_type: {mime_type}, has_data: {'data' in attachment}, has_path: {'path' in attachment}")

            if mime_type and mime_type.startswith("image/"):
                try:
-                    base64_image = self._get_base64_image(attachment)
+                    # Check if this is a pre-converted image (from PDF-to-image conversion)
+                    if "data" in attachment:
+                        base64_image = attachment["data"]
+                    else:
+                        base64_image = self._get_base64_image(attachment)
+
                    prepared_messages[user_message_index]["content"].append(
                        {
                            "type": "image_url",
@@ -320,6 +359,7 @@ class OpenAILLM(BaseLLM):
                            },
                        }
                    )
+
                except Exception as e:
                    logging.error(
                        f"Error processing image attachment: {e}", exc_info=True
@@ -334,6 +374,7 @@ class OpenAILLM(BaseLLM):
            # Handle PDFs using the file API

            elif mime_type == "application/pdf":
+                logging.info(f"Attempting to upload PDF to OpenAI: {attachment.get('path', 'unknown')}")
                try:
                    file_id = self._upload_file_to_openai(attachment)
                    prepared_messages[user_message_index]["content"].append(
@@ -348,6 +389,8 @@ class OpenAILLM(BaseLLM):
                                "text": f"File content:\n\n{attachment['content']}",
                            }
                        )
+            else:
+                logging.warning(f"Unsupported attachment type in OpenAI provider: {mime_type}")
        return prepared_messages

    def _get_base64_image(self, attachment):
--- a/application/parser/file/bulk.py
+++ b/application/parser/file/bulk.py
@@ -60,14 +60,14 @@ def get_default_file_extractor(
            ".rst": RstParser(),
            ".adoc": DoclingAsciiDocParser(),
            ".asciidoc": DoclingAsciiDocParser(),
-            # Images (with OCR)
-            ".png": DoclingImageParser(ocr_enabled=ocr_enabled),
-            ".jpg": DoclingImageParser(ocr_enabled=ocr_enabled),
-            ".jpeg": DoclingImageParser(ocr_enabled=ocr_enabled),
-            ".tiff": DoclingImageParser(ocr_enabled=ocr_enabled),
-            ".tif": DoclingImageParser(ocr_enabled=ocr_enabled),
-            ".bmp": DoclingImageParser(ocr_enabled=ocr_enabled),
-            ".webp": DoclingImageParser(ocr_enabled=ocr_enabled),
+            # Images (with OCR) - only use Docling when OCR is enabled
+            ".png": DoclingImageParser(ocr_enabled=ocr_enabled) if ocr_enabled else ImageParser(),
+            ".jpg": DoclingImageParser(ocr_enabled=ocr_enabled) if ocr_enabled else ImageParser(),
+            ".jpeg": DoclingImageParser(ocr_enabled=ocr_enabled) if ocr_enabled else ImageParser(),
+            ".tiff": DoclingImageParser(ocr_enabled=ocr_enabled) if ocr_enabled else ImageParser(),
+            ".tif": DoclingImageParser(ocr_enabled=ocr_enabled) if ocr_enabled else ImageParser(),
+            ".bmp": DoclingImageParser(ocr_enabled=ocr_enabled) if ocr_enabled else ImageParser(),
+            ".webp": DoclingImageParser(ocr_enabled=ocr_enabled) if ocr_enabled else ImageParser(),
            # Media/subtitles
            ".vtt": DoclingVTTParser(),
            # Specialized XML formats
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -56,6 +56,7 @@ packaging==24.2
 pandas==2.3.3
 openpyxl==3.1.5
 pathable==0.4.4
+pdf2image>=1.17.0
 pillow
 portalocker>=2.7.0,<3.0.0
 prance==25.4.8.0
--- a/application/utils.py
+++ b/application/utils.py
@@ -1,7 +1,11 @@
+import base64
 import hashlib
+import io
+import logging
 import os
 import re
 import uuid
+from typing import List

 import tiktoken
 from flask import jsonify, make_response
@@ -11,6 +15,8 @@ from application.core.model_utils import get_token_limit

 from application.core.settings import settings

+logger = logging.getLogger(__name__)
+

 _encoding = None

@@ -215,6 +221,93 @@ def calculate_compression_threshold(
    return threshold


+def convert_pdf_to_images(
+    file_path: str,
+    storage=None,
+    max_pages: int = 20,
+    dpi: int = 150,
+    image_format: str = "PNG",
+) -> List[dict]:
+    """
+    Convert PDF pages to images for LLMs that support images but not PDFs.
+
+    This enables "synthetic PDF support" by converting each PDF page to an image
+    that can be sent to vision-capable LLMs like Claude.
+
+    Args:
+        file_path: Path to the PDF file (can be storage path)
+        storage: Optional storage instance for retrieving files
+        max_pages: Maximum number of pages to convert (default 20 to avoid context overflow)
+        dpi: Resolution for rendering (default 150 for balance of quality/size)
+        image_format: Output format (PNG recommended for quality)
+
+    Returns:
+        List of dicts with keys:
+        - 'data': base64-encoded image data
+        - 'mime_type': MIME type (e.g., 'image/png')
+        - 'page': Page number (1-indexed)
+
+    Raises:
+        ImportError: If pdf2image is not installed
+        FileNotFoundError: If file doesn't exist
+        Exception: If conversion fails
+    """
+    try:
+        from pdf2image import convert_from_path, convert_from_bytes
+    except ImportError:
+        raise ImportError(
+            "pdf2image is required for PDF-to-image conversion. "
+            "Install it with: pip install pdf2image\n"
+            "Also ensure poppler-utils is installed on your system."
+        )
+
+    images_data = []
+    mime_type = f"image/{image_format.lower()}"
+
+    try:
+        # Get PDF content either from storage or direct file path
+        if storage and hasattr(storage, "get_file"):
+            with storage.get_file(file_path) as pdf_file:
+                pdf_bytes = pdf_file.read()
+                pil_images = convert_from_bytes(
+                    pdf_bytes,
+                    dpi=dpi,
+                    fmt=image_format.lower(),
+                    first_page=1,
+                    last_page=max_pages,
+                )
+        else:
+            pil_images = convert_from_path(
+                file_path,
+                dpi=dpi,
+                fmt=image_format.lower(),
+                first_page=1,
+                last_page=max_pages,
+            )
+
+        for page_num, pil_image in enumerate(pil_images, start=1):
+            # Convert PIL image to base64
+            buffer = io.BytesIO()
+            pil_image.save(buffer, format=image_format)
+            buffer.seek(0)
+            base64_data = base64.b64encode(buffer.read()).decode("utf-8")
+
+            images_data.append({
+                "data": base64_data,
+                "mime_type": mime_type,
+                "page": page_num,
+            })
+
+        return images_data
+
+    except FileNotFoundError:
+        logger.error(f"PDF file not found: {file_path}")
+        raise
+    except Exception as e:
+        logger.error(f"Error converting PDF to images: {e}", exc_info=True)
+        raise
+
+
 def clean_text_for_tts(text: str) -> str:
    """
    clean text for Text-to-Speech processing.