diff --git a/application/Dockerfile b/application/Dockerfile
index efdf6908..48d29e57 100644
--- a/application/Dockerfile
+++ b/application/Dockerfile
@@ -52,6 +52,7 @@ RUN apt-get update && \
         python3.12 \
         libgl1 \
         libglib2.0-0 \
+        poppler-utils \
         && \
     ln -s /usr/bin/python3.12 /usr/bin/python && \
     rm -rf /var/lib/apt/lists/*
diff --git a/application/core/model_configs.py b/application/core/model_configs.py
index a25c4440..841be925 100644
--- a/application/core/model_configs.py
+++ b/application/core/model_configs.py
@@ -8,8 +8,8 @@ from application.core.model_settings import (
     ModelProvider,
 )
 
-OPENAI_ATTACHMENTS = [
-    "application/pdf",
+# Base image attachment types supported by most vision-capable LLMs
+IMAGE_ATTACHMENTS = [
     "image/png",
     "image/jpeg",
     "image/jpg",
@@ -17,14 +17,15 @@ OPENAI_ATTACHMENTS = [
     "image/gif",
 ]
 
-GOOGLE_ATTACHMENTS = [
-    "application/pdf",
-    "image/png",
-    "image/jpeg",
-    "image/jpg",
-    "image/webp",
-    "image/gif",
-]
+# PDF excluded: most OpenAI-compatible endpoints don't support native PDF uploads.
+# When excluded, PDFs are synthetically processed by converting pages to images.
+OPENAI_ATTACHMENTS = IMAGE_ATTACHMENTS
+
+GOOGLE_ATTACHMENTS = ["application/pdf"] + IMAGE_ATTACHMENTS
+
+ANTHROPIC_ATTACHMENTS = IMAGE_ATTACHMENTS
+
+OPENROUTER_ATTACHMENTS = IMAGE_ATTACHMENTS
 
 
 OPENAI_MODELS = [
@@ -63,6 +64,7 @@ ANTHROPIC_MODELS = [
         description="Latest Claude 3.5 Sonnet with enhanced capabilities",
         capabilities=ModelCapabilities(
             supports_tools=True,
+            supported_attachment_types=ANTHROPIC_ATTACHMENTS,
             context_window=200000,
         ),
     ),
@@ -73,6 +75,7 @@ ANTHROPIC_MODELS = [
         description="Balanced performance and capability",
         capabilities=ModelCapabilities(
             supports_tools=True,
+            supported_attachment_types=ANTHROPIC_ATTACHMENTS,
             context_window=200000,
         ),
     ),
@@ -83,6 +86,7 @@ ANTHROPIC_MODELS = [
         description="Most capable Claude model",
         capabilities=ModelCapabilities(
             supports_tools=True,
+            supported_attachment_types=ANTHROPIC_ATTACHMENTS,
             context_window=200000,
         ),
     ),
@@ -93,6 +97,7 @@ ANTHROPIC_MODELS = [
         description="Fastest Claude model",
         capabilities=ModelCapabilities(
             supports_tools=True,
+            supported_attachment_types=ANTHROPIC_ATTACHMENTS,
             context_window=200000,
         ),
     ),
@@ -151,28 +156,43 @@ GROQ_MODELS = [
         ),
     ),
     AvailableModel(
-        id="llama-3.1-8b-instant",
+        id="openai/gpt-oss-120b",
         provider=ModelProvider.GROQ,
-        display_name="Llama 3.1 8B",
-        description="Ultra-fast inference",
+        display_name="GPT-OSS 120B",
+        description="Open-source GPT model optimized for speed",
         capabilities=ModelCapabilities(
             supports_tools=True,
             context_window=128000,
         ),
     ),
+]
+
+
+OPENROUTER_MODELS = [
     AvailableModel(
-        id="mixtral-8x7b-32768",
-        provider=ModelProvider.GROQ,
-        display_name="Mixtral 8x7B",
-        description="High-speed inference with tools",
+        id="qwen/qwen3-coder:free",
+        provider=ModelProvider.OPENROUTER,
+        display_name="Qwen 3 Coder",
+        description="Latest Qwen model with high-speed inference",
         capabilities=ModelCapabilities(
             supports_tools=True,
-            context_window=32768,
+            context_window=128000,
+            supported_attachment_types=OPENROUTER_ATTACHMENTS
+        ),
+    ),
+    AvailableModel(
+        id="google/gemma-3-27b-it:free",
+        provider=ModelProvider.OPENROUTER,
+        display_name="Gemma 3 27B",
+        description="Latest Gemma model with high-speed inference",
+        capabilities=ModelCapabilities(
+            supports_tools=True,
+            context_window=128000,
+            supported_attachment_types=OPENROUTER_ATTACHMENTS
         ),
     ),
 ]
 
-
 AZURE_OPENAI_MODELS = [
     AvailableModel(
         id="azure-gpt-4",
diff --git a/application/core/model_settings.py b/application/core/model_settings.py
index bc38239a..206589f6 100644
--- a/application/core/model_settings.py
+++ b/application/core/model_settings.py
@@ -8,6 +8,7 @@ logger = logging.getLogger(__name__)
 
 class ModelProvider(str, Enum):
     OPENAI = "openai"
+    OPENROUTER = "openrouter"
     AZURE_OPENAI = "azure_openai"
     ANTHROPIC = "anthropic"
     GROQ = "groq"
@@ -107,6 +108,10 @@ class ModelRegistry:
             settings.LLM_PROVIDER == "groq" and settings.API_KEY
         ):
             self._add_groq_models(settings)
+        if settings.OPEN_ROUTER_API_KEY or (
+            settings.LLM_PROVIDER == "openrouter" and settings.API_KEY
+        ):
+            self._add_openrouter_models(settings)
         if settings.HUGGINGFACE_API_KEY or (
             settings.LLM_PROVIDER == "huggingface" and settings.API_KEY
         ):
@@ -211,6 +216,21 @@ class ModelRegistry:
                     return
         for model in GROQ_MODELS:
             self.models[model.id] = model
+    
+    def _add_openrouter_models(self, settings):
+        from application.core.model_configs import OPENROUTER_MODELS
+
+        if settings.OPEN_ROUTER_API_KEY:
+            for model in OPENROUTER_MODELS:
+                self.models[model.id] = model
+            return
+        if settings.LLM_PROVIDER == "openrouter" and settings.LLM_NAME:
+            for model in OPENROUTER_MODELS:
+                if model.id == settings.LLM_NAME:
+                    self.models[model.id] = model
+                    return
+        for model in OPENROUTER_MODELS:
+            self.models[model.id] = model
 
     def _add_docsgpt_models(self, settings):
         model_id = "docsgpt-local"
diff --git a/application/core/model_utils.py b/application/core/model_utils.py
index f24dbf47..94dc8973 100644
--- a/application/core/model_utils.py
+++ b/application/core/model_utils.py
@@ -9,6 +9,7 @@ def get_api_key_for_provider(provider: str) -> Optional[str]:
 
     provider_key_map = {
         "openai": settings.OPENAI_API_KEY,
+        "openrouter": settings.OPEN_ROUTER_API_KEY,
         "anthropic": settings.ANTHROPIC_API_KEY,
         "google": settings.GOOGLE_API_KEY,
         "groq": settings.GROQ_API_KEY,
diff --git a/application/core/settings.py b/application/core/settings.py
index 1cc36b18..66018b6f 100644
--- a/application/core/settings.py
+++ b/application/core/settings.py
@@ -81,6 +81,7 @@ class Settings(BaseSettings):
     GOOGLE_API_KEY: Optional[str] = None
     GROQ_API_KEY: Optional[str] = None
     HUGGINGFACE_API_KEY: Optional[str] = None
+    OPEN_ROUTER_API_KEY: Optional[str] = None
 
     OPENAI_API_BASE: Optional[str] = None  # azure openai api base url
     OPENAI_API_VERSION: Optional[str] = None  # azure openai api version
diff --git a/application/llm/anthropic.py b/application/llm/anthropic.py
index 4d26f925..e8b3ba2f 100644
--- a/application/llm/anthropic.py
+++ b/application/llm/anthropic.py
@@ -1,7 +1,13 @@
+import base64
+import logging
+
 from anthropic import AI_PROMPT, Anthropic, HUMAN_PROMPT
 
 from application.core.settings import settings
 from application.llm.base import BaseLLM
+from application.storage.storage_creator import StorageCreator
+
+logger = logging.getLogger(__name__)
 
 
 class AnthropicLLM(BaseLLM):
@@ -20,6 +26,7 @@ class AnthropicLLM(BaseLLM):
 
         self.HUMAN_PROMPT = HUMAN_PROMPT
         self.AI_PROMPT = AI_PROMPT
+        self.storage = StorageCreator.get_storage()
 
     def _raw_gen(
         self,
@@ -70,3 +77,115 @@ class AnthropicLLM(BaseLLM):
         finally:
             if hasattr(stream_response, "close"):
                 stream_response.close()
+
+    def get_supported_attachment_types(self):
+        """
+        Return a list of MIME types supported by Anthropic Claude for file uploads.
+        Claude supports images but not PDFs natively.
+        PDFs are synthetically supported via PDF-to-image conversion in the handler.
+
+        Returns:
+            list: List of supported MIME types
+        """
+        return [
+            "image/png",
+            "image/jpeg",
+            "image/jpg",
+            "image/webp",
+            "image/gif",
+        ]
+
+    def prepare_messages_with_attachments(self, messages, attachments=None):
+        """
+        Process attachments for Anthropic Claude API.
+        Formats images using Claude's vision message format.
+
+        Args:
+            messages (list): List of message dictionaries.
+            attachments (list): List of attachment dictionaries with content and metadata.
+
+        Returns:
+            list: Messages formatted with image content for Claude API.
+        """
+        if not attachments:
+            return messages
+
+        prepared_messages = messages.copy()
+
+        # Find the last user message to attach images to
+        user_message_index = None
+        for i in range(len(prepared_messages) - 1, -1, -1):
+            if prepared_messages[i].get("role") == "user":
+                user_message_index = i
+                break
+
+        if user_message_index is None:
+            user_message = {"role": "user", "content": []}
+            prepared_messages.append(user_message)
+            user_message_index = len(prepared_messages) - 1
+
+        # Convert content to list format if it's a string
+        if isinstance(prepared_messages[user_message_index].get("content"), str):
+            text_content = prepared_messages[user_message_index]["content"]
+            prepared_messages[user_message_index]["content"] = [
+                {"type": "text", "text": text_content}
+            ]
+        elif not isinstance(prepared_messages[user_message_index].get("content"), list):
+            prepared_messages[user_message_index]["content"] = []
+
+        for attachment in attachments:
+            mime_type = attachment.get("mime_type")
+
+            if mime_type and mime_type.startswith("image/"):
+                try:
+                    # Check if this is a pre-converted image (from PDF-to-image conversion)
+                    # These have 'data' key with base64 already
+                    if "data" in attachment:
+                        base64_image = attachment["data"]
+                    else:
+                        base64_image = self._get_base64_image(attachment)
+
+                    # Claude uses a specific format for images
+                    prepared_messages[user_message_index]["content"].append(
+                        {
+                            "type": "image",
+                            "source": {
+                                "type": "base64",
+                                "media_type": mime_type,
+                                "data": base64_image,
+                            },
+                        }
+                    )
+
+                except Exception as e:
+                    logger.error(
+                        f"Error processing image attachment: {e}", exc_info=True
+                    )
+                    if "content" in attachment:
+                        prepared_messages[user_message_index]["content"].append(
+                            {
+                                "type": "text",
+                                "text": f"[Image could not be processed: {attachment.get('path', 'unknown')}]",
+                            }
+                        )
+
+        return prepared_messages
+
+    def _get_base64_image(self, attachment):
+        """
+        Convert an image file to base64 encoding.
+
+        Args:
+            attachment (dict): Attachment dictionary with path and metadata.
+
+        Returns:
+            str: Base64-encoded image data.
+        """
+        file_path = attachment.get("path")
+        if not file_path:
+            raise ValueError("No file path provided in attachment")
+        try:
+            with self.storage.get_file(file_path) as image_file:
+                return base64.b64encode(image_file.read()).decode("utf-8")
+        except FileNotFoundError:
+            raise FileNotFoundError(f"File not found: {file_path}")
diff --git a/application/llm/handlers/base.py b/application/llm/handlers/base.py
index dbc5a879..b673a604 100644
--- a/application/llm/handlers/base.py
+++ b/application/llm/handlers/base.py
@@ -105,6 +105,7 @@ class LLMHandler(ABC):
         """
         Prepare messages with attachments and provider-specific formatting.
 
+
         Args:
             agent: The agent instance
             messages: Original messages
@@ -118,11 +119,40 @@ class LLMHandler(ABC):
         logger.info(f"Preparing messages with {len(attachments)} attachments")
         supported_types = agent.llm.get_supported_attachment_types()
 
+        # Check if provider supports images but not PDF (synthetic PDF support)
+        supports_images = any(t.startswith("image/") for t in supported_types)
+        supports_pdf = "application/pdf" in supported_types
+
+        # Process attachments, converting PDFs to images if needed
+        processed_attachments = []
+        for attachment in attachments:
+            mime_type = attachment.get("mime_type")
+
+            # Synthetic PDF support: convert PDF to images if LLM supports images but not PDF
+            if mime_type == "application/pdf" and supports_images and not supports_pdf:
+                logger.info(
+                    f"Converting PDF to images for synthetic PDF support: {attachment.get('path', 'unknown')}"
+                )
+                try:
+                    converted_images = self._convert_pdf_to_images(attachment)
+                    processed_attachments.extend(converted_images)
+                    logger.info(
+                        f"Converted PDF to {len(converted_images)} images"
+                    )
+                except Exception as e:
+                    logger.error(
+                        f"Failed to convert PDF to images, falling back to text: {e}"
+                    )
+                    # Fall back to treating as unsupported (text extraction)
+                    processed_attachments.append(attachment)
+            else:
+                processed_attachments.append(attachment)
+
         supported_attachments = [
-            a for a in attachments if a.get("mime_type") in supported_types
+            a for a in processed_attachments if a.get("mime_type") in supported_types
         ]
         unsupported_attachments = [
-            a for a in attachments if a.get("mime_type") not in supported_types
+            a for a in processed_attachments if a.get("mime_type") not in supported_types
         ]
 
         # Process supported attachments with the LLM's custom method
@@ -145,6 +175,37 @@ class LLMHandler(ABC):
             )
         return messages
 
+    def _convert_pdf_to_images(self, attachment: Dict) -> List[Dict]:
+        """
+        Convert a PDF attachment to a list of image attachments.
+
+        This enables synthetic PDF support for LLMs that support images but not PDFs.
+
+        Args:
+            attachment: PDF attachment dictionary with 'path' and optional 'content'
+
+        Returns:
+            List of image attachment dictionaries with 'data', 'mime_type', and 'page'
+        """
+        from application.utils import convert_pdf_to_images
+        from application.storage.storage_creator import StorageCreator
+
+        file_path = attachment.get("path")
+        if not file_path:
+            raise ValueError("No file path provided in PDF attachment")
+
+        storage = StorageCreator.get_storage()
+
+        # Convert PDF to images
+        images_data = convert_pdf_to_images(
+            file_path=file_path,
+            storage=storage,
+            max_pages=20,
+            dpi=150,
+        )
+
+        return images_data
+
     def _append_unsupported_attachments(
         self, messages: List[Dict], attachments: List[Dict]
     ) -> List[Dict]:
diff --git a/application/llm/llm_creator.py b/application/llm/llm_creator.py
index ca39194c..96653831 100644
--- a/application/llm/llm_creator.py
+++ b/application/llm/llm_creator.py
@@ -9,6 +9,7 @@ from application.llm.novita import NovitaLLM
 from application.llm.openai import AzureOpenAILLM, OpenAILLM
 from application.llm.premai import PremAILLM
 from application.llm.sagemaker import SagemakerAPILLM
+from application.llm.open_router import OpenRouterLLM
 
 logger = logging.getLogger(__name__)
 
@@ -25,6 +26,7 @@ class LLMCreator:
         "groq": GroqLLM,
         "google": GoogleLLM,
         "novita": NovitaLLM,
+        "openrouter": OpenRouterLLM,
     }
 
     @classmethod
diff --git a/application/llm/open_router.py b/application/llm/open_router.py
new file mode 100644
index 00000000..39654572
--- /dev/null
+++ b/application/llm/open_router.py
@@ -0,0 +1,15 @@
+from application.core.settings import settings
+from application.llm.openai import OpenAILLM
+
+OPEN_ROUTER_BASE_URL = "https://openrouter.ai/api/v1"
+
+
+class OpenRouterLLM(OpenAILLM):
+    def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):
+        super().__init__(
+            api_key=api_key or settings.OPEN_ROUTER_API_KEY or settings.API_KEY,
+            user_api_key=user_api_key,
+            base_url=base_url or OPEN_ROUTER_BASE_URL,
+            *args,
+            **kwargs,
+        )
diff --git a/application/llm/openai.py b/application/llm/openai.py
index e851f078..263b4b5a 100644
--- a/application/llm/openai.py
+++ b/application/llm/openai.py
@@ -9,6 +9,57 @@ from application.llm.base import BaseLLM
 from application.storage.storage_creator import StorageCreator
 
 
+def _truncate_base64_for_logging(messages):
+    """
+    Create a copy of messages with base64 data truncated for readable logging.
+
+    Args:
+        messages: List of message dicts
+
+    Returns:
+        Copy of messages with truncated base64 content
+    """
+    import copy
+
+    def truncate_content(content):
+        if isinstance(content, str):
+            # Check if it looks like a data URL with base64
+            if content.startswith("data:") and ";base64," in content:
+                prefix_end = content.index(";base64,") + len(";base64,")
+                prefix = content[:prefix_end]
+                return f"{prefix}[BASE64_DATA_TRUNCATED, length={len(content) - prefix_end}]"
+            return content
+        elif isinstance(content, list):
+            return [truncate_item(item) for item in content]
+        elif isinstance(content, dict):
+            return {k: truncate_content(v) for k, v in content.items()}
+        return content
+
+    def truncate_item(item):
+        if isinstance(item, dict):
+            result = {}
+            for k, v in item.items():
+                if k == "url" and isinstance(v, str) and ";base64," in v:
+                    prefix_end = v.index(";base64,") + len(";base64,")
+                    prefix = v[:prefix_end]
+                    result[k] = f"{prefix}[BASE64_DATA_TRUNCATED, length={len(v) - prefix_end}]"
+                elif k == "data" and isinstance(v, str) and len(v) > 100:
+                    result[k] = f"[BASE64_DATA_TRUNCATED, length={len(v)}]"
+                else:
+                    result[k] = truncate_content(v)
+            return result
+        return truncate_content(item)
+
+    truncated = []
+    for msg in messages:
+        msg_copy = copy.copy(msg)
+        if "content" in msg_copy:
+            msg_copy["content"] = truncate_content(msg_copy["content"])
+        truncated.append(msg_copy)
+
+    return truncated
+
+
 class OpenAILLM(BaseLLM):
 
     def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):
@@ -44,12 +95,12 @@ class OpenAILLM(BaseLLM):
                 if isinstance(content, str):
                     cleaned_messages.append({"role": role, "content": content})
                 elif isinstance(content, list):
+                    # Collect all content parts into a single message
+                    content_parts = []
+
                     for item in content:
-                        if "text" in item:
-                            cleaned_messages.append(
-                                {"role": role, "content": item["text"]}
-                            )
-                        elif "function_call" in item:
+                        if "function_call" in item:
+                            # Function calls need their own message
                             cleaned_args = self._remove_null_values(
                                 item["function_call"]["args"]
                             )
@@ -69,6 +120,7 @@ class OpenAILLM(BaseLLM):
                                 }
                             )
                         elif "function_response" in item:
+                            # Function responses need their own message
                             cleaned_messages.append(
                                 {
                                     "role": "tool",
@@ -81,36 +133,20 @@ class OpenAILLM(BaseLLM):
                                 }
                             )
                         elif isinstance(item, dict):
-                            content_parts = []
-                            if "text" in item:
-                                content_parts.append(
-                                    {"type": "text", "text": item["text"]}
-                                )
-                            elif (
-                                "type" in item
-                                and item["type"] == "text"
-                                and "text" in item
-                            ):
+                            # Collect content parts (text, images, files) into a single message
+                            if "type" in item and item["type"] == "text" and "text" in item:
                                 content_parts.append(item)
-                            elif (
-                                "type" in item
-                                and item["type"] == "file"
-                                and "file" in item
-                            ):
+                            elif "type" in item and item["type"] == "file" and "file" in item:
                                 content_parts.append(item)
-                            elif (
-                                "type" in item
-                                and item["type"] == "image_url"
-                                and "image_url" in item
-                            ):
+                            elif "type" in item and item["type"] == "image_url" and "image_url" in item:
                                 content_parts.append(item)
-                            cleaned_messages.append(
-                                {"role": role, "content": content_parts}
-                            )
-                        else:
-                            raise ValueError(
-                                f"Unexpected content dictionary format: {item}"
-                            )
+                            elif "text" in item and "type" not in item:
+                                # Legacy format: {"text": "..."} without type
+                                content_parts.append({"type": "text", "text": item["text"]})
+
+                    # Add the collected content parts as a single message
+                    if content_parts:
+                        cleaned_messages.append({"role": role, "content": content_parts})
                 else:
                     raise ValueError(f"Unexpected content type: {type(content)}")
         return cleaned_messages
@@ -127,7 +163,7 @@ class OpenAILLM(BaseLLM):
         **kwargs,
     ):
         messages = self._clean_messages_openai(messages)
-        logging.info(f"Cleaned messages: {messages}")
+        logging.info(f"Cleaned messages: {_truncate_base64_for_logging(messages)}")
 
         # Convert max_tokens to max_completion_tokens for newer models
         if "max_tokens" in kwargs:
@@ -163,7 +199,7 @@ class OpenAILLM(BaseLLM):
         **kwargs,
     ):
         messages = self._clean_messages_openai(messages)
-        logging.info(f"Cleaned messages: {messages}")
+        logging.info(f"Cleaned messages: {_truncate_base64_for_logging(messages)}")
 
         # Convert max_tokens to max_completion_tokens for newer models
         if "max_tokens" in kwargs:
@@ -261,17 +297,14 @@ class OpenAILLM(BaseLLM):
         """
         Return a list of MIME types supported by OpenAI for file uploads.
 
+        This reads from the model config to ensure consistency.
+        If no model config found, falls back to images only (safest default).
+
         Returns:
             list: List of supported MIME types
         """
-        return [
-            "application/pdf",
-            "image/png",
-            "image/jpeg",
-            "image/jpg",
-            "image/webp",
-            "image/gif",
-        ]
+        from application.core.model_configs import OPENAI_ATTACHMENTS
+        return OPENAI_ATTACHMENTS
 
     def prepare_messages_with_attachments(self, messages, attachments=None):
         """
@@ -308,10 +341,16 @@ class OpenAILLM(BaseLLM):
             prepared_messages[user_message_index]["content"] = []
         for attachment in attachments:
             mime_type = attachment.get("mime_type")
+            logging.info(f"Processing attachment with mime_type: {mime_type}, has_data: {'data' in attachment}, has_path: {'path' in attachment}")
 
             if mime_type and mime_type.startswith("image/"):
                 try:
-                    base64_image = self._get_base64_image(attachment)
+                    # Check if this is a pre-converted image (from PDF-to-image conversion)
+                    if "data" in attachment:
+                        base64_image = attachment["data"]
+                    else:
+                        base64_image = self._get_base64_image(attachment)
+
                     prepared_messages[user_message_index]["content"].append(
                         {
                             "type": "image_url",
@@ -320,6 +359,7 @@ class OpenAILLM(BaseLLM):
                             },
                         }
                     )
+
                 except Exception as e:
                     logging.error(
                         f"Error processing image attachment: {e}", exc_info=True
@@ -334,6 +374,7 @@ class OpenAILLM(BaseLLM):
             # Handle PDFs using the file API
 
             elif mime_type == "application/pdf":
+                logging.info(f"Attempting to upload PDF to OpenAI: {attachment.get('path', 'unknown')}")
                 try:
                     file_id = self._upload_file_to_openai(attachment)
                     prepared_messages[user_message_index]["content"].append(
@@ -348,6 +389,8 @@ class OpenAILLM(BaseLLM):
                                 "text": f"File content:\n\n{attachment['content']}",
                             }
                         )
+            else:
+                logging.warning(f"Unsupported attachment type in OpenAI provider: {mime_type}")
         return prepared_messages
 
     def _get_base64_image(self, attachment):
diff --git a/application/parser/file/bulk.py b/application/parser/file/bulk.py
index 64860c0c..bee0dd34 100644
--- a/application/parser/file/bulk.py
+++ b/application/parser/file/bulk.py
@@ -60,14 +60,14 @@ def get_default_file_extractor(
             ".rst": RstParser(),
             ".adoc": DoclingAsciiDocParser(),
             ".asciidoc": DoclingAsciiDocParser(),
-            # Images (with OCR)
-            ".png": DoclingImageParser(ocr_enabled=ocr_enabled),
-            ".jpg": DoclingImageParser(ocr_enabled=ocr_enabled),
-            ".jpeg": DoclingImageParser(ocr_enabled=ocr_enabled),
-            ".tiff": DoclingImageParser(ocr_enabled=ocr_enabled),
-            ".tif": DoclingImageParser(ocr_enabled=ocr_enabled),
-            ".bmp": DoclingImageParser(ocr_enabled=ocr_enabled),
-            ".webp": DoclingImageParser(ocr_enabled=ocr_enabled),
+            # Images (with OCR) - only use Docling when OCR is enabled
+            ".png": DoclingImageParser(ocr_enabled=ocr_enabled) if ocr_enabled else ImageParser(),
+            ".jpg": DoclingImageParser(ocr_enabled=ocr_enabled) if ocr_enabled else ImageParser(),
+            ".jpeg": DoclingImageParser(ocr_enabled=ocr_enabled) if ocr_enabled else ImageParser(),
+            ".tiff": DoclingImageParser(ocr_enabled=ocr_enabled) if ocr_enabled else ImageParser(),
+            ".tif": DoclingImageParser(ocr_enabled=ocr_enabled) if ocr_enabled else ImageParser(),
+            ".bmp": DoclingImageParser(ocr_enabled=ocr_enabled) if ocr_enabled else ImageParser(),
+            ".webp": DoclingImageParser(ocr_enabled=ocr_enabled) if ocr_enabled else ImageParser(),
             # Media/subtitles
             ".vtt": DoclingVTTParser(),
             # Specialized XML formats
diff --git a/application/requirements.txt b/application/requirements.txt
index 5a17a7c7..85d1e391 100644
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -56,6 +56,7 @@ packaging==24.2
 pandas==2.3.3
 openpyxl==3.1.5
 pathable==0.4.4
+pdf2image>=1.17.0
 pillow
 portalocker>=2.7.0,<3.0.0
 prance==25.4.8.0
diff --git a/application/utils.py b/application/utils.py
index 35b61036..5a23517d 100644
--- a/application/utils.py
+++ b/application/utils.py
@@ -1,7 +1,11 @@
+import base64
 import hashlib
+import io
+import logging
 import os
 import re
 import uuid
+from typing import List
 
 import tiktoken
 from flask import jsonify, make_response
@@ -11,6 +15,8 @@ from application.core.model_utils import get_token_limit
 
 from application.core.settings import settings
 
+logger = logging.getLogger(__name__)
+
 
 _encoding = None
 
@@ -215,6 +221,93 @@ def calculate_compression_threshold(
     return threshold
 
 
+def convert_pdf_to_images(
+    file_path: str,
+    storage=None,
+    max_pages: int = 20,
+    dpi: int = 150,
+    image_format: str = "PNG",
+) -> List[dict]:
+    """
+    Convert PDF pages to images for LLMs that support images but not PDFs.
+
+    This enables "synthetic PDF support" by converting each PDF page to an image
+    that can be sent to vision-capable LLMs like Claude.
+
+    Args:
+        file_path: Path to the PDF file (can be storage path)
+        storage: Optional storage instance for retrieving files
+        max_pages: Maximum number of pages to convert (default 20 to avoid context overflow)
+        dpi: Resolution for rendering (default 150 for balance of quality/size)
+        image_format: Output format (PNG recommended for quality)
+
+    Returns:
+        List of dicts with keys:
+        - 'data': base64-encoded image data
+        - 'mime_type': MIME type (e.g., 'image/png')
+        - 'page': Page number (1-indexed)
+
+    Raises:
+        ImportError: If pdf2image is not installed
+        FileNotFoundError: If file doesn't exist
+        Exception: If conversion fails
+    """
+    try:
+        from pdf2image import convert_from_path, convert_from_bytes
+    except ImportError:
+        raise ImportError(
+            "pdf2image is required for PDF-to-image conversion. "
+            "Install it with: pip install pdf2image\n"
+            "Also ensure poppler-utils is installed on your system."
+        )
+
+    images_data = []
+    mime_type = f"image/{image_format.lower()}"
+
+    try:
+        # Get PDF content either from storage or direct file path
+        if storage and hasattr(storage, "get_file"):
+            with storage.get_file(file_path) as pdf_file:
+                pdf_bytes = pdf_file.read()
+                pil_images = convert_from_bytes(
+                    pdf_bytes,
+                    dpi=dpi,
+                    fmt=image_format.lower(),
+                    first_page=1,
+                    last_page=max_pages,
+                )
+        else:
+            pil_images = convert_from_path(
+                file_path,
+                dpi=dpi,
+                fmt=image_format.lower(),
+                first_page=1,
+                last_page=max_pages,
+            )
+
+        for page_num, pil_image in enumerate(pil_images, start=1):
+            # Convert PIL image to base64
+            buffer = io.BytesIO()
+            pil_image.save(buffer, format=image_format)
+            buffer.seek(0)
+            base64_data = base64.b64encode(buffer.read()).decode("utf-8")
+
+            images_data.append({
+                "data": base64_data,
+                "mime_type": mime_type,
+                "page": page_num,
+            })
+
+        return images_data
+
+    except FileNotFoundError:
+        logger.error(f"PDF file not found: {file_path}")
+        raise
+    except Exception as e:
+        logger.error(f"Error converting PDF to images: {e}", exc_info=True)
+        raise
+
+
 def clean_text_for_tts(text: str) -> str:
     """
     clean text for Text-to-Speech processing.