feat: add support for structured output and JSON schema validation

2026-05-04 23:52:00 +00:00 · 2025-08-13 13:29:51 +05:30
parent 56831fbcf2
commit 896dcf1f9e
13 changed files with 660 additions and 153 deletions
--- a/application/llm/google_ai.py
+++ b/application/llm/google_ai.py
@@ -1,11 +1,13 @@
+import json
+import logging
+
 from google import genai
 from google.genai import types
-import logging
-import json
+
+from application.core.settings import settings

 from application.llm.base import BaseLLM
 from application.storage.storage_creator import StorageCreator
-from application.core.settings import settings


 class GoogleLLM(BaseLLM):
@@ -24,12 +26,12 @@ class GoogleLLM(BaseLLM):
            list: List of supported MIME types
        """
        return [
-            'application/pdf',
-            'image/png',
-            'image/jpeg',
-            'image/jpg',
-            'image/webp',
-            'image/gif'
+            "application/pdf",
+            "image/png",
+            "image/jpeg",
+            "image/jpg",
+            "image/webp",
+            "image/gif",
        ]

    def prepare_messages_with_attachments(self, messages, attachments=None):
@@ -70,26 +72,30 @@ class GoogleLLM(BaseLLM):

        files = []
        for attachment in attachments:
-            mime_type = attachment.get('mime_type')
+            mime_type = attachment.get("mime_type")

            if mime_type in self.get_supported_attachment_types():
                try:
                    file_uri = self._upload_file_to_google(attachment)
-                    logging.info(f"GoogleLLM: Successfully uploaded file, got URI: {file_uri}")
+                    logging.info(
+                        f"GoogleLLM: Successfully uploaded file, got URI: {file_uri}"
+                    )
                    files.append({"file_uri": file_uri, "mime_type": mime_type})
                except Exception as e:
-                    logging.error(f"GoogleLLM: Error uploading file: {e}", exc_info=True)
-                    if 'content' in attachment:
-                        prepared_messages[user_message_index]["content"].append({
-                            "type": "text",
-                            "text": f"[File could not be processed: {attachment.get('path', 'unknown')}]"
-                        })
+                    logging.error(
+                        f"GoogleLLM: Error uploading file: {e}", exc_info=True
+                    )
+                    if "content" in attachment:
+                        prepared_messages[user_message_index]["content"].append(
+                            {
+                                "type": "text",
+                                "text": f"[File could not be processed: {attachment.get('path', 'unknown')}]",
+                            }
+                        )

        if files:
            logging.info(f"GoogleLLM: Adding {len(files)} files to message")
-            prepared_messages[user_message_index]["content"].append({
-                "files": files
-            })
+            prepared_messages[user_message_index]["content"].append({"files": files})

        return prepared_messages

@@ -103,10 +109,10 @@ class GoogleLLM(BaseLLM):
        Returns:
            str: Google AI file URI for the uploaded file.
        """
-        if 'google_file_uri' in attachment:
-            return attachment['google_file_uri']
+        if "google_file_uri" in attachment:
+            return attachment["google_file_uri"]

-        file_path = attachment.get('path')
+        file_path = attachment.get("path")
        if not file_path:
            raise ValueError("No file path provided in attachment")

@@ -116,17 +122,19 @@ class GoogleLLM(BaseLLM):
        try:
            file_uri = self.storage.process_file(
                file_path,
-                lambda local_path, **kwargs: self.client.files.upload(file=local_path).uri
+                lambda local_path, **kwargs: self.client.files.upload(
+                    file=local_path
+                ).uri,
            )

            from application.core.mongo_db import MongoDB
+
            mongo = MongoDB.get_client()
            db = mongo[settings.MONGO_DB_NAME]
            attachments_collection = db["attachments"]
-            if '_id' in attachment:
+            if "_id" in attachment:
                attachments_collection.update_one(
-                    {"_id": attachment['_id']},
-                    {"$set": {"google_file_uri": file_uri}}
+                    {"_id": attachment["_id"]}, {"$set": {"google_file_uri": file_uri}}
                )

            return file_uri
@@ -166,13 +174,13 @@ class GoogleLLM(BaseLLM):
                                )
                            )
                        elif "files" in item:
-                                for file_data in item["files"]:
-                                    parts.append(
-                                        types.Part.from_uri(
-                                            file_uri=file_data["file_uri"],
-                                            mime_type=file_data["mime_type"]
-                                        )
+                            for file_data in item["files"]:
+                                parts.append(
+                                    types.Part.from_uri(
+                                        file_uri=file_data["file_uri"],
+                                        mime_type=file_data["mime_type"],
                                    )
+                                )
                        else:
                            raise ValueError(
                                f"Unexpected content dictionary format:{item}"
@@ -231,6 +239,7 @@ class GoogleLLM(BaseLLM):
        stream=False,
        tools=None,
        formatting="openai",
+        response_schema=None,
        **kwargs,
    ):
        client = genai.Client(api_key=self.api_key)
@@ -244,16 +253,21 @@ class GoogleLLM(BaseLLM):
        if tools:
            cleaned_tools = self._clean_tools_format(tools)
            config.tools = cleaned_tools
-            response = client.models.generate_content(
-                model=model,
-                contents=messages,
-                config=config,
-            )
+
+        # Add response schema for structured output if provided
+        if response_schema:
+            config.response_schema = response_schema
+            config.response_mime_type = "application/json"
+
+        response = client.models.generate_content(
+            model=model,
+            contents=messages,
+            config=config,
+        )
+
+        if tools:
            return response
        else:
-            response = client.models.generate_content(
-                model=model, contents=messages, config=config
-            )
            return response.text

    def _raw_gen_stream(
@@ -264,6 +278,7 @@ class GoogleLLM(BaseLLM):
        stream=True,
        tools=None,
        formatting="openai",
+        response_schema=None,
        **kwargs,
    ):
        client = genai.Client(api_key=self.api_key)
@@ -278,17 +293,24 @@ class GoogleLLM(BaseLLM):
            cleaned_tools = self._clean_tools_format(tools)
            config.tools = cleaned_tools

+        # Add response schema for structured output if provided
+        if response_schema:
+            config.response_schema = response_schema
+            config.response_mime_type = "application/json"
+
        # Check if we have both tools and file attachments
        has_attachments = False
        for message in messages:
            for part in message.parts:
-                if hasattr(part, 'file_data') and part.file_data is not None:
+                if hasattr(part, "file_data") and part.file_data is not None:
                    has_attachments = True
                    break
            if has_attachments:
                break

-        logging.info(f"GoogleLLM: Starting stream generation. Model: {model}, Messages: {json.dumps(messages, default=str)}, Has attachments: {has_attachments}")
+        logging.info(
+            f"GoogleLLM: Starting stream generation. Model: {model}, Messages: {json.dumps(messages, default=str)}, Has attachments: {has_attachments}"
+        )

        response = client.models.generate_content_stream(
            model=model,
@@ -296,7 +318,6 @@ class GoogleLLM(BaseLLM):
            config=config,
        )

-
        for chunk in response:
            if hasattr(chunk, "candidates") and chunk.candidates:
                for candidate in chunk.candidates:
@@ -311,3 +332,75 @@ class GoogleLLM(BaseLLM):

    def _supports_tools(self):
        return True
+
+    def _supports_structured_output(self):
+        return True
+
+    def prepare_structured_output_format(self, json_schema):
+        if not json_schema:
+            return None
+
+        type_map = {
+            "object": "OBJECT",
+            "array": "ARRAY",
+            "string": "STRING",
+            "integer": "INTEGER",
+            "number": "NUMBER",
+            "boolean": "BOOLEAN",
+        }
+
+        def convert(schema):
+            if not isinstance(schema, dict):
+                return schema
+
+            result = {}
+            schema_type = schema.get("type")
+            if schema_type:
+                result["type"] = type_map.get(schema_type.lower(), schema_type.upper())
+
+            for key in [
+                "description",
+                "nullable",
+                "enum",
+                "minItems",
+                "maxItems",
+                "required",
+                "propertyOrdering",
+            ]:
+                if key in schema:
+                    result[key] = schema[key]
+
+            if "format" in schema:
+                format_value = schema["format"]
+                if schema_type == "string":
+                    if format_value == "date":
+                        result["format"] = "date-time"
+                    elif format_value in ["enum", "date-time"]:
+                        result["format"] = format_value
+                else:
+                    result["format"] = format_value
+
+            if "properties" in schema:
+                result["properties"] = {
+                    k: convert(v) for k, v in schema["properties"].items()
+                }
+                if "propertyOrdering" not in result and result.get("type") == "OBJECT":
+                    result["propertyOrdering"] = list(result["properties"].keys())
+
+            if "items" in schema:
+                result["items"] = convert(schema["items"])
+
+            for field in ["anyOf", "oneOf", "allOf"]:
+                if field in schema:
+                    result[field] = [convert(s) for s in schema[field]]
+
+            return result
+
+        try:
+            return convert(json_schema)
+        except Exception as e:
+            logging.error(
+                f"Error preparing structured output format for Google: {e}",
+                exc_info=True,
+            )
+            return None