import logging
import uuid
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Any, Dict, Generator, List, Optional, Union

from application.logging import build_stack_data

logger = logging.getLogger(__name__)


@dataclass
class ToolCall:
    """Represents a tool/function call from the LLM."""

    id: str
    name: str
    arguments: Union[str, Dict]
    index: Optional[int] = None
    thought_signature: Optional[str] = None

    @classmethod
    def from_dict(cls, data: Dict) -> "ToolCall":
        """Create ToolCall from dictionary."""
        return cls(
            id=data.get("id", ""),
            name=data.get("name", ""),
            arguments=data.get("arguments", {}),
            index=data.get("index"),
        )


@dataclass
class LLMResponse:
    """Represents a response from the LLM."""

    content: str
    tool_calls: List[ToolCall]
    finish_reason: str
    raw_response: Any

    @property
    def requires_tool_call(self) -> bool:
        """Check if the response requires tool calls."""
        return bool(self.tool_calls) and self.finish_reason == "tool_calls"


class LLMHandler(ABC):
    """Abstract base class for LLM handlers."""

    def __init__(self):
        self.llm_calls = []
        self.tool_calls = []

    @abstractmethod
    def parse_response(self, response: Any) -> LLMResponse:
        """Parse raw LLM response into standardized format."""
        pass

    @abstractmethod
    def create_tool_message(self, tool_call: ToolCall, result: Any) -> Dict:
        """Create a tool result message for the conversation history."""
        pass

    @abstractmethod
    def _iterate_stream(self, response: Any) -> Generator:
        """Iterate through streaming response chunks."""
        pass

    def process_message_flow(
        self,
        agent,
        initial_response,
        tools_dict: Dict,
        messages: List[Dict],
        attachments: Optional[List] = None,
        stream: bool = False,
    ) -> Union[str, Generator]:
        """
        Main orchestration method for processing LLM message flow.

        Args:
            agent: The agent instance
            initial_response: Initial LLM response
            tools_dict: Dictionary of available tools
            messages: Conversation history
            attachments: Optional attachments
            stream: Whether to use streaming

        Returns:
            Final response or generator for streaming
        """
        messages = self.prepare_messages(agent, messages, attachments)

        if stream:
            return self.handle_streaming(agent, initial_response, tools_dict, messages)
        else:
            return self.handle_non_streaming(
                agent, initial_response, tools_dict, messages
            )

    def prepare_messages(
        self, agent, messages: List[Dict], attachments: Optional[List] = None
    ) -> List[Dict]:
        """
        Prepare messages with attachments and provider-specific formatting.

        Args:
            agent: The agent instance
            messages: Original messages
            attachments: List of attachments

        Returns:
            Prepared messages list
        """
        if not attachments:
            return messages
        logger.info(f"Preparing messages with {len(attachments)} attachments")
        supported_types = agent.llm.get_supported_attachment_types()

        supported_attachments = [
            a for a in attachments if a.get("mime_type") in supported_types
        ]
        unsupported_attachments = [
            a for a in attachments if a.get("mime_type") not in supported_types
        ]

        # Process supported attachments with the LLM's custom method

        if supported_attachments:
            logger.info(
                f"Processing {len(supported_attachments)} supported attachments"
            )
            messages = agent.llm.prepare_messages_with_attachments(
                messages, supported_attachments
            )
        # Process unsupported attachments with default method

        if unsupported_attachments:
            logger.info(
                f"Processing {len(unsupported_attachments)} unsupported attachments"
            )
            messages = self._append_unsupported_attachments(
                messages, unsupported_attachments
            )
        return messages

    def _append_unsupported_attachments(
        self, messages: List[Dict], attachments: List[Dict]
    ) -> List[Dict]:
        """
        Default method to append unsupported attachment content to system prompt.

        Args:
            messages: Current messages
            attachments: List of unsupported attachments

        Returns:
            Updated messages list
        """
        prepared_messages = messages.copy()
        attachment_texts = []

        for attachment in attachments:
            logger.info(f"Adding attachment {attachment.get('id')} to context")
            if "content" in attachment:
                attachment_texts.append(
                    f"Attached file content:\n\n{attachment['content']}"
                )
        if attachment_texts:
            combined_text = "\n\n".join(attachment_texts)

            system_msg = next(
                (msg for msg in prepared_messages if msg.get("role") == "system"),
                {"role": "system", "content": ""},
            )

            if system_msg not in prepared_messages:
                prepared_messages.insert(0, system_msg)
            system_msg["content"] += f"\n\n{combined_text}"
        return prepared_messages

    def _prune_messages_minimal(self, messages: List[Dict]) -> Optional[List[Dict]]:
        """
        Build a minimal context: system prompt + latest user message only.
        Drops all tool/function messages to shrink context aggressively.
        """
        system_message = next((m for m in messages if m.get("role") == "system"), None)
        if not system_message:
            logger.warning("Cannot prune messages minimally: missing system message.")
            return None
        last_non_system = None
        for m in reversed(messages):
            if m.get("role") == "user":
                last_non_system = m
                break
            if not last_non_system and m.get("role") not in ("system", None):
                last_non_system = m
        if not last_non_system:
            logger.warning("Cannot prune messages minimally: missing user/assistant messages.")
            return None
        logger.info("Pruning context to system + latest user/assistant message to proceed.")
        return [system_message, last_non_system]

    def _extract_text_from_content(self, content: Any) -> str:
        """
        Convert message content (str or list of parts) to plain text for compression.
        """
        if isinstance(content, str):
            return content
        if isinstance(content, list):
            parts_text = []
            for item in content:
                if isinstance(item, dict):
                    if "text" in item and item["text"] is not None:
                        parts_text.append(str(item["text"]))
                    elif "function_call" in item or "function_response" in item:
                        # Keep serialized function calls/responses so the compressor sees actions
                        parts_text.append(str(item))
                    elif "files" in item:
                        parts_text.append(str(item))
            return "\n".join(parts_text)
        return ""

    def _build_conversation_from_messages(self, messages: List[Dict]) -> Optional[Dict]:
        """
        Build a conversation-like dict from current messages so we can compress
        even when the conversation isn't persisted yet. Includes tool calls/results.
        """
        queries = []
        current_prompt = None
        current_tool_calls = {}

        def _commit_query(response_text: str):
            nonlocal current_prompt, current_tool_calls
            if current_prompt is None and not response_text:
                return
            tool_calls_list = list(current_tool_calls.values())
            queries.append(
                {
                    "prompt": current_prompt or "",
                    "response": response_text,
                    "tool_calls": tool_calls_list,
                }
            )
            current_prompt = None
            current_tool_calls = {}

        for message in messages:
            role = message.get("role")
            content = message.get("content")

            if role == "user":
                current_prompt = self._extract_text_from_content(content)

            elif role in {"assistant", "model"}:
                # If this assistant turn contains tool calls, collect them; otherwise commit a response.
                if isinstance(content, list):
                    for item in content:
                        if "function_call" in item:
                            fc = item["function_call"]
                            call_id = fc.get("call_id") or str(uuid.uuid4())
                            current_tool_calls[call_id] = {
                                "tool_name": "unknown_tool",
                                "action_name": fc.get("name"),
                                "arguments": fc.get("args"),
                                "result": None,
                                "status": "called",
                                "call_id": call_id,
                            }
                        elif "function_response" in item:
                            fr = item["function_response"]
                            call_id = fr.get("call_id") or str(uuid.uuid4())
                            current_tool_calls[call_id] = {
                                "tool_name": "unknown_tool",
                                "action_name": fr.get("name"),
                                "arguments": None,
                                "result": fr.get("response", {}).get("result"),
                                "status": "completed",
                                "call_id": call_id,
                            }
                    # No direct assistant text here; continue to next message
                    continue

                response_text = self._extract_text_from_content(content)
                _commit_query(response_text)

            elif role == "tool":
                # Attach tool outputs to the latest pending tool call if possible
                tool_text = self._extract_text_from_content(content)
                # Attempt to parse function_response style
                call_id = None
                if isinstance(content, list):
                    for item in content:
                        if "function_response" in item and item["function_response"].get("call_id"):
                            call_id = item["function_response"]["call_id"]
                            break
                if call_id and call_id in current_tool_calls:
                    current_tool_calls[call_id]["result"] = tool_text
                    current_tool_calls[call_id]["status"] = "completed"
                elif queries:
                    queries[-1].setdefault("tool_calls", []).append(
                        {
                            "tool_name": "unknown_tool",
                            "action_name": "unknown_action",
                            "arguments": {},
                            "result": tool_text,
                            "status": "completed",
                        }
                    )

        # If there's an unfinished prompt with tool_calls but no response yet, commit it
        if current_prompt is not None or current_tool_calls:
            _commit_query(response_text="")

        if not queries:
            return None

        return {
            "queries": queries,
            "compression_metadata": {
                "is_compressed": False,
                "compression_points": [],
            },
        }

    def _rebuild_messages_after_compression(
        self,
        messages: List[Dict],
        compressed_summary: Optional[str],
        recent_queries: List[Dict],
        include_current_execution: bool = False,
        include_tool_calls: bool = False,
    ) -> Optional[List[Dict]]:
        """
        Rebuild the message list after compression so tool execution can continue.

        Delegates to MessageBuilder for the actual reconstruction.
        """
        from application.api.answer.services.compression.message_builder import (
            MessageBuilder,
        )

        return MessageBuilder.rebuild_messages_after_compression(
            messages=messages,
            compressed_summary=compressed_summary,
            recent_queries=recent_queries,
            include_current_execution=include_current_execution,
            include_tool_calls=include_tool_calls,
        )

    def _perform_mid_execution_compression(
        self, agent, messages: List[Dict]
    ) -> tuple[bool, Optional[List[Dict]]]:
        """
        Perform compression during tool execution and rebuild messages.

        Uses the new orchestrator for simplified compression.

        Args:
            agent: The agent instance
            messages: Current conversation messages

        Returns:
            (success: bool, rebuilt_messages: Optional[List[Dict]])
        """
        try:
            from application.api.answer.services.compression import (
                CompressionOrchestrator,
            )
            from application.api.answer.services.conversation_service import (
                ConversationService,
            )

            conversation_service = ConversationService()
            orchestrator = CompressionOrchestrator(conversation_service)

            # Get conversation from database (may be None for new sessions)
            conversation = conversation_service.get_conversation(
                agent.conversation_id, agent.initial_user_id
            )

            if conversation:
                # Merge current in-flight messages (including tool calls)
                conversation_from_msgs = self._build_conversation_from_messages(messages)
                if conversation_from_msgs:
                    conversation = conversation_from_msgs
            else:
                logger.warning(
                    "Could not load conversation for compression; attempting in-memory compression"
                )
                return self._perform_in_memory_compression(agent, messages)

            # Use orchestrator to perform compression
            result = orchestrator.compress_mid_execution(
                conversation_id=agent.conversation_id,
                user_id=agent.initial_user_id,
                model_id=agent.model_id,
                decoded_token=getattr(agent, "decoded_token", {}),
                current_conversation=conversation,
            )

            if not result.success:
                logger.warning(f"Mid-execution compression failed: {result.error}")
                # Try minimal pruning as fallback
                pruned = self._prune_messages_minimal(messages)
                if pruned:
                    agent.context_limit_reached = False
                    agent.current_token_count = 0
                    return True, pruned
                return False, None

            if not result.compression_performed:
                logger.warning("Compression not performed")
                return False, None

            # Check if compression actually reduced tokens
            if result.metadata:
                if result.metadata.compressed_token_count >= result.metadata.original_token_count:
                    logger.warning(
                        "Compression did not reduce token count; falling back to minimal pruning"
                    )
                    pruned = self._prune_messages_minimal(messages)
                    if pruned:
                        agent.context_limit_reached = False
                        agent.current_token_count = 0
                        return True, pruned
                    return False, None

                logger.info(
                    f"Mid-execution compression successful - ratio: {result.metadata.compression_ratio:.1f}x, "
                    f"saved {result.metadata.original_token_count - result.metadata.compressed_token_count} tokens"
                )

            # Also store the compression summary as a visible message
            if result.metadata:
                conversation_service.append_compression_message(
                    agent.conversation_id, result.metadata.to_dict()
                )

            # Update agent's compressed summary for downstream persistence
            agent.compressed_summary = result.compressed_summary
            agent.compression_metadata = result.metadata.to_dict() if result.metadata else None
            agent.compression_saved = False

            # Reset the context limit flag so tools can continue
            agent.context_limit_reached = False
            agent.current_token_count = 0

            # Rebuild messages
            rebuilt_messages = self._rebuild_messages_after_compression(
                messages,
                result.compressed_summary,
                result.recent_queries,
                include_current_execution=False,
                include_tool_calls=False,
            )

            if rebuilt_messages is None:
                return False, None

            return True, rebuilt_messages

        except Exception as e:
            logger.error(
                f"Error performing mid-execution compression: {str(e)}", exc_info=True
            )
            return False, None

    def _perform_in_memory_compression(
        self, agent, messages: List[Dict]
    ) -> tuple[bool, Optional[List[Dict]]]:
        """
        Fallback compression path when the conversation is not yet persisted.

        Uses CompressionService directly without DB persistence.
        """
        try:
            from application.api.answer.services.compression.service import (
                CompressionService,
            )
            from application.core.model_utils import (
                get_api_key_for_provider,
                get_provider_from_model_id,
            )
            from application.core.settings import settings
            from application.llm.llm_creator import LLMCreator

            conversation = self._build_conversation_from_messages(messages)
            if not conversation:
                logger.warning(
                    "Cannot perform in-memory compression: no user/assistant turns found"
                )
                return False, None

            compression_model = (
                settings.COMPRESSION_MODEL_OVERRIDE
                if settings.COMPRESSION_MODEL_OVERRIDE
                else agent.model_id
            )
            provider = get_provider_from_model_id(compression_model)
            api_key = get_api_key_for_provider(provider)
            compression_llm = LLMCreator.create_llm(
                provider,
                api_key,
                getattr(agent, "user_api_key", None),
                getattr(agent, "decoded_token", None),
                model_id=compression_model,
            )

            # Create service without DB persistence capability
            compression_service = CompressionService(
                llm=compression_llm,
                model_id=compression_model,
                conversation_service=None,  # No DB updates for in-memory
            )

            queries_count = len(conversation.get("queries", []))
            compress_up_to = queries_count - 1

            if compress_up_to < 0 or queries_count == 0:
                logger.warning("Not enough queries to compress in-memory context")
                return False, None

            metadata = compression_service.compress_conversation(
                conversation,
                compress_up_to_index=compress_up_to,
            )

            # If compression doesn't reduce tokens, fall back to minimal pruning
            if (
                metadata.compressed_token_count
                >= metadata.original_token_count
            ):
                logger.warning(
                    "In-memory compression did not reduce token count; falling back to minimal pruning"
                )
                pruned = self._prune_messages_minimal(messages)
                if pruned:
                    agent.context_limit_reached = False
                    agent.current_token_count = 0
                    return True, pruned
                return False, None

            # Attach metadata to synthetic conversation
            conversation["compression_metadata"] = {
                "is_compressed": True,
                "compression_points": [metadata.to_dict()],
            }

            compressed_summary, recent_queries = (
                compression_service.get_compressed_context(conversation)
            )

            agent.compressed_summary = compressed_summary
            agent.compression_metadata = metadata.to_dict()
            agent.compression_saved = False
            agent.context_limit_reached = False
            agent.current_token_count = 0

            rebuilt_messages = self._rebuild_messages_after_compression(
                messages,
                compressed_summary,
                recent_queries,
                include_current_execution=False,
                include_tool_calls=False,
            )
            if rebuilt_messages is None:
                return False, None

            logger.info(
                f"In-memory compression successful - ratio: {metadata.compression_ratio:.1f}x, "
                f"saved {metadata.original_token_count - metadata.compressed_token_count} tokens"
            )
            return True, rebuilt_messages

        except Exception as e:
            logger.error(
                f"Error performing in-memory compression: {str(e)}", exc_info=True
            )
            return False, None

    def handle_tool_calls(
        self, agent, tool_calls: List[ToolCall], tools_dict: Dict, messages: List[Dict]
    ) -> Generator:
        """
        Execute tool calls and update conversation history.

        Args:
            agent: The agent instance
            tool_calls: List of tool calls to execute
            tools_dict: Available tools dictionary
            messages: Current conversation history

        Returns:
            Updated messages list
        """
        updated_messages = messages.copy()

        for i, call in enumerate(tool_calls):
            # Check context limit before executing tool call
            if hasattr(agent, '_check_context_limit') and agent._check_context_limit(updated_messages):
                # Context limit reached - attempt mid-execution compression
                compression_attempted = False
                compression_successful = False

                try:
                    from application.core.settings import settings
                    compression_enabled = settings.ENABLE_CONVERSATION_COMPRESSION
                except Exception:
                    compression_enabled = False

                if compression_enabled:
                    compression_attempted = True
                    try:
                        logger.info(
                            f"Context limit reached with {len(tool_calls) - i} remaining tool calls. "
                            f"Attempting mid-execution compression..."
                        )

                        # Trigger mid-execution compression (DB-backed if available, otherwise in-memory)
                        compression_successful, rebuilt_messages = self._perform_mid_execution_compression(
                            agent, updated_messages
                        )

                        if compression_successful and rebuilt_messages is not None:
                            # Update the messages list with rebuilt compressed version
                            updated_messages = rebuilt_messages

                            # Yield compression success message
                            yield {
                                "type": "info",
                                "data": {
                                    "message": "Context window limit reached. Compressed conversation history to continue processing."
                                }
                            }

                            logger.info(
                                f"Mid-execution compression successful. Continuing with {len(tool_calls) - i} remaining tool calls."
                            )
                            # Proceed to execute the current tool call with the reduced context
                        else:
                            logger.warning("Mid-execution compression attempted but failed. Skipping remaining tools.")
                    except Exception as e:
                        logger.error(f"Error during mid-execution compression: {str(e)}", exc_info=True)
                        compression_attempted = True
                        compression_successful = False

                # If compression wasn't attempted or failed, skip remaining tools
                if not compression_successful:
                    if i == 0:
                        # Special case: limit reached before executing any tools
                        # This can happen when previous tool responses pushed context over limit
                        if compression_attempted:
                            logger.warning(
                                f"Context limit reached before executing any tools. "
                                f"Compression attempted but failed. "
                                f"Skipping all {len(tool_calls)} pending tool call(s). "
                                f"This typically occurs when previous tool responses contained large amounts of data."
                            )
                        else:
                            logger.warning(
                                f"Context limit reached before executing any tools. "
                                f"Skipping all {len(tool_calls)} pending tool call(s). "
                                f"This typically occurs when previous tool responses contained large amounts of data. "
                                f"Consider enabling compression or using a model with larger context window."
                            )
                    else:
                        # Normal case: executed some tools, now stopping
                        tool_word = "tool call" if i == 1 else "tool calls"
                        remaining = len(tool_calls) - i
                        remaining_word = "tool call" if remaining == 1 else "tool calls"
                        if compression_attempted:
                            logger.warning(
                                f"Context limit reached after executing {i} {tool_word}. "
                                f"Compression attempted but failed. "
                                f"Skipping remaining {remaining} {remaining_word}."
                            )
                        else:
                            logger.warning(
                                f"Context limit reached after executing {i} {tool_word}. "
                                f"Skipping remaining {remaining} {remaining_word}. "
                                f"Consider enabling compression or using a model with larger context window."
                            )

                    # Mark remaining tools as skipped
                    for remaining_call in tool_calls[i:]:
                        skip_message = {
                            "type": "tool_call",
                            "data": {
                                "tool_name": "system",
                                "call_id": remaining_call.id,
                                "action_name": remaining_call.name,
                                "arguments": {},
                                "result": "Skipped: Context limit reached. Too many tool calls in conversation.",
                                "status": "skipped"
                            }
                        }
                        yield skip_message

                    # Set flag on agent
                    agent.context_limit_reached = True
                    break
            try:
                self.tool_calls.append(call)
                tool_executor_gen = agent._execute_tool_action(tools_dict, call)
                while True:
                    try:
                        yield next(tool_executor_gen)
                    except StopIteration as e:
                        tool_response, call_id = e.value
                        break
                    
                function_call_content = {
                    "function_call": {
                        "name": call.name,
                        "args": call.arguments,
                        "call_id": call_id,
                    }
                }
                # Include thought_signature for Google Gemini 3 models
                # It should be at the same level as function_call, not inside it
                if call.thought_signature:
                    function_call_content["thought_signature"] = call.thought_signature
                updated_messages.append(
                    {
                        "role": "assistant",
                        "content": [function_call_content],
                    }
                )


                updated_messages.append(self.create_tool_message(call, tool_response))
            except Exception as e:
                logger.error(f"Error executing tool: {str(e)}", exc_info=True)
                error_call = ToolCall(
                    id=call.id, name=call.name, arguments=call.arguments
                )
                error_response = f"Error executing tool: {str(e)}"
                error_message = self.create_tool_message(error_call, error_response)
                updated_messages.append(error_message)

                call_parts = call.name.split("_")
                if len(call_parts) >= 2:
                    tool_id = call_parts[-1]  # Last part is tool ID (e.g., "1")
                    action_name = "_".join(call_parts[:-1])
                    tool_name = tools_dict.get(tool_id, {}).get("name", "unknown_tool")
                    full_action_name = f"{action_name}_{tool_id}"
                else:
                    tool_name = "unknown_tool"
                    action_name = call.name
                    full_action_name = call.name
                yield {
                    "type": "tool_call",
                    "data": {
                        "tool_name": tool_name,
                        "call_id": call.id,
                        "action_name": full_action_name,
                        "arguments": call.arguments,
                        "error": error_response,
                        "status": "error",
                    },
                }
        return updated_messages

    def handle_non_streaming(
        self, agent, response: Any, tools_dict: Dict, messages: List[Dict]
    ) -> Generator:
        """
        Handle non-streaming response flow.

        Args:
            agent: The agent instance
            response: Current LLM response
            tools_dict: Available tools dictionary
            messages: Conversation history

        Returns:
            Final response after processing all tool calls
        """
        parsed = self.parse_response(response)
        self.llm_calls.append(build_stack_data(agent.llm))

        while parsed.requires_tool_call:
            tool_handler_gen = self.handle_tool_calls(
                agent, parsed.tool_calls, tools_dict, messages
            )
            while True:
                try:
                    yield next(tool_handler_gen)
                except StopIteration as e:
                    messages = e.value
                    break
            response = agent.llm.gen(
                model=agent.model_id, messages=messages, tools=agent.tools
            )
            parsed = self.parse_response(response)
            self.llm_calls.append(build_stack_data(agent.llm))
        return parsed.content

    def handle_streaming(
        self, agent, response: Any, tools_dict: Dict, messages: List[Dict]
    ) -> Generator:
        """
        Handle streaming response flow.

        Args:
            agent: The agent instance
            response: Current LLM response
            tools_dict: Available tools dictionary
            messages: Conversation history

        Yields:
            Streaming response chunks
        """
        buffer = ""
        tool_calls = {}

        for chunk in self._iterate_stream(response):
            if isinstance(chunk, str):
                yield chunk
                continue
            parsed = self.parse_response(chunk)

            if parsed.tool_calls:
                for call in parsed.tool_calls:
                    if call.index not in tool_calls:
                        tool_calls[call.index] = call
                    else:
                        existing = tool_calls[call.index]
                        if call.id:
                            existing.id = call.id
                        if call.name:
                            existing.name = call.name
                        if call.arguments:
                            existing.arguments += call.arguments
                        # Preserve thought_signature for Google Gemini 3 models
                        if call.thought_signature:
                            existing.thought_signature = call.thought_signature
            if parsed.finish_reason == "tool_calls":
                tool_handler_gen = self.handle_tool_calls(
                    agent, list(tool_calls.values()), tools_dict, messages
                )
                while True:
                    try:
                        yield next(tool_handler_gen)
                    except StopIteration as e:
                        messages = e.value
                        break
                tool_calls = {}

                # Check if context limit was reached during tool execution
                if hasattr(agent, 'context_limit_reached') and agent.context_limit_reached:
                    # Add system message warning about context limit
                    messages.append({
                        "role": "system",
                        "content": (
                            "WARNING: Context window limit has been reached. "
                            "Please provide a final response to the user without making additional tool calls. "
                            "Summarize the work completed so far."
                        )
                    })
                    logger.info("Context limit reached - instructing agent to wrap up")

                response = agent.llm.gen_stream(
                    model=agent.model_id, messages=messages, tools=agent.tools if not agent.context_limit_reached else None
                )
                self.llm_calls.append(build_stack_data(agent.llm))

                yield from self.handle_streaming(agent, response, tools_dict, messages)
                return
            if parsed.content:
                buffer += parsed.content
                yield buffer
                buffer = ""
            if parsed.finish_reason == "stop":
                return