DocsGPT/application/api/answer/services/compression/threshold_checker.py

"""Compression threshold checking logic."""

import logging
from typing import Any, Dict

from application.core.model_utils import get_token_limit
from application.core.settings import settings
from application.api.answer.services.compression.token_counter import TokenCounter

logger = logging.getLogger(__name__)


class CompressionThresholdChecker:
    """Determines if compression is needed based on token thresholds."""

    def __init__(self, threshold_percentage: float = None):
        """
        Initialize threshold checker.

        Args:
            threshold_percentage: Percentage of context to use as threshold
                                 (defaults to settings.COMPRESSION_THRESHOLD_PERCENTAGE)
        """
        self.threshold_percentage = (
            threshold_percentage or settings.COMPRESSION_THRESHOLD_PERCENTAGE
        )

    def should_compress(
        self,
        conversation: Dict[str, Any],
        model_id: str,
        current_query_tokens: int = 500,
    ) -> bool:
        """
        Determine if compression is needed.

        Args:
            conversation: Full conversation document
            model_id: Target model for this request
            current_query_tokens: Estimated tokens for current query

        Returns:
            True if tokens >= threshold% of context window
        """
        try:
            # Calculate total tokens in conversation
            total_tokens = TokenCounter.count_conversation_tokens(conversation)
            total_tokens += current_query_tokens

            # Get context window limit for model
            context_limit = get_token_limit(model_id)

            # Calculate threshold
            threshold = int(context_limit * self.threshold_percentage)

            compression_needed = total_tokens >= threshold
            percentage_used = (total_tokens / context_limit) * 100

            if compression_needed:
                logger.warning(
                    f"COMPRESSION TRIGGERED: {total_tokens} tokens / {context_limit} limit "
                    f"({percentage_used:.1f}% used, threshold: {self.threshold_percentage * 100:.0f}%)"
                )
            else:
                logger.info(
                    f"Compression check: {total_tokens}/{context_limit} tokens "
                    f"({percentage_used:.1f}% used, threshold: {self.threshold_percentage * 100:.0f}%) - No compression needed"
                )

            return compression_needed

        except Exception as e:
            logger.error(f"Error checking compression need: {str(e)}", exc_info=True)
            return False

    def check_message_tokens(self, messages: list, model_id: str) -> bool:
        """
        Check if message list exceeds threshold.

        Args:
            messages: List of message dicts
            model_id: Target model

        Returns:
            True if at or above threshold
        """
        try:
            current_tokens = TokenCounter.count_message_tokens(messages)
            context_limit = get_token_limit(model_id)
            threshold = int(context_limit * self.threshold_percentage)

            if current_tokens >= threshold:
                logger.warning(
                    f"Message context limit approaching: {current_tokens}/{context_limit} tokens "
                    f"({(current_tokens/context_limit)*100:.1f}%)"
                )
                return True

            return False

        except Exception as e:
            logger.error(f"Error checking message tokens: {str(e)}", exc_info=True)
            return False