import pytest from datetime import datetime, timezone from unittest.mock import Mock, MagicMock, patch from application.api.answer.services.compression import CompressionService from application.api.answer.services.compression.threshold_checker import ( CompressionThresholdChecker, ) from application.api.answer.services.compression.token_counter import TokenCounter from application.api.answer.services.compression.prompt_builder import ( CompressionPromptBuilder, ) from application.core.settings import settings @pytest.fixture def mock_llm(): """Create a mock LLM for testing""" llm = Mock() llm.gen = Mock() return llm @pytest.fixture def compression_service(mock_llm): """Create a CompressionService instance with mock LLM""" return CompressionService(llm=mock_llm, model_id="gpt-4o") @pytest.fixture def threshold_checker(): """Create a ThresholdChecker instance""" return CompressionThresholdChecker() @pytest.fixture def prompt_builder(): """Create a PromptBuilder instance""" return CompressionPromptBuilder() @pytest.fixture def sample_conversation(): """Create a sample conversation for testing""" return { "_id": "test_conversation_id", "user": "test_user", "date": datetime.now(timezone.utc), "name": "Test Conversation", "queries": [ { "prompt": "What is Python?", "response": "Python is a high-level programming language.", "thought": "", "sources": [], "tool_calls": [], "timestamp": datetime.now(timezone.utc), }, { "prompt": "How do I install it?", "response": "You can install Python from python.org", "thought": "", "sources": [], "tool_calls": [], "timestamp": datetime.now(timezone.utc), }, { "prompt": "What are some popular libraries?", "response": "Popular Python libraries include NumPy, Pandas, Django, Flask, etc.", "thought": "", "sources": [], "tool_calls": [], "timestamp": datetime.now(timezone.utc), }, ], } @pytest.fixture def large_conversation(): """Create a large conversation that exceeds threshold""" queries = [] for i in range(100): queries.append( { "prompt": f"Question {i}: " + ("test " * 100), # ~400 tokens each "response": f"Answer {i}: " + ("response " * 100), # ~400 tokens each "thought": "", "sources": [], "tool_calls": [], "timestamp": datetime.now(timezone.utc), } ) return { "_id": "large_conversation_id", "user": "test_user", "date": datetime.now(timezone.utc), "name": "Large Conversation", "queries": queries, } class TestCompressionService: """Test suite for CompressionService""" def test_initialization(self, mock_llm): """Test CompressionService initialization""" service = CompressionService(llm=mock_llm, model_id="gpt-4o") assert service.llm == mock_llm assert service.model_id == "gpt-4o" assert service.prompt_builder is not None assert service.prompt_builder.version == settings.COMPRESSION_PROMPT_VERSION @patch("application.api.answer.services.compression.threshold_checker.get_token_limit") def test_should_compress_below_threshold( self, mock_get_token_limit, threshold_checker, sample_conversation ): """Test that compression is not triggered when below threshold""" mock_get_token_limit.return_value = 128000 # GPT-4o limit # Small conversation should not trigger compression result = threshold_checker.should_compress( sample_conversation, model_id="gpt-4o" ) assert result is False @patch("application.api.answer.services.compression.threshold_checker.get_token_limit") def test_should_compress_above_threshold( self, mock_get_token_limit, threshold_checker, large_conversation ): """Test that compression is triggered when above threshold""" mock_get_token_limit.return_value = 10000 # Lower limit to ensure large conversation exceeds threshold # Large conversation should trigger compression (100 queries with repeated text) # Threshold at 80% of 10k = 8k tokens, so large_conversation > 8k should trigger result = threshold_checker.should_compress( large_conversation, model_id="gpt-4o" ) assert result is True @patch("application.api.answer.services.compression.threshold_checker.get_token_limit") def test_should_compress_at_exact_threshold( self, mock_get_token_limit, threshold_checker ): """Test compression trigger at exact 80% threshold""" mock_get_token_limit.return_value = 1000 # Create conversation with exactly 800 tokens (80% of 1000) conversation = { "queries": [ { "prompt": "a " * 200, # ~200 tokens "response": "b " * 200, # ~200 tokens }, { "prompt": "c " * 200, # ~200 tokens "response": "d " * 200, # ~200 tokens }, ] } result = threshold_checker.should_compress(conversation, model_id="test-model") # Should trigger at or above 80% assert result is True def test_compress_conversation_basic(self, compression_service, sample_conversation): """Test basic conversation compression""" # Mock LLM response mock_summary = """ The conversation covers Python basics and installation. 1. Primary Request and Intent: User asked about Python and how to install it. 2. Key Concepts: - Python programming language - Installation process 3. Files and Code Sections: None 4. Errors and fixes: None 5. Problem Solving: Explained Python installation from python.org 6. All user messages: - What is Python? - How do I install it? - What are some popular libraries? 7. Pending Tasks: None 8. Current Work: Provided information about popular Python libraries. 9. Optional Next Step: None """ compression_service.llm.gen.return_value = mock_summary # Compress first 2 queries result = compression_service.compress_conversation( conversation=sample_conversation, compress_up_to_index=1 ) # Verify LLM was called assert compression_service.llm.gen.called # Verify result is a CompressionMetadata object assert hasattr(result, 'timestamp') assert result.query_index == 1 assert hasattr(result, 'compressed_summary') assert result.original_token_count > 0 assert result.compressed_token_count > 0 assert result.compression_ratio > 0 assert result.model_used == "gpt-4o" assert result.compression_prompt_version == settings.COMPRESSION_PROMPT_VERSION # Verify summary was extracted correctly (without analysis tags) assert "" not in result.compressed_summary assert "Primary Request and Intent" in result.compressed_summary def test_compress_conversation_with_tool_calls(self, compression_service): """Test compression of conversation with tool calls""" conversation = { "queries": [ { "prompt": "Search for Python tutorials", "response": "I'll search for Python tutorials.", "thought": "Need to use search tool", "sources": [], "tool_calls": [ { "tool_name": "search_tool", "action_name": "search", "arguments": {"query": "Python tutorials"}, "result": "Found 100 tutorials", "status": "completed", } ], "timestamp": datetime.now(timezone.utc), } ] } mock_summary = "Test summary with tools" compression_service.llm.gen.return_value = mock_summary result = compression_service.compress_conversation( conversation=conversation, compress_up_to_index=0 ) # Verify tool calls are included in compression prompt call_args = compression_service.llm.gen.call_args messages = call_args[1]["messages"] user_message = messages[1]["content"] assert "Tool Calls:" in user_message assert "search_tool" in user_message def test_compress_conversation_invalid_index( self, compression_service, sample_conversation ): """Test compression with invalid index raises error""" with pytest.raises(ValueError, match="Invalid compress_up_to_index"): compression_service.compress_conversation( conversation=sample_conversation, compress_up_to_index=100, # Invalid - conversation only has 3 queries ) def test_get_compressed_context_no_compression( self, compression_service, sample_conversation ): """Test getting context when no compression exists""" summary, recent = compression_service.get_compressed_context( sample_conversation ) assert summary is None assert len(recent) == 3 # All queries returned def test_get_compressed_context_with_compression(self, compression_service): """Test getting context when compression exists""" conversation = { "queries": [ {"prompt": "Q1", "response": "A1"}, {"prompt": "Q2", "response": "A2"}, {"prompt": "Q3", "response": "A3"}, {"prompt": "Q4", "response": "A4"}, {"prompt": "Q5", "response": "A5"}, ], "compression_metadata": { "is_compressed": True, "last_compression_at": datetime.now(timezone.utc), "compression_points": [ { "timestamp": datetime.now(timezone.utc), "query_index": 2, # Compressed up to Q3 "compressed_summary": "Summary of Q1-Q3", "original_token_count": 100, "compressed_token_count": 20, "compression_ratio": 5.0, } ], }, } summary, recent = compression_service.get_compressed_context( conversation ) assert summary == "Summary of Q1-Q3" assert len(recent) == 2 # Q4 and Q5 (after compression point) assert recent[0]["prompt"] == "Q4" assert recent[1]["prompt"] == "Q5" def test_get_compressed_context_multiple_compressions(self, compression_service): """Test getting context when multiple compressions exist""" conversation = { "queries": [ {"prompt": f"Q{i}", "response": f"A{i}"} for i in range(1, 11) ], "compression_metadata": { "is_compressed": True, "last_compression_at": datetime.now(timezone.utc), "compression_points": [ { "timestamp": datetime.now(timezone.utc), "query_index": 4, # First compression "compressed_summary": "First compression summary", "original_token_count": 100, "compressed_token_count": 20, }, { "timestamp": datetime.now(timezone.utc), "query_index": 7, # Second compression "compressed_summary": "Second compression summary (includes first)", "original_token_count": 150, "compressed_token_count": 30, }, ], }, } summary, recent = compression_service.get_compressed_context( conversation ) # Should use the most recent compression assert summary == "Second compression summary (includes first)" assert len(recent) == 2 # Q9 and Q10 (after compression point at index 7) assert recent[0]["prompt"] == "Q9" assert recent[1]["prompt"] == "Q10" def test_extract_summary_with_tags(self, compression_service): """Test summary extraction with analysis and summary tags""" llm_response = """ This is my analysis of the conversation. It has multiple lines. This is the actual summary. It should be extracted. """ result = compression_service._extract_summary(llm_response) assert "" not in result assert "This is the actual summary" in result assert "my analysis" not in result def test_extract_summary_without_tags(self, compression_service): """Test summary extraction when no tags present""" llm_response = "This is a plain summary without tags." result = compression_service._extract_summary(llm_response) assert result == "This is a plain summary without tags." def test_count_tokens_in_queries(self, sample_conversation): """Test token counting in queries""" queries = sample_conversation["queries"] token_count = TokenCounter.count_query_tokens(queries) # Should count all prompts and responses assert token_count > 0 def test_count_tokens_with_tool_calls(self): """Test token counting includes tool calls""" queries = [ { "prompt": "Test prompt", "response": "Test response", "tool_calls": [ { "tool_name": "test_tool", "action_name": "test_action", "arguments": {"arg": "value"}, "result": "Tool result", } ], } ] token_count_with_tools = TokenCounter.count_query_tokens( queries, include_tool_calls=True ) token_count_without_tools = TokenCounter.count_query_tokens( queries, include_tool_calls=False ) assert token_count_with_tools > token_count_without_tools def test_format_conversation_for_compression( self, prompt_builder, sample_conversation ): """Test conversation formatting for compression prompt""" queries = sample_conversation["queries"] formatted = prompt_builder._format_conversation(queries) # Verify formatting includes all messages assert "Message 1" in formatted assert "What is Python?" in formatted assert "Python is a high-level programming language" in formatted assert "Message 2" in formatted assert "How do I install it?" in formatted def test_build_compression_prompt_basic(self, prompt_builder): """Test compression prompt building""" queries = [ {"prompt": "Q1", "response": "A1", "tool_calls": [], "sources": []}, {"prompt": "Q2", "response": "A2", "tool_calls": [], "sources": []}, ] messages = prompt_builder.build_prompt(queries) assert len(messages) == 2 # System and user messages assert messages[0]["role"] == "system" assert messages[1]["role"] == "user" assert "conversation to summarize" in messages[1]["content"] def test_build_compression_prompt_with_existing_compressions( self, prompt_builder ): """Test compression prompt building with existing compressions""" queries = [ {"prompt": "Q3", "response": "A3", "tool_calls": [], "sources": []}, {"prompt": "Q4", "response": "A4", "tool_calls": [], "sources": []}, ] existing_compressions = [ { "query_index": 1, "compressed_summary": "Previous compression summary", "timestamp": datetime.now(timezone.utc), } ] messages = prompt_builder.build_prompt( queries, existing_compressions ) user_content = messages[1]["content"] # Should mention existing compression assert "compressed before" in user_content assert "Previous compression summary" in user_content assert "NEW summary" in user_content def test_calculate_conversation_tokens( self, sample_conversation ): """Test conversation token calculation""" token_count = TokenCounter.count_conversation_tokens( sample_conversation, include_system_prompt=False ) assert token_count > 0 # With system prompt should be higher token_count_with_system = TokenCounter.count_conversation_tokens( sample_conversation, include_system_prompt=True ) assert token_count_with_system > token_count @patch("application.api.answer.services.compression.threshold_checker.logger") def test_error_handling_in_should_compress( self, mock_logger, threshold_checker, sample_conversation ): """Test error handling in should_compress""" # Force an error by making get_token_limit raise an exception with patch( "application.api.answer.services.compression.threshold_checker.get_token_limit", side_effect=Exception("Test error"), ): result = threshold_checker.should_compress( sample_conversation, model_id="gpt-4o" ) # Should return False on error assert result is False # Should log the error assert mock_logger.error.called @patch("application.api.answer.services.compression.service.logger") def test_error_handling_in_get_compressed_context( self, mock_logger, compression_service ): """Test error handling in get_compressed_context""" # Malformed conversation malformed_conversation = {"queries": None} summary, recent = compression_service.get_compressed_context( malformed_conversation ) # Should return safe defaults assert summary is None assert recent == [] # Should log the error assert mock_logger.error.called def test_compression_points_array_limiting(self, compression_service): """Test that only the most recent compression points are kept""" # Simulate a conversation with 3 previous compressions conversation = { "queries": [ {"prompt": f"Q{i}", "response": f"A{i}"} for i in range(1, 11) ], "compression_metadata": { "is_compressed": True, "last_compression_at": datetime.now(timezone.utc), "compression_points": [ { "timestamp": datetime.now(timezone.utc), "query_index": 2, "compressed_summary": "First compression summary", "original_token_count": 100, "compressed_token_count": 20, }, { "timestamp": datetime.now(timezone.utc), "query_index": 5, "compressed_summary": "Second compression summary", "original_token_count": 150, "compressed_token_count": 30, }, { "timestamp": datetime.now(timezone.utc), "query_index": 7, "compressed_summary": "Third compression summary", "original_token_count": 200, "compressed_token_count": 40, }, ], }, } # The service should use the most recent compression summary, recent = compression_service.get_compressed_context( conversation ) # Should use the most recent (third) compression assert summary == "Third compression summary" assert len(recent) == 2 # Q9 and Q10 (after compression point at index 7) assert recent[0]["prompt"] == "Q9" assert recent[1]["prompt"] == "Q10" def test_compression_with_heavy_tool_usage(self, compression_service): """Test compression when conversation has many tool calls with large responses Scenario: User asks agent to scrape all files in a GitHub repo, generating dozens of tool calls with file contents as responses. This tests the system's ability to compress tool-heavy conversations that hit token limits. """ # Simulate a conversation where agent scraped 50 files from DocsGPT repo queries = [] # Initial user request queries.append({ "prompt": "Please analyze all Python files in the https://github.com/arc53/DocsGPT repository", "response": "I'll scrape all the Python files from the DocsGPT repository and analyze them.", "tool_calls": [] }) # Simulate 50 file scraping tool calls with realistic file contents file_paths = [ "application/app.py", "application/api/answer/routes.py", "application/api/answer/services/conversation_service.py", "application/api/answer/services/compression_service.py", "application/api/answer/services/stream_processor.py", "application/agents/base.py", "application/agents/react.py", "application/llm/handlers/base.py", "application/llm/llm_creator.py", "application/core/settings.py", "application/core/model_configs.py", "application/utils.py", "application/vectorstore/base.py", "application/parser/file_parser.py", "tests/test_compression_service.py", "tests/test_agent_token_tracking.py", "frontend/src/App.tsx", "frontend/src/store/index.ts", "deployment/docker-compose.yaml", "setup.py", ] tool_calls = [] for i, file_path in enumerate(file_paths[:20]): # First 20 files # Each tool call with realistic file content (simulating ~500-1000 tokens per file) file_content = f""" # {file_path} import os import sys from typing import Dict, List, Optional, Any from datetime import datetime class {file_path.split('/')[-1].replace('.py', '').title()}: ''' This is a module that handles various operations for the DocsGPT application. It contains multiple classes and functions for processing data. ''' def __init__(self, config: Dict[str, Any]): self.config = config self.initialized = False self.data_store = {{}} def process_data(self, input_data: List[str]) -> Dict[str, Any]: '''Process input data and return results''' results = {{}} for item in input_data: # Complex processing logic here processed = self._transform_item(item) results[item] = processed return results def _transform_item(self, item: str) -> str: '''Internal transformation logic''' # Multiple lines of transformation code transformed = item.upper().strip() transformed = transformed.replace(' ', '_') return transformed def validate_config(self) -> bool: '''Validate configuration settings''' required_keys = ['api_key', 'endpoint', 'model_id'] return all(key in self.config for key in required_keys) # Additional helper functions def utility_function_one(param: str) -> str: return param.strip().lower() def utility_function_two(data: Dict) -> List: return list(data.values()) def main(): config = {{'api_key': 'test', 'endpoint': 'http://localhost', 'model_id': 'gpt-4'}} instance = {file_path.split('/')[-1].replace('.py', '').title()}(config) instance.process_data(['item1', 'item2', 'item3']) """ * 2 # Double it to simulate ~1000-1500 tokens per response tool_calls.append({ "call_id": f"call_{i}", "tool_name": "github_file_scraper", "action_name": "read_file", "arguments": {"file_path": file_path}, "result": {"content": file_content, "status": "success"}, "status": "success" }) # Add query with all tool calls queries.append({ "prompt": "[Agent continues processing]", "response": "I've scraped 20 Python files. Let me analyze the patterns...", "tool_calls": tool_calls }) # Add analysis response queries.append({ "prompt": "[Agent continues analysis]", "response": """Based on my analysis of the 20 Python files: 1. Architecture: The codebase follows a modular architecture with clear separation between API, agents, LLM handlers, and utilities. 2. Key patterns identified: - Heavy use of type hints (typing module) - Consistent error handling patterns - Service-based architecture for API endpoints - Factory pattern for LLM creation - Abstract base classes for extensibility 3. Core components: - Agent system with tool integration - LLM provider abstraction - Compression service for context management - Stream processing for real-time responses 4. Code quality observations: - Comprehensive docstrings - Good test coverage - Clear naming conventions - Proper separation of concerns""", "tool_calls": [] }) conversation = {"queries": queries} # Mock LLM response for compression mock_summary = """ User requested analysis of all Python files in DocsGPT GitHub repository. Agent scraped 20 files including app.py, API routes, services, agents, and tests. Analysis revealed modular architecture with service-based design, type hints, factory patterns, and agent system with tool integration. Code quality is high with comprehensive docstrings and test coverage. """ compression_service.llm.gen.return_value = mock_summary # Compress the heavy tool usage result = compression_service.compress_conversation( conversation=conversation, compress_up_to_index=1 # Compress first 2 queries (including all tool calls) ) # Verify compression handled tool calls properly assert result.query_index == 1 assert result.compressed_summary is not None # Verify the compression prompt included tool call information call_args = compression_service.llm.gen.call_args messages = call_args[1]["messages"] user_message = messages[1]["content"] # Should include tool calls section assert "Tool Calls:" in user_message assert "github_file_scraper" in user_message # Verify compression ratio (should be significant with all that tool data) original_tokens = result.original_token_count compressed_tokens = result.compressed_token_count compression_ratio = result.compression_ratio # With 20 large tool responses, original should be substantial assert original_tokens > 5000 # At least 5k tokens from tool responses assert compressed_tokens < 500 # Summary should be much smaller assert compression_ratio > 10 # Should achieve >10x compression def test_compression_with_needle_in_haystack(self, compression_service): """Test compression preserves important information buried in long conversation Scenario: User has long conversation with verbose responses, hiding critical information in the middle. Tests that compression correctly identifies and preserves key details even when surrounded by lengthy content. """ # Create a long conversation with important info buried in the middle queries = [] # Query 1: Long general discussion queries.append({ "prompt": "Tell me about Python programming best practices", "response": """Python best practices encompass a wide range of principles and patterns. Let me provide a comprehensive overview: 1. Code Style and Formatting: - Follow PEP 8 style guide for consistent formatting - Use 4 spaces for indentation (not tabs) - Limit lines to 79 characters for code, 72 for docstrings - Use meaningful variable names that describe their purpose - Add whitespace around operators and after commas - Group imports: standard library, third-party, local 2. Documentation: - Write clear docstrings for all functions, classes, and modules - Use type hints for better code clarity and IDE support - Include examples in docstrings when helpful - Keep comments up-to-date with code changes 3. Error Handling: - Use specific exceptions rather than bare except clauses - Create custom exceptions for domain-specific errors - Always clean up resources with context managers (with statement) - Log errors appropriately for debugging 4. Testing: - Write unit tests for all critical functionality - Aim for high test coverage (80%+) - Use pytest for modern testing features - Mock external dependencies in tests 5. Code Organization: - Keep functions small and focused on single tasks - Use classes to group related functionality - Avoid deep nesting (max 3-4 levels) - Extract complex conditions into well-named variables 6. Performance: - Use list comprehensions for simple transformations - Avoid premature optimization - Profile code before optimizing - Use generators for large datasets These practices help maintain readable, maintainable, and efficient code.""", "tool_calls": [] }) # Query 2: Another long response queries.append({ "prompt": "What about Python data structures?", "response": """Python provides several built-in data structures, each optimized for different use cases: 1. Lists: - Ordered, mutable sequences - Dynamic sizing with amortized O(1) append - Access by index in O(1) - Insertion/deletion in middle is O(n) - Use cases: ordered collections, stacks, queues - Methods: append(), extend(), insert(), remove(), pop(), sort() 2. Tuples: - Ordered, immutable sequences - Slightly more memory efficient than lists - Can be used as dictionary keys (if contents are hashable) - Use cases: fixed collections, function return values, dictionary keys 3. Dictionaries: - Unordered (ordered in Python 3.7+) key-value mappings - Average O(1) lookup, insertion, deletion - Keys must be hashable - Use cases: lookups, caching, counting, grouping - Methods: get(), keys(), values(), items(), update(), pop() 4. Sets: - Unordered collections of unique elements - Average O(1) membership testing - Efficient for removing duplicates - Support set operations: union, intersection, difference - Use cases: membership testing, removing duplicates, set mathematics 5. Collections module extensions: - defaultdict: dict with default values for missing keys - Counter: dict subclass for counting hashable objects - deque: double-ended queue with O(1) append/pop from both ends - OrderedDict: maintains insertion order (less relevant in Python 3.7+) - namedtuple: tuple subclass with named fields 6. Performance considerations: - Lists for ordered data with frequent append operations - Dictionaries for key-based lookups - Sets for membership testing and uniqueness - Deques for queue operations from both ends - Tuples for immutable data Understanding these data structures is crucial for writing efficient Python code.""", "tool_calls": [] }) # Query 3: THE CRITICAL INFORMATION (needle in the haystack) queries.append({ "prompt": "I need to remember this important detail", "response": """I'll make a note of that important detail. CRITICAL INFORMATION TO REMEMBER: The production database password is stored in the environment variable DB_PASSWORD_PROD. The backup schedule is set to run daily at 3:00 AM UTC. The API rate limit for premium users is 10,000 requests per hour. The encryption key rotation happens every 90 days. The primary contact for incidents is: ops-team@example.com I've recorded this information for our conversation. These operational details are important for system administration and should be referenced when needed.""", "tool_calls": [] }) # Query 4: More long content after the important info queries.append({ "prompt": "Explain Python decorators in detail", "response": """Python decorators are a powerful feature that allows you to modify or enhance functions and classes. Here's a comprehensive explanation: 1. Basic Concept: - Decorators are functions that take another function as input - They return a modified version of that function - Syntax: @decorator above function definition - They implement the decorator design pattern 2. Function Decorators: ```python def my_decorator(func): def wrapper(*args, **kwargs): # Code before function result = func(*args, **kwargs) # Code after function return result return wrapper @my_decorator def my_function(): pass ``` 3. Common Use Cases: - Logging: Record function calls and results - Timing: Measure execution time - Authentication: Check permissions before execution - Caching: Store and return cached results - Validation: Check input parameters - Rate limiting: Throttle function calls 4. Decorators with Arguments: ```python def repeat(times): def decorator(func): def wrapper(*args, **kwargs): for _ in range(times): result = func(*args, **kwargs) return result return wrapper return decorator @repeat(3) def greet(): print("Hello") ``` 5. Class Decorators: - Can decorate entire classes - Useful for adding methods or attributes - Can enforce patterns like singleton 6. Built-in Decorators: - @property: Create managed attributes - @staticmethod: Define static methods - @classmethod: Define class methods - @abstractmethod: Define abstract methods 7. functools.wraps: - Preserves original function metadata - Should be used in decorator implementations - Maintains __name__, __doc__, etc. 8. Practical Examples: - @login_required for web routes - @cache for memoization - @retry for resilient API calls - @deprecated for marking old code Decorators are essential for writing clean, maintainable Python code with separation of concerns.""", "tool_calls": [] }) # Query 5: Final long response queries.append({ "prompt": "What about Python async programming?", "response": """Asynchronous programming in Python allows for concurrent execution of I/O-bound operations: 1. Core Concepts: - Event loop: Manages and executes async tasks - Coroutines: Functions defined with async def - await: Pauses coroutine until awaitable completes - Tasks: Wrapper for coroutines to run concurrently 2. Basic Syntax: ```python import asyncio async def fetch_data(): await asyncio.sleep(1) return "data" async def main(): result = await fetch_data() print(result) asyncio.run(main()) ``` 3. When to Use Async: - I/O-bound operations (network requests, file I/O, database queries) - Multiple concurrent operations - Real-time applications (websockets, streaming) - NOT for CPU-bound tasks (use multiprocessing instead) 4. Common Patterns: - Gather: Run multiple coroutines concurrently - create_task: Schedule coroutine execution - Semaphore: Limit concurrent operations - Queue: Producer-consumer patterns 5. Async Libraries: - aiohttp: Async HTTP client/server - asyncpg: Async PostgreSQL driver - motor: Async MongoDB driver - aioredis: Async Redis client 6. Error Handling: - Use try/except in coroutines - Tasks can be cancelled with task.cancel() - Timeouts with asyncio.wait_for() Understanding async programming is crucial for building scalable Python applications.""", "tool_calls": [] }) conversation = {"queries": queries} # Mock LLM response that MUST preserve the critical information mock_summary = """ User asked about Python best practices, data structures, decorators, and async programming. Discussed code style, testing, documentation standards, and various Python data structures. CRITICAL OPERATIONAL DETAILS PROVIDED: - Production database password stored in DB_PASSWORD_PROD environment variable - Backup schedule: daily at 3:00 AM UTC - Premium API rate limit: 10,000 requests/hour - Encryption key rotation: every 90 days - Incident contact: ops-team@example.com Also covered decorators for code enhancement and async programming for I/O-bound operations. """ compression_service.llm.gen.return_value = mock_summary # Compress everything except the last query result = compression_service.compress_conversation( conversation=conversation, compress_up_to_index=3 # Compress first 4 queries (includes the critical info) ) # Verify compression happened assert result.query_index == 3 assert result.compressed_summary is not None # Get the compressed context conversation["compression_metadata"] = { "is_compressed": True, "last_compression_at": datetime.now(timezone.utc), "compression_points": [result.to_dict()] } summary, recent = compression_service.get_compressed_context( conversation ) # Verify critical information is in the summary assert summary is not None assert "DB_PASSWORD_PROD" in summary or "database password" in summary.lower() assert "3:00 AM UTC" in summary or "backup" in summary.lower() assert "10,000" in summary or "rate limit" in summary.lower() assert "ops-team@example.com" in summary or "incident contact" in summary.lower() # Verify only the last query is in recent assert len(recent) == 1 assert "async programming" in recent[0]["prompt"].lower() # The compression should be substantial (long responses compressed to summary) assert result.original_token_count > 1300 # 4 long responses assert result.compressed_token_count < 300 # Summary should be concise assert result.compression_ratio > 4 # At least 4x compression if __name__ == "__main__": pytest.main([__file__, "-v"])