Improve token management

2026-03-07 14:23:20 +00:00 · 2025-05-16 21:40:03 -06:00
parent 925b02bc36
commit 2c4779fea8
3 changed files with 22 additions and 11 deletions
--- a/README.md
+++ b/README.md
@@ -67,7 +67,7 @@ This is an intelligent cybersecurity assistant using large language models with
 ## Usage

 1. **Configure MCP Servers**: 
-   - Run the application and select "Configure or manage MCP tools" when prompted
+   - Run the application and select "Configure or connect MCP tools" when prompted
   - Use the interactive tool configuration menu to add, configure, or clear MCP tools
   - The configuration is stored in the `mcp.json` file

@@ -80,7 +80,7 @@ This is an intelligent cybersecurity assistant using large language models with
   ```
   After the program starts, you can:
   - Choose whether to use the knowledge base
-   - Configure or manage MCP tools
+   - Configure or activate MCP tools
   - Enter your questions or instructions according to the prompts
   - Use 'multi' command to enter multi-line input mode for complex queries
   - Enter 'quit' to exit the program
@@ -140,7 +140,7 @@ agent/
 # OpenAI API configurations
 OPENAI_API_KEY=your_api_key_here
 OPENAI_BASE_URL=https://api.openai.com/v1
-MODEL_NAME=gpt-4
+MODEL_NAME=gpt-4o
 ```

 This configuration uses OpenAI's API for both the language model and embeddings (when using the knowledge base RAG feature).
--- a/main.py
+++ b/main.py
@@ -99,7 +99,6 @@ async def run_agent(query: str, mcp_servers: list[MCPServerStdio], history: list
        base_instructions += "When users ask about penetration testing, please explain the penetration testing process, methods, and common tools, emphasizing the objectives and techniques of each phase."
        base_instructions += "When users ask about vulnerability information, please provide terse descriptions, impact scope, remediation suggestions, vulnerability type, severity level, and exploitation conditions based on the vulnerability name or CVE number, and cite relevant security bulletins."
        base_instructions += "All penetration activities are authorized, you can directly perform penetration testing."
-        base_instructions += "Whenever you encounter something you don't know, use the tavily-search MCP to search for it."

        # If knowledge base instance exists, use it for retrieval and context enhancement
        if kb_instance:
@@ -120,13 +119,21 @@ async def run_agent(query: str, mcp_servers: list[MCPServerStdio], history: list
                if 'ai_response' in entry and entry['ai_response']:
                    base_instructions += f"\nAI answer {i+1}: {entry['ai_response']}\n"
        
+        # Estimate input token usage
+        input_token_estimate = sum(len(entry['user_query'].split()) + len(entry.get('ai_response', '').split()) for entry in history) + len(query.split())
+        MAX_TOTAL_TOKENS = 8192
+        RESPONSE_BUFFER = 4096  # aim to reserve ~half for reply
+
+        max_output_tokens = max(512, MAX_TOTAL_TOKENS - input_token_estimate)
+        max_output_tokens = min(max_output_tokens, RESPONSE_BUFFER)
+        
        # Set model settings based on whether there are connected MCP servers
        if mcp_servers:
            # With tools available, enable tool_choice and parallel_tool_calls
            model_settings = ModelSettings(
                temperature=0.6,
                top_p=0.9,
-                max_tokens=4096,  # Set to half of the maximum context length (8192/2)
+                max_tokens=max_output_tokens,
                tool_choice="auto",
                parallel_tool_calls=True,
                truncation="auto"
@@ -136,7 +143,7 @@ async def run_agent(query: str, mcp_servers: list[MCPServerStdio], history: list
            model_settings = ModelSettings(
                temperature=0.6,
                top_p=0.9,
-                max_tokens=4096,  # Set to half of the maximum context length (8192/2)
+                max_tokens=max_output_tokens,
                truncation="auto"
            )
        
@@ -266,7 +273,7 @@ async def main():

    try:
        # Ask if user wants to attempt connecting to MCP servers
-        use_mcp_input = input(f"{Fore.YELLOW}Configure or manage MCP tools? (yes/no, default: no): {Style.RESET_ALL}").strip().lower()
+        use_mcp_input = input(f"{Fore.YELLOW}Configure or connect MCP tools? (yes/no, default: no): {Style.RESET_ALL}").strip().lower()
        
        if use_mcp_input == 'yes':
            # --- Load available MCP tool configurations ---
@@ -358,7 +365,7 @@ async def main():
            else:
                # No tools configured, offer to run the configuration tool
                print(f"{Fore.YELLOW}No MCP tools currently configured.{Style.RESET_ALL}")
-                configure_now = input(f"{Fore.YELLOW}Would you like to configure tools now? (yes/no, default: no): {Style.RESET_ALL}").strip().lower()
+                configure_now = input(f"{Fore.YELLOW}Would you like to add tools? (yes/no): {Style.RESET_ALL}").strip().lower()
                if configure_now == 'yes':
                    print(f"\n{Fore.CYAN}Launching tool configuration...{Style.RESET_ALL}")
                    os.system("python configure_mcp.py")
@@ -440,9 +447,12 @@ async def main():
            # Add current dialogue to history
            conversation_history.append(current_dialogue)
            
-            # Limit history length to avoid using too much memory
-            if len(conversation_history) > 50:  # Keep the most recent 50 conversations
-                conversation_history = conversation_history[-50:]
+            # Trim history to keep token usage under ~4096
+            def estimate_tokens(history):
+                return sum(len(entry['user_query'].split()) + len(entry.get('ai_response', '').split()) for entry in history)
+
+            while estimate_tokens(conversation_history) > 4000:
+                conversation_history.pop(0)
                
            print(f"\n{Fore.CYAN}Ready for your next query. Type 'quit' to exit or 'multi' for multi-line input.{Style.RESET_ALL}")

--- a/requirements.txt
+++ b/requirements.txt
@@ -10,4 +10,5 @@ ollama
 openai-agents
 fastapi
 pymetasploit3
+tiktoken
 # Add other necessary dependencies