From c457c2943b79fa2fe165a2c7f85efce36afe2e7f Mon Sep 17 00:00:00 2001 From: GH05TCREW Date: Mon, 15 Dec 2025 21:26:04 -0700 Subject: [PATCH] refactor: update CLI, add token tracking --- ghostcrew/agents/base_agent.py | 1 + ghostcrew/agents/crew/orchestrator.py | 6 + ghostcrew/agents/crew/worker_pool.py | 37 +++++- ghostcrew/interface/cli.py | 101 ++++++++++++---- ghostcrew/interface/main.py | 164 ++++++++++++++------------ 5 files changed, 207 insertions(+), 102 deletions(-) diff --git a/ghostcrew/agents/base_agent.py b/ghostcrew/agents/base_agent.py index c425fbb..a4cbb8a 100644 --- a/ghostcrew/agents/base_agent.py +++ b/ghostcrew/agents/base_agent.py @@ -597,6 +597,7 @@ Call the create_plan tool with your steps.""" role="assistant", content="\n".join(plan_display), metadata={"auto_plan": True}, + usage=response.usage, ) self.conversation_history.append(plan_msg) return plan_msg diff --git a/ghostcrew/agents/crew/orchestrator.py b/ghostcrew/agents/crew/orchestrator.py index 7be38c6..848e429 100644 --- a/ghostcrew/agents/crew/orchestrator.py +++ b/ghostcrew/agents/crew/orchestrator.py @@ -167,6 +167,12 @@ class CrewOrchestrator: tools=crew_tools, ) + # Track tokens for orchestrator + if response.usage: + total = response.usage.get("total_tokens", 0) + if total > 0: + yield {"phase": "tokens", "tokens": total} + # Check for tool calls first to determine if content is "thinking" or "final answer" if response.tool_calls: # If there are tool calls, the content is "thinking" (reasoning before action) diff --git a/ghostcrew/agents/crew/worker_pool.py b/ghostcrew/agents/crew/worker_pool.py index 4025d4c..fca592e 100644 --- a/ghostcrew/agents/crew/worker_pool.py +++ b/ghostcrew/agents/crew/worker_pool.py @@ -133,10 +133,26 @@ class WorkerPool: worker.tools_used.append(tc.name) self._emit(worker.id, "tool", {"tool": tc.name}) - # Track tokens + # Track tokens (avoid double counting) if response.usage: total = response.usage.get("total_tokens", 0) - if total > 0: + is_intermediate = response.metadata.get("intermediate", False) + has_tools = bool(response.tool_calls) + + # Same logic as CLI to avoid double counting + should_count = False + if is_intermediate: + should_count = True + worker.last_msg_intermediate = True + elif has_tools: + if not getattr(worker, "last_msg_intermediate", False): + should_count = True + worker.last_msg_intermediate = False + else: + should_count = True + worker.last_msg_intermediate = False + + if should_count and total > 0: self._emit(worker.id, "tokens", {"tokens": total}) # Capture final response (text without tool calls) @@ -150,7 +166,22 @@ class WorkerPool: if response.metadata.get("replan_impossible"): is_infeasible = True - worker.result = final_response or "No findings." + # Prioritize structured results from the plan over chatty summaries + plan_summary = "" + plan = getattr(worker_runtime, "plan", None) + if plan and plan.steps: + completed_steps = [ + s for s in plan.steps if s.status == "complete" and s.result + ] + if completed_steps: + summary_lines = [] + for s in completed_steps: + summary_lines.append(f"- {s.description}: {s.result}") + plan_summary = "\n".join(summary_lines) + + # Use plan summary if available, otherwise fallback to chat response + worker.result = plan_summary or final_response or "No findings." + worker.completed_at = time.time() self._results[worker.id] = worker.result diff --git a/ghostcrew/interface/cli.py b/ghostcrew/interface/cli.py index cc8baf0..1766d7a 100644 --- a/ghostcrew/interface/cli.py +++ b/ghostcrew/interface/cli.py @@ -25,7 +25,7 @@ async def run_cli( model: str, task: str = None, report: str = None, - max_tools: int = 50, + max_loops: int = 50, use_docker: bool = False, ): """ @@ -36,7 +36,7 @@ async def run_cli( model: LLM model to use task: Optional task description report: Report path ("auto" for loot/reports/_.md) - max_tools: Max tool calls before stopping + max_loops: Max agent loops before stopping use_docker: Run tools in Docker container """ from ..agents.ghostcrew_agent import GhostCrewAgent @@ -56,8 +56,8 @@ async def run_cli( start_text.append(f"{model}\n", style=GHOST_PRIMARY) start_text.append("Runtime: ", style=GHOST_SECONDARY) start_text.append(f"{'Docker' if use_docker else 'Local'}\n", style=GHOST_PRIMARY) - start_text.append("Max calls: ", style=GHOST_SECONDARY) - start_text.append(f"{max_tools}\n", style=GHOST_PRIMARY) + start_text.append("Max loops: ", style=GHOST_SECONDARY) + start_text.append(f"{max_loops}\n", style=GHOST_PRIMARY) task_msg = task or f"Perform a penetration test on {target}" start_text.append("Task: ", style=GHOST_SECONDARY) @@ -122,9 +122,13 @@ async def run_cli( start_time = time.time() tool_count = 0 iteration = 0 - findings = [] # Store findings for report + findings_count = 0 # Count of notes/findings recorded + findings = [] # Store actual findings text + total_tokens = 0 # Track total token usage + messages = [] # Store agent messages tool_log = [] # Log of tools executed (ts, name, command, result, exit_code) last_content = "" + last_msg_intermediate = False # Track if previous message was intermediate (to avoid double counting tokens) stopped_reason = None def print_status(msg: str, style: str = GHOST_DIM): @@ -301,13 +305,13 @@ async def run_cli( return None async def print_summary(interrupted: bool = False): - nonlocal findings + nonlocal messages - # Generate summary if we don't have findings yet - if not findings and tool_log: + # Generate summary if we don't have messages yet + if not messages and tool_log: summary = await generate_summary() if summary: - findings.append(summary) + messages.append(summary) elapsed = int(time.time() - start_time) mins, secs = divmod(elapsed, 60) @@ -321,14 +325,18 @@ async def run_cli( final_text.append(f"{status}\n\n", style=f"bold {GHOST_PRIMARY}") final_text.append("Duration: ", style=GHOST_DIM) final_text.append(f"{mins}m {secs}s\n", style=GHOST_SECONDARY) - final_text.append("Iterations: ", style=GHOST_DIM) - final_text.append(f"{iteration}\n", style=GHOST_SECONDARY) + final_text.append("Loops: ", style=GHOST_DIM) + final_text.append(f"{iteration}/{max_loops}\n", style=GHOST_SECONDARY) final_text.append("Tools: ", style=GHOST_DIM) - final_text.append(f"{tool_count}/{max_tools}\n", style=GHOST_SECONDARY) + final_text.append(f"{tool_count}\n", style=GHOST_SECONDARY) - if findings: + if total_tokens > 0: + final_text.append("Tokens: ", style=GHOST_DIM) + final_text.append(f"{total_tokens:,}\n", style=GHOST_SECONDARY) + + if findings_count > 0: final_text.append("Findings: ", style=GHOST_DIM) - final_text.append(f"{len(findings)}", style=GHOST_SECONDARY) + final_text.append(f"{findings_count}", style=GHOST_SECONDARY) console.print() console.print( @@ -339,12 +347,12 @@ async def run_cli( ) ) - # Show summary/findings - if findings: + # Show summary/messages only if it's new content (not just displayed) + if messages and messages[-1] != last_content: console.print() console.print( Panel( - Markdown(findings[-1]), + Markdown(messages[-1]), title=f"[{GHOST_PRIMARY}]Summary", border_style=GHOST_BORDER, ) @@ -359,6 +367,27 @@ async def run_cli( async for response in agent.agent_loop(task_msg): iteration += 1 + # Track token usage + if response.usage: + usage = response.usage.get("total_tokens", 0) + is_intermediate = response.metadata.get("intermediate", False) + has_tools = bool(response.tool_calls) + + # Logic to avoid double counting: + # 1. Intermediate messages (thinking) always count + # 2. Tool messages count ONLY if not preceded by intermediate message + if is_intermediate: + total_tokens += usage + last_msg_intermediate = True + elif has_tools: + if not last_msg_intermediate: + total_tokens += usage + last_msg_intermediate = False + else: + # Other messages (like plan) + total_tokens += usage + last_msg_intermediate = False + # Show tool calls and results as they happen if response.tool_calls: for i, call in enumerate(response.tool_calls): @@ -367,6 +396,26 @@ async def run_cli( call.function, "name", "tool" ) + # Track findings (notes tool) + if name == "notes": + findings_count += 1 + try: + args = getattr(call, "arguments", None) or getattr( + call.function, "arguments", "{}" + ) + if isinstance(args, str): + import json + + args = json.loads(args) + if isinstance(args, dict): + note_content = args.get("content", "") or args.get( + "note", "" + ) + if note_content: + findings.append(note_content) + except Exception: + pass + elapsed = int(time.time() - start_time) mins, secs = divmod(elapsed, 60) ts = f"{mins:02d}:{secs:02d}" @@ -427,7 +476,7 @@ async def run_cli( # Metasploit-style output with better spacing console.print() # Blank line before each tool - print_status(f"$ {name} ({tool_count}/{max_tools})", GHOST_ACCENT) + print_status(f"$ {name} ({tool_count})", GHOST_ACCENT) # Show command/args on separate indented line (truncated for display) if command_text: @@ -457,17 +506,10 @@ async def run_cli( f" [{GHOST_DIM}][*] {result_line[:60]}...[/]" ) - # Check max tools limit - if tool_count >= max_tools: - stopped_reason = "max calls reached" - console.print() - print_status(f"Max calls limit reached ({max_tools})", "yellow") - raise StopIteration() - # Print assistant content immediately (analysis/findings) if response.content and response.content != last_content: last_content = response.content - findings.append(response.content) + messages.append(response.content) console.print() console.print( @@ -479,6 +521,13 @@ async def run_cli( ) console.print() + # Check max loops limit + if iteration >= max_loops: + stopped_reason = "max loops reached" + console.print() + print_status(f"Max loops limit reached ({max_loops})", "yellow") + raise StopIteration() + await print_summary(interrupted=False) except StopIteration: diff --git a/ghostcrew/interface/main.py b/ghostcrew/interface/main.py index 3aef1e0..a23bd7f 100644 --- a/ghostcrew/interface/main.py +++ b/ghostcrew/interface/main.py @@ -8,24 +8,70 @@ from .cli import run_cli from .tui import run_tui -def parse_arguments() -> argparse.Namespace: +def parse_arguments(): """Parse command line arguments.""" parser = argparse.ArgumentParser( description="GhostCrew - AI Penetration Testing", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: - ghostcrew Launch TUI - ghostcrew -t 192.168.1.1 Launch TUI with target - ghostcrew -n -t example.com Non-interactive run - ghostcrew tools list List available tools - ghostcrew mcp list List MCP servers + ghostcrew tui Launch TUI + ghostcrew tui -t 192.168.1.1 Launch TUI with target + ghostcrew run -t localhost --task "scan" Headless run + ghostcrew tools list List available tools + ghostcrew mcp list List MCP servers """, ) + parser.add_argument("--version", action="version", version="GhostCrew 0.2.0") + # Subcommands subparsers = parser.add_subparsers(dest="command", help="Commands") + # Common arguments for runtime modes + runtime_parent = argparse.ArgumentParser(add_help=False) + runtime_parent.add_argument("--target", "-t", help="Target (IP, hostname, or URL)") + runtime_parent.add_argument( + "--model", + "-m", + default=DEFAULT_MODEL, + help="LLM model (set GHOSTCREW_MODEL in .env)", + ) + runtime_parent.add_argument( + "--docker", + "-d", + action="store_true", + help="Run tools inside Docker container (requires Docker)", + ) + + # TUI subcommand + subparsers.add_parser( + "tui", parents=[runtime_parent], help="Launch TUI (Interactive Mode)" + ) + + # Run subcommand (Headless) + run_parser = subparsers.add_parser( + "run", parents=[runtime_parent], help="Run in headless mode" + ) + run_parser.add_argument("task", nargs="+", help="Task to run") + run_parser.add_argument( + "--report", + "-r", + nargs="?", + const="auto", + help=( + "Generate report. " + "If used without value, auto-generates path under loot/reports/. " + "If omitted, no report is generated." + ), + ) + run_parser.add_argument( + "--max-loops", + type=int, + default=50, + help="Max agent loops before stopping (default: 50)", + ) + # Tools subcommand tools_parser = subparsers.add_parser("tools", help="Manage tools") tools_subparsers = tools_parser.add_subparsers( @@ -51,7 +97,7 @@ Examples: mcp_add.add_argument("name", help="Server name") mcp_add.add_argument("command", help="Command to run (e.g., npx)") mcp_add.add_argument("args", nargs="*", help="Command arguments") - mcp_add.add_argument("--description", "-d", default="", help="Server description") + mcp_add.add_argument("--description", default="", help="Server description") # mcp remove mcp_remove = mcp_subparsers.add_parser("remove", help="Remove an MCP server") @@ -61,54 +107,7 @@ Examples: mcp_test = mcp_subparsers.add_parser("test", help="Test MCP server connection") mcp_test.add_argument("name", help="Server name to test") - # Target option - parser.add_argument("--target", "-t", help="Target (IP, hostname, or URL)") - - # Non-interactive mode - parser.add_argument( - "-n", - "--headless", - action="store_true", - help="Run without TUI (requires --target)", - ) - - # Task for non-interactive mode - parser.add_argument("--task", help="Task to run in non-interactive mode") - - # Report output (saves to loot/reports/ by default) - parser.add_argument( - "--report", - "-r", - nargs="?", - const="auto", - help="Generate report (default: loot/reports/_.md)", - ) - - # Max tool calls limit - parser.add_argument( - "--max", type=int, default=50, help="Max calls before stopping (default: 50)" - ) - - # Model options - parser.add_argument( - "--model", - "-m", - default=DEFAULT_MODEL, - help="LLM model (set GHOSTCREW_MODEL in .env)", - ) - - # Docker mode - parser.add_argument( - "--docker", - "-d", - action="store_true", - help="Run tools inside Docker container (requires Docker)", - ) - - # Version - parser.add_argument("--version", action="version", version="GhostCrew 0.2.0") - - return parser.parse_args() + return parser, parser.parse_args() def handle_tools_command(args: argparse.Namespace): @@ -242,7 +241,7 @@ def handle_mcp_command(args: argparse.Namespace): def main(): """Main entry point.""" - args = parse_arguments() + parser, args = parse_arguments() # Handle subcommands if args.command == "tools": @@ -253,36 +252,55 @@ def main(): handle_mcp_command(args) return - # Check model configuration - if not args.model: - print("Error: No model configured.") - print("Set GHOSTCREW_MODEL in .env file or use --model flag.") - print( - "Example: GHOSTCREW_MODEL=gpt-5 or GHOSTCREW_MODEL=claude-sonnet-4-20250514" - ) - return - - # Determine interface mode - if args.headless: - if not args.target: - print("Error: --target is required for headless mode") + if args.command == "run": + # Check model configuration + if not args.model: + print("Error: No model configured.") + print("Set GHOSTCREW_MODEL in .env file or use --model flag.") + print( + "Example: GHOSTCREW_MODEL=gpt-5 or GHOSTCREW_MODEL=claude-sonnet-4-20250514" + ) return + + if not args.target: + print("Error: --target is required for run mode") + return + + # Join task arguments + task_description = " ".join(args.task) + try: asyncio.run( run_cli( target=args.target, model=args.model, - task=args.task, + task=task_description, report=args.report, - max_tools=args.max, + max_loops=args.max_loops, use_docker=args.docker, ) ) except KeyboardInterrupt: print("\n[!] Interrupted by user.") - else: - # TUI doesn't need asyncio.run - it runs its own event loop + return + + if args.command == "tui": + # Check model configuration + if not args.model: + print("Error: No model configured.") + print("Set GHOSTCREW_MODEL in .env file or use --model flag.") + print( + "Example: GHOSTCREW_MODEL=gpt-5 or GHOSTCREW_MODEL=claude-sonnet-4-20250514" + ) + return + run_tui(target=args.target, model=args.model, use_docker=args.docker) + return + + # If no command provided, default to TUI + if args.command is None: + run_tui(target=None, model=DEFAULT_MODEL, use_docker=False) + return if __name__ == "__main__":