From c457c2943b79fa2fe165a2c7f85efce36afe2e7f Mon Sep 17 00:00:00 2001
From: GH05TCREW <harmasic@gmail.com>
Date: Mon, 15 Dec 2025 21:26:04 -0700
Subject: [PATCH] refactor: update CLI, add token tracking

---
 ghostcrew/agents/base_agent.py        |   1 +
 ghostcrew/agents/crew/orchestrator.py |   6 +
 ghostcrew/agents/crew/worker_pool.py  |  37 +++++-
 ghostcrew/interface/cli.py            | 101 ++++++++++++----
 ghostcrew/interface/main.py           | 164 ++++++++++++++------------
 5 files changed, 207 insertions(+), 102 deletions(-)

diff --git a/ghostcrew/agents/base_agent.py b/ghostcrew/agents/base_agent.py
index c425fbb..a4cbb8a 100644
--- a/ghostcrew/agents/base_agent.py
+++ b/ghostcrew/agents/base_agent.py
@@ -597,6 +597,7 @@ Call the create_plan tool with your steps."""
                 role="assistant",
                 content="\n".join(plan_display),
                 metadata={"auto_plan": True},
+                usage=response.usage,
             )
             self.conversation_history.append(plan_msg)
             return plan_msg
diff --git a/ghostcrew/agents/crew/orchestrator.py b/ghostcrew/agents/crew/orchestrator.py
index 7be38c6..848e429 100644
--- a/ghostcrew/agents/crew/orchestrator.py
+++ b/ghostcrew/agents/crew/orchestrator.py
@@ -167,6 +167,12 @@ class CrewOrchestrator:
                     tools=crew_tools,
                 )
 
+                # Track tokens for orchestrator
+                if response.usage:
+                    total = response.usage.get("total_tokens", 0)
+                    if total > 0:
+                        yield {"phase": "tokens", "tokens": total}
+
                 # Check for tool calls first to determine if content is "thinking" or "final answer"
                 if response.tool_calls:
                     # If there are tool calls, the content is "thinking" (reasoning before action)
diff --git a/ghostcrew/agents/crew/worker_pool.py b/ghostcrew/agents/crew/worker_pool.py
index 4025d4c..fca592e 100644
--- a/ghostcrew/agents/crew/worker_pool.py
+++ b/ghostcrew/agents/crew/worker_pool.py
@@ -133,10 +133,26 @@ class WorkerPool:
                             worker.tools_used.append(tc.name)
                             self._emit(worker.id, "tool", {"tool": tc.name})
 
-                # Track tokens
+                # Track tokens (avoid double counting)
                 if response.usage:
                     total = response.usage.get("total_tokens", 0)
-                    if total > 0:
+                    is_intermediate = response.metadata.get("intermediate", False)
+                    has_tools = bool(response.tool_calls)
+
+                    # Same logic as CLI to avoid double counting
+                    should_count = False
+                    if is_intermediate:
+                        should_count = True
+                        worker.last_msg_intermediate = True
+                    elif has_tools:
+                        if not getattr(worker, "last_msg_intermediate", False):
+                            should_count = True
+                        worker.last_msg_intermediate = False
+                    else:
+                        should_count = True
+                        worker.last_msg_intermediate = False
+
+                    if should_count and total > 0:
                         self._emit(worker.id, "tokens", {"tokens": total})
 
                 # Capture final response (text without tool calls)
@@ -150,7 +166,22 @@ class WorkerPool:
                     if response.metadata.get("replan_impossible"):
                         is_infeasible = True
 
-            worker.result = final_response or "No findings."
+            # Prioritize structured results from the plan over chatty summaries
+            plan_summary = ""
+            plan = getattr(worker_runtime, "plan", None)
+            if plan and plan.steps:
+                completed_steps = [
+                    s for s in plan.steps if s.status == "complete" and s.result
+                ]
+                if completed_steps:
+                    summary_lines = []
+                    for s in completed_steps:
+                        summary_lines.append(f"- {s.description}: {s.result}")
+                    plan_summary = "\n".join(summary_lines)
+
+            # Use plan summary if available, otherwise fallback to chat response
+            worker.result = plan_summary or final_response or "No findings."
+
             worker.completed_at = time.time()
             self._results[worker.id] = worker.result
 
diff --git a/ghostcrew/interface/cli.py b/ghostcrew/interface/cli.py
index cc8baf0..1766d7a 100644
--- a/ghostcrew/interface/cli.py
+++ b/ghostcrew/interface/cli.py
@@ -25,7 +25,7 @@ async def run_cli(
     model: str,
     task: str = None,
     report: str = None,
-    max_tools: int = 50,
+    max_loops: int = 50,
     use_docker: bool = False,
 ):
     """
@@ -36,7 +36,7 @@ async def run_cli(
         model: LLM model to use
         task: Optional task description
         report: Report path ("auto" for loot/reports/<target>_<timestamp>.md)
-        max_tools: Max tool calls before stopping
+        max_loops: Max agent loops before stopping
         use_docker: Run tools in Docker container
     """
     from ..agents.ghostcrew_agent import GhostCrewAgent
@@ -56,8 +56,8 @@ async def run_cli(
     start_text.append(f"{model}\n", style=GHOST_PRIMARY)
     start_text.append("Runtime: ", style=GHOST_SECONDARY)
     start_text.append(f"{'Docker' if use_docker else 'Local'}\n", style=GHOST_PRIMARY)
-    start_text.append("Max calls: ", style=GHOST_SECONDARY)
-    start_text.append(f"{max_tools}\n", style=GHOST_PRIMARY)
+    start_text.append("Max loops: ", style=GHOST_SECONDARY)
+    start_text.append(f"{max_loops}\n", style=GHOST_PRIMARY)
 
     task_msg = task or f"Perform a penetration test on {target}"
     start_text.append("Task: ", style=GHOST_SECONDARY)
@@ -122,9 +122,13 @@ async def run_cli(
     start_time = time.time()
     tool_count = 0
     iteration = 0
-    findings = []  # Store findings for report
+    findings_count = 0  # Count of notes/findings recorded
+    findings = []  # Store actual findings text
+    total_tokens = 0  # Track total token usage
+    messages = []  # Store agent messages
     tool_log = []  # Log of tools executed (ts, name, command, result, exit_code)
     last_content = ""
+    last_msg_intermediate = False  # Track if previous message was intermediate (to avoid double counting tokens)
     stopped_reason = None
 
     def print_status(msg: str, style: str = GHOST_DIM):
@@ -301,13 +305,13 @@ async def run_cli(
             return None
 
     async def print_summary(interrupted: bool = False):
-        nonlocal findings
+        nonlocal messages
 
-        # Generate summary if we don't have findings yet
-        if not findings and tool_log:
+        # Generate summary if we don't have messages yet
+        if not messages and tool_log:
             summary = await generate_summary()
             if summary:
-                findings.append(summary)
+                messages.append(summary)
 
         elapsed = int(time.time() - start_time)
         mins, secs = divmod(elapsed, 60)
@@ -321,14 +325,18 @@ async def run_cli(
         final_text.append(f"{status}\n\n", style=f"bold {GHOST_PRIMARY}")
         final_text.append("Duration: ", style=GHOST_DIM)
         final_text.append(f"{mins}m {secs}s\n", style=GHOST_SECONDARY)
-        final_text.append("Iterations: ", style=GHOST_DIM)
-        final_text.append(f"{iteration}\n", style=GHOST_SECONDARY)
+        final_text.append("Loops: ", style=GHOST_DIM)
+        final_text.append(f"{iteration}/{max_loops}\n", style=GHOST_SECONDARY)
         final_text.append("Tools: ", style=GHOST_DIM)
-        final_text.append(f"{tool_count}/{max_tools}\n", style=GHOST_SECONDARY)
+        final_text.append(f"{tool_count}\n", style=GHOST_SECONDARY)
 
-        if findings:
+        if total_tokens > 0:
+            final_text.append("Tokens: ", style=GHOST_DIM)
+            final_text.append(f"{total_tokens:,}\n", style=GHOST_SECONDARY)
+
+        if findings_count > 0:
             final_text.append("Findings: ", style=GHOST_DIM)
-            final_text.append(f"{len(findings)}", style=GHOST_SECONDARY)
+            final_text.append(f"{findings_count}", style=GHOST_SECONDARY)
 
         console.print()
         console.print(
@@ -339,12 +347,12 @@ async def run_cli(
             )
         )
 
-        # Show summary/findings
-        if findings:
+        # Show summary/messages only if it's new content (not just displayed)
+        if messages and messages[-1] != last_content:
             console.print()
             console.print(
                 Panel(
-                    Markdown(findings[-1]),
+                    Markdown(messages[-1]),
                     title=f"[{GHOST_PRIMARY}]Summary",
                     border_style=GHOST_BORDER,
                 )
@@ -359,6 +367,27 @@ async def run_cli(
         async for response in agent.agent_loop(task_msg):
             iteration += 1
 
+            # Track token usage
+            if response.usage:
+                usage = response.usage.get("total_tokens", 0)
+                is_intermediate = response.metadata.get("intermediate", False)
+                has_tools = bool(response.tool_calls)
+
+                # Logic to avoid double counting:
+                # 1. Intermediate messages (thinking) always count
+                # 2. Tool messages count ONLY if not preceded by intermediate message
+                if is_intermediate:
+                    total_tokens += usage
+                    last_msg_intermediate = True
+                elif has_tools:
+                    if not last_msg_intermediate:
+                        total_tokens += usage
+                    last_msg_intermediate = False
+                else:
+                    # Other messages (like plan)
+                    total_tokens += usage
+                    last_msg_intermediate = False
+
             # Show tool calls and results as they happen
             if response.tool_calls:
                 for i, call in enumerate(response.tool_calls):
@@ -367,6 +396,26 @@ async def run_cli(
                         call.function, "name", "tool"
                     )
 
+                    # Track findings (notes tool)
+                    if name == "notes":
+                        findings_count += 1
+                        try:
+                            args = getattr(call, "arguments", None) or getattr(
+                                call.function, "arguments", "{}"
+                            )
+                            if isinstance(args, str):
+                                import json
+
+                                args = json.loads(args)
+                            if isinstance(args, dict):
+                                note_content = args.get("content", "") or args.get(
+                                    "note", ""
+                                )
+                                if note_content:
+                                    findings.append(note_content)
+                        except Exception:
+                            pass
+
                     elapsed = int(time.time() - start_time)
                     mins, secs = divmod(elapsed, 60)
                     ts = f"{mins:02d}:{secs:02d}"
@@ -427,7 +476,7 @@ async def run_cli(
 
                     # Metasploit-style output with better spacing
                     console.print()  # Blank line before each tool
-                    print_status(f"$ {name} ({tool_count}/{max_tools})", GHOST_ACCENT)
+                    print_status(f"$ {name} ({tool_count})", GHOST_ACCENT)
 
                     # Show command/args on separate indented line (truncated for display)
                     if command_text:
@@ -457,17 +506,10 @@ async def run_cli(
                                     f"         [{GHOST_DIM}][*] {result_line[:60]}...[/]"
                                 )
 
-                    # Check max tools limit
-                    if tool_count >= max_tools:
-                        stopped_reason = "max calls reached"
-                        console.print()
-                        print_status(f"Max calls limit reached ({max_tools})", "yellow")
-                        raise StopIteration()
-
             # Print assistant content immediately (analysis/findings)
             if response.content and response.content != last_content:
                 last_content = response.content
-                findings.append(response.content)
+                messages.append(response.content)
 
                 console.print()
                 console.print(
@@ -479,6 +521,13 @@ async def run_cli(
                 )
                 console.print()
 
+            # Check max loops limit
+            if iteration >= max_loops:
+                stopped_reason = "max loops reached"
+                console.print()
+                print_status(f"Max loops limit reached ({max_loops})", "yellow")
+                raise StopIteration()
+
         await print_summary(interrupted=False)
 
     except StopIteration:
diff --git a/ghostcrew/interface/main.py b/ghostcrew/interface/main.py
index 3aef1e0..a23bd7f 100644
--- a/ghostcrew/interface/main.py
+++ b/ghostcrew/interface/main.py
@@ -8,24 +8,70 @@ from .cli import run_cli
 from .tui import run_tui
 
 
-def parse_arguments() -> argparse.Namespace:
+def parse_arguments():
     """Parse command line arguments."""
     parser = argparse.ArgumentParser(
         description="GhostCrew - AI Penetration Testing",
         formatter_class=argparse.RawDescriptionHelpFormatter,
         epilog="""
 Examples:
-  ghostcrew                           Launch TUI
-  ghostcrew -t 192.168.1.1            Launch TUI with target
-  ghostcrew -n -t example.com         Non-interactive run
-  ghostcrew tools list                List available tools
-  ghostcrew mcp list                  List MCP servers
+  ghostcrew tui                              Launch TUI
+  ghostcrew tui -t 192.168.1.1               Launch TUI with target
+  ghostcrew run -t localhost --task "scan"   Headless run
+  ghostcrew tools list                       List available tools
+  ghostcrew mcp list                         List MCP servers
         """,
     )
 
+    parser.add_argument("--version", action="version", version="GhostCrew 0.2.0")
+
     # Subcommands
     subparsers = parser.add_subparsers(dest="command", help="Commands")
 
+    # Common arguments for runtime modes
+    runtime_parent = argparse.ArgumentParser(add_help=False)
+    runtime_parent.add_argument("--target", "-t", help="Target (IP, hostname, or URL)")
+    runtime_parent.add_argument(
+        "--model",
+        "-m",
+        default=DEFAULT_MODEL,
+        help="LLM model (set GHOSTCREW_MODEL in .env)",
+    )
+    runtime_parent.add_argument(
+        "--docker",
+        "-d",
+        action="store_true",
+        help="Run tools inside Docker container (requires Docker)",
+    )
+
+    # TUI subcommand
+    subparsers.add_parser(
+        "tui", parents=[runtime_parent], help="Launch TUI (Interactive Mode)"
+    )
+
+    # Run subcommand (Headless)
+    run_parser = subparsers.add_parser(
+        "run", parents=[runtime_parent], help="Run in headless mode"
+    )
+    run_parser.add_argument("task", nargs="+", help="Task to run")
+    run_parser.add_argument(
+        "--report",
+        "-r",
+        nargs="?",
+        const="auto",
+        help=(
+            "Generate report. "
+            "If used without value, auto-generates path under loot/reports/. "
+            "If omitted, no report is generated."
+        ),
+    )
+    run_parser.add_argument(
+        "--max-loops",
+        type=int,
+        default=50,
+        help="Max agent loops before stopping (default: 50)",
+    )
+
     # Tools subcommand
     tools_parser = subparsers.add_parser("tools", help="Manage tools")
     tools_subparsers = tools_parser.add_subparsers(
@@ -51,7 +97,7 @@ Examples:
     mcp_add.add_argument("name", help="Server name")
     mcp_add.add_argument("command", help="Command to run (e.g., npx)")
     mcp_add.add_argument("args", nargs="*", help="Command arguments")
-    mcp_add.add_argument("--description", "-d", default="", help="Server description")
+    mcp_add.add_argument("--description", default="", help="Server description")
 
     # mcp remove
     mcp_remove = mcp_subparsers.add_parser("remove", help="Remove an MCP server")
@@ -61,54 +107,7 @@ Examples:
     mcp_test = mcp_subparsers.add_parser("test", help="Test MCP server connection")
     mcp_test.add_argument("name", help="Server name to test")
 
-    # Target option
-    parser.add_argument("--target", "-t", help="Target (IP, hostname, or URL)")
-
-    # Non-interactive mode
-    parser.add_argument(
-        "-n",
-        "--headless",
-        action="store_true",
-        help="Run without TUI (requires --target)",
-    )
-
-    # Task for non-interactive mode
-    parser.add_argument("--task", help="Task to run in non-interactive mode")
-
-    # Report output (saves to loot/reports/ by default)
-    parser.add_argument(
-        "--report",
-        "-r",
-        nargs="?",
-        const="auto",
-        help="Generate report (default: loot/reports/<target>_<timestamp>.md)",
-    )
-
-    # Max tool calls limit
-    parser.add_argument(
-        "--max", type=int, default=50, help="Max calls before stopping (default: 50)"
-    )
-
-    # Model options
-    parser.add_argument(
-        "--model",
-        "-m",
-        default=DEFAULT_MODEL,
-        help="LLM model (set GHOSTCREW_MODEL in .env)",
-    )
-
-    # Docker mode
-    parser.add_argument(
-        "--docker",
-        "-d",
-        action="store_true",
-        help="Run tools inside Docker container (requires Docker)",
-    )
-
-    # Version
-    parser.add_argument("--version", action="version", version="GhostCrew 0.2.0")
-
-    return parser.parse_args()
+    return parser, parser.parse_args()
 
 
 def handle_tools_command(args: argparse.Namespace):
@@ -242,7 +241,7 @@ def handle_mcp_command(args: argparse.Namespace):
 
 def main():
     """Main entry point."""
-    args = parse_arguments()
+    parser, args = parse_arguments()
 
     # Handle subcommands
     if args.command == "tools":
@@ -253,36 +252,55 @@ def main():
         handle_mcp_command(args)
         return
 
-    # Check model configuration
-    if not args.model:
-        print("Error: No model configured.")
-        print("Set GHOSTCREW_MODEL in .env file or use --model flag.")
-        print(
-            "Example: GHOSTCREW_MODEL=gpt-5 or GHOSTCREW_MODEL=claude-sonnet-4-20250514"
-        )
-        return
-
-    # Determine interface mode
-    if args.headless:
-        if not args.target:
-            print("Error: --target is required for headless mode")
+    if args.command == "run":
+        # Check model configuration
+        if not args.model:
+            print("Error: No model configured.")
+            print("Set GHOSTCREW_MODEL in .env file or use --model flag.")
+            print(
+                "Example: GHOSTCREW_MODEL=gpt-5 or GHOSTCREW_MODEL=claude-sonnet-4-20250514"
+            )
             return
+
+        if not args.target:
+            print("Error: --target is required for run mode")
+            return
+
+        # Join task arguments
+        task_description = " ".join(args.task)
+
         try:
             asyncio.run(
                 run_cli(
                     target=args.target,
                     model=args.model,
-                    task=args.task,
+                    task=task_description,
                     report=args.report,
-                    max_tools=args.max,
+                    max_loops=args.max_loops,
                     use_docker=args.docker,
                 )
             )
         except KeyboardInterrupt:
             print("\n[!] Interrupted by user.")
-    else:
-        # TUI doesn't need asyncio.run - it runs its own event loop
+        return
+
+    if args.command == "tui":
+        # Check model configuration
+        if not args.model:
+            print("Error: No model configured.")
+            print("Set GHOSTCREW_MODEL in .env file or use --model flag.")
+            print(
+                "Example: GHOSTCREW_MODEL=gpt-5 or GHOSTCREW_MODEL=claude-sonnet-4-20250514"
+            )
+            return
+
         run_tui(target=args.target, model=args.model, use_docker=args.docker)
+        return
+
+    # If no command provided, default to TUI
+    if args.command is None:
+        run_tui(target=None, model=DEFAULT_MODEL, use_docker=False)
+        return
 
 
 if __name__ == "__main__":