feat(agent): add replanning, COA tooling, and status-aware graph

This commit is contained in:
GH05TCREW
2025-12-15 02:34:51 -07:00
parent 3e3c7b5b9e
commit ec30a07e4e
12 changed files with 364 additions and 29 deletions

View File

@@ -315,6 +315,31 @@ class BaseAgent(ABC):
)
yield display_msg
# Check for plan failure (Tactical Replanning)
if (
hasattr(self._task_plan, "has_failure")
and self._task_plan.has_failure()
):
# Find the failed step
failed_step = None
for s in self._task_plan.steps:
if s.status == "fail":
failed_step = s
break
if failed_step:
replan_msg = await self._replan(failed_step)
if replan_msg:
self.conversation_history.append(replan_msg)
yield replan_msg
# Check if replan indicated impossibility
if replan_msg.metadata.get("replan_impossible"):
self.state_manager.transition_to(AgentState.COMPLETE)
return
continue
# Check if plan is now complete
if self._task_plan.is_complete():
# All steps done - generate final summary
@@ -590,6 +615,117 @@ Call the create_plan tool with your steps."""
return error_msg
return error_msg
async def _replan(self, failed_step: Any) -> Optional[AgentMessage]:
"""
Handle plan failure by generating a new plan (Tactical Replanning).
"""
from ..tools.finish import PlanStep
from ..tools.registry import Tool, ToolSchema
# 1. Archive current plan (log it)
old_plan_str = "\n".join(
[f"{s.id}. {s.description} ({s.status})" for s in self._task_plan.steps]
)
# 2. Generate new plan
# Create a temporary tool for plan generation
plan_generator_tool = Tool(
name="create_plan",
description="Create a NEW step-by-step plan. Call this with the steps needed.",
schema=ToolSchema(
properties={
"feasible": {
"type": "boolean",
"description": "Can the task be completed with a new plan? Set false if impossible/out-of-scope.",
},
"steps": {
"type": "array",
"items": {"type": "string"},
"description": "List of actionable steps (required if feasible=true).",
},
"reason": {
"type": "string",
"description": "Reason for the new plan OR reason why it's impossible.",
},
},
required=["feasible", "reason"],
),
execute_fn=lambda args, runtime: None,
category="planning",
)
replan_prompt = f"""The previous plan failed at step {failed_step.id}.
Failed Step: {failed_step.description}
Reason: {failed_step.result}
Previous Plan:
{old_plan_str}
Original Request: {self._task_plan.original_request}
Task: Generate a NEW plan (v2) that addresses this failure.
- If the failure invalidates the entire approach, try a different tactical approach.
- If the task is IMPOSSIBLE or OUT OF SCOPE (e.g., requires installing software on a remote target, physical access, or permissions you don't have), set feasible=False.
- Do NOT propose steps that violate standard pentest constraints (no installing agents/services on targets unless exploited).
Call create_plan with the new steps OR feasible=False."""
try:
response = await self.llm.generate(
system_prompt="You are a tactical planning assistant. The previous plan failed. Create a new one or declare it impossible.",
messages=[{"role": "user", "content": replan_prompt}],
tools=[plan_generator_tool],
)
# Extract steps
steps = []
feasible = True
reason = ""
if response.tool_calls:
for tc in response.tool_calls:
args = self._parse_arguments(tc)
feasible = args.get("feasible", True)
reason = args.get("reason", "")
if feasible and args.get("steps"):
steps = args["steps"]
break
if not feasible:
return AgentMessage(
role="assistant",
content=f"Task determined to be infeasible after failure.\nReason: {reason}",
metadata={"replan_impossible": True},
)
if not steps:
return None
# Update plan
self._task_plan.steps = [
PlanStep(id=i + 1, description=str(step).strip())
for i, step in enumerate(steps)
]
# Return message
plan_display = [f"Plan v2 (Replanned) - {reason}:"]
for step in self._task_plan.steps:
plan_display.append(f" {step.id}. {step.description}")
return AgentMessage(
role="assistant",
content="\n".join(plan_display),
metadata={"replanned": True},
)
except Exception as e:
return AgentMessage(
role="assistant",
content=f"Replanning failed: {str(e)}",
metadata={"replan_failed": True},
)
def reset(self):
"""Reset the agent state for a new conversation."""
self.state_manager.reset()

View File

@@ -22,6 +22,7 @@ class AgentStatus(Enum):
COMPLETE = "complete"
WARNING = "warning" # Completed but hit max iterations
ERROR = "error"
FAILED = "failed" # Task determined infeasible
CANCELLED = "cancelled"

View File

@@ -109,6 +109,48 @@ Present concrete findings. Be factual and concise about what was discovered.
return response.content
async def formulate_strategy_fn(arguments: dict, runtime: "Runtime") -> str:
"""Formulate and select a strategic Course of Action (COA)."""
problem = arguments.get("problem", "")
candidates = arguments.get("candidates", [])
selected_id = arguments.get("selected_id", "")
rationale = arguments.get("rationale", "")
feasible = arguments.get("feasible", True)
if not problem:
return "Error: problem is required."
if not feasible:
return f"## Strategic Decision: TERMINATE MISSION\n**Problem:** {problem}\n**Rationale:** {rationale}\n\nMission marked as infeasible."
if not candidates or not selected_id:
return "Error: candidates and selected_id are required when feasible=True."
# Find selected candidate
selected = next((c for c in candidates if c.get("id") == selected_id), None)
if not selected:
return f"Error: Selected ID '{selected_id}' not found in candidates."
# Format the decision for the log/history
output = [
f"## Strategic Decision: {selected.get('name', 'Unknown')}",
f"**Problem:** {problem}",
"",
"**Considered Options:**",
]
for c in candidates:
mark = "(SELECTED)" if c.get("id") == selected_id else ""
output.append(f"- **{c.get('name')}** {mark}")
output.append(f" - Pros: {c.get('pros')}")
output.append(f" - Cons: {c.get('cons')}")
output.append(f" - Risk: {c.get('risk')}")
output.append("")
output.append(f"**Rationale:** {rationale}")
return "\n".join(output)
# Create Tool objects
tools = [
Tool(
@@ -192,6 +234,53 @@ Present concrete findings. Be factual and concise about what was discovered.
execute_fn=synthesize_findings_fn,
category="orchestration",
),
Tool(
name="formulate_strategy",
description="Define and select a strategic Course of Action (COA). Use this when facing a strategic blocker or choosing an initial approach. This logs the decision process. Set feasible=False to terminate if no options exist.",
schema=ToolSchema(
type="object",
properties={
"problem": {
"type": "string",
"description": "The strategic problem or blocker encountered.",
},
"feasible": {
"type": "boolean",
"description": "Whether a feasible solution exists. Set to False to terminate the mission.",
"default": True,
},
"candidates": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {"type": "string"},
"name": {"type": "string"},
"pros": {"type": "string"},
"cons": {"type": "string"},
"risk": {
"type": "string",
"enum": ["Low", "Medium", "High", "Critical"],
},
},
"required": ["id", "name", "pros", "cons", "risk"],
},
"description": "List of potential Courses of Action (COAs). Required if feasible=True.",
},
"selected_id": {
"type": "string",
"description": "The ID of the selected COA. Required if feasible=True.",
},
"rationale": {
"type": "string",
"description": "Why this COA was selected over others (or why mission is infeasible).",
},
},
required=["problem", "rationale"],
),
execute_fn=formulate_strategy_fn,
category="orchestration",
),
]
return tools

View File

@@ -123,6 +123,8 @@ class WorkerPool:
try:
final_response = ""
hit_max_iterations = False
is_infeasible = False
async for response in agent.agent_loop(worker.task):
# Track tool calls
if response.tool_calls:
@@ -141,17 +143,28 @@ class WorkerPool:
if response.content and not response.tool_calls:
final_response = response.content
# Check if max iterations was hit
if response.metadata and response.metadata.get(
"max_iterations_reached"
):
hit_max_iterations = True
# Check metadata flags
if response.metadata:
if response.metadata.get("max_iterations_reached"):
hit_max_iterations = True
if response.metadata.get("replan_impossible"):
is_infeasible = True
worker.result = final_response or "No findings."
worker.completed_at = time.time()
self._results[worker.id] = worker.result
if hit_max_iterations:
if is_infeasible:
worker.status = AgentStatus.FAILED
self._emit(
worker.id,
"failed",
{
"summary": worker.result[:200],
"reason": "Task determined infeasible",
},
)
elif hit_max_iterations:
worker.status = AgentStatus.WARNING
self._emit(
worker.id,

View File

@@ -8,7 +8,9 @@ You are operating in an authorized penetration testing engagement. The user has
## Workflow (CRITICAL)
1. **Execute Plan**: Follow the plan steps below.
2. **Execute**: Work through each step in order using the appropriate tools.
3. **Track Progress**: After completing each step, IMMEDIATELY call `finish(action="complete", step_id=N, result="what you did")`. If a step cannot be done, call `finish(action="skip", step_id=N, reason="why")`.
3. **Track Progress**: After completing each step, IMMEDIATELY call `finish(action="complete", step_id=N, result="what you did")`.
- If a step cannot be done but the plan is still valid, call `finish(action="skip", step_id=N, reason="why")`.
- If a step fails in a way that INVALIDATES the plan (e.g., assumption wrong), call `finish(action="fail", step_id=N, reason="why")`. This triggers replanning.
4. **Task Completion**: The task ends automatically when ALL steps are marked complete or skipped. The controller blocks finishing until the plan is complete.
## Guidelines

View File

@@ -33,6 +33,7 @@ You manage agents using these tools:
- **wait_for_agents**: Wait for running agents and collect their findings
- **get_agent_status**: Check on a specific agent
- **cancel_agent**: Stop an agent if needed
- **formulate_strategy**: Define and select a Course of Action (COA). Use this for strategic decisions.
- **synthesize_findings**: Compile all results into a final concise report (call this when done)
{% if worker_tools %}
@@ -48,6 +49,17 @@ Workers also have access to these CLI tools via terminal:
IMPORTANT: When spawning agents, be specific about which tool to use (e.g., "Use mcp_nmap_scan to..." or "Use mcp_metasploit_run_module to..."). Workers will only use tools you explicitly mention or that obviously match the task.
## Strategy & Replanning (COA)
You are responsible for STRATEGIC adaptation.
- If a worker fails due to a tactical issue (e.g., port closed), they will handle it.
- If a worker fails due to a STRATEGIC issue (e.g., "Approach invalid: Target is not domain joined"), you must REPLAN.
- Do not just retry the same task.
- **Use the `formulate_strategy` tool** to explicitly reason through options:
1. Define the problem (e.g., "SMB exploit failed, target patched").
2. List 2-3 candidate Courses of Action (COAs).
3. Select the best one based on risk and probability of success.
4. THEN spawn new agents to execute the selected COA.
## Guidelines
- Leverage any prior intelligence from earlier reconnaissance
- Be strategic - spawn 2-4 agents in parallel for efficiency

View File

@@ -1393,6 +1393,14 @@ Be concise. Use the actual data from notes."""
wtype = worker.get("worker_type", "worker")
self._add_system(f"[!] {wtype.upper()} stopped: {reason}")
self._update_crew_stats()
elif event_type == "failed":
# Worker determined task infeasible
self._update_crew_worker(worker_id, status="failed")
reason = data.get("reason", "Task infeasible")
worker = self._crew_workers.get(worker_id, {})
wtype = worker.get("worker_type", "worker")
self._add_system(f"[!] {wtype.upper()} failed: {reason}")
self._update_crew_stats()
elif event_type == "error":
self._update_crew_worker(worker_id, status="error")
worker = self._crew_workers.get(worker_id, {})

View File

@@ -79,16 +79,23 @@ class ShadowGraph:
content = note_data
category = "info"
metadata = {}
status = "confirmed"
else:
content = note_data.get("content", "")
category = note_data.get("category", "info")
metadata = note_data.get("metadata", {})
status = note_data.get("status", "confirmed")
self._process_note(key, content, category, metadata)
self._process_note(key, content, category, metadata, status)
self._processed_notes.add(key)
def _process_note(
self, key: str, content: str, category: str, metadata: Dict[str, Any]
self,
key: str,
content: str,
category: str,
metadata: Dict[str, Any],
status: str,
) -> None:
"""Extract entities and relationships from a single note."""
@@ -121,11 +128,11 @@ class ShadowGraph:
# 2. Handle specific categories
if category == "credential":
self._process_credential(key, content, hosts, metadata)
self._process_credential(key, content, hosts, metadata, status)
elif category == "finding":
self._process_finding(key, content, hosts, metadata)
self._process_finding(key, content, hosts, metadata, status)
elif category == "vulnerability":
self._process_vulnerability(key, content, hosts, metadata)
self._process_vulnerability(key, content, hosts, metadata, status)
# 3. Link note to hosts (provenance)
# We don't add the note itself as a node usually, but we could.
@@ -142,9 +149,18 @@ class ShadowGraph:
self.graph.add_edge(source, target, type=edge_type, **kwargs)
def _process_credential(
self, key: str, content: str, related_hosts: List[str], metadata: Dict[str, Any]
self,
key: str,
content: str,
related_hosts: List[str],
metadata: Dict[str, Any],
status: str,
) -> None:
"""Process a credential note."""
# Skip if status is closed/filtered (invalid creds)
if status in ["closed", "filtered"]:
return
# Extract username from metadata or regex
username = metadata.get("username")
if not username:
@@ -189,9 +205,18 @@ class ShadowGraph:
self._add_edge(cred_id, host_id, "AUTH_ACCESS", protocol=protocol)
def _process_finding(
self, key: str, content: str, related_hosts: List[str], metadata: Dict[str, Any]
self,
key: str,
content: str,
related_hosts: List[str],
metadata: Dict[str, Any],
status: str,
) -> None:
"""Process a finding note (e.g., open ports)."""
# Skip if status is closed/filtered
if status in ["closed", "filtered"]:
return
# Filter related_hosts: If we have explicit target metadata, ONLY use that.
# Otherwise, use all related hosts (fallback to regex behavior).
target_hosts = related_hosts
@@ -232,9 +257,18 @@ class ShadowGraph:
self._add_edge(host_id, service_id, "HAS_SERVICE", protocol=proto)
def _process_vulnerability(
self, key: str, content: str, related_hosts: List[str], metadata: Dict[str, Any]
self,
key: str,
content: str,
related_hosts: List[str],
metadata: Dict[str, Any],
status: str,
) -> None:
"""Process a vulnerability note."""
# Skip if status is closed/filtered (patched or not vulnerable)
if status in ["closed", "filtered"]:
return
# Filter related_hosts: If we have explicit target metadata, ONLY use that.
target_hosts = related_hosts
if metadata.get("target"):

View File

@@ -384,24 +384,42 @@ class LocalRuntime(Runtime):
async def execute_command(self, command: str, timeout: int = 300) -> CommandResult:
"""Execute a command locally."""
import asyncio
import os
import re
import subprocess
# Regex to strip ANSI escape codes
ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
# Set environment variables to discourage ANSI output
env = os.environ.copy()
env["TERM"] = "dumb"
env["NO_COLOR"] = "1"
try:
process = await asyncio.create_subprocess_shell(
command,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
stdin=subprocess.DEVNULL, # Prevent interactive prompts
env=env,
)
stdout, stderr = await asyncio.wait_for(
process.communicate(), timeout=timeout
)
# Decode and strip ANSI codes
stdout_str = stdout.decode(errors="replace")
stderr_str = stderr.decode(errors="replace")
stdout_clean = ansi_escape.sub("", stdout_str)
stderr_clean = ansi_escape.sub("", stderr_str)
return CommandResult(
exit_code=process.returncode or 0,
stdout=stdout.decode(errors="replace"),
stderr=stderr.decode(errors="replace"),
stdout=stdout_clean,
stderr=stderr_clean,
)
except asyncio.TimeoutError:

View File

@@ -20,7 +20,8 @@ class StepStatus(str, Enum):
PENDING = "pending"
IN_PROGRESS = "in_progress"
COMPLETE = "complete"
SKIPPED = "skipped"
SKIP = "skip"
FAIL = "fail"
@dataclass
@@ -46,9 +47,12 @@ class TaskPlan:
def is_complete(self) -> bool:
return all(
s.status in (StepStatus.COMPLETE, StepStatus.SKIPPED) for s in self.steps
s.status in (StepStatus.COMPLETE, StepStatus.SKIP) for s in self.steps
)
def has_failure(self) -> bool:
return any(s.status == StepStatus.FAIL for s in self.steps)
def clear(self) -> None:
self.steps.clear()
self.original_request = ""
@@ -65,13 +69,13 @@ class TaskPlan:
@register_tool(
name="finish",
description="Mark plan steps as complete or skipped. Actions: 'complete' (mark step done), 'skip' (step not applicable). Once all steps are complete/skipped, task automatically finishes.",
description="Mark plan steps as complete, skipped, or failed. Actions: 'complete' (mark step done), 'skip' (step not applicable), 'fail' (step failed, invalidating plan). Once all steps are complete/skipped, task automatically finishes.",
schema=ToolSchema(
properties={
"action": {
"type": "string",
"enum": ["complete", "skip"],
"description": "Action: 'complete' (mark step done) or 'skip' (step not applicable)",
"enum": ["complete", "skip", "fail"],
"description": "Action: 'complete', 'skip', or 'fail'",
},
"step_id": {
"type": "integer",
@@ -83,7 +87,7 @@ class TaskPlan:
},
"reason": {
"type": "string",
"description": "[skip only] Why step is being skipped",
"description": "[skip/fail only] Why step is being skipped or failed",
},
},
required=["action", "step_id"],
@@ -131,12 +135,21 @@ async def finish(arguments: dict, runtime: "Runtime") -> str:
if not reason:
return "Error: 'reason' required for skip action."
step.status = StepStatus.SKIPPED
step.status = StepStatus.SKIP
step.result = f"Skipped: {reason}"
return f"Step {step_id} skipped: {reason}"
elif action == "fail":
reason = arguments.get("reason", "")
if not reason:
return "Error: 'reason' required for fail action."
step.status = StepStatus.FAIL
step.result = f"Failed: {reason}"
return f"Step {step_id} marked as FAILED: {reason}. Initiating replanning..."
else:
return f"Error: Unknown action '{action}'. Use 'complete' or 'skip'."
return f"Error: Unknown action '{action}'. Use 'complete', 'skip', or 'fail'."
class CompletionReport:

View File

@@ -119,6 +119,11 @@ _load_notes_unlocked()
"enum": ["high", "medium", "low"],
"description": "Confidence level (default: medium)",
},
"status": {
"type": "string",
"enum": ["open", "closed", "filtered", "confirmed", "potential"],
"description": "Status of the finding (e.g., 'open' for ports, 'closed' for dead services). Default: confirmed/open.",
},
"source": {
"type": "string",
"description": "Source IP/Hostname where the finding originated (e.g., where creds were found)",
@@ -185,6 +190,7 @@ async def notes(arguments: dict, runtime) -> str:
category = "info"
confidence = arguments.get("confidence", "medium")
status = arguments.get("status", "confirmed")
# Extract structured metadata
metadata = {}
@@ -214,10 +220,11 @@ async def notes(arguments: dict, runtime) -> str:
"content": value,
"category": category,
"confidence": confidence,
"status": status,
"metadata": metadata,
}
_save_notes_unlocked()
return f"Created note '{key}' ({category})"
return f"Created note '{key}' ({category}, {status})"
elif action == "read":
if not key:
@@ -231,7 +238,8 @@ async def notes(arguments: dict, runtime) -> str:
if note.get("metadata")
else ""
)
return f"[{key}] ({note['category']}, {note['confidence']}) {note['content']}{meta_str}"
status_str = f", {note.get('status', 'confirmed')}"
return f"[{key}] ({note['category']}, {note['confidence']}{status_str}) {note['content']}{meta_str}"
elif action == "update":
if not key:
@@ -246,6 +254,7 @@ async def notes(arguments: dict, runtime) -> str:
"content": value,
"category": category,
"confidence": confidence,
"status": status,
"metadata": metadata,
}
_save_notes_unlocked()

View File

@@ -62,8 +62,8 @@ async def test_read_note():
}, runtime=None)
assert "Content to read" in result
# The format is "[key] (category, confidence) content"
assert "(info, medium)" in result
# The format is "[key] (category, confidence, status) content"
assert "(info, medium, confirmed)" in result
@pytest.mark.asyncio
async def test_update_note():