Files
pentestagent/pentestagent/runtime/runtime.py

845 lines
24 KiB
Python

"""Runtime abstraction for PentestAgent."""
import platform
import shutil
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, List, Optional
if TYPE_CHECKING:
from ..mcp import MCPManager
# Categorized list of tools to check for
INTERESTING_TOOLS = {
"network_scan": [
"nmap",
"masscan",
"rustscan",
"naabu",
"unicornscan",
"zmap",
"arp-scan",
],
"web_scan": [
"nikto",
"gobuster",
"dirb",
"dirbuster",
"ffuf",
"feroxbuster",
"wpscan",
"nuclei",
"whatweb",
"wfuzz",
"arjun",
"burpsuite",
"zaproxy",
"zap-cli",
],
"enumeration": [
"enum4linux",
"enum4linux-ng",
"smbclient",
"smbmap",
"rpcclient",
"ldapsearch",
"snmpwalk",
"snmp-check",
"onesixtyone",
"nbtscan",
"responder",
"impacket-smbclient",
"impacket-GetNPUsers",
"impacket-GetUserSPNs",
"impacket-psexec",
"impacket-wmiexec",
"impacket-dcomexec",
"crackmapexec",
"netexec",
"evil-winrm",
"kerbrute",
"xfreerdp",
"rdesktop",
],
"exploitation": [
"msfconsole",
"msfvenom",
"searchsploit",
"sqlmap",
"commix",
"xsser",
"ysoserial",
],
"password_attacks": [
"hydra",
"medusa",
"john",
"hashcat",
"crackmapexec",
"netexec",
"thc-hydra",
"patator",
],
"network_analysis": [
"tcpdump",
"tshark",
"wireshark",
"ngrep",
"ettercap",
"bettercap",
],
"post_exploitation": [
"mimikatz",
"pypykatz",
"impacket-secretsdump",
"bloodhound",
"sharphound",
"powerview",
"linpeas",
"winpeas",
"pspy",
],
"tunneling": [
"proxychains",
"proxychains4",
"socat",
"chisel",
"ligolo",
"sshuttle",
"ngrok",
],
"osint": [
"theHarvester",
"recon-ng",
"amass",
"subfinder",
"assetfinder",
"sublist3r",
"dnsenum",
"dnsrecon",
"fierce",
],
"wireless": [
"aircrack-ng",
"airodump-ng",
"aireplay-ng",
"airmon-ng",
"wifite",
"kismet",
"reaver",
"bully",
"hostapd",
"wpa_supplicant",
],
"reverse_engineering": [
"ghidra",
"radare2",
"r2",
"gdb",
"pwndbg",
"gef",
"binwalk",
"foremost",
"exiftool",
"objdump",
"readelf",
"strace",
"ltrace",
],
"cloud": [
"aws",
"az",
"gcloud",
"s3scanner",
"cloud_enum",
"ScoutSuite",
"prowler",
"pacu",
"cloudfox",
],
"forensics": [
"steghide",
"stegseek",
"volatility",
"volatility3",
"autopsy",
"sleuthkit",
"binwalk",
"foremost",
"scalpel",
"bulk_extractor",
],
"fuzzing": [
"boofuzz",
"radamsa",
"afl-fuzz",
"zzuf",
"honggfuzz",
],
"mobile": [
"adb",
"apktool",
"jadx",
"frida",
"objection",
"mobsf",
"drozer",
],
"utilities": [
"curl",
"wget",
"nc",
"netcat",
"ncat",
"ssh",
"telnet",
"git",
"docker",
"kubectl",
"kubeletctl",
"kube-hunter",
"trivy",
"jq",
"python3",
"python",
"perl",
"ruby",
"gcc",
"g++",
"make",
"base64",
"openssl",
"xxd",
"strings",
"dig",
"whois",
"host",
"traceroute",
"mtr",
"ping",
"awk",
"sed",
"grep",
"find",
],
}
@dataclass
class ToolInfo:
"""Information about an available tool."""
name: str
path: str
category: str
@dataclass
class EnvironmentInfo:
"""System environment information."""
os: str # "Windows", "Linux", "Darwin"
os_version: str
shell: str # "powershell", "bash", "zsh", etc.
architecture: str # "x86_64", "arm64", etc.
available_tools: List[ToolInfo] = field(default_factory=list)
def __str__(self) -> str:
"""Concise string representation for prompts."""
# Group tools by category for cleaner output
grouped = {}
for tool in self.available_tools:
if tool.category not in grouped:
grouped[tool.category] = []
grouped[tool.category].append(tool.name)
tools_str = ""
if grouped:
lines = []
for cat, tools in grouped.items():
lines.append(f" - {cat}: {', '.join(tools)}")
tools_str = "\n" + "\n".join(lines)
else:
tools_str = " None"
return (
f"{self.os} ({self.architecture}), shell: {self.shell}\n"
f"Available CLI Tools:{tools_str}"
)
def detect_environment() -> EnvironmentInfo:
"""Detect the current system environment."""
import os
os_name = platform.system()
os_version = platform.release()
arch = platform.machine()
shell = "unknown"
# Detect shell and OS nuances
if os_name == "Windows":
# Better Windows shell detection
comspec = os.environ.get("COMSPEC", "").lower()
if "powershell" in comspec:
shell = "powershell"
elif "cmd.exe" in comspec:
shell = "cmd"
else:
# Fallback: check if we are in a PS session via env vars
if "PSModulePath" in os.environ:
shell = "powershell"
else:
shell = "cmd" # Default to cmd on Windows if unsure
else:
# Unix-like
shell_path = os.environ.get("SHELL", "/bin/sh")
shell = shell_path.split("/")[-1]
# WSL Detection
if os_name == "Linux":
try:
with open("/proc/version", "r") as f:
if "microsoft" in f.read().lower():
os_name = "Linux (WSL)"
except Exception:
pass
# Detect available tools with categories
available_tools = []
for category, tools in INTERESTING_TOOLS.items():
for tool_name in tools:
tool_path = shutil.which(tool_name)
if tool_path:
available_tools.append(
ToolInfo(name=tool_name, path=tool_path, category=category)
)
return EnvironmentInfo(
os=os_name,
os_version=os_version,
shell=shell,
architecture=arch,
available_tools=available_tools,
)
@dataclass
class CommandResult:
"""Result of a command execution."""
exit_code: int
stdout: str
stderr: str
@property
def success(self) -> bool:
"""Check if the command succeeded."""
return self.exit_code == 0
@property
def output(self) -> str:
"""Get combined output."""
parts = []
if self.stdout:
parts.append(self.stdout)
if self.stderr:
parts.append(self.stderr)
return "\n".join(parts)
class Runtime(ABC):
"""Abstract base class for runtime environments."""
_environment: Optional[EnvironmentInfo] = None
def __init__(self, mcp_manager: Optional["MCPManager"] = None):
"""
Initialize the runtime.
Args:
mcp_manager: Optional MCP manager for tool calls
"""
self.mcp_manager = mcp_manager
self.plan = None # Set by agent for finish tool access
@property
def environment(self) -> EnvironmentInfo:
"""Get environment info (cached)."""
if Runtime._environment is None:
Runtime._environment = detect_environment()
return Runtime._environment
@abstractmethod
async def start(self):
"""Start the runtime environment."""
pass
@abstractmethod
async def stop(self):
"""Stop the runtime environment."""
pass
@abstractmethod
async def execute_command(self, command: str, timeout: int = 300) -> CommandResult:
"""
Execute a shell command.
Args:
command: The command to execute
timeout: Timeout in seconds
Returns:
CommandResult with output
"""
pass
@abstractmethod
async def browser_action(self, action: str, **kwargs) -> dict:
"""
Perform a browser automation action.
Args:
action: The action to perform
**kwargs: Action-specific arguments
Returns:
Action result
"""
pass
@abstractmethod
async def proxy_action(self, action: str, **kwargs) -> dict:
"""
Perform an HTTP proxy action.
Args:
action: The action to perform
**kwargs: Action-specific arguments
Returns:
Action result
"""
pass
@abstractmethod
async def is_running(self) -> bool:
"""Check if the runtime is running."""
pass
@abstractmethod
async def get_status(self) -> dict:
"""
Get runtime status information.
Returns:
Status dictionary
"""
pass
class LocalRuntime(Runtime):
"""Local runtime that executes commands directly on the host."""
def __init__(self, mcp_manager: Optional["MCPManager"] = None):
super().__init__(mcp_manager)
self._running = False
self._browser = None
self._browser_context = None
self._page = None
self._playwright = None
self._active_processes: list = []
async def start(self):
"""Start the local runtime."""
self._running = True
# Create organized loot directory structure (workspace-aware)
from ..workspaces.utils import get_loot_base
base = get_loot_base()
(base).mkdir(parents=True, exist_ok=True)
(base / "reports").mkdir(parents=True, exist_ok=True)
(base / "artifacts").mkdir(parents=True, exist_ok=True)
(base / "artifacts" / "screenshots").mkdir(parents=True, exist_ok=True)
async def stop(self):
"""Stop the local runtime gracefully."""
# Clean up any active subprocesses
for proc in self._active_processes:
try:
if proc.returncode is None:
proc.terminate()
await proc.wait()
# Close pipes to prevent warnings
if proc.stdin:
proc.stdin.close()
if proc.stdout:
proc.stdout.close()
if proc.stderr:
proc.stderr.close()
except Exception:
pass
self._active_processes.clear()
# Clean up browser
await self._cleanup_browser()
self._running = False
async def _cleanup_browser(self):
"""Clean up browser resources properly."""
# Close in reverse order of creation
if self._page:
try:
await self._page.close()
except Exception:
pass
self._page = None
if self._browser_context:
try:
await self._browser_context.close()
except Exception:
pass
self._browser_context = None
if self._browser:
try:
await self._browser.close()
except Exception:
pass
self._browser = None
if self._playwright:
try:
await self._playwright.stop()
except Exception:
pass
self._playwright = None
async def _ensure_browser(self):
"""Ensure browser is initialized."""
if self._page is not None:
return
try:
from playwright.async_api import async_playwright
except ImportError as e:
raise RuntimeError(
"Playwright not installed. Install with:\n"
" pip install playwright\n"
" playwright install chromium"
) from e
self._playwright = await async_playwright().start()
self._browser = await self._playwright.chromium.launch(headless=True)
self._browser_context = await self._browser.new_context(
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
)
self._page = await self._browser_context.new_page()
async def execute_command(self, command: str, timeout: int = 300) -> CommandResult:
"""Execute a command locally."""
import asyncio
import os
import re
import subprocess
# Regex to strip ANSI escape codes
ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
# Set environment variables to discourage ANSI output
env = os.environ.copy()
env["TERM"] = "dumb"
env["NO_COLOR"] = "1"
try:
process = await asyncio.create_subprocess_shell(
command,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
stdin=subprocess.DEVNULL, # Prevent interactive prompts
env=env,
)
# Track process for cleanup
self._active_processes.append(process)
stdout, stderr = await asyncio.wait_for(
process.communicate(), timeout=timeout
)
# Remove from tracking after completion
if process in self._active_processes:
self._active_processes.remove(process)
# Decode and strip ANSI codes
stdout_str = stdout.decode(errors="replace")
stderr_str = stderr.decode(errors="replace")
stdout_clean = ansi_escape.sub("", stdout_str)
stderr_clean = ansi_escape.sub("", stderr_str)
return CommandResult(
exit_code=process.returncode or 0,
stdout=stdout_clean,
stderr=stderr_clean,
)
except asyncio.TimeoutError:
# Clean up timed-out process
if process in self._active_processes:
self._active_processes.remove(process)
return CommandResult(
exit_code=-1,
stdout="",
stderr=f"Command timed out after {timeout} seconds",
)
except asyncio.CancelledError:
# Clean up cancelled process
if process in self._active_processes:
self._active_processes.remove(process)
# Handle Ctrl+C gracefully
return CommandResult(exit_code=-1, stdout="", stderr="Command cancelled")
except Exception as e:
# Clean up on error
if process in self._active_processes:
self._active_processes.remove(process)
return CommandResult(exit_code=-1, stdout="", stderr=str(e))
async def browser_action(self, action: str, **kwargs) -> dict:
"""Perform browser automation actions using Playwright."""
import asyncio
# Enforce a hard timeout on the entire operation to prevent hanging
# Add 5 seconds buffer to the requested timeout for browser startup overhead
op_timeout = kwargs.get("timeout", 30) + 10
try:
return await asyncio.wait_for(
self._execute_browser_action(action, **kwargs), timeout=op_timeout
)
except asyncio.TimeoutError:
return {"error": f"Browser action '{action}' timed out after {op_timeout}s"}
except Exception as e:
return {"error": str(e)}
async def _execute_browser_action(self, action: str, **kwargs) -> dict:
"""Internal execution of browser action."""
try:
await self._ensure_browser()
except RuntimeError as e:
return {"error": str(e)}
timeout = kwargs.get("timeout", 30) * 1000 # Convert to ms
try:
if action == "navigate":
url = kwargs.get("url")
if not url:
return {"error": "URL is required for navigate action"}
await self._page.goto(
url, timeout=timeout, wait_until="domcontentloaded"
)
if kwargs.get("wait_for"):
await self._page.wait_for_selector(
kwargs["wait_for"], timeout=timeout
)
return {"url": self._page.url, "title": await self._page.title()}
elif action == "screenshot":
import time
import uuid
# Navigate first if URL provided
if kwargs.get("url"):
await self._page.goto(
kwargs["url"], timeout=timeout, wait_until="domcontentloaded"
)
# Save screenshot to workspace-aware loot/artifacts/screenshots/
from ..workspaces.utils import get_loot_file
output_dir = get_loot_file("artifacts/screenshots").parent
timestamp = int(time.time())
unique_id = uuid.uuid4().hex[:8]
filename = f"screenshot_{timestamp}_{unique_id}.png"
filepath = output_dir / filename
await self._page.screenshot(path=str(filepath), full_page=True)
return {"path": str(filepath)}
elif action == "get_content":
if kwargs.get("url"):
await self._page.goto(
kwargs["url"], timeout=timeout, wait_until="domcontentloaded"
)
content = await self._page.content()
# Also get text content for easier reading
text_content = await self._page.evaluate(
"() => document.body.innerText"
)
return {
"content": text_content,
"html": content[:10000] if len(content) > 10000 else content,
}
elif action == "get_links":
if kwargs.get("url"):
await self._page.goto(
kwargs["url"], timeout=timeout, wait_until="domcontentloaded"
)
links = await self._page.evaluate(
"""() => {
return Array.from(document.querySelectorAll('a[href]')).map(a => ({
href: a.href,
text: a.innerText.trim()
}));
}"""
)
return {"links": links}
elif action == "get_forms":
if kwargs.get("url"):
await self._page.goto(
kwargs["url"], timeout=timeout, wait_until="domcontentloaded"
)
forms = await self._page.evaluate(
"""() => {
return Array.from(document.querySelectorAll('form')).map(form => ({
action: form.action,
method: form.method || 'GET',
inputs: Array.from(form.querySelectorAll('input, textarea, select')).map(input => ({
name: input.name,
type: input.type || 'text',
value: input.value
}))
}));
}"""
)
return {"forms": forms}
elif action == "click":
selector = kwargs.get("selector")
if not selector:
return {"error": "Selector is required for click action"}
await self._page.click(selector, timeout=timeout)
return {"selector": selector, "clicked": True}
elif action == "type":
selector = kwargs.get("selector")
text = kwargs.get("text", "")
if not selector:
return {"error": "Selector is required for type action"}
await self._page.fill(selector, text, timeout=timeout)
return {"selector": selector, "typed": True}
elif action == "execute_js":
javascript = kwargs.get("javascript")
if not javascript:
return {"error": "JavaScript code is required"}
result = await self._page.evaluate(javascript)
return {"result": str(result) if result else ""}
else:
return {"error": f"Unknown browser action: {action}"}
except Exception as e:
return {"error": f"Browser action failed: {str(e)}"}
async def proxy_action(self, action: str, **kwargs) -> dict:
"""HTTP proxy actions using httpx."""
try:
import httpx
except ImportError:
return {"error": "httpx not installed. Install with: pip install httpx"}
timeout = kwargs.get("timeout", 30)
try:
async with httpx.AsyncClient(
timeout=timeout, follow_redirects=True
) as client:
if action == "request":
method = kwargs.get("method", "GET").upper()
url = kwargs.get("url")
headers = kwargs.get("headers", {})
data = kwargs.get("data")
if not url:
return {"error": "URL is required"}
response = await client.request(
method, url, headers=headers, data=data
)
return {
"status_code": response.status_code,
"headers": dict(response.headers),
"body": (
response.text[:10000]
if len(response.text) > 10000
else response.text
),
}
elif action == "get":
url = kwargs.get("url")
if not url:
return {"error": "URL is required"}
response = await client.get(url, headers=kwargs.get("headers", {}))
return {
"status_code": response.status_code,
"headers": dict(response.headers),
"body": response.text[:10000],
}
elif action == "post":
url = kwargs.get("url")
if not url:
return {"error": "URL is required"}
response = await client.post(
url,
headers=kwargs.get("headers", {}),
data=kwargs.get("data"),
json=kwargs.get("json"),
)
return {
"status_code": response.status_code,
"headers": dict(response.headers),
"body": response.text[:10000],
}
else:
return {"error": f"Unknown proxy action: {action}"}
except Exception as e:
return {"error": f"Proxy action failed: {str(e)}"}
async def is_running(self) -> bool:
return self._running
async def get_status(self) -> dict:
return {
"type": "local",
"running": self._running,
"browser_active": self._page is not None,
}