pentestagent/pentestagent/runtime/runtime.py

"""Runtime abstraction for PentestAgent."""

import platform
import shutil
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, List, Optional

if TYPE_CHECKING:
    from ..mcp import MCPManager


# Categorized list of tools to check for
INTERESTING_TOOLS = {
    "network_scan": [
        "nmap",
        "masscan",
        "rustscan",
        "naabu",
        "unicornscan",
        "zmap",
        "arp-scan",
    ],
    "web_scan": [
        "nikto",
        "gobuster",
        "dirb",
        "dirbuster",
        "ffuf",
        "feroxbuster",
        "wpscan",
        "nuclei",
        "whatweb",
        "wfuzz",
        "arjun",
        "burpsuite",
        "zaproxy",
        "zap-cli",
    ],
    "enumeration": [
        "enum4linux",
        "enum4linux-ng",
        "smbclient",
        "smbmap",
        "rpcclient",
        "ldapsearch",
        "snmpwalk",
        "snmp-check",
        "onesixtyone",
        "nbtscan",
        "responder",
        "impacket-smbclient",
        "impacket-GetNPUsers",
        "impacket-GetUserSPNs",
        "impacket-psexec",
        "impacket-wmiexec",
        "impacket-dcomexec",
        "crackmapexec",
        "netexec",
        "evil-winrm",
        "kerbrute",
        "xfreerdp",
        "rdesktop",
    ],
    "exploitation": [
        "msfconsole",
        "msfvenom",
        "searchsploit",
        "sqlmap",
        "commix",
        "xsser",
        "ysoserial",
    ],
    "password_attacks": [
        "hydra",
        "medusa",
        "john",
        "hashcat",
        "crackmapexec",
        "netexec",
        "thc-hydra",
        "patator",
    ],
    "network_analysis": [
        "tcpdump",
        "tshark",
        "wireshark",
        "ngrep",
        "ettercap",
        "bettercap",
    ],
    "post_exploitation": [
        "mimikatz",
        "pypykatz",
        "impacket-secretsdump",
        "bloodhound",
        "sharphound",
        "powerview",
        "linpeas",
        "winpeas",
        "pspy",
    ],
    "tunneling": [
        "proxychains",
        "proxychains4",
        "socat",
        "chisel",
        "ligolo",
        "sshuttle",
        "ngrok",
    ],
    "osint": [
        "theHarvester",
        "recon-ng",
        "amass",
        "subfinder",
        "assetfinder",
        "sublist3r",
        "dnsenum",
        "dnsrecon",
        "fierce",
    ],
    "wireless": [
        "aircrack-ng",
        "airodump-ng",
        "aireplay-ng",
        "airmon-ng",
        "wifite",
        "kismet",
        "reaver",
        "bully",
        "hostapd",
        "wpa_supplicant",
    ],
    "reverse_engineering": [
        "ghidra",
        "radare2",
        "r2",
        "gdb",
        "pwndbg",
        "gef",
        "binwalk",
        "foremost",
        "exiftool",
        "objdump",
        "readelf",
        "strace",
        "ltrace",
    ],
    "cloud": [
        "aws",
        "az",
        "gcloud",
        "s3scanner",
        "cloud_enum",
        "ScoutSuite",
        "prowler",
        "pacu",
        "cloudfox",
    ],
    "forensics": [
        "steghide",
        "stegseek",
        "volatility",
        "volatility3",
        "autopsy",
        "sleuthkit",
        "binwalk",
        "foremost",
        "scalpel",
        "bulk_extractor",
    ],
    "fuzzing": [
        "boofuzz",
        "radamsa",
        "afl-fuzz",
        "zzuf",
        "honggfuzz",
    ],
    "mobile": [
        "adb",
        "apktool",
        "jadx",
        "frida",
        "objection",
        "mobsf",
        "drozer",
    ],
    "utilities": [
        "curl",
        "wget",
        "nc",
        "netcat",
        "ncat",
        "ssh",
        "telnet",
        "git",
        "docker",
        "kubectl",
        "kubeletctl",
        "kube-hunter",
        "trivy",
        "jq",
        "python3",
        "python",
        "perl",
        "ruby",
        "gcc",
        "g++",
        "make",
        "base64",
        "openssl",
        "xxd",
        "strings",
        "dig",
        "whois",
        "host",
        "traceroute",
        "mtr",
        "ping",
        "awk",
        "sed",
        "grep",
        "find",
    ],
}


@dataclass
class ToolInfo:
    """Information about an available tool."""

    name: str
    path: str
    category: str


@dataclass
class EnvironmentInfo:
    """System environment information."""

    os: str  # "Windows", "Linux", "Darwin"
    os_version: str
    shell: str  # "powershell", "bash", "zsh", etc.
    architecture: str  # "x86_64", "arm64", etc.
    available_tools: List[ToolInfo] = field(default_factory=list)

    def __str__(self) -> str:
        """Concise string representation for prompts."""
        # Group tools by category for cleaner output
        grouped = {}
        for tool in self.available_tools:
            if tool.category not in grouped:
                grouped[tool.category] = []
            grouped[tool.category].append(tool.name)

        tools_str = ""
        if grouped:
            lines = []
            for cat, tools in grouped.items():
                lines.append(f"  - {cat}: {', '.join(tools)}")
            tools_str = "\n" + "\n".join(lines)
        else:
            tools_str = " None"

        return (
            f"{self.os} ({self.architecture}), shell: {self.shell}\n"
            f"Available CLI Tools:{tools_str}"
        )


def detect_environment() -> EnvironmentInfo:
    """Detect the current system environment."""
    import os

    os_name = platform.system()
    os_version = platform.release()
    arch = platform.machine()
    shell = "unknown"

    # Detect shell and OS nuances
    if os_name == "Windows":
        # Better Windows shell detection
        comspec = os.environ.get("COMSPEC", "").lower()
        if "powershell" in comspec:
            shell = "powershell"
        elif "cmd.exe" in comspec:
            shell = "cmd"
        else:
            # Fallback: check if we are in a PS session via env vars
            if "PSModulePath" in os.environ:
                shell = "powershell"
            else:
                shell = "cmd"  # Default to cmd on Windows if unsure
    else:
        # Unix-like
        shell_path = os.environ.get("SHELL", "/bin/sh")
        shell = shell_path.split("/")[-1]

        # WSL Detection
        if os_name == "Linux":
            try:
                with open("/proc/version", "r") as f:
                    if "microsoft" in f.read().lower():
                        os_name = "Linux (WSL)"
            except Exception:
                pass

    # Detect available tools with categories
    available_tools = []
    for category, tools in INTERESTING_TOOLS.items():
        for tool_name in tools:
            tool_path = shutil.which(tool_name)
            if tool_path:
                available_tools.append(
                    ToolInfo(name=tool_name, path=tool_path, category=category)
                )

    return EnvironmentInfo(
        os=os_name,
        os_version=os_version,
        shell=shell,
        architecture=arch,
        available_tools=available_tools,
    )


@dataclass
class CommandResult:
    """Result of a command execution."""

    exit_code: int
    stdout: str
    stderr: str

    @property
    def success(self) -> bool:
        """Check if the command succeeded."""
        return self.exit_code == 0

    @property
    def output(self) -> str:
        """Get combined output."""
        parts = []
        if self.stdout:
            parts.append(self.stdout)
        if self.stderr:
            parts.append(self.stderr)
        return "\n".join(parts)


class Runtime(ABC):
    """Abstract base class for runtime environments."""

    _environment: Optional[EnvironmentInfo] = None

    def __init__(self, mcp_manager: Optional["MCPManager"] = None):
        """
        Initialize the runtime.

        Args:
            mcp_manager: Optional MCP manager for tool calls
        """
        self.mcp_manager = mcp_manager
        self.plan = None  # Set by agent for finish tool access

    @property
    def environment(self) -> EnvironmentInfo:
        """Get environment info (cached)."""
        if Runtime._environment is None:
            Runtime._environment = detect_environment()
        return Runtime._environment

    @abstractmethod
    async def start(self):
        """Start the runtime environment."""
        pass

    @abstractmethod
    async def stop(self):
        """Stop the runtime environment."""
        pass

    @abstractmethod
    async def execute_command(self, command: str, timeout: int = 300) -> CommandResult:
        """
        Execute a shell command.

        Args:
            command: The command to execute
            timeout: Timeout in seconds

        Returns:
            CommandResult with output
        """
        pass

    @abstractmethod
    async def browser_action(self, action: str, **kwargs) -> dict:
        """
        Perform a browser automation action.

        Args:
            action: The action to perform
            **kwargs: Action-specific arguments

        Returns:
            Action result
        """
        pass

    @abstractmethod
    async def proxy_action(self, action: str, **kwargs) -> dict:
        """
        Perform an HTTP proxy action.

        Args:
            action: The action to perform
            **kwargs: Action-specific arguments

        Returns:
            Action result
        """
        pass

    @abstractmethod
    async def is_running(self) -> bool:
        """Check if the runtime is running."""
        pass

    @abstractmethod
    async def get_status(self) -> dict:
        """
        Get runtime status information.

        Returns:
            Status dictionary
        """
        pass


class LocalRuntime(Runtime):
    """Local runtime that executes commands directly on the host."""

    def __init__(self, mcp_manager: Optional["MCPManager"] = None):
        super().__init__(mcp_manager)
        self._running = False
        self._browser = None
        self._browser_context = None
        self._page = None
        self._playwright = None
        self._active_processes: list = []

    async def start(self):
        """Start the local runtime."""
        self._running = True
        # Create organized loot directory structure (workspace-aware)
        from ..workspaces.utils import get_loot_base

        base = get_loot_base()
        (base).mkdir(parents=True, exist_ok=True)
        (base / "reports").mkdir(parents=True, exist_ok=True)
        (base / "artifacts").mkdir(parents=True, exist_ok=True)
        (base / "artifacts" / "screenshots").mkdir(parents=True, exist_ok=True)

    async def stop(self):
        """Stop the local runtime gracefully."""
        # Clean up any active subprocesses
        for proc in self._active_processes:
            try:
                if proc.returncode is None:
                    proc.terminate()
                    await proc.wait()
                # Close pipes to prevent warnings
                if proc.stdin:
                    proc.stdin.close()
                if proc.stdout:
                    proc.stdout.close()
                if proc.stderr:
                    proc.stderr.close()
            except Exception:
                pass
        self._active_processes.clear()

        # Clean up browser
        await self._cleanup_browser()
        self._running = False

    async def _cleanup_browser(self):
        """Clean up browser resources properly."""
        # Close in reverse order of creation
        if self._page:
            try:
                await self._page.close()
            except Exception:
                pass
            self._page = None

        if self._browser_context:
            try:
                await self._browser_context.close()
            except Exception:
                pass
            self._browser_context = None

        if self._browser:
            try:
                await self._browser.close()
            except Exception:
                pass
            self._browser = None

        if self._playwright:
            try:
                await self._playwright.stop()
            except Exception:
                pass
            self._playwright = None

    async def _ensure_browser(self):
        """Ensure browser is initialized."""
        if self._page is not None:
            return

        try:
            from playwright.async_api import async_playwright
        except ImportError as e:
            raise RuntimeError(
                "Playwright not installed. Install with:\n"
                "  pip install playwright\n"
                "  playwright install chromium"
            ) from e

        self._playwright = await async_playwright().start()
        self._browser = await self._playwright.chromium.launch(headless=True)
        self._browser_context = await self._browser.new_context(
            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
        )
        self._page = await self._browser_context.new_page()

    async def execute_command(self, command: str, timeout: int = 300) -> CommandResult:
        """Execute a command locally."""
        import asyncio
        import os
        import re
        import subprocess

        # Regex to strip ANSI escape codes
        ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")

        # Set environment variables to discourage ANSI output
        env = os.environ.copy()
        env["TERM"] = "dumb"
        env["NO_COLOR"] = "1"

        try:
            process = await asyncio.create_subprocess_shell(
                command,
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE,
                stdin=subprocess.DEVNULL,  # Prevent interactive prompts
                env=env,
            )

            # Track process for cleanup
            self._active_processes.append(process)

            stdout, stderr = await asyncio.wait_for(
                process.communicate(), timeout=timeout
            )

            # Remove from tracking after completion
            if process in self._active_processes:
                self._active_processes.remove(process)

            # Decode and strip ANSI codes
            stdout_str = stdout.decode(errors="replace")
            stderr_str = stderr.decode(errors="replace")

            stdout_clean = ansi_escape.sub("", stdout_str)
            stderr_clean = ansi_escape.sub("", stderr_str)

            return CommandResult(
                exit_code=process.returncode or 0,
                stdout=stdout_clean,
                stderr=stderr_clean,
            )

        except asyncio.TimeoutError:
            # Clean up timed-out process
            if process in self._active_processes:
                self._active_processes.remove(process)
            return CommandResult(
                exit_code=-1,
                stdout="",
                stderr=f"Command timed out after {timeout} seconds",
            )
        except asyncio.CancelledError:
            # Clean up cancelled process
            if process in self._active_processes:
                self._active_processes.remove(process)
            # Handle Ctrl+C gracefully
            return CommandResult(exit_code=-1, stdout="", stderr="Command cancelled")
        except Exception as e:
            # Clean up on error
            if process in self._active_processes:
                self._active_processes.remove(process)
            return CommandResult(exit_code=-1, stdout="", stderr=str(e))

    async def browser_action(self, action: str, **kwargs) -> dict:
        """Perform browser automation actions using Playwright."""
        import asyncio

        # Enforce a hard timeout on the entire operation to prevent hanging
        # Add 5 seconds buffer to the requested timeout for browser startup overhead
        op_timeout = kwargs.get("timeout", 30) + 10

        try:
            return await asyncio.wait_for(
                self._execute_browser_action(action, **kwargs), timeout=op_timeout
            )
        except asyncio.TimeoutError:
            return {"error": f"Browser action '{action}' timed out after {op_timeout}s"}
        except Exception as e:
            return {"error": str(e)}

    async def _execute_browser_action(self, action: str, **kwargs) -> dict:
        """Internal execution of browser action."""
        try:
            await self._ensure_browser()
        except RuntimeError as e:
            return {"error": str(e)}

        timeout = kwargs.get("timeout", 30) * 1000  # Convert to ms

        try:
            if action == "navigate":
                url = kwargs.get("url")
                if not url:
                    return {"error": "URL is required for navigate action"}

                await self._page.goto(
                    url, timeout=timeout, wait_until="domcontentloaded"
                )

                if kwargs.get("wait_for"):
                    await self._page.wait_for_selector(
                        kwargs["wait_for"], timeout=timeout
                    )

                return {"url": self._page.url, "title": await self._page.title()}

            elif action == "screenshot":
                import time
                import uuid

                # Navigate first if URL provided
                if kwargs.get("url"):
                    await self._page.goto(
                        kwargs["url"], timeout=timeout, wait_until="domcontentloaded"
                    )

                # Save screenshot to workspace-aware loot/artifacts/screenshots/
                from ..workspaces.utils import get_loot_file

                output_dir = get_loot_file("artifacts/screenshots").parent

                timestamp = int(time.time())
                unique_id = uuid.uuid4().hex[:8]
                filename = f"screenshot_{timestamp}_{unique_id}.png"
                filepath = output_dir / filename

                await self._page.screenshot(path=str(filepath), full_page=True)

                return {"path": str(filepath)}

            elif action == "get_content":
                if kwargs.get("url"):
                    await self._page.goto(
                        kwargs["url"], timeout=timeout, wait_until="domcontentloaded"
                    )

                content = await self._page.content()

                # Also get text content for easier reading
                text_content = await self._page.evaluate(
                    "() => document.body.innerText"
                )

                return {
                    "content": text_content,
                    "html": content[:10000] if len(content) > 10000 else content,
                }

            elif action == "get_links":
                if kwargs.get("url"):
                    await self._page.goto(
                        kwargs["url"], timeout=timeout, wait_until="domcontentloaded"
                    )

                links = await self._page.evaluate(
                    """() => {
                    return Array.from(document.querySelectorAll('a[href]')).map(a => ({
                        href: a.href,
                        text: a.innerText.trim()
                    }));
                }"""
                )

                return {"links": links}

            elif action == "get_forms":
                if kwargs.get("url"):
                    await self._page.goto(
                        kwargs["url"], timeout=timeout, wait_until="domcontentloaded"
                    )

                forms = await self._page.evaluate(
                    """() => {
                    return Array.from(document.querySelectorAll('form')).map(form => ({
                        action: form.action,
                        method: form.method || 'GET',
                        inputs: Array.from(form.querySelectorAll('input, textarea, select')).map(input => ({
                            name: input.name,
                            type: input.type || 'text',
                            value: input.value
                        }))
                    }));
                }"""
                )

                return {"forms": forms}

            elif action == "click":
                selector = kwargs.get("selector")
                if not selector:
                    return {"error": "Selector is required for click action"}

                await self._page.click(selector, timeout=timeout)
                return {"selector": selector, "clicked": True}

            elif action == "type":
                selector = kwargs.get("selector")
                text = kwargs.get("text", "")
                if not selector:
                    return {"error": "Selector is required for type action"}

                await self._page.fill(selector, text, timeout=timeout)
                return {"selector": selector, "typed": True}

            elif action == "execute_js":
                javascript = kwargs.get("javascript")
                if not javascript:
                    return {"error": "JavaScript code is required"}

                result = await self._page.evaluate(javascript)
                return {"result": str(result) if result else ""}

            else:
                return {"error": f"Unknown browser action: {action}"}

        except Exception as e:
            return {"error": f"Browser action failed: {str(e)}"}

    async def proxy_action(self, action: str, **kwargs) -> dict:
        """HTTP proxy actions using httpx."""
        try:
            import httpx
        except ImportError:
            return {"error": "httpx not installed. Install with: pip install httpx"}

        timeout = kwargs.get("timeout", 30)

        try:
            async with httpx.AsyncClient(
                timeout=timeout, follow_redirects=True
            ) as client:
                if action == "request":
                    method = kwargs.get("method", "GET").upper()
                    url = kwargs.get("url")
                    headers = kwargs.get("headers", {})
                    data = kwargs.get("data")

                    if not url:
                        return {"error": "URL is required"}

                    response = await client.request(
                        method, url, headers=headers, data=data
                    )

                    return {
                        "status_code": response.status_code,
                        "headers": dict(response.headers),
                        "body": (
                            response.text[:10000]
                            if len(response.text) > 10000
                            else response.text
                        ),
                    }

                elif action == "get":
                    url = kwargs.get("url")
                    if not url:
                        return {"error": "URL is required"}

                    response = await client.get(url, headers=kwargs.get("headers", {}))
                    return {
                        "status_code": response.status_code,
                        "headers": dict(response.headers),
                        "body": response.text[:10000],
                    }

                elif action == "post":
                    url = kwargs.get("url")
                    if not url:
                        return {"error": "URL is required"}

                    response = await client.post(
                        url,
                        headers=kwargs.get("headers", {}),
                        data=kwargs.get("data"),
                        json=kwargs.get("json"),
                    )
                    return {
                        "status_code": response.status_code,
                        "headers": dict(response.headers),
                        "body": response.text[:10000],
                    }

                else:
                    return {"error": f"Unknown proxy action: {action}"}

        except Exception as e:
            return {"error": f"Proxy action failed: {str(e)}"}

    async def is_running(self) -> bool:
        return self._running

    async def get_status(self) -> dict:
        return {
            "type": "local",
            "running": self._running,
            "browser_active": self._page is not None,
        }