From dcb1eabb4e98d89035138a1be8f60fd8efdf9d2d Mon Sep 17 00:00:00 2001 From: ilya-bov <111734093+ilya-bov@users.noreply.github.com> Date: Fri, 27 Feb 2026 15:08:31 +0300 Subject: [PATCH] feat: improve tool execution reliability and package installs --- src/lib/agent/agent.ts | 94 ++- src/lib/storage/settings-store.ts | 4 +- src/lib/tools/code-execution.ts | 1076 ++++++++++++++++++++++--- src/lib/tools/install-orchestrator.ts | 577 +++++++++++++ src/lib/tools/tool.ts | 145 +++- src/prompts/system.md | 7 +- src/prompts/tool-code_execution.md | 12 +- src/prompts/tool-install_packages.md | 24 + src/prompts/tool-process.md | 19 + 9 files changed, 1825 insertions(+), 133 deletions(-) create mode 100644 src/lib/tools/install-orchestrator.ts create mode 100644 src/prompts/tool-install_packages.md create mode 100644 src/prompts/tool-process.md diff --git a/src/lib/agent/agent.ts b/src/lib/agent/agent.ts index 41af8fd..2c5944e 100644 --- a/src/lib/agent/agent.ts +++ b/src/lib/agent/agent.ts @@ -17,6 +17,10 @@ import type { ChatMessage } from "@/lib/types"; import { publishUiSyncEvent } from "@/lib/realtime/event-bus"; const LLM_LOG_BORDER = "═".repeat(60); +const MAX_TOOL_STEPS_PER_TURN = 30; +const MAX_TOOL_STEPS_SUBORDINATE = 15; +const POLL_NO_PROGRESS_BLOCK_THRESHOLD = 16; +const POLL_BACKOFF_SCHEDULE_MS = [5000, 10000, 30000, 60000] as const; function asRecord(value: unknown): Record | null { if (value == null || typeof value !== "object" || Array.isArray(value)) { @@ -90,6 +94,9 @@ function extractDeterministicFailureSignature(output: unknown): string | null { trimmed.startsWith("[MCP tool error]") || trimmed.startsWith("[Preflight error]") || trimmed.startsWith("[Loop guard]") || + trimmed.includes("Process error:") || + trimmed.includes("[Process killed after timeout]") || + /Exit code:\s*-?[1-9]\d*/.test(trimmed) || /^Failed\b/i.test(trimmed) || /^Skill ".+" not found\./i.test(trimmed) || (/\bnot found\b/i.test(trimmed) && @@ -102,8 +109,52 @@ function extractDeterministicFailureSignature(output: unknown): string | null { return trimmed.length > 400 ? `${trimmed.slice(0, 400)}...` : trimmed; } +function isPollLikeCall(toolName: string, input: unknown): boolean { + if (toolName !== "process") { + return false; + } + const record = asRecord(input); + if (!record) { + return false; + } + const action = typeof record.action === "string" ? record.action : ""; + return action === "poll" || action === "log"; +} + +function normalizeNoProgressValue(value: unknown): unknown { + if (typeof value === "string") { + const trimmed = value.trim(); + return trimmed.length > 1000 ? `${trimmed.slice(0, 1000)}...` : trimmed; + } + + if (Array.isArray(value)) { + return value.slice(0, 8).map((item) => normalizeNoProgressValue(item)); + } + + const record = asRecord(value); + if (!record) { + return value; + } + + const out: Record = {}; + for (const [key, raw] of Object.entries(record)) { + if (key === "output" && typeof raw === "string") { + out[key] = raw.length > 1000 ? `${raw.slice(0, 1000)}...` : raw; + continue; + } + if (key === "attempts" && Array.isArray(raw)) { + out[key] = raw.slice(0, 3).map((item) => normalizeNoProgressValue(item)); + continue; + } + out[key] = normalizeNoProgressValue(raw); + } + + return out; +} + function applyGlobalToolLoopGuard(tools: ToolSet): ToolSet { const deterministicFailureByCall = new Map(); + const noProgressByCall = new Map(); const wrappedTools: ToolSet = {}; for (const [toolName, toolDef] of Object.entries(tools)) { @@ -116,6 +167,24 @@ function applyGlobalToolLoopGuard(tools: ToolSet): ToolSet { ...toolDef, execute: async (input: unknown, options: ToolExecutionOptions) => { const callKey = `${toolName}:${stableSerialize(input)}`; + const previousNoProgress = noProgressByCall.get(callKey); + if ( + previousNoProgress && + previousNoProgress.count >= POLL_NO_PROGRESS_BLOCK_THRESHOLD && + isPollLikeCall(toolName, input) + ) { + const scheduleIdx = Math.min( + previousNoProgress.count - POLL_NO_PROGRESS_BLOCK_THRESHOLD, + POLL_BACKOFF_SCHEDULE_MS.length - 1 + ); + const retryInMs = POLL_BACKOFF_SCHEDULE_MS[scheduleIdx] ?? 60000; + return ( + `[Loop guard] Detected no-progress polling loop for "${toolName}".\n` + + `Repeated identical result ${previousNoProgress.count} times.\n` + + `Back off for ~${retryInMs}ms or report the background task as stuck.` + ); + } + const previousFailure = deterministicFailureByCall.get(callKey); if (previousFailure) { return ( @@ -132,6 +201,25 @@ function applyGlobalToolLoopGuard(tools: ToolSet): ToolSet { } else { deterministicFailureByCall.delete(callKey); } + + if (isPollLikeCall(toolName, input)) { + const outputHash = stableSerialize(normalizeNoProgressValue(output)); + const previous = noProgressByCall.get(callKey); + if (previous && previous.hash === outputHash) { + noProgressByCall.set(callKey, { + hash: outputHash, + count: previous.count + 1, + }); + } else { + noProgressByCall.set(callKey, { + hash: outputHash, + count: 1, + }); + } + } else { + noProgressByCall.delete(callKey); + } + return output; }, } as typeof toolDef; @@ -398,7 +486,7 @@ export async function runAgent(options: { system: systemPrompt, messages, tools, - stopWhen: stepCountIs(15), // Allow up to 15 tool call rounds + stopWhen: stepCountIs(MAX_TOOL_STEPS_PER_TURN), temperature: settings.chatModel.temperature ?? 0.7, maxOutputTokens: settings.chatModel.maxTokens ?? 4096, onFinish: async (event) => { @@ -528,7 +616,7 @@ export async function runAgentText(options: { system: systemPrompt, messages, tools, - stopWhen: stepCountIs(15), + stopWhen: stepCountIs(MAX_TOOL_STEPS_PER_TURN), temperature: settings.chatModel.temperature ?? 0.7, maxOutputTokens: settings.chatModel.maxTokens ?? 4096, }); @@ -659,7 +747,7 @@ export async function runSubordinateAgent(options: { system: systemPrompt, messages, tools, - stopWhen: stepCountIs(10), + stopWhen: stepCountIs(MAX_TOOL_STEPS_SUBORDINATE), temperature: settings.chatModel.temperature ?? 0.7, maxOutputTokens: settings.chatModel.maxTokens ?? 4096, }); diff --git a/src/lib/storage/settings-store.ts b/src/lib/storage/settings-store.ts index 5866489..016f636 100644 --- a/src/lib/storage/settings-store.ts +++ b/src/lib/storage/settings-store.ts @@ -34,8 +34,8 @@ export const DEFAULT_SETTINGS: AppSettings = { }, codeExecution: { enabled: true, - timeout: 180, - maxOutputLength: 50000, + timeout: 600, + maxOutputLength: 120000, }, memory: { enabled: true, diff --git a/src/lib/tools/code-execution.ts b/src/lib/tools/code-execution.ts index 7f4e6e2..a0f2496 100644 --- a/src/lib/tools/code-execution.ts +++ b/src/lib/tools/code-execution.ts @@ -1,56 +1,856 @@ -import { spawn, type ChildProcess } from "child_process"; +import fs from "fs"; +import path from "path"; +import { execFileSync, spawn, type ChildProcess } from "child_process"; import type { AppSettings } from "@/lib/types"; -// Persistent shell sessions -const sessions: Map = new Map(); +type ExecutionRuntime = "python" | "nodejs" | "terminal"; + +type TerminalSessionState = { + cwd: string; +}; + +type CommandResult = { + stdout: string; + stderr: string; + stdoutTail: string; + stderrTail: string; + exitCode: number | null; + timedOut: boolean; + spawnError?: string; +}; + +export type ExecuteCodeOptions = { + background?: boolean; + yieldMs?: number; +}; + +export type ManagedProcessStatus = "running" | "completed" | "failed" | "killed"; + +export type ManagedProcessSummary = { + sessionId: string; + runtime: ExecutionRuntime; + commandPreview: string; + status: ManagedProcessStatus; + startedAt: number; + updatedAt: number; + finishedAt?: number; + pid?: number; + cwd: string; + exitCode?: number | null; + timedOut: boolean; + truncated: boolean; + tail: string; +}; + +export type ManagedProcessPollResult = { + success: boolean; + status: ManagedProcessStatus | "not_found"; + sessionId: string; + output: string; + retryInMs?: number; + exitCode?: number | null; + timedOut?: boolean; + startedAt?: number; + finishedAt?: number; +}; + +export type ManagedProcessLogResult = { + success: boolean; + status: ManagedProcessStatus | "not_found"; + sessionId: string; + output: string; + totalLines?: number; + truncated?: boolean; +}; + +export type ManagedProcessKillResult = { + success: boolean; + status: "killed" | "already_finished" | "not_found"; + sessionId: string; + message: string; +}; + +type ManagedProcessSession = { + id: string; + runtime: ExecutionRuntime; + commandPreview: string; + startedAt: number; + updatedAt: number; + finishedAt?: number; + cwd: string; + pid?: number; + status: ManagedProcessStatus; + exitCode: number | null; + timedOut: boolean; + killedByUser: boolean; + spawnError?: string; + stdout: string; + stderr: string; + combined: string; + truncated: boolean; + process?: ChildProcess; + completion: Promise; + resolveCompletion: () => void; + version: number; + lastPolledVersion: number; + noProgressPollCount: number; + terminalMarker?: string; + terminalState?: TerminalSessionState; +}; + +type PreparedExecution = { + runtime: ExecutionRuntime; + command: string; + args: string[]; + cwd: string; + env: NodeJS.ProcessEnv; + commandPreview: string; + terminalMarker?: string; + terminalState?: TerminalSessionState; +}; + +const OUTPUT_TRUNCATED_MARKER = "[output truncated]"; +const OUTPUT_TAIL_CHARS = 8192; +const DEFAULT_YIELD_MS = 10_000; +const MIN_YIELD_MS = 10; +const MAX_YIELD_MS = 120_000; +const PROCESS_LOG_DEFAULT_TAIL_LINES = 200; +const PROCESS_SESSION_TTL_MS = 30 * 60_000; +const POLL_BACKOFF_SCHEDULE_MS = [5_000, 10_000, 30_000, 60_000] as const; + +const terminalSessions: Map = new Map(); +const runningProcessSessions: Map = new Map(); +const finishedProcessSessions: Map = new Map(); +let cachedLoginShellPath: string | null | undefined; + +const sweeper = setInterval(() => { + pruneFinishedProcessSessions(); +}, 60_000); +sweeper.unref?.(); /** * Execute code in a specified runtime */ export async function executeCode( - runtime: "python" | "nodejs" | "terminal", + runtime: ExecutionRuntime, code: string, - _sessionId: number, + sessionId: number, config: AppSettings["codeExecution"], - cwd?: string + cwd?: string, + options?: ExecuteCodeOptions ): Promise { - const timeout = (config.timeout || 180) * 1000; - const maxOutput = config.maxOutputLength || 50000; + const timeoutMs = toPositiveInteger(config.timeout, 180) * 1000; + const maxOutput = toPositiveInteger(config.maxOutputLength, 50000); + const baseCwd = cwd || process.cwd(); + const runInBackground = options?.background === true; + const yieldMs = + typeof options?.yieldMs === "number" && Number.isFinite(options.yieldMs) + ? Math.min(Math.max(Math.floor(options.yieldMs), MIN_YIELD_MS), MAX_YIELD_MS) + : null; try { - let command: string; - let args: string[]; + const prepared = prepareExecution({ + runtime, + code, + sessionId, + cwd: baseCwd, + }); - switch (runtime) { - case "python": - command = "python3"; - args = ["-c", code]; - break; - case "nodejs": - command = "node"; - args = ["-e", code]; - break; - case "terminal": - command = process.env.SHELL?.trim() || "sh"; - args = ["-c", rewriteAptCommandsWithSudo(code)]; - break; - default: - return `Error: Unknown runtime '${runtime}'`; + if (!runInBackground && yieldMs === null) { + const result = await runCommand(prepared.command, prepared.args, { + timeout: timeoutMs, + maxOutput, + cwd: prepared.cwd, + env: prepared.env, + }); + applyTerminalMarkerIfNeeded(prepared, result); + return formatCommandResult(result); } - const result = await runCommand(command, args, timeout, maxOutput, cwd); - return result; + const managed = startManagedExecution({ + prepared, + timeoutMs, + maxOutput, + }); + + if (runInBackground) { + return formatManagedSessionRunning(managed, true); + } + + const waitMs = yieldMs ?? DEFAULT_YIELD_MS; + const completedBeforeYield = await waitForManagedCompletion(managed, waitMs); + if (!completedBeforeYield) { + return formatManagedSessionRunning(managed, false); + } + + return formatManagedSessionResult(managed); } catch (error) { return `Execution error: ${error instanceof Error ? error.message : String(error)}`; } } +export function listManagedProcessSessions(): ManagedProcessSummary[] { + const summaries = [ + ...Array.from(runningProcessSessions.values()), + ...Array.from(finishedProcessSessions.values()), + ].map(toManagedSummary); + + summaries.sort((a, b) => b.startedAt - a.startedAt); + return summaries; +} + +export async function pollManagedProcessSession( + sessionId: string, + timeoutMs?: number +): Promise { + const id = sessionId.trim(); + if (!id) { + return { + success: false, + status: "not_found", + sessionId, + output: "session_id is required", + }; + } + + const session = getAnyProcessSession(id); + if (!session) { + return { + success: false, + status: "not_found", + sessionId: id, + output: `No session found for ${id}`, + }; + } + + const wait = + typeof timeoutMs === "number" && Number.isFinite(timeoutMs) + ? Math.max(0, Math.min(Math.floor(timeoutMs), 120_000)) + : 0; + + if (session.status === "running" && wait > 0) { + const startVersion = session.version; + const deadline = Date.now() + wait; + while (session.status === "running" && session.version === startVersion && Date.now() < deadline) { + await sleep(Math.min(250, deadline - Date.now())); + } + } + + const hasNewOutput = session.version !== session.lastPolledVersion; + session.lastPolledVersion = session.version; + if (session.status === "running") { + session.noProgressPollCount = hasNewOutput ? 0 : session.noProgressPollCount + 1; + } else { + session.noProgressPollCount = 0; + } + + const retryInMs = + session.status === "running" + ? POLL_BACKOFF_SCHEDULE_MS[Math.min(session.noProgressPollCount, POLL_BACKOFF_SCHEDULE_MS.length - 1)] + : undefined; + + return { + success: true, + status: session.status, + sessionId: session.id, + output: formatManagedSessionOutput(session), + retryInMs, + exitCode: session.status === "running" ? undefined : session.exitCode, + timedOut: session.timedOut, + startedAt: session.startedAt, + finishedAt: session.finishedAt, + }; +} + +export function readManagedProcessSessionLog( + sessionId: string, + offset?: number, + limit?: number +): ManagedProcessLogResult { + const id = sessionId.trim(); + if (!id) { + return { + success: false, + status: "not_found", + sessionId, + output: "session_id is required", + }; + } + + const session = getAnyProcessSession(id); + if (!session) { + return { + success: false, + status: "not_found", + sessionId: id, + output: `No session found for ${id}`, + }; + } + + const lines = session.combined.replace(/\r\n/g, "\n").split("\n"); + const usingDefaultTail = offset === undefined && limit === undefined; + const effectiveLimit = + typeof limit === "number" && Number.isFinite(limit) + ? Math.max(1, Math.floor(limit)) + : usingDefaultTail + ? PROCESS_LOG_DEFAULT_TAIL_LINES + : lines.length; + const effectiveOffset = + typeof offset === "number" && Number.isFinite(offset) + ? Math.max(0, Math.floor(offset)) + : Math.max(0, lines.length - effectiveLimit); + const slice = lines.slice(effectiveOffset, effectiveOffset + effectiveLimit).join("\n").trim(); + + return { + success: true, + status: session.status, + sessionId: session.id, + output: slice || "(no output)", + totalLines: lines.length, + truncated: session.truncated, + }; +} + +export function killManagedProcessSession(sessionId: string): ManagedProcessKillResult { + const id = sessionId.trim(); + if (!id) { + return { + success: false, + status: "not_found", + sessionId, + message: "session_id is required", + }; + } + + const running = runningProcessSessions.get(id); + if (!running) { + const existing = finishedProcessSessions.get(id); + if (existing) { + return { + success: true, + status: "already_finished", + sessionId: id, + message: `Session ${id} has already finished with status ${existing.status}.`, + }; + } + return { + success: false, + status: "not_found", + sessionId: id, + message: `No session found for ${id}`, + }; + } + + running.killedByUser = true; + running.updatedAt = Date.now(); + if (running.process) { + terminateProcess(running.process); + } + + return { + success: true, + status: "killed", + sessionId: id, + message: `Sent termination signal to session ${id}.`, + }; +} + +export function clearFinishedManagedProcessSessions(): { removed: number } { + const removed = finishedProcessSessions.size; + finishedProcessSessions.clear(); + return { removed }; +} + +export function removeManagedProcessSession(sessionId: string): { removed: boolean } { + const id = sessionId.trim(); + if (!id) { + return { removed: false }; + } + return { removed: finishedProcessSessions.delete(id) }; +} + +/** + * Clean up all sessions + */ +export function cleanupSessions(): void { + terminalSessions.clear(); + for (const session of runningProcessSessions.values()) { + session.killedByUser = true; + if (session.process) { + terminateProcess(session.process); + } + } + runningProcessSessions.clear(); + finishedProcessSessions.clear(); +} + +function prepareExecution(params: { + runtime: ExecutionRuntime; + code: string; + sessionId: number; + cwd: string; +}): PreparedExecution { + if (params.runtime === "python") { + return { + runtime: "python", + command: "python3", + args: ["-c", params.code], + cwd: params.cwd, + env: { ...process.env, PYTHONUNBUFFERED: "1" }, + commandPreview: `python3 -c ${previewText(params.code)}`, + }; + } + + if (params.runtime === "nodejs") { + return { + runtime: "nodejs", + command: "node", + args: ["-e", params.code], + cwd: params.cwd, + env: { ...process.env, PYTHONUNBUFFERED: "1" }, + commandPreview: `node -e ${previewText(params.code)}`, + }; + } + + const shell = process.env.SHELL?.trim() || "sh"; + const normalizedSessionId = + Number.isFinite(params.sessionId) && params.sessionId >= 0 ? Math.floor(params.sessionId) : 0; + const terminalState = terminalSessions.get(normalizedSessionId) ?? { cwd: params.cwd }; + terminalSessions.set(normalizedSessionId, terminalState); + + const marker = `__EGGENT_SESSION_RESULT_${Date.now().toString(36)}_${Math.random() + .toString(36) + .slice(2, 10)}`; + const rewrittenCode = rewriteAptCommandsWithSudo(params.code); + const wrapped = [ + "set +e", + rewrittenCode, + "__eggent_exit=$?", + '__eggent_pwd="$(pwd)"', + `printf "\\n${marker}\\t%s\\t%s\\n" "$__eggent_exit" "$__eggent_pwd"`, + "exit $__eggent_exit", + ].join("\n"); + + return { + runtime: "terminal", + command: shell, + args: ["-lc", wrapped], + cwd: terminalState.cwd || params.cwd, + env: buildTerminalEnv(shell), + commandPreview: previewText(params.code), + terminalMarker: marker, + terminalState, + }; +} + +function startManagedExecution(params: { + prepared: PreparedExecution; + timeoutMs: number; + maxOutput: number; +}): ManagedProcessSession { + const id = createManagedProcessId(); + let resolveCompletion = () => { + // replaced below + }; + const completion = new Promise((resolve) => { + resolveCompletion = resolve; + }); + + const session: ManagedProcessSession = { + id, + runtime: params.prepared.runtime, + commandPreview: params.prepared.commandPreview, + startedAt: Date.now(), + updatedAt: Date.now(), + cwd: params.prepared.cwd, + status: "running", + exitCode: null, + timedOut: false, + killedByUser: false, + stdout: "", + stderr: "", + combined: "", + truncated: false, + completion, + resolveCompletion, + version: 0, + lastPolledVersion: 0, + noProgressPollCount: 0, + terminalMarker: params.prepared.terminalMarker, + terminalState: params.prepared.terminalState, + }; + + const proc = spawn(params.prepared.command, params.prepared.args, { + stdio: ["pipe", "pipe", "pipe"], + env: params.prepared.env, + cwd: params.prepared.cwd, + }); + + session.process = proc; + session.pid = proc.pid ?? undefined; + + let finalized = false; + const timeoutHandle = setTimeout(() => { + session.timedOut = true; + session.updatedAt = Date.now(); + terminateProcess(proc); + }, params.timeoutMs); + + const finalize = () => { + if (finalized) { + return; + } + finalized = true; + clearTimeout(timeoutHandle); + + applyTerminalMarkerToSession(session); + + if (session.killedByUser) { + session.status = "killed"; + } else if (session.timedOut) { + session.status = "failed"; + } else if (session.spawnError) { + session.status = "failed"; + } else { + const code = session.exitCode ?? 0; + session.status = code === 0 ? "completed" : "failed"; + } + + session.finishedAt = Date.now(); + session.updatedAt = session.finishedAt; + session.version += 1; + + runningProcessSessions.delete(session.id); + finishedProcessSessions.set(session.id, session); + session.resolveCompletion(); + }; + + proc.stdout?.on("data", (data: Buffer) => { + appendManagedOutput(session, "stdout", data.toString(), params.maxOutput); + }); + + proc.stderr?.on("data", (data: Buffer) => { + appendManagedOutput(session, "stderr", data.toString(), params.maxOutput); + }); + + proc.on("close", (code) => { + session.exitCode = code; + finalize(); + }); + + proc.on("error", (error) => { + session.spawnError = error.message; + finalize(); + }); + + runningProcessSessions.set(session.id, session); + return session; +} + +function appendManagedOutput( + session: ManagedProcessSession, + stream: "stdout" | "stderr", + chunk: string, + maxOutput: number +): void { + const streamBuffer = stream === "stdout" ? session.stdout : session.stderr; + const streamCapped = appendTailWithCap(streamBuffer, chunk, maxOutput); + if (stream === "stdout") { + session.stdout = streamCapped.text; + } else { + session.stderr = streamCapped.text; + } + + const combinedCapped = appendTailWithCap(session.combined, chunk, maxOutput); + session.combined = combinedCapped.text; + session.truncated = session.truncated || streamCapped.truncated || combinedCapped.truncated; + session.updatedAt = Date.now(); + session.version += 1; +} + +function appendTailWithCap( + current: string, + chunk: string, + maxOutput: number +): { text: string; truncated: boolean } { + const combined = current + chunk; + if (combined.length <= maxOutput) { + return { text: combined, truncated: false }; + } + + const marker = `${OUTPUT_TRUNCATED_MARKER}\n`; + const keepChars = Math.max(0, maxOutput - marker.length); + const tail = combined.slice(combined.length - keepChars); + return { + text: `${marker}${tail}`, + truncated: true, + }; +} + +function applyTerminalMarkerIfNeeded(prepared: PreparedExecution, result: CommandResult): void { + if (prepared.runtime !== "terminal" || !prepared.terminalMarker) { + return; + } + + const parsed = parseSessionMarker(prepared.terminalMarker, result.stdout, result.stdoutTail); + result.stdout = parsed.cleanedStdout; + if (parsed.exitCode !== null) { + result.exitCode = parsed.exitCode; + } + if (prepared.terminalState && parsed.cwd && path.isAbsolute(parsed.cwd)) { + prepared.terminalState.cwd = parsed.cwd; + } +} + +function applyTerminalMarkerToSession(session: ManagedProcessSession): void { + if (session.runtime !== "terminal" || !session.terminalMarker) { + return; + } + + const parsed = parseSessionMarker(session.terminalMarker, session.stdout, session.stdout); + session.stdout = parsed.cleanedStdout; + if (parsed.exitCode !== null) { + session.exitCode = parsed.exitCode; + } + if (session.terminalState && parsed.cwd && path.isAbsolute(parsed.cwd)) { + session.terminalState.cwd = parsed.cwd; + } +} + +function parseSessionMarker( + marker: string, + stdout: string, + stdoutTail: string +): { + exitCode: number | null; + cwd: string | null; + cleanedStdout: string; +} { + const escapedMarker = escapeRegExp(marker); + const markerRegex = new RegExp(`${escapedMarker}\\t(-?\\d+)\\t([^\\r\\n]*)`); + const markerRemovalRegex = new RegExp( + `(?:\\r?\\n)?${escapedMarker}\\t-?\\d+\\t[^\\r\\n]*(?:\\r?\\n)?`, + "g" + ); + + const scanText = `${stdout}\n${stdoutTail}`; + const match = scanText.match(markerRegex); + const exitCode = match && typeof match[1] === "string" ? Number.parseInt(match[1], 10) : null; + const cwd = match && typeof match[2] === "string" ? match[2].trim() : null; + const cleanedStdout = stdout.replace(markerRemovalRegex, "\n").trimEnd(); + + return { + exitCode: Number.isFinite(exitCode) ? exitCode : null, + cwd: cwd || null, + cleanedStdout, + }; +} + +async function waitForManagedCompletion( + session: ManagedProcessSession, + waitMs: number +): Promise { + const boundedWait = Math.max(MIN_YIELD_MS, Math.min(waitMs, MAX_YIELD_MS)); + const timedOut = await Promise.race([ + session.completion.then(() => false), + sleep(boundedWait).then(() => true), + ]); + return !timedOut; +} + +function toManagedSummary(session: ManagedProcessSession): ManagedProcessSummary { + return { + sessionId: session.id, + runtime: session.runtime, + commandPreview: session.commandPreview, + status: session.status, + startedAt: session.startedAt, + updatedAt: session.updatedAt, + finishedAt: session.finishedAt, + pid: session.pid, + cwd: session.cwd, + exitCode: session.status === "running" ? undefined : session.exitCode, + timedOut: session.timedOut, + truncated: session.truncated, + tail: trimTail(session.combined, 2000), + }; +} + +function formatManagedSessionRunning(session: ManagedProcessSession, immediate: boolean): string { + const header = immediate + ? "[Background execution started]" + : "[Execution yielded to background]"; + return ( + `${header}\n` + + `Session ID: ${session.id}\n` + + `Runtime: ${session.runtime}\n` + + `PID: ${session.pid ?? "n/a"}\n` + + `Use process tool (action=\"poll\") with session_id=\"${session.id}\" to continue.` + ); +} + +function formatManagedSessionResult(session: ManagedProcessSession): string { + const output = formatManagedSessionOutput(session); + const parts: string[] = [output]; + + if (session.timedOut) { + parts.push("[Process killed after timeout]"); + } + if (session.spawnError) { + parts.push(`Process error: ${session.spawnError}`); + } + if (session.status !== "running" && session.exitCode !== null && session.exitCode !== 0) { + parts.push(`Exit code: ${session.exitCode}`); + } + + return parts.join("\n\n").trim() || "(no output)"; +} + +function formatManagedSessionOutput(session: ManagedProcessSession): string { + const parts: string[] = []; + if (session.stdout.trim()) { + parts.push(`STDOUT:\n${session.stdout.trim()}`); + } + if (session.stderr.trim()) { + parts.push(`STDERR:\n${session.stderr.trim()}`); + } + if (session.truncated) { + parts.push(OUTPUT_TRUNCATED_MARKER); + } + if (parts.length === 0) { + return session.status === "running" ? "(no output yet)" : "(no output)"; + } + return parts.join("\n\n"); +} + +function getAnyProcessSession(sessionId: string): ManagedProcessSession | null { + return runningProcessSessions.get(sessionId) ?? finishedProcessSessions.get(sessionId) ?? null; +} + +function pruneFinishedProcessSessions(): void { + const cutoff = Date.now() - PROCESS_SESSION_TTL_MS; + for (const [sessionId, session] of finishedProcessSessions.entries()) { + if ((session.finishedAt ?? 0) < cutoff) { + finishedProcessSessions.delete(sessionId); + } + } +} + +/** + * Run a shell command with timeout and output limits + */ +function runCommand( + command: string, + args: string[], + options: { + timeout: number; + maxOutput: number; + cwd: string; + env: NodeJS.ProcessEnv; + } +): Promise { + return new Promise((resolve) => { + let stdout = ""; + let stderr = ""; + let stdoutTail = ""; + let stderrTail = ""; + let timedOut = false; + + const proc = spawn(command, args, { + stdio: ["pipe", "pipe", "pipe"], + env: options.env, + cwd: options.cwd, + }); + + proc.stdout?.on("data", (data: Buffer) => { + const chunk = data.toString(); + stdoutTail = trimTail(stdoutTail + chunk, OUTPUT_TAIL_CHARS); + stdout = appendWithLimit(stdout, chunk, options.maxOutput); + }); + + proc.stderr?.on("data", (data: Buffer) => { + const chunk = data.toString(); + stderrTail = trimTail(stderrTail + chunk, OUTPUT_TAIL_CHARS); + stderr = appendWithLimit(stderr, chunk, options.maxOutput); + }); + + const timer = setTimeout(() => { + timedOut = true; + terminateProcess(proc); + }, options.timeout); + + proc.on("close", (code) => { + clearTimeout(timer); + resolve({ + stdout, + stderr, + stdoutTail, + stderrTail, + exitCode: code, + timedOut, + }); + }); + + proc.on("error", (err) => { + clearTimeout(timer); + resolve({ + stdout, + stderr, + stdoutTail, + stderrTail, + exitCode: null, + timedOut, + spawnError: err.message, + }); + }); + }); +} + +function terminateProcess(proc: ChildProcess): void { + try { + proc.kill("SIGTERM"); + } catch { + // ignore + } + setTimeout(() => { + try { + proc.kill("SIGKILL"); + } catch { + // ignore + } + }, 2000); +} + +function formatCommandResult(result: CommandResult): string { + const parts: string[] = []; + if (result.stdout.trim()) { + parts.push(`STDOUT:\n${result.stdout.trim()}`); + } + if (result.stderr.trim()) { + parts.push(`STDERR:\n${result.stderr.trim()}`); + } + if (result.spawnError) { + parts.push(`Process error: ${result.spawnError}`); + } + if (result.timedOut) { + parts.push("[Process killed after timeout]"); + } + if (result.exitCode !== null && result.exitCode !== 0) { + parts.push(`Exit code: ${result.exitCode}`); + } + return parts.length > 0 ? parts.join("\n\n") : "(no output)"; +} + function rewriteAptCommandsWithSudo(code: string): string { + const isRoot = typeof process.getuid === "function" && process.getuid() === 0; + if (isRoot || !hasCommandInPath("sudo", process.env.PATH)) { + return code; + } + const lines = code.split("\n"); let changed = false; const rewritten = lines.map((line) => { - // Preserve comments and empty lines. if (!line.trim() || line.trim().startsWith("#")) { return line; } @@ -67,97 +867,139 @@ function rewriteAptCommandsWithSudo(code: string): string { return code; } - return [ - 'echo "[eggent] Auto-added sudo for apt/apt-get command(s)"', - ...rewritten, - ].join("\n"); + return ['echo "[eggent] Auto-added sudo for apt/apt-get command(s)"', ...rewritten].join("\n"); } -/** - * Run a shell command with timeout and output limits - */ -function runCommand( - command: string, - args: string[], - timeout: number, - maxOutput: number, - cwd?: string -): Promise { - return new Promise((resolve) => { - let stdout = ""; - let stderr = ""; - let killed = false; +function buildTerminalEnv(shell: string): NodeJS.ProcessEnv { + const env: NodeJS.ProcessEnv = { + ...process.env, + PYTHONUNBUFFERED: "1", + }; - const proc = spawn(command, args, { - timeout, - stdio: ["pipe", "pipe", "pipe"], - env: { ...process.env, PYTHONUNBUFFERED: "1" }, - cwd: cwd || process.cwd(), - }); - - proc.stdout?.on("data", (data: Buffer) => { - const chunk = data.toString(); - if (stdout.length + chunk.length < maxOutput) { - stdout += chunk; - } else if (!stdout.endsWith("\n[output truncated]")) { - stdout += "\n[output truncated]"; - } - }); - - proc.stderr?.on("data", (data: Buffer) => { - const chunk = data.toString(); - if (stderr.length + chunk.length < maxOutput) { - stderr += chunk; - } - }); - - const timer = setTimeout(() => { - killed = true; - proc.kill("SIGTERM"); - setTimeout(() => proc.kill("SIGKILL"), 2000); - }, timeout); - - proc.on("close", (code) => { - clearTimeout(timer); - - const parts: string[] = []; - if (stdout.trim()) { - parts.push(`STDOUT:\n${stdout.trim()}`); - } - if (stderr.trim()) { - parts.push(`STDERR:\n${stderr.trim()}`); - } - if (killed) { - parts.push(`[Process killed after timeout]`); - } - if (code !== null && code !== 0) { - parts.push(`Exit code: ${code}`); - } - - resolve( - parts.length > 0 - ? parts.join("\n\n") - : "(no output)" - ); - }); - - proc.on("error", (err) => { - clearTimeout(timer); - resolve(`Process error: ${err.message}`); - }); - }); -} - -/** - * Clean up all sessions - */ -export function cleanupSessions(): void { - for (const [id, session] of sessions) { - try { - session.process.kill("SIGTERM"); - } catch { - // ignore - } - sessions.delete(id); + const loginShellPath = getLoginShellPath(shell); + if (loginShellPath) { + env.PATH = mergePath(loginShellPath, env.PATH); } + + return env; +} + +function mergePath(primary: string, secondary?: string): string { + const delimiter = path.delimiter; + const entries = [...primary.split(delimiter), ...(secondary ? secondary.split(delimiter) : [])] + .map((entry) => entry.trim()) + .filter(Boolean); + const seen = new Set(); + const merged: string[] = []; + for (const entry of entries) { + if (!seen.has(entry)) { + seen.add(entry); + merged.push(entry); + } + } + return merged.join(delimiter); +} + +function getLoginShellPath(shell: string): string | null { + if (cachedLoginShellPath !== undefined) { + return cachedLoginShellPath; + } + if (process.platform === "win32") { + cachedLoginShellPath = null; + return cachedLoginShellPath; + } + + try { + const raw = execFileSync(shell, ["-lc", 'printf "%s" "$PATH"'], { + encoding: "utf8", + timeout: 10_000, + maxBuffer: 2 * 1024 * 1024, + env: process.env, + stdio: ["ignore", "pipe", "ignore"], + }); + const value = raw + .split(/\r?\n/) + .map((line) => line.trim()) + .filter(Boolean) + .at(-1); + cachedLoginShellPath = value && value.length > 0 ? value : null; + } catch { + cachedLoginShellPath = null; + } + + return cachedLoginShellPath; +} + +function hasCommandInPath(command: string, envPath?: string): boolean { + const rawPath = envPath ?? process.env.PATH; + if (!rawPath) { + return false; + } + for (const dir of rawPath.split(path.delimiter)) { + const trimmed = dir.trim(); + if (!trimmed) { + continue; + } + const candidate = path.join(trimmed, command); + if (fs.existsSync(candidate)) { + return true; + } + } + return false; +} + +function appendWithLimit(current: string, chunk: string, maxOutput: number): string { + if (current.length >= maxOutput) { + if (!current.includes(OUTPUT_TRUNCATED_MARKER)) { + return `${current}\n${OUTPUT_TRUNCATED_MARKER}`; + } + return current; + } + + const remaining = maxOutput - current.length; + if (chunk.length <= remaining) { + return current + chunk; + } + + const base = current + chunk.slice(0, Math.max(0, remaining)); + if (base.includes(OUTPUT_TRUNCATED_MARKER)) { + return base; + } + return `${base}\n${OUTPUT_TRUNCATED_MARKER}`; +} + +function trimTail(text: string, maxChars: number): string { + if (text.length <= maxChars) { + return text; + } + return text.slice(text.length - maxChars); +} + +function escapeRegExp(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +function toPositiveInteger(value: unknown, fallback: number): number { + return typeof value === "number" && Number.isFinite(value) && value > 0 + ? Math.floor(value) + : fallback; +} + +function createManagedProcessId(): string { + return `proc-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 10)}`; +} + +function previewText(text: string, maxChars = 160): string { + const normalized = text.replace(/\s+/g, " ").trim(); + if (!normalized) { + return "(empty)"; + } + if (normalized.length <= maxChars) { + return normalized; + } + return `${normalized.slice(0, Math.max(0, maxChars - 1))}…`; +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, Math.max(0, ms))); } diff --git a/src/lib/tools/install-orchestrator.ts b/src/lib/tools/install-orchestrator.ts new file mode 100644 index 0000000..3bb4268 --- /dev/null +++ b/src/lib/tools/install-orchestrator.ts @@ -0,0 +1,577 @@ +import fs from "fs"; +import path from "path"; +import { spawn } from "child_process"; + +const DEFAULT_TIMEOUT_MS = 10 * 60_000; +const OUTPUT_CAP = 120_000; + +export type InstallKind = "auto" | "node" | "python" | "go" | "uv" | "apt"; + +export type InstallAttempt = { + command: string; + manager: string; + code: number | null; + durationMs: number; + stdout: string; + stderr: string; + timedOut: boolean; + success: boolean; + skipped: boolean; + reason?: string; +}; + +export type InstallPackagesParams = { + kind: InstallKind; + packages: string[]; + preferManager?: string; + global?: boolean; + cwd: string; + timeoutMs?: number; +}; + +export type InstallPackagesResult = { + success: boolean; + kind: InstallKind; + resolvedKind: Exclude; + manager: string | null; + message: string; + attempts: InstallAttempt[]; +}; + +type InstallStep = { + argv: string[]; + manager: string; + cwd?: string; +}; + +type InstallPlan = { + manager: string; + steps: InstallStep[]; +}; + +type CommandRunResult = { + code: number | null; + stdout: string; + stderr: string; + timedOut: boolean; + durationMs: number; +}; + +export async function installPackages(params: InstallPackagesParams): Promise { + const packages = uniqueNonEmpty(params.packages); + if (packages.length === 0) { + return { + success: false, + kind: params.kind, + resolvedKind: resolveAutoKind(params.kind, params.preferManager), + manager: null, + message: "No packages specified.", + attempts: [], + }; + } + + const timeoutMs = clampTimeout(params.timeoutMs ?? DEFAULT_TIMEOUT_MS); + const resolvedKind = resolveAutoKind(params.kind, params.preferManager); + const attempts: InstallAttempt[] = []; + const plans = await buildInstallPlans({ + kind: resolvedKind, + packages, + preferManager: params.preferManager, + global: params.global === true, + cwd: params.cwd, + timeoutMs, + }); + + if (plans.length === 0) { + return { + success: false, + kind: params.kind, + resolvedKind, + manager: null, + message: `No compatible installer found for kind=${resolvedKind}.`, + attempts, + }; + } + + for (const plan of plans) { + const planResult = await executePlan(plan, timeoutMs); + attempts.push(...planResult.attempts); + + if (planResult.success) { + return { + success: true, + kind: params.kind, + resolvedKind, + manager: plan.manager, + message: `Installed successfully using ${plan.manager}.`, + attempts, + }; + } + } + + const last = attempts.at(-1); + const message = + last?.stderr?.trim() || + `Failed to install package(s) with available ${resolvedKind} installers.`; + + return { + success: false, + kind: params.kind, + resolvedKind, + manager: null, + message, + attempts, + }; +} + +async function buildInstallPlans(params: { + kind: Exclude; + packages: string[]; + preferManager?: string; + global: boolean; + cwd: string; + timeoutMs: number; +}): Promise { + switch (params.kind) { + case "node": + return buildNodePlans(params); + case "python": + return buildPythonPlans(params); + case "uv": + return buildUvPlans(params); + case "go": + return buildGoPlans(params); + case "apt": + return await buildAptPlans(params); + } +} + +function buildNodePlans(params: { + packages: string[]; + preferManager?: string; + global: boolean; + cwd: string; +}): InstallPlan[] { + const order = orderedManagers( + ["pnpm", "npm", "yarn", "bun"], + normalizeManager(params.preferManager) + ); + + const plans: InstallPlan[] = []; + for (const manager of order) { + if (!commandExists(manager)) { + plans.push({ + manager, + steps: [], + }); + continue; + } + + let argv: string[]; + if (manager === "pnpm") { + argv = params.global + ? ["pnpm", "add", "-g", "--ignore-scripts", ...params.packages] + : ["pnpm", "add", ...params.packages]; + } else if (manager === "npm") { + argv = params.global + ? ["npm", "install", "-g", "--ignore-scripts", ...params.packages] + : ["npm", "install", ...params.packages]; + } else if (manager === "yarn") { + argv = params.global + ? ["yarn", "global", "add", ...params.packages] + : ["yarn", "add", ...params.packages]; + } else { + argv = params.global + ? ["bun", "add", "-g", "--ignore-scripts", ...params.packages] + : ["bun", "add", ...params.packages]; + } + + plans.push({ + manager, + steps: [ + { + manager, + argv, + cwd: params.cwd, + }, + ], + }); + } + + return plans; +} + +function buildPythonPlans(params: { + packages: string[]; + preferManager?: string; + cwd: string; +}): InstallPlan[] { + const normalized = normalizeManager(params.preferManager); + const plans: InstallPlan[] = []; + + const uvPreferred = normalized === "uv"; + if (uvPreferred && commandExists("uv")) { + plans.push({ + manager: "uv", + steps: [ + { + manager: "uv", + argv: ["uv", "pip", "install", ...params.packages], + cwd: params.cwd, + }, + ], + }); + } + + if (commandExists("python3")) { + plans.push({ + manager: "pip", + steps: [ + { + manager: "pip", + argv: ["python3", "-m", "pip", "install", ...params.packages], + cwd: params.cwd, + }, + ], + }); + } else if (commandExists("python")) { + plans.push({ + manager: "pip", + steps: [ + { + manager: "pip", + argv: ["python", "-m", "pip", "install", ...params.packages], + cwd: params.cwd, + }, + ], + }); + } + + if (!uvPreferred && commandExists("uv")) { + plans.push({ + manager: "uv", + steps: [ + { + manager: "uv", + argv: ["uv", "pip", "install", ...params.packages], + cwd: params.cwd, + }, + ], + }); + } + + return plans; +} + +function buildUvPlans(params: { packages: string[]; cwd: string }): InstallPlan[] { + if (!commandExists("uv")) { + return []; + } + return [ + { + manager: "uv", + steps: [ + { + manager: "uv", + argv: ["uv", "tool", "install", ...params.packages], + cwd: params.cwd, + }, + ], + }, + ]; +} + +function buildGoPlans(params: { packages: string[]; cwd: string }): InstallPlan[] { + if (!commandExists("go")) { + return []; + } + + const argv = ["go", "install", ...params.packages]; + return [ + { + manager: "go", + steps: [ + { + manager: "go", + argv, + cwd: params.cwd, + }, + ], + }, + ]; +} + +async function buildAptPlans(params: { + packages: string[]; + cwd: string; + timeoutMs: number; +}): Promise { + if (!commandExists("apt-get")) { + return []; + } + + const isRoot = typeof process.getuid === "function" && process.getuid() === 0; + if (isRoot) { + return [ + { + manager: "apt-get", + steps: [ + { manager: "apt-get", argv: ["apt-get", "update", "-qq"], cwd: params.cwd }, + { + manager: "apt-get", + argv: ["apt-get", "install", "-y", ...params.packages], + cwd: params.cwd, + }, + ], + }, + ]; + } + + if (!commandExists("sudo")) { + return []; + } + + const sudoCheck = await runCommand(["sudo", "-n", "true"], { + timeoutMs: Math.min(params.timeoutMs, 10_000), + cwd: params.cwd, + }); + if (sudoCheck.code !== 0) { + return []; + } + + return [ + { + manager: "sudo-apt-get", + steps: [ + { + manager: "sudo-apt-get", + argv: ["sudo", "apt-get", "update", "-qq"], + cwd: params.cwd, + }, + { + manager: "sudo-apt-get", + argv: ["sudo", "apt-get", "install", "-y", ...params.packages], + cwd: params.cwd, + }, + ], + }, + ]; +} + +async function executePlan( + plan: InstallPlan, + timeoutMs: number +): Promise<{ success: boolean; attempts: InstallAttempt[] }> { + if (plan.steps.length === 0) { + return { + success: false, + attempts: [ + { + command: plan.manager, + manager: plan.manager, + code: null, + durationMs: 0, + stdout: "", + stderr: "", + timedOut: false, + success: false, + skipped: true, + reason: `Manager \"${plan.manager}\" is not available in PATH.`, + }, + ], + }; + } + + const attempts: InstallAttempt[] = []; + for (const step of plan.steps) { + const run = await runCommand(step.argv, { + timeoutMs, + cwd: step.cwd, + }); + + const attempt: InstallAttempt = { + command: formatCommand(step.argv), + manager: step.manager, + code: run.code, + durationMs: run.durationMs, + stdout: run.stdout, + stderr: run.stderr, + timedOut: run.timedOut, + success: run.code === 0, + skipped: false, + }; + + attempts.push(attempt); + if (run.code !== 0) { + return { success: false, attempts }; + } + } + + return { success: true, attempts }; +} + +async function runCommand( + argv: string[], + options: { + timeoutMs: number; + cwd?: string; + } +): Promise { + const startedAt = Date.now(); + + return new Promise((resolve) => { + const command = argv[0]; + const args = argv.slice(1); + let stdout = ""; + let stderr = ""; + let timedOut = false; + + const child = spawn(command, args, { + cwd: options.cwd, + env: process.env, + stdio: ["ignore", "pipe", "pipe"], + }); + + child.stdout?.on("data", (chunk: Buffer) => { + stdout = appendWithCap(stdout, chunk.toString(), OUTPUT_CAP); + }); + + child.stderr?.on("data", (chunk: Buffer) => { + stderr = appendWithCap(stderr, chunk.toString(), OUTPUT_CAP); + }); + + const timer = setTimeout(() => { + timedOut = true; + try { + child.kill("SIGTERM"); + } catch { + // ignore + } + setTimeout(() => { + try { + child.kill("SIGKILL"); + } catch { + // ignore + } + }, 2000); + }, options.timeoutMs); + + child.on("close", (code) => { + clearTimeout(timer); + resolve({ + code, + stdout, + stderr, + timedOut, + durationMs: Date.now() - startedAt, + }); + }); + + child.on("error", (error) => { + clearTimeout(timer); + resolve({ + code: null, + stdout, + stderr: appendWithCap(stderr, error.message, OUTPUT_CAP), + timedOut, + durationMs: Date.now() - startedAt, + }); + }); + }); +} + +function appendWithCap(current: string, chunk: string, cap: number): string { + if (current.length >= cap) { + return current; + } + const remaining = cap - current.length; + if (chunk.length <= remaining) { + return current + chunk; + } + return current + chunk.slice(0, Math.max(0, remaining)); +} + +function resolveAutoKind(kind: InstallKind, preferManager?: string): Exclude { + if (kind !== "auto") { + return kind; + } + + const manager = normalizeManager(preferManager); + if (manager === "go") return "go"; + if (manager === "uv") return "uv"; + if (manager === "pip" || manager === "python") return "python"; + if (manager === "apt" || manager === "apt-get") return "apt"; + return "node"; +} + +function orderedManagers(base: string[], preferred?: string): string[] { + if (!preferred) { + return base; + } + const normalized = preferred.toLowerCase(); + const filtered = base.filter((value) => value !== normalized); + return base.includes(normalized) ? [normalized, ...filtered] : base; +} + +function commandExists(command: string, envPath?: string): boolean { + const rawPath = envPath ?? process.env.PATH; + if (!rawPath) { + return false; + } + + for (const dir of rawPath.split(path.delimiter)) { + const trimmed = dir.trim(); + if (!trimmed) { + continue; + } + const candidate = path.join(trimmed, command); + if (fs.existsSync(candidate)) { + return true; + } + if (process.platform === "win32") { + const winCandidate = path.join(trimmed, `${command}.cmd`); + if (fs.existsSync(winCandidate)) { + return true; + } + } + } + + return false; +} + +function uniqueNonEmpty(values: string[]): string[] { + const result: string[] = []; + const seen = new Set(); + for (const raw of values) { + const value = raw.trim(); + if (!value || seen.has(value)) { + continue; + } + seen.add(value); + result.push(value); + } + return result; +} + +function normalizeManager(value?: string): string | undefined { + const normalized = value?.trim().toLowerCase(); + return normalized || undefined; +} + +function formatCommand(argv: string[]): string { + return argv.map(quoteArg).join(" "); +} + +function quoteArg(arg: string): string { + if (/^[a-zA-Z0-9_./:@%+-]+$/.test(arg)) { + return arg; + } + return JSON.stringify(arg); +} + +function clampTimeout(value: number): number { + if (!Number.isFinite(value) || value <= 0) { + return DEFAULT_TIMEOUT_MS; + } + return Math.min(Math.max(Math.floor(value), 1_000), 30 * 60_000); +} diff --git a/src/lib/tools/tool.ts b/src/lib/tools/tool.ts index 7a0c2c5..b03ba4b 100644 --- a/src/lib/tools/tool.ts +++ b/src/lib/tools/tool.ts @@ -6,12 +6,21 @@ import { constants as fsConstants } from "fs"; import path from "path"; import type { AgentContext } from "@/lib/agent/types"; import type { AppSettings, McpServerConfig } from "@/lib/types"; -import { executeCode } from "@/lib/tools/code-execution"; +import { + clearFinishedManagedProcessSessions, + executeCode, + killManagedProcessSession, + listManagedProcessSessions, + pollManagedProcessSession, + readManagedProcessSessionLog, + removeManagedProcessSession, +} from "@/lib/tools/code-execution"; import { memorySave, memoryLoad, memoryDelete } from "@/lib/tools/memory-tools"; import { knowledgeQuery } from "@/lib/tools/knowledge-query"; import { searchWeb } from "@/lib/tools/search-engine"; import { callSubordinate } from "@/lib/tools/call-subordinate"; import { createCronTool } from "@/lib/tools/cron-tool"; +import { installPackages } from "@/lib/tools/install-orchestrator"; import { loadPdf } from "@/lib/memory/loaders/pdf-loader"; import { getAllProjects, @@ -618,7 +627,7 @@ export function createAgentTools( if (settings.codeExecution.enabled) { tools.code_execution = tool({ description: - "Execute code in Python, Node.js, or Shell terminal. Use this to run scripts, install packages, manipulate files, perform calculations, or any task that requires code execution. The code runs in a persistent shell session.", + "Execute code in Python, Node.js, or Shell terminal. Use this to run scripts, install packages, manipulate files, perform calculations, or any task that requires code execution. For terminal runtime, session IDs preserve working directory continuity across calls.", inputSchema: z.object({ runtime: z .enum(["python", "nodejs", "terminal"]) @@ -632,10 +641,25 @@ export function createAgentTools( .number() .default(0) .describe( - "Session ID (0-9). Use different sessions for parallel tasks. Default is 0." + "Session ID (0-9). Reuse a session to keep terminal working-directory state between calls. Use different sessions for independent tasks." + ), + background: z + .boolean() + .default(false) + .describe( + "Run execution in background and return immediately with a managed process session id." + ), + yield_ms: z + .number() + .int() + .min(10) + .max(120000) + .optional() + .describe( + "Optional milliseconds to wait before yielding a still-running command to background process management." ), }), - execute: async ({ runtime, code, session }) => { + execute: async ({ runtime, code, session, background, yield_ms }) => { const normalizedCode = code.replace(/\r\n/g, "\n"); const sanitizedCode = normalizedCode.replace(/\s+$/, ""); const lineCount = sanitizedCode.length === 0 ? 0 : sanitizedCode.split("\n").length; @@ -649,7 +673,118 @@ export function createAgentTools( return `[Preflight error] Code payload has too many lines (${lineCount}). Limit is ${CODE_EXEC_MAX_LINES}. Split the task into smaller executions.`; } const cwd = resolveContextCwd(context); - return executeCode(runtime, sanitizedCode, session, settings.codeExecution, cwd); + return executeCode(runtime, sanitizedCode, session, settings.codeExecution, cwd, { + background, + yieldMs: typeof yield_ms === "number" ? yield_ms : undefined, + }); + }, + }); + + tools.install_packages = tool({ + description: + "Install dependencies with installer fallback logic. Supports node (npm/pnpm/yarn/bun), python (pip/uv), go, uv, and apt. Use this when package installation via code_execution is flaky.", + inputSchema: z.object({ + kind: z + .enum(["auto", "node", "python", "go", "uv", "apt"]) + .default("auto") + .describe("Dependency ecosystem to install for."), + packages: z + .array(z.string()) + .min(1) + .describe("List of package names/specifiers to install."), + prefer_manager: z + .string() + .optional() + .describe("Optional preferred manager (e.g. pnpm, npm, pip, uv, go, apt-get)."), + global: z + .boolean() + .default(false) + .describe("Whether to install globally when supported (mainly node ecosystem)."), + timeout_seconds: z + .number() + .int() + .min(1) + .max(1800) + .default(600) + .describe("Timeout per installer attempt in seconds."), + }), + execute: async ({ kind, packages, prefer_manager, global, timeout_seconds }) => { + const cwd = resolveContextCwd(context); + return installPackages({ + kind, + packages, + preferManager: prefer_manager, + global, + cwd, + timeoutMs: timeout_seconds * 1000, + }); + }, + }); + + tools.process = tool({ + description: + "Manage code_execution background sessions (list, poll, log, kill, clear, remove). Use this after code_execution returns a managed session id.", + inputSchema: z.object({ + action: z + .enum(["list", "poll", "log", "kill", "clear", "remove"]) + .describe("Process management action."), + session_id: z + .string() + .optional() + .describe("Managed process session id for poll/log/kill/remove."), + timeout_ms: z + .number() + .int() + .min(0) + .max(120000) + .optional() + .describe("Optional wait timeout for poll action."), + offset: z + .number() + .int() + .min(0) + .optional() + .describe("Optional line offset for log action."), + limit: z + .number() + .int() + .min(1) + .max(5000) + .optional() + .describe("Optional line count for log action."), + }), + execute: async ({ action, session_id, timeout_ms, offset, limit }) => { + if (action === "list") { + return { + success: true, + sessions: listManagedProcessSessions(), + }; + } + if (action === "poll") { + if (!session_id?.trim()) { + return { success: false, error: "session_id is required for poll." }; + } + return pollManagedProcessSession(session_id, timeout_ms); + } + if (action === "log") { + if (!session_id?.trim()) { + return { success: false, error: "session_id is required for log." }; + } + return readManagedProcessSessionLog(session_id, offset, limit); + } + if (action === "kill") { + if (!session_id?.trim()) { + return { success: false, error: "session_id is required for kill." }; + } + return killManagedProcessSession(session_id); + } + if (action === "remove") { + if (!session_id?.trim()) { + return { success: false, error: "session_id is required for remove." }; + } + return removeManagedProcessSession(session_id); + } + return clearFinishedManagedProcessSessions(); }, }); } diff --git a/src/prompts/system.md b/src/prompts/system.md index 3a60efb..6c63013 100644 --- a/src/prompts/system.md +++ b/src/prompts/system.md @@ -4,12 +4,13 @@ You are a powerful AI agent with access to tools that allow you to interact with ## Core Capabilities -1. **Code Execution** - Execute Python, Node.js, and Shell commands in persistent terminal sessions +1. **Code Execution** - Execute Python, Node.js, and Shell commands with session-scoped continuity 2. **Persistent Memory** - Save and retrieve information across conversations using vector-based semantic memory 3. **Knowledge Base** - Query uploaded documents using semantic search (RAG) 4. **Web Search** - Search the internet for current information 5. **Multi-Agent Delegation** - Delegate complex subtasks to subordinate agents 6. **Cron Scheduling** - Create, update, run, and inspect scheduled jobs +7. **Process Management** - Inspect and control background code execution sessions ## Guidelines @@ -25,9 +26,11 @@ You are a powerful AI agent with access to tools that allow you to interact with - Choose the appropriate runtime: `python` for data processing and scripting, `nodejs` for web/JS tasks, `terminal` for shell commands - Always handle errors and edge cases in your code - If Python fails with `ModuleNotFoundError`, install the missing dependency with `python3 -m pip install ` using `terminal`, then retry -- For OS-level packages, use `sudo apt-get update && sudo apt-get install -y ` +- For OS-level packages on Debian/Ubuntu, use `apt-get`/`apt` and add `sudo` only when needed and available - For file operations, prefer dedicated file tools (`read_text_file`, `read_pdf_file`, `write_text_file`, `copy_file`) over code execution - Use `code_execution` for file operations only as a fallback when dedicated tools cannot complete the task +- For long-running commands, use `code_execution` with background/yield and continue via the `process` tool +- For dependency setup, prefer `install_packages` over ad-hoc install retries in shell - Break complex tasks into smaller executable steps - Check output after each execution before proceeding - Do not use `sleep`, `at`, or background shell loops as a substitute for scheduled reminders/tasks; use the **cron** tool for scheduling diff --git a/src/prompts/tool-code_execution.md b/src/prompts/tool-code_execution.md index ad14c9a..f2ff021 100644 --- a/src/prompts/tool-code_execution.md +++ b/src/prompts/tool-code_execution.md @@ -14,10 +14,11 @@ Execute code in a specified runtime environment. The code runs on the user's mac 2. **Print outputs explicitly** — always `print()` or `console.log()` results you want to see 3. **Handle errors** — wrap risky operations in try/except or try/catch 4. **Check prerequisites** — verify packages are installed before importing -5. **Use sessions wisely** — session 0 is the default; use different sessions for parallel tasks +5. **Use sessions wisely** — session 0 is the default; reuse the same session to keep terminal working-directory state between calls 6. **Prefer dedicated file tools first** — use `read_text_file`, `read_pdf_file`, `write_text_file`, and `copy_file` for common file tasks; use `code_execution` only when those tools are insufficient 7. **Auto-resolve missing Python deps** — if you see `ModuleNotFoundError`, run `python3 -m pip install ` in `terminal`, then rerun Python code -8. **Install system packages with sudo** — use `sudo apt-get update && sudo apt-get install -y ` +8. **Install system packages carefully** — for Debian/Ubuntu, use `apt-get`/`apt`; add `sudo` only when required and available +9. **Use background mode for long jobs** — set `background=true` or `yield_ms` and then use the `process` tool to poll/log/kill ## Examples @@ -26,7 +27,7 @@ First execution: `python3 -m pip install requests` (runtime: terminal) Second execution: `import requests; r = requests.get('...'); print(r.json())` (runtime: python) ### Install a system package -Use: `sudo apt-get update && sudo apt-get install -y ffmpeg` (runtime: terminal) +Use: `apt-get update && apt-get install -y ffmpeg` (runtime: terminal). If permission is denied and `sudo` exists, retry with `sudo`. ### File operations (fallback) ```python @@ -42,9 +43,12 @@ print(content) uname -a && python3 --version && node --version ``` +### Long running command +Use `background=true` (or `yield_ms`) and then follow up with `process` tool actions (`poll`, `log`). + ## Limitations -- Execution timeout: configurable (default 180 seconds) +- Execution timeout: configurable (default 600 seconds) - Output is truncated at configurable max length - No GUI applications — terminal only - Network access depends on system configuration diff --git a/src/prompts/tool-install_packages.md b/src/prompts/tool-install_packages.md new file mode 100644 index 0000000..700976b --- /dev/null +++ b/src/prompts/tool-install_packages.md @@ -0,0 +1,24 @@ +# Install Packages Tool + +Install dependencies with fallback manager selection. + +## When to Use + +- Package installs in `code_execution` are failing or flaky. +- You need controlled install attempts with structured diagnostics. + +## Supported Kinds + +- `node` — tries node managers (pnpm/npm/yarn/bun) +- `python` — pip/uv flow +- `go` — `go install` +- `uv` — `uv tool install` +- `apt` — apt-get flow with root/sudo checks +- `auto` — chooses a default flow from inputs + +## Guidelines + +1. Pass explicit `kind` when known to avoid ambiguous installs. +2. Use `prefer_manager` if project policy requires a specific manager. +3. Review `attempts` in tool output to understand fallback behavior and errors. +4. If install fails repeatedly, report exact failing command and stderr to user. diff --git a/src/prompts/tool-process.md b/src/prompts/tool-process.md new file mode 100644 index 0000000..32bb280 --- /dev/null +++ b/src/prompts/tool-process.md @@ -0,0 +1,19 @@ +# Process Tool + +Manage background sessions created by `code_execution`. + +## Actions + +- `list` — list running and finished managed sessions. +- `poll` — check current status/output of one session (optionally wait with `timeout_ms`). +- `log` — read session log output with optional line window (`offset`, `limit`). +- `kill` — terminate a running session. +- `remove` — remove one finished session from history. +- `clear` — clear all finished sessions. + +## Usage Rules + +1. Use `process` only after `code_execution` returned a managed session id. +2. For `poll`/`log`/`kill`/`remove`, always pass `session_id`. +3. If `poll` returns `retryInMs`, wait roughly that long before the next poll. +4. If status is `completed`/`failed`/`killed`, stop polling and report outcome.