feat: improve tool execution reliability and package installs

This commit is contained in:
ilya-bov
2026-02-27 15:08:31 +03:00
parent 8ccac132e1
commit dcb1eabb4e
9 changed files with 1825 additions and 133 deletions

View File

@@ -17,6 +17,10 @@ import type { ChatMessage } from "@/lib/types";
import { publishUiSyncEvent } from "@/lib/realtime/event-bus";
const LLM_LOG_BORDER = "═".repeat(60);
const MAX_TOOL_STEPS_PER_TURN = 30;
const MAX_TOOL_STEPS_SUBORDINATE = 15;
const POLL_NO_PROGRESS_BLOCK_THRESHOLD = 16;
const POLL_BACKOFF_SCHEDULE_MS = [5000, 10000, 30000, 60000] as const;
function asRecord(value: unknown): Record<string, unknown> | null {
if (value == null || typeof value !== "object" || Array.isArray(value)) {
@@ -90,6 +94,9 @@ function extractDeterministicFailureSignature(output: unknown): string | null {
trimmed.startsWith("[MCP tool error]") ||
trimmed.startsWith("[Preflight error]") ||
trimmed.startsWith("[Loop guard]") ||
trimmed.includes("Process error:") ||
trimmed.includes("[Process killed after timeout]") ||
/Exit code:\s*-?[1-9]\d*/.test(trimmed) ||
/^Failed\b/i.test(trimmed) ||
/^Skill ".+" not found\./i.test(trimmed) ||
(/\bnot found\b/i.test(trimmed) &&
@@ -102,8 +109,52 @@ function extractDeterministicFailureSignature(output: unknown): string | null {
return trimmed.length > 400 ? `${trimmed.slice(0, 400)}...` : trimmed;
}
function isPollLikeCall(toolName: string, input: unknown): boolean {
if (toolName !== "process") {
return false;
}
const record = asRecord(input);
if (!record) {
return false;
}
const action = typeof record.action === "string" ? record.action : "";
return action === "poll" || action === "log";
}
function normalizeNoProgressValue(value: unknown): unknown {
if (typeof value === "string") {
const trimmed = value.trim();
return trimmed.length > 1000 ? `${trimmed.slice(0, 1000)}...` : trimmed;
}
if (Array.isArray(value)) {
return value.slice(0, 8).map((item) => normalizeNoProgressValue(item));
}
const record = asRecord(value);
if (!record) {
return value;
}
const out: Record<string, unknown> = {};
for (const [key, raw] of Object.entries(record)) {
if (key === "output" && typeof raw === "string") {
out[key] = raw.length > 1000 ? `${raw.slice(0, 1000)}...` : raw;
continue;
}
if (key === "attempts" && Array.isArray(raw)) {
out[key] = raw.slice(0, 3).map((item) => normalizeNoProgressValue(item));
continue;
}
out[key] = normalizeNoProgressValue(raw);
}
return out;
}
function applyGlobalToolLoopGuard(tools: ToolSet): ToolSet {
const deterministicFailureByCall = new Map<string, string>();
const noProgressByCall = new Map<string, { hash: string; count: number }>();
const wrappedTools: ToolSet = {};
for (const [toolName, toolDef] of Object.entries(tools)) {
@@ -116,6 +167,24 @@ function applyGlobalToolLoopGuard(tools: ToolSet): ToolSet {
...toolDef,
execute: async (input: unknown, options: ToolExecutionOptions) => {
const callKey = `${toolName}:${stableSerialize(input)}`;
const previousNoProgress = noProgressByCall.get(callKey);
if (
previousNoProgress &&
previousNoProgress.count >= POLL_NO_PROGRESS_BLOCK_THRESHOLD &&
isPollLikeCall(toolName, input)
) {
const scheduleIdx = Math.min(
previousNoProgress.count - POLL_NO_PROGRESS_BLOCK_THRESHOLD,
POLL_BACKOFF_SCHEDULE_MS.length - 1
);
const retryInMs = POLL_BACKOFF_SCHEDULE_MS[scheduleIdx] ?? 60000;
return (
`[Loop guard] Detected no-progress polling loop for "${toolName}".\n` +
`Repeated identical result ${previousNoProgress.count} times.\n` +
`Back off for ~${retryInMs}ms or report the background task as stuck.`
);
}
const previousFailure = deterministicFailureByCall.get(callKey);
if (previousFailure) {
return (
@@ -132,6 +201,25 @@ function applyGlobalToolLoopGuard(tools: ToolSet): ToolSet {
} else {
deterministicFailureByCall.delete(callKey);
}
if (isPollLikeCall(toolName, input)) {
const outputHash = stableSerialize(normalizeNoProgressValue(output));
const previous = noProgressByCall.get(callKey);
if (previous && previous.hash === outputHash) {
noProgressByCall.set(callKey, {
hash: outputHash,
count: previous.count + 1,
});
} else {
noProgressByCall.set(callKey, {
hash: outputHash,
count: 1,
});
}
} else {
noProgressByCall.delete(callKey);
}
return output;
},
} as typeof toolDef;
@@ -398,7 +486,7 @@ export async function runAgent(options: {
system: systemPrompt,
messages,
tools,
stopWhen: stepCountIs(15), // Allow up to 15 tool call rounds
stopWhen: stepCountIs(MAX_TOOL_STEPS_PER_TURN),
temperature: settings.chatModel.temperature ?? 0.7,
maxOutputTokens: settings.chatModel.maxTokens ?? 4096,
onFinish: async (event) => {
@@ -528,7 +616,7 @@ export async function runAgentText(options: {
system: systemPrompt,
messages,
tools,
stopWhen: stepCountIs(15),
stopWhen: stepCountIs(MAX_TOOL_STEPS_PER_TURN),
temperature: settings.chatModel.temperature ?? 0.7,
maxOutputTokens: settings.chatModel.maxTokens ?? 4096,
});
@@ -659,7 +747,7 @@ export async function runSubordinateAgent(options: {
system: systemPrompt,
messages,
tools,
stopWhen: stepCountIs(10),
stopWhen: stepCountIs(MAX_TOOL_STEPS_SUBORDINATE),
temperature: settings.chatModel.temperature ?? 0.7,
maxOutputTokens: settings.chatModel.maxTokens ?? 4096,
});

View File

@@ -34,8 +34,8 @@ export const DEFAULT_SETTINGS: AppSettings = {
},
codeExecution: {
enabled: true,
timeout: 180,
maxOutputLength: 50000,
timeout: 600,
maxOutputLength: 120000,
},
memory: {
enabled: true,

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,577 @@
import fs from "fs";
import path from "path";
import { spawn } from "child_process";
const DEFAULT_TIMEOUT_MS = 10 * 60_000;
const OUTPUT_CAP = 120_000;
export type InstallKind = "auto" | "node" | "python" | "go" | "uv" | "apt";
export type InstallAttempt = {
command: string;
manager: string;
code: number | null;
durationMs: number;
stdout: string;
stderr: string;
timedOut: boolean;
success: boolean;
skipped: boolean;
reason?: string;
};
export type InstallPackagesParams = {
kind: InstallKind;
packages: string[];
preferManager?: string;
global?: boolean;
cwd: string;
timeoutMs?: number;
};
export type InstallPackagesResult = {
success: boolean;
kind: InstallKind;
resolvedKind: Exclude<InstallKind, "auto">;
manager: string | null;
message: string;
attempts: InstallAttempt[];
};
type InstallStep = {
argv: string[];
manager: string;
cwd?: string;
};
type InstallPlan = {
manager: string;
steps: InstallStep[];
};
type CommandRunResult = {
code: number | null;
stdout: string;
stderr: string;
timedOut: boolean;
durationMs: number;
};
export async function installPackages(params: InstallPackagesParams): Promise<InstallPackagesResult> {
const packages = uniqueNonEmpty(params.packages);
if (packages.length === 0) {
return {
success: false,
kind: params.kind,
resolvedKind: resolveAutoKind(params.kind, params.preferManager),
manager: null,
message: "No packages specified.",
attempts: [],
};
}
const timeoutMs = clampTimeout(params.timeoutMs ?? DEFAULT_TIMEOUT_MS);
const resolvedKind = resolveAutoKind(params.kind, params.preferManager);
const attempts: InstallAttempt[] = [];
const plans = await buildInstallPlans({
kind: resolvedKind,
packages,
preferManager: params.preferManager,
global: params.global === true,
cwd: params.cwd,
timeoutMs,
});
if (plans.length === 0) {
return {
success: false,
kind: params.kind,
resolvedKind,
manager: null,
message: `No compatible installer found for kind=${resolvedKind}.`,
attempts,
};
}
for (const plan of plans) {
const planResult = await executePlan(plan, timeoutMs);
attempts.push(...planResult.attempts);
if (planResult.success) {
return {
success: true,
kind: params.kind,
resolvedKind,
manager: plan.manager,
message: `Installed successfully using ${plan.manager}.`,
attempts,
};
}
}
const last = attempts.at(-1);
const message =
last?.stderr?.trim() ||
`Failed to install package(s) with available ${resolvedKind} installers.`;
return {
success: false,
kind: params.kind,
resolvedKind,
manager: null,
message,
attempts,
};
}
async function buildInstallPlans(params: {
kind: Exclude<InstallKind, "auto">;
packages: string[];
preferManager?: string;
global: boolean;
cwd: string;
timeoutMs: number;
}): Promise<InstallPlan[]> {
switch (params.kind) {
case "node":
return buildNodePlans(params);
case "python":
return buildPythonPlans(params);
case "uv":
return buildUvPlans(params);
case "go":
return buildGoPlans(params);
case "apt":
return await buildAptPlans(params);
}
}
function buildNodePlans(params: {
packages: string[];
preferManager?: string;
global: boolean;
cwd: string;
}): InstallPlan[] {
const order = orderedManagers(
["pnpm", "npm", "yarn", "bun"],
normalizeManager(params.preferManager)
);
const plans: InstallPlan[] = [];
for (const manager of order) {
if (!commandExists(manager)) {
plans.push({
manager,
steps: [],
});
continue;
}
let argv: string[];
if (manager === "pnpm") {
argv = params.global
? ["pnpm", "add", "-g", "--ignore-scripts", ...params.packages]
: ["pnpm", "add", ...params.packages];
} else if (manager === "npm") {
argv = params.global
? ["npm", "install", "-g", "--ignore-scripts", ...params.packages]
: ["npm", "install", ...params.packages];
} else if (manager === "yarn") {
argv = params.global
? ["yarn", "global", "add", ...params.packages]
: ["yarn", "add", ...params.packages];
} else {
argv = params.global
? ["bun", "add", "-g", "--ignore-scripts", ...params.packages]
: ["bun", "add", ...params.packages];
}
plans.push({
manager,
steps: [
{
manager,
argv,
cwd: params.cwd,
},
],
});
}
return plans;
}
function buildPythonPlans(params: {
packages: string[];
preferManager?: string;
cwd: string;
}): InstallPlan[] {
const normalized = normalizeManager(params.preferManager);
const plans: InstallPlan[] = [];
const uvPreferred = normalized === "uv";
if (uvPreferred && commandExists("uv")) {
plans.push({
manager: "uv",
steps: [
{
manager: "uv",
argv: ["uv", "pip", "install", ...params.packages],
cwd: params.cwd,
},
],
});
}
if (commandExists("python3")) {
plans.push({
manager: "pip",
steps: [
{
manager: "pip",
argv: ["python3", "-m", "pip", "install", ...params.packages],
cwd: params.cwd,
},
],
});
} else if (commandExists("python")) {
plans.push({
manager: "pip",
steps: [
{
manager: "pip",
argv: ["python", "-m", "pip", "install", ...params.packages],
cwd: params.cwd,
},
],
});
}
if (!uvPreferred && commandExists("uv")) {
plans.push({
manager: "uv",
steps: [
{
manager: "uv",
argv: ["uv", "pip", "install", ...params.packages],
cwd: params.cwd,
},
],
});
}
return plans;
}
function buildUvPlans(params: { packages: string[]; cwd: string }): InstallPlan[] {
if (!commandExists("uv")) {
return [];
}
return [
{
manager: "uv",
steps: [
{
manager: "uv",
argv: ["uv", "tool", "install", ...params.packages],
cwd: params.cwd,
},
],
},
];
}
function buildGoPlans(params: { packages: string[]; cwd: string }): InstallPlan[] {
if (!commandExists("go")) {
return [];
}
const argv = ["go", "install", ...params.packages];
return [
{
manager: "go",
steps: [
{
manager: "go",
argv,
cwd: params.cwd,
},
],
},
];
}
async function buildAptPlans(params: {
packages: string[];
cwd: string;
timeoutMs: number;
}): Promise<InstallPlan[]> {
if (!commandExists("apt-get")) {
return [];
}
const isRoot = typeof process.getuid === "function" && process.getuid() === 0;
if (isRoot) {
return [
{
manager: "apt-get",
steps: [
{ manager: "apt-get", argv: ["apt-get", "update", "-qq"], cwd: params.cwd },
{
manager: "apt-get",
argv: ["apt-get", "install", "-y", ...params.packages],
cwd: params.cwd,
},
],
},
];
}
if (!commandExists("sudo")) {
return [];
}
const sudoCheck = await runCommand(["sudo", "-n", "true"], {
timeoutMs: Math.min(params.timeoutMs, 10_000),
cwd: params.cwd,
});
if (sudoCheck.code !== 0) {
return [];
}
return [
{
manager: "sudo-apt-get",
steps: [
{
manager: "sudo-apt-get",
argv: ["sudo", "apt-get", "update", "-qq"],
cwd: params.cwd,
},
{
manager: "sudo-apt-get",
argv: ["sudo", "apt-get", "install", "-y", ...params.packages],
cwd: params.cwd,
},
],
},
];
}
async function executePlan(
plan: InstallPlan,
timeoutMs: number
): Promise<{ success: boolean; attempts: InstallAttempt[] }> {
if (plan.steps.length === 0) {
return {
success: false,
attempts: [
{
command: plan.manager,
manager: plan.manager,
code: null,
durationMs: 0,
stdout: "",
stderr: "",
timedOut: false,
success: false,
skipped: true,
reason: `Manager \"${plan.manager}\" is not available in PATH.`,
},
],
};
}
const attempts: InstallAttempt[] = [];
for (const step of plan.steps) {
const run = await runCommand(step.argv, {
timeoutMs,
cwd: step.cwd,
});
const attempt: InstallAttempt = {
command: formatCommand(step.argv),
manager: step.manager,
code: run.code,
durationMs: run.durationMs,
stdout: run.stdout,
stderr: run.stderr,
timedOut: run.timedOut,
success: run.code === 0,
skipped: false,
};
attempts.push(attempt);
if (run.code !== 0) {
return { success: false, attempts };
}
}
return { success: true, attempts };
}
async function runCommand(
argv: string[],
options: {
timeoutMs: number;
cwd?: string;
}
): Promise<CommandRunResult> {
const startedAt = Date.now();
return new Promise((resolve) => {
const command = argv[0];
const args = argv.slice(1);
let stdout = "";
let stderr = "";
let timedOut = false;
const child = spawn(command, args, {
cwd: options.cwd,
env: process.env,
stdio: ["ignore", "pipe", "pipe"],
});
child.stdout?.on("data", (chunk: Buffer) => {
stdout = appendWithCap(stdout, chunk.toString(), OUTPUT_CAP);
});
child.stderr?.on("data", (chunk: Buffer) => {
stderr = appendWithCap(stderr, chunk.toString(), OUTPUT_CAP);
});
const timer = setTimeout(() => {
timedOut = true;
try {
child.kill("SIGTERM");
} catch {
// ignore
}
setTimeout(() => {
try {
child.kill("SIGKILL");
} catch {
// ignore
}
}, 2000);
}, options.timeoutMs);
child.on("close", (code) => {
clearTimeout(timer);
resolve({
code,
stdout,
stderr,
timedOut,
durationMs: Date.now() - startedAt,
});
});
child.on("error", (error) => {
clearTimeout(timer);
resolve({
code: null,
stdout,
stderr: appendWithCap(stderr, error.message, OUTPUT_CAP),
timedOut,
durationMs: Date.now() - startedAt,
});
});
});
}
function appendWithCap(current: string, chunk: string, cap: number): string {
if (current.length >= cap) {
return current;
}
const remaining = cap - current.length;
if (chunk.length <= remaining) {
return current + chunk;
}
return current + chunk.slice(0, Math.max(0, remaining));
}
function resolveAutoKind(kind: InstallKind, preferManager?: string): Exclude<InstallKind, "auto"> {
if (kind !== "auto") {
return kind;
}
const manager = normalizeManager(preferManager);
if (manager === "go") return "go";
if (manager === "uv") return "uv";
if (manager === "pip" || manager === "python") return "python";
if (manager === "apt" || manager === "apt-get") return "apt";
return "node";
}
function orderedManagers(base: string[], preferred?: string): string[] {
if (!preferred) {
return base;
}
const normalized = preferred.toLowerCase();
const filtered = base.filter((value) => value !== normalized);
return base.includes(normalized) ? [normalized, ...filtered] : base;
}
function commandExists(command: string, envPath?: string): boolean {
const rawPath = envPath ?? process.env.PATH;
if (!rawPath) {
return false;
}
for (const dir of rawPath.split(path.delimiter)) {
const trimmed = dir.trim();
if (!trimmed) {
continue;
}
const candidate = path.join(trimmed, command);
if (fs.existsSync(candidate)) {
return true;
}
if (process.platform === "win32") {
const winCandidate = path.join(trimmed, `${command}.cmd`);
if (fs.existsSync(winCandidate)) {
return true;
}
}
}
return false;
}
function uniqueNonEmpty(values: string[]): string[] {
const result: string[] = [];
const seen = new Set<string>();
for (const raw of values) {
const value = raw.trim();
if (!value || seen.has(value)) {
continue;
}
seen.add(value);
result.push(value);
}
return result;
}
function normalizeManager(value?: string): string | undefined {
const normalized = value?.trim().toLowerCase();
return normalized || undefined;
}
function formatCommand(argv: string[]): string {
return argv.map(quoteArg).join(" ");
}
function quoteArg(arg: string): string {
if (/^[a-zA-Z0-9_./:@%+-]+$/.test(arg)) {
return arg;
}
return JSON.stringify(arg);
}
function clampTimeout(value: number): number {
if (!Number.isFinite(value) || value <= 0) {
return DEFAULT_TIMEOUT_MS;
}
return Math.min(Math.max(Math.floor(value), 1_000), 30 * 60_000);
}

View File

@@ -6,12 +6,21 @@ import { constants as fsConstants } from "fs";
import path from "path";
import type { AgentContext } from "@/lib/agent/types";
import type { AppSettings, McpServerConfig } from "@/lib/types";
import { executeCode } from "@/lib/tools/code-execution";
import {
clearFinishedManagedProcessSessions,
executeCode,
killManagedProcessSession,
listManagedProcessSessions,
pollManagedProcessSession,
readManagedProcessSessionLog,
removeManagedProcessSession,
} from "@/lib/tools/code-execution";
import { memorySave, memoryLoad, memoryDelete } from "@/lib/tools/memory-tools";
import { knowledgeQuery } from "@/lib/tools/knowledge-query";
import { searchWeb } from "@/lib/tools/search-engine";
import { callSubordinate } from "@/lib/tools/call-subordinate";
import { createCronTool } from "@/lib/tools/cron-tool";
import { installPackages } from "@/lib/tools/install-orchestrator";
import { loadPdf } from "@/lib/memory/loaders/pdf-loader";
import {
getAllProjects,
@@ -618,7 +627,7 @@ export function createAgentTools(
if (settings.codeExecution.enabled) {
tools.code_execution = tool({
description:
"Execute code in Python, Node.js, or Shell terminal. Use this to run scripts, install packages, manipulate files, perform calculations, or any task that requires code execution. The code runs in a persistent shell session.",
"Execute code in Python, Node.js, or Shell terminal. Use this to run scripts, install packages, manipulate files, perform calculations, or any task that requires code execution. For terminal runtime, session IDs preserve working directory continuity across calls.",
inputSchema: z.object({
runtime: z
.enum(["python", "nodejs", "terminal"])
@@ -632,10 +641,25 @@ export function createAgentTools(
.number()
.default(0)
.describe(
"Session ID (0-9). Use different sessions for parallel tasks. Default is 0."
"Session ID (0-9). Reuse a session to keep terminal working-directory state between calls. Use different sessions for independent tasks."
),
background: z
.boolean()
.default(false)
.describe(
"Run execution in background and return immediately with a managed process session id."
),
yield_ms: z
.number()
.int()
.min(10)
.max(120000)
.optional()
.describe(
"Optional milliseconds to wait before yielding a still-running command to background process management."
),
}),
execute: async ({ runtime, code, session }) => {
execute: async ({ runtime, code, session, background, yield_ms }) => {
const normalizedCode = code.replace(/\r\n/g, "\n");
const sanitizedCode = normalizedCode.replace(/\s+$/, "");
const lineCount = sanitizedCode.length === 0 ? 0 : sanitizedCode.split("\n").length;
@@ -649,7 +673,118 @@ export function createAgentTools(
return `[Preflight error] Code payload has too many lines (${lineCount}). Limit is ${CODE_EXEC_MAX_LINES}. Split the task into smaller executions.`;
}
const cwd = resolveContextCwd(context);
return executeCode(runtime, sanitizedCode, session, settings.codeExecution, cwd);
return executeCode(runtime, sanitizedCode, session, settings.codeExecution, cwd, {
background,
yieldMs: typeof yield_ms === "number" ? yield_ms : undefined,
});
},
});
tools.install_packages = tool({
description:
"Install dependencies with installer fallback logic. Supports node (npm/pnpm/yarn/bun), python (pip/uv), go, uv, and apt. Use this when package installation via code_execution is flaky.",
inputSchema: z.object({
kind: z
.enum(["auto", "node", "python", "go", "uv", "apt"])
.default("auto")
.describe("Dependency ecosystem to install for."),
packages: z
.array(z.string())
.min(1)
.describe("List of package names/specifiers to install."),
prefer_manager: z
.string()
.optional()
.describe("Optional preferred manager (e.g. pnpm, npm, pip, uv, go, apt-get)."),
global: z
.boolean()
.default(false)
.describe("Whether to install globally when supported (mainly node ecosystem)."),
timeout_seconds: z
.number()
.int()
.min(1)
.max(1800)
.default(600)
.describe("Timeout per installer attempt in seconds."),
}),
execute: async ({ kind, packages, prefer_manager, global, timeout_seconds }) => {
const cwd = resolveContextCwd(context);
return installPackages({
kind,
packages,
preferManager: prefer_manager,
global,
cwd,
timeoutMs: timeout_seconds * 1000,
});
},
});
tools.process = tool({
description:
"Manage code_execution background sessions (list, poll, log, kill, clear, remove). Use this after code_execution returns a managed session id.",
inputSchema: z.object({
action: z
.enum(["list", "poll", "log", "kill", "clear", "remove"])
.describe("Process management action."),
session_id: z
.string()
.optional()
.describe("Managed process session id for poll/log/kill/remove."),
timeout_ms: z
.number()
.int()
.min(0)
.max(120000)
.optional()
.describe("Optional wait timeout for poll action."),
offset: z
.number()
.int()
.min(0)
.optional()
.describe("Optional line offset for log action."),
limit: z
.number()
.int()
.min(1)
.max(5000)
.optional()
.describe("Optional line count for log action."),
}),
execute: async ({ action, session_id, timeout_ms, offset, limit }) => {
if (action === "list") {
return {
success: true,
sessions: listManagedProcessSessions(),
};
}
if (action === "poll") {
if (!session_id?.trim()) {
return { success: false, error: "session_id is required for poll." };
}
return pollManagedProcessSession(session_id, timeout_ms);
}
if (action === "log") {
if (!session_id?.trim()) {
return { success: false, error: "session_id is required for log." };
}
return readManagedProcessSessionLog(session_id, offset, limit);
}
if (action === "kill") {
if (!session_id?.trim()) {
return { success: false, error: "session_id is required for kill." };
}
return killManagedProcessSession(session_id);
}
if (action === "remove") {
if (!session_id?.trim()) {
return { success: false, error: "session_id is required for remove." };
}
return removeManagedProcessSession(session_id);
}
return clearFinishedManagedProcessSessions();
},
});
}

View File

@@ -4,12 +4,13 @@ You are a powerful AI agent with access to tools that allow you to interact with
## Core Capabilities
1. **Code Execution** - Execute Python, Node.js, and Shell commands in persistent terminal sessions
1. **Code Execution** - Execute Python, Node.js, and Shell commands with session-scoped continuity
2. **Persistent Memory** - Save and retrieve information across conversations using vector-based semantic memory
3. **Knowledge Base** - Query uploaded documents using semantic search (RAG)
4. **Web Search** - Search the internet for current information
5. **Multi-Agent Delegation** - Delegate complex subtasks to subordinate agents
6. **Cron Scheduling** - Create, update, run, and inspect scheduled jobs
7. **Process Management** - Inspect and control background code execution sessions
## Guidelines
@@ -25,9 +26,11 @@ You are a powerful AI agent with access to tools that allow you to interact with
- Choose the appropriate runtime: `python` for data processing and scripting, `nodejs` for web/JS tasks, `terminal` for shell commands
- Always handle errors and edge cases in your code
- If Python fails with `ModuleNotFoundError`, install the missing dependency with `python3 -m pip install <package>` using `terminal`, then retry
- For OS-level packages, use `sudo apt-get update && sudo apt-get install -y <package>`
- For OS-level packages on Debian/Ubuntu, use `apt-get`/`apt` and add `sudo` only when needed and available
- For file operations, prefer dedicated file tools (`read_text_file`, `read_pdf_file`, `write_text_file`, `copy_file`) over code execution
- Use `code_execution` for file operations only as a fallback when dedicated tools cannot complete the task
- For long-running commands, use `code_execution` with background/yield and continue via the `process` tool
- For dependency setup, prefer `install_packages` over ad-hoc install retries in shell
- Break complex tasks into smaller executable steps
- Check output after each execution before proceeding
- Do not use `sleep`, `at`, or background shell loops as a substitute for scheduled reminders/tasks; use the **cron** tool for scheduling

View File

@@ -14,10 +14,11 @@ Execute code in a specified runtime environment. The code runs on the user's mac
2. **Print outputs explicitly** — always `print()` or `console.log()` results you want to see
3. **Handle errors** — wrap risky operations in try/except or try/catch
4. **Check prerequisites** — verify packages are installed before importing
5. **Use sessions wisely** — session 0 is the default; use different sessions for parallel tasks
5. **Use sessions wisely** — session 0 is the default; reuse the same session to keep terminal working-directory state between calls
6. **Prefer dedicated file tools first** — use `read_text_file`, `read_pdf_file`, `write_text_file`, and `copy_file` for common file tasks; use `code_execution` only when those tools are insufficient
7. **Auto-resolve missing Python deps** — if you see `ModuleNotFoundError`, run `python3 -m pip install <package>` in `terminal`, then rerun Python code
8. **Install system packages with sudo** use `sudo apt-get update && sudo apt-get install -y <package>`
8. **Install system packages carefully** — for Debian/Ubuntu, use `apt-get`/`apt`; add `sudo` only when required and available
9. **Use background mode for long jobs** — set `background=true` or `yield_ms` and then use the `process` tool to poll/log/kill
## Examples
@@ -26,7 +27,7 @@ First execution: `python3 -m pip install requests` (runtime: terminal)
Second execution: `import requests; r = requests.get('...'); print(r.json())` (runtime: python)
### Install a system package
Use: `sudo apt-get update && sudo apt-get install -y ffmpeg` (runtime: terminal)
Use: `apt-get update && apt-get install -y ffmpeg` (runtime: terminal). If permission is denied and `sudo` exists, retry with `sudo`.
### File operations (fallback)
```python
@@ -42,9 +43,12 @@ print(content)
uname -a && python3 --version && node --version
```
### Long running command
Use `background=true` (or `yield_ms`) and then follow up with `process` tool actions (`poll`, `log`).
## Limitations
- Execution timeout: configurable (default 180 seconds)
- Execution timeout: configurable (default 600 seconds)
- Output is truncated at configurable max length
- No GUI applications — terminal only
- Network access depends on system configuration

View File

@@ -0,0 +1,24 @@
# Install Packages Tool
Install dependencies with fallback manager selection.
## When to Use
- Package installs in `code_execution` are failing or flaky.
- You need controlled install attempts with structured diagnostics.
## Supported Kinds
- `node` — tries node managers (pnpm/npm/yarn/bun)
- `python` — pip/uv flow
- `go``go install`
- `uv``uv tool install`
- `apt` — apt-get flow with root/sudo checks
- `auto` — chooses a default flow from inputs
## Guidelines
1. Pass explicit `kind` when known to avoid ambiguous installs.
2. Use `prefer_manager` if project policy requires a specific manager.
3. Review `attempts` in tool output to understand fallback behavior and errors.
4. If install fails repeatedly, report exact failing command and stderr to user.

View File

@@ -0,0 +1,19 @@
# Process Tool
Manage background sessions created by `code_execution`.
## Actions
- `list` — list running and finished managed sessions.
- `poll` — check current status/output of one session (optionally wait with `timeout_ms`).
- `log` — read session log output with optional line window (`offset`, `limit`).
- `kill` — terminate a running session.
- `remove` — remove one finished session from history.
- `clear` — clear all finished sessions.
## Usage Rules
1. Use `process` only after `code_execution` returned a managed session id.
2. For `poll`/`log`/`kill`/`remove`, always pass `session_id`.
3. If `poll` returns `retryInMs`, wait roughly that long before the next poll.
4. If status is `completed`/`failed`/`killed`, stop polling and report outcome.