Harden tool execution recovery and fix multi-tab sync hangs

2026-03-07 01:53:08 +00:00 · 2026-02-27 17:06:17 +03:00
parent dcb1eabb4e
commit ce362a836b
8 changed files with 291 additions and 34 deletions
--- a/16
+++ b/16
@@ -33,6 +33,22 @@ RUN apt-get update \
    curl \
    git \
    jq \
+    libasound2 \
+    libatk1.0-0 \
+    libatspi2.0-0 \
+    libdbus-1-3 \
+    libgbm1 \
+    libglib2.0-0 \
+    libnspr4 \
+    libnss3 \
+    libx11-6 \
+    libxcb1 \
+    libxcomposite1 \
+    libxdamage1 \
+    libxext6 \
+    libxfixes3 \
+    libxkbcommon0 \
+    libxrandr2 \
    python3 \
    python3-requests \
    python3-venv \
--- a/bundled-skills/playwright-cli/SKILL.md
+++ b/bundled-skills/playwright-cli/SKILL.md
@@ -216,11 +216,11 @@ playwright-cli kill-all

 ## Local installation

-In some cases user might want to install playwright-cli locally. If running globally available `playwright-cli` binary fails, use `npx playwright-cli` to run the commands. For example:
+In some cases user might want to install playwright-cli locally. If running globally available `playwright-cli` binary fails, use non-interactive `npx -y @playwright/cli` to run the commands. (`playwright-cli` npm package is deprecated for execution.) For example:

 ```bash
-npx playwright-cli open https://example.com
-npx playwright-cli click e1
+npx -y @playwright/cli open https://example.com
+npx -y @playwright/cli click e1
 ```

 ## Example: Form submission
--- a/src/hooks/use-background-sync.ts
+++ b/src/hooks/use-background-sync.ts
@@ -10,6 +10,60 @@ interface BackgroundSyncOptions {
  fallbackIntervalMs?: number;
 }

+type SyncSubscriber = (event: UiSyncEvent) => void;
+
+let sharedEventSource: EventSource | null = null;
+let sharedSyncListener: ((event: MessageEvent<string>) => void) | null = null;
+let nextSubscriberId = 1;
+const syncSubscribers = new Map<number, SyncSubscriber>();
+
+function ensureSharedEventSource(): void {
+  if (sharedEventSource) {
+    return;
+  }
+
+  sharedEventSource = new EventSource("/api/events");
+  sharedSyncListener = (event: MessageEvent<string>) => {
+    let parsed: UiSyncEvent | null = null;
+    try {
+      parsed = JSON.parse(event.data) as UiSyncEvent;
+    } catch {
+      return;
+    }
+
+    for (const subscriber of syncSubscribers.values()) {
+      try {
+        subscriber(parsed);
+      } catch {
+        // Keep fan-out resilient to individual listener failures.
+      }
+    }
+  };
+
+  sharedEventSource.addEventListener("sync", sharedSyncListener as EventListener);
+}
+
+function subscribeSharedSync(subscriber: SyncSubscriber): () => void {
+  ensureSharedEventSource();
+  const subscriberId = nextSubscriberId++;
+  syncSubscribers.set(subscriberId, subscriber);
+
+  return () => {
+    syncSubscribers.delete(subscriberId);
+    if (syncSubscribers.size === 0 && sharedEventSource) {
+      if (sharedSyncListener) {
+        sharedEventSource.removeEventListener(
+          "sync",
+          sharedSyncListener as EventListener
+        );
+      }
+      sharedEventSource.close();
+      sharedEventSource = null;
+      sharedSyncListener = null;
+    }
+  };
+}
+
 function matchesScope(
  event: UiSyncEvent,
  options: BackgroundSyncOptions
@@ -49,7 +103,6 @@ export function useBackgroundSync(options: BackgroundSyncOptions = {}): number {
  const [tick, setTick] = useState(0);

  useEffect(() => {
-    let eventSource: EventSource | null = null;
    const scope: BackgroundSyncOptions = {
      topics: topicsKey
        ? (topicsKey.split(",").filter(Boolean) as UiSyncTopic[])
@@ -63,24 +116,14 @@ export function useBackgroundSync(options: BackgroundSyncOptions = {}): number {
      setTick((value) => value + 1);
    };

-    const onSync = (event: MessageEvent<string>) => {
-      try {
-        const parsed = JSON.parse(event.data) as UiSyncEvent;
-        if (!matchesScope(parsed, scope)) {
-          return;
-        }
-        bump();
-      } catch {
-        // Ignore malformed SSE event payloads.
+    const onSync = (parsed: UiSyncEvent) => {
+      if (!matchesScope(parsed, scope)) {
+        return;
      }
+      bump();
    };

-    const connect = () => {
-      eventSource = new EventSource("/api/events");
-      eventSource.addEventListener("sync", onSync as EventListener);
-    };
-
-    connect();
+    const unsubscribeSync = subscribeSharedSync(onSync);

    const fallbackTimer =
      fallbackIntervalMs > 0 ? window.setInterval(bump, fallbackIntervalMs) : null;
@@ -101,10 +144,7 @@ export function useBackgroundSync(options: BackgroundSyncOptions = {}): number {
      if (fallbackTimer) {
        window.clearInterval(fallbackTimer);
      }
-      if (eventSource) {
-        eventSource.removeEventListener("sync", onSync as EventListener);
-        eventSource.close();
-      }
+      unsubscribeSync();
      document.removeEventListener("visibilitychange", onVisibilityChange);
      window.removeEventListener("focus", onWindowFocus);
    };
--- a/src/lib/agent/agent.ts
+++ b/src/lib/agent/agent.ts
@@ -66,6 +66,147 @@ function parseJsonObject(text: string): Record<string, unknown> | null {
  }
 }

+function getOutputTextForRecovery(output: unknown): string {
+  if (typeof output === "string") {
+    return output;
+  }
+  const record = asRecord(output);
+  if (!record) {
+    return "";
+  }
+  const out = typeof record.output === "string" ? record.output : "";
+  const err = typeof record.error === "string" ? record.error : "";
+  return [out, err].filter(Boolean).join("\n");
+}
+
+function extractNodeMissingModule(text: string): string | null {
+  const match = text.match(/Cannot find module ['"]([^'"\n]+)['"]/i);
+  const mod = match?.[1]?.trim();
+  return mod ? mod : null;
+}
+
+function extractPythonMissingModule(text: string): string | null {
+  const match = text.match(/ModuleNotFoundError:\s*No module named ['"]([^'"\n]+)['"]/i);
+  const mod = match?.[1]?.trim();
+  return mod ? mod : null;
+}
+
+function extractMissingCommand(text: string): string | null {
+  const shellMatch = text.match(/(?:^|\n)(?:\/bin\/sh:\s*\d+:\s*)?([a-zA-Z0-9._-]+):\s*not found(?:\n|$)/i);
+  if (shellMatch?.[1]) {
+    return shellMatch[1];
+  }
+  const spawnMatch = text.match(/spawn\s+([a-zA-Z0-9._/-]+)\s+ENOENT/i);
+  if (spawnMatch?.[1]) {
+    const command = spawnMatch[1].split("/").pop();
+    return command ?? null;
+  }
+  return null;
+}
+
+function buildAutoRecoveryHint(toolName: string, output: unknown): string | null {
+  if (toolName !== "code_execution" && toolName !== "process") {
+    return null;
+  }
+
+  const text = getOutputTextForRecovery(output);
+  if (!text) {
+    return null;
+  }
+
+  if (
+    /Need to install the following packages/i.test(text) &&
+    /Ok to proceed\?/i.test(text)
+  ) {
+    return [
+      "Recoverable blocker detected: interactive npx prompt is waiting for confirmation.",
+      "Next action: rerun with non-interactive form using `npx -y ...`, then continue polling/retrying in this turn.",
+      "Do not stop on this blocker.",
+    ].join("\n");
+  }
+
+  if (
+    /npm error could not determine executable to run/i.test(text) &&
+    /playwright-cli/i.test(text)
+  ) {
+    return [
+      "Recoverable blocker detected: deprecated `playwright-cli` npm package does not expose an executable.",
+      "Next action: run the command with `npx -y @playwright/cli ...` (or install `@playwright/cli` via install_packages and retry).",
+      "Do not stop on this blocker.",
+    ].join("\n");
+  }
+
+  if (text.includes("Host system is missing dependencies to run browsers")) {
+    return [
+      "Recoverable blocker detected: Playwright browser system dependencies are missing.",
+      "Next action: run install_packages with kind=\"apt\" for the required libs (or run `npx playwright install-deps` in terminal runtime), then retry the same Playwright command in this turn.",
+      "Do not stop and do not ask the user to run commands manually unless installation keeps failing after corrected retries.",
+    ].join("\n");
+  }
+
+  const missingNodeModule = extractNodeMissingModule(text);
+  if (missingNodeModule) {
+    return [
+      `Recoverable blocker detected: missing Node module "${missingNodeModule}".`,
+      `Next action: call install_packages with kind="node" and packages=["${missingNodeModule}"], then retry the same command in this turn.`,
+      "Do not stop after this error.",
+    ].join("\n");
+  }
+
+  const missingPythonModule = extractPythonMissingModule(text);
+  if (missingPythonModule) {
+    return [
+      `Recoverable blocker detected: missing Python module "${missingPythonModule}".`,
+      `Next action: call install_packages with kind="python" and packages=["${missingPythonModule}"], then retry the same command in this turn.`,
+      "Do not stop after this error.",
+    ].join("\n");
+  }
+
+  if (/playwright-cli:\s*not found/i.test(text)) {
+    return [
+      "Recoverable blocker detected: playwright-cli is not installed/in PATH.",
+      "Next action: first try running the same command via `npx -y @playwright/cli ...`.",
+      "If npx path is unavailable, call install_packages with kind=\"node\" and packages=[\"@playwright/cli\"], then retry in this turn.",
+      "Do not end the turn on this error.",
+    ].join("\n");
+  }
+
+  const missingCommand = extractMissingCommand(text);
+  if (missingCommand && missingCommand !== "node" && missingCommand !== "python3") {
+    return [
+      `Recoverable blocker detected: command "${missingCommand}" is missing.`,
+      `Next action: install it via install_packages (kind depends on ecosystem, e.g. apt for system commands), then retry the original command in this turn.`,
+      "Only report blocker after corrected install attempts fail.",
+    ].join("\n");
+  }
+
+  return null;
+}
+
+function appendRecoveryHint(output: unknown, hint: string | null): unknown {
+  if (!hint) {
+    return output;
+  }
+
+  const block = `\n\n[Auto-recovery hint]\n${hint}`;
+  if (typeof output === "string") {
+    return `${output}${block}`;
+  }
+
+  const record = asRecord(output);
+  if (!record) {
+    return output;
+  }
+
+  const current = typeof record.output === "string" ? record.output : "";
+  return {
+    ...record,
+    output: current ? `${current}${block}` : block.trim(),
+    recoverable: true,
+    recoveryHint: hint,
+  };
+}
+
 function extractDeterministicFailureSignature(output: unknown): string | null {
  const outputRecord = asRecord(output);
  if (outputRecord && outputRecord.success === false) {
@@ -153,7 +294,7 @@ function normalizeNoProgressValue(value: unknown): unknown {
 }

 function applyGlobalToolLoopGuard(tools: ToolSet): ToolSet {
-  const deterministicFailureByCall = new Map<string, string>();
+  let lastDeterministicFailure: { callKey: string; signature: string } | null = null;
  const noProgressByCall = new Map<string, { hash: string; count: number }>();
  const wrappedTools: ToolSet = {};

@@ -185,25 +326,29 @@ function applyGlobalToolLoopGuard(tools: ToolSet): ToolSet {
          );
        }

-        const previousFailure = deterministicFailureByCall.get(callKey);
-        if (previousFailure) {
+        if (lastDeterministicFailure?.callKey === callKey) {
          return (
            `[Loop guard] Blocked repeated tool call "${toolName}" with identical arguments.\n` +
-            `Previous deterministic error: ${previousFailure}\n` +
+            `Previous deterministic error: ${lastDeterministicFailure.signature}\n` +
            "Change arguments based on the tool error before retrying."
          );
        }

        const output = await toolDef.execute(input as never, options as never);
-        const failureSignature = extractDeterministicFailureSignature(output);
+        const recoveryHint = buildAutoRecoveryHint(toolName, output);
+        const outputWithHint = appendRecoveryHint(output, recoveryHint);
+        const failureSignature = extractDeterministicFailureSignature(outputWithHint);
        if (failureSignature) {
-          deterministicFailureByCall.set(callKey, failureSignature);
+          lastDeterministicFailure = {
+            callKey,
+            signature: failureSignature,
+          };
        } else {
-          deterministicFailureByCall.delete(callKey);
+          lastDeterministicFailure = null;
        }

        if (isPollLikeCall(toolName, input)) {
-          const outputHash = stableSerialize(normalizeNoProgressValue(output));
+          const outputHash = stableSerialize(normalizeNoProgressValue(outputWithHint));
          const previous = noProgressByCall.get(callKey);
          if (previous && previous.hash === outputHash) {
            noProgressByCall.set(callKey, {
@@ -220,7 +365,7 @@ function applyGlobalToolLoopGuard(tools: ToolSet): ToolSet {
          noProgressByCall.delete(callKey);
        }

-        return output;
+        return outputWithHint;
      },
    } as typeof toolDef;
  }
--- a/src/lib/tools/tool.ts
+++ b/src/lib/tools/tool.ts
@@ -51,6 +51,27 @@ interface TelegramRuntimeData {
  chatId: string | number;
 }

+function getCurrentUserMessageText(context: AgentContext): string {
+  const value = context.data?.currentUserMessage;
+  return typeof value === "string" ? value.trim() : "";
+}
+
+function userExplicitlyRequestedProcessKill(context: AgentContext): boolean {
+  const text = getCurrentUserMessageText(context);
+  if (!text) return false;
+
+  const killIntent =
+    /\b(stop|terminate|kill|cancel|abort|end|прервать|прерви|остановить|останови|убить|убей|завершить|заверши|отменить|отмени)\b/i;
+  const negatedIntent =
+    /\b(do not|don't|dont|не)\b.{0,20}\b(stop|terminate|kill|cancel|abort|прерв|останов|убива|заверш|отмен)\b/i;
+
+  if (negatedIntent.test(text)) {
+    return false;
+  }
+
+  return killIntent.test(text);
+}
+
 function getTelegramRuntimeData(context: AgentContext): TelegramRuntimeData | null {
  const raw = context.data?.telegram;
  if (!raw || typeof raw !== "object" || Array.isArray(raw)) return null;
@@ -776,6 +797,13 @@ export function createAgentTools(
          if (!session_id?.trim()) {
            return { success: false, error: "session_id is required for kill." };
          }
+          if (!userExplicitlyRequestedProcessKill(context)) {
+            return {
+              success: false,
+              error:
+                "Kill blocked by policy: only stop a background process when the user explicitly asks to stop/cancel it. Continue with poll/log or wait for completion.",
+            };
+          }
          return killManagedProcessSession(session_id);
        }
        if (action === "remove") {
--- a/src/prompts/system.md
+++ b/src/prompts/system.md
@@ -26,6 +26,7 @@ You are a powerful AI agent with access to tools that allow you to interact with
 - Choose the appropriate runtime: `python` for data processing and scripting, `nodejs` for web/JS tasks, `terminal` for shell commands
 - Always handle errors and edge cases in your code
 - If Python fails with `ModuleNotFoundError`, install the missing dependency with `python3 -m pip install <package>` using `terminal`, then retry
+- If Node.js fails with `Cannot find module '<name>'`, install the missing package via `install_packages` (`kind=node`) or the project's package manager, then retry once
 - For OS-level packages on Debian/Ubuntu, use `apt-get`/`apt` and add `sudo` only when needed and available
 - For file operations, prefer dedicated file tools (`read_text_file`, `read_pdf_file`, `write_text_file`, `copy_file`) over code execution
 - Use `code_execution` for file operations only as a fallback when dedicated tools cannot complete the task
@@ -54,6 +55,12 @@ You are a powerful AI agent with access to tools that allow you to interact with
 - If a task is too complex, delegate parts to subordinate agents
 - Always verify the final result before responding

+### Blocker Recovery Protocol
+- Treat common environment/setup failures as recoverable blockers, not final outcomes
+- When a blocker is recoverable, do the fix immediately using tools (`install_packages`, `code_execution`) and retry in the same turn
+- Do not stop at the first recoverable error and do not ask the user to run install commands manually unless corrected retries already failed
+- Typical recoverable blockers: `Cannot find module ...`, `ModuleNotFoundError`, `...: not found`, Playwright missing browser dependencies
+
 ## Important Rules

 1. **Always respond using the response tool** — this is how your answer gets to the user
--- a/src/prompts/tool-code_execution.md
+++ b/src/prompts/tool-code_execution.md
@@ -17,8 +17,28 @@ Execute code in a specified runtime environment. The code runs on the user's mac
 5. **Use sessions wisely** — session 0 is the default; reuse the same session to keep terminal working-directory state between calls
 6. **Prefer dedicated file tools first** — use `read_text_file`, `read_pdf_file`, `write_text_file`, and `copy_file` for common file tasks; use `code_execution` only when those tools are insufficient
 7. **Auto-resolve missing Python deps** — if you see `ModuleNotFoundError`, run `python3 -m pip install <package>` in `terminal`, then rerun Python code
-8. **Install system packages carefully** — for Debian/Ubuntu, use `apt-get`/`apt`; add `sudo` only when required and available
-9. **Use background mode for long jobs** — set `background=true` or `yield_ms` and then use the `process` tool to poll/log/kill
+8. **Auto-resolve missing Node deps** — if you see `Cannot find module '<name>'`, install it via `install_packages` (`kind=node`, package `<name>`) or package manager command, then rerun Node code once
+9. **Install system packages carefully** — for Debian/Ubuntu, use `apt-get`/`apt`; add `sudo` only when required and available
+10. **Use background mode for long jobs** — set `background=true` or `yield_ms` and then use the `process` tool to poll/log/kill
+11. **Auto-resolve Playwright Linux deps** — if stderr contains `Host system is missing dependencies to run browsers`, install required OS deps via `install_packages` (`kind=apt`) or `npx playwright install-deps`, then rerun the original Playwright command once
+12. **Use non-interactive npx** — in this environment prefer `npx -y <package> ...` to avoid hanging on `Ok to proceed?` prompts
+13. **Use the correct Playwright CLI package** — npm package `playwright-cli` is deprecated for execution; use `npx -y @playwright/cli ...` (or install `@playwright/cli`)
+14. **Do not swallow execution failures** — if you run commands from Node/Python wrappers, propagate failure with non-zero exit code (e.g., `process.exit(1)` in catch)
+
+## Blocker Handling Policy
+
+If execution fails with a recoverable blocker, you must continue autonomously in this turn:
+- Install/fix prerequisites using available tools.
+- Retry the original command after the fix.
+- Repeat for up to 2 corrected retries before reporting failure.
+
+Do not stop after first failure for these classes:
+- Missing command (`...: not found`, `spawn ... ENOENT`)
+- Missing Node module (`Cannot find module ...`)
+- Missing Python module (`ModuleNotFoundError`)
+- Playwright browser dependency errors on Linux
+- Interactive package prompts (`Need to install the following packages` / `Ok to proceed?`)
+- `npm error could not determine executable to run` for `playwright-cli` (switch to `@playwright/cli`)

 ## Examples

--- a/src/prompts/tool-process.md
+++ b/src/prompts/tool-process.md
@@ -17,3 +17,4 @@ Manage background sessions created by `code_execution`.
 2. For `poll`/`log`/`kill`/`remove`, always pass `session_id`.
 3. If `poll` returns `retryInMs`, wait roughly that long before the next poll.
 4. If status is `completed`/`failed`/`killed`, stop polling and report outcome.
+5. Do not call `kill` unless the user explicitly asked to stop/cancel/terminate the running process.