Harden tool execution recovery and fix multi-tab sync hangs

This commit is contained in:
ilya-bov
2026-02-27 17:06:17 +03:00
parent dcb1eabb4e
commit ce362a836b
8 changed files with 291 additions and 34 deletions

View File

@@ -33,6 +33,22 @@ RUN apt-get update \
curl \
git \
jq \
libasound2 \
libatk1.0-0 \
libatspi2.0-0 \
libdbus-1-3 \
libgbm1 \
libglib2.0-0 \
libnspr4 \
libnss3 \
libx11-6 \
libxcb1 \
libxcomposite1 \
libxdamage1 \
libxext6 \
libxfixes3 \
libxkbcommon0 \
libxrandr2 \
python3 \
python3-requests \
python3-venv \

View File

@@ -216,11 +216,11 @@ playwright-cli kill-all
## Local installation
In some cases user might want to install playwright-cli locally. If running globally available `playwright-cli` binary fails, use `npx playwright-cli` to run the commands. For example:
In some cases user might want to install playwright-cli locally. If running globally available `playwright-cli` binary fails, use non-interactive `npx -y @playwright/cli` to run the commands. (`playwright-cli` npm package is deprecated for execution.) For example:
```bash
npx playwright-cli open https://example.com
npx playwright-cli click e1
npx -y @playwright/cli open https://example.com
npx -y @playwright/cli click e1
```
## Example: Form submission

View File

@@ -10,6 +10,60 @@ interface BackgroundSyncOptions {
fallbackIntervalMs?: number;
}
type SyncSubscriber = (event: UiSyncEvent) => void;
let sharedEventSource: EventSource | null = null;
let sharedSyncListener: ((event: MessageEvent<string>) => void) | null = null;
let nextSubscriberId = 1;
const syncSubscribers = new Map<number, SyncSubscriber>();
function ensureSharedEventSource(): void {
if (sharedEventSource) {
return;
}
sharedEventSource = new EventSource("/api/events");
sharedSyncListener = (event: MessageEvent<string>) => {
let parsed: UiSyncEvent | null = null;
try {
parsed = JSON.parse(event.data) as UiSyncEvent;
} catch {
return;
}
for (const subscriber of syncSubscribers.values()) {
try {
subscriber(parsed);
} catch {
// Keep fan-out resilient to individual listener failures.
}
}
};
sharedEventSource.addEventListener("sync", sharedSyncListener as EventListener);
}
function subscribeSharedSync(subscriber: SyncSubscriber): () => void {
ensureSharedEventSource();
const subscriberId = nextSubscriberId++;
syncSubscribers.set(subscriberId, subscriber);
return () => {
syncSubscribers.delete(subscriberId);
if (syncSubscribers.size === 0 && sharedEventSource) {
if (sharedSyncListener) {
sharedEventSource.removeEventListener(
"sync",
sharedSyncListener as EventListener
);
}
sharedEventSource.close();
sharedEventSource = null;
sharedSyncListener = null;
}
};
}
function matchesScope(
event: UiSyncEvent,
options: BackgroundSyncOptions
@@ -49,7 +103,6 @@ export function useBackgroundSync(options: BackgroundSyncOptions = {}): number {
const [tick, setTick] = useState(0);
useEffect(() => {
let eventSource: EventSource | null = null;
const scope: BackgroundSyncOptions = {
topics: topicsKey
? (topicsKey.split(",").filter(Boolean) as UiSyncTopic[])
@@ -63,24 +116,14 @@ export function useBackgroundSync(options: BackgroundSyncOptions = {}): number {
setTick((value) => value + 1);
};
const onSync = (event: MessageEvent<string>) => {
try {
const parsed = JSON.parse(event.data) as UiSyncEvent;
if (!matchesScope(parsed, scope)) {
return;
}
bump();
} catch {
// Ignore malformed SSE event payloads.
const onSync = (parsed: UiSyncEvent) => {
if (!matchesScope(parsed, scope)) {
return;
}
bump();
};
const connect = () => {
eventSource = new EventSource("/api/events");
eventSource.addEventListener("sync", onSync as EventListener);
};
connect();
const unsubscribeSync = subscribeSharedSync(onSync);
const fallbackTimer =
fallbackIntervalMs > 0 ? window.setInterval(bump, fallbackIntervalMs) : null;
@@ -101,10 +144,7 @@ export function useBackgroundSync(options: BackgroundSyncOptions = {}): number {
if (fallbackTimer) {
window.clearInterval(fallbackTimer);
}
if (eventSource) {
eventSource.removeEventListener("sync", onSync as EventListener);
eventSource.close();
}
unsubscribeSync();
document.removeEventListener("visibilitychange", onVisibilityChange);
window.removeEventListener("focus", onWindowFocus);
};

View File

@@ -66,6 +66,147 @@ function parseJsonObject(text: string): Record<string, unknown> | null {
}
}
function getOutputTextForRecovery(output: unknown): string {
if (typeof output === "string") {
return output;
}
const record = asRecord(output);
if (!record) {
return "";
}
const out = typeof record.output === "string" ? record.output : "";
const err = typeof record.error === "string" ? record.error : "";
return [out, err].filter(Boolean).join("\n");
}
function extractNodeMissingModule(text: string): string | null {
const match = text.match(/Cannot find module ['"]([^'"\n]+)['"]/i);
const mod = match?.[1]?.trim();
return mod ? mod : null;
}
function extractPythonMissingModule(text: string): string | null {
const match = text.match(/ModuleNotFoundError:\s*No module named ['"]([^'"\n]+)['"]/i);
const mod = match?.[1]?.trim();
return mod ? mod : null;
}
function extractMissingCommand(text: string): string | null {
const shellMatch = text.match(/(?:^|\n)(?:\/bin\/sh:\s*\d+:\s*)?([a-zA-Z0-9._-]+):\s*not found(?:\n|$)/i);
if (shellMatch?.[1]) {
return shellMatch[1];
}
const spawnMatch = text.match(/spawn\s+([a-zA-Z0-9._/-]+)\s+ENOENT/i);
if (spawnMatch?.[1]) {
const command = spawnMatch[1].split("/").pop();
return command ?? null;
}
return null;
}
function buildAutoRecoveryHint(toolName: string, output: unknown): string | null {
if (toolName !== "code_execution" && toolName !== "process") {
return null;
}
const text = getOutputTextForRecovery(output);
if (!text) {
return null;
}
if (
/Need to install the following packages/i.test(text) &&
/Ok to proceed\?/i.test(text)
) {
return [
"Recoverable blocker detected: interactive npx prompt is waiting for confirmation.",
"Next action: rerun with non-interactive form using `npx -y ...`, then continue polling/retrying in this turn.",
"Do not stop on this blocker.",
].join("\n");
}
if (
/npm error could not determine executable to run/i.test(text) &&
/playwright-cli/i.test(text)
) {
return [
"Recoverable blocker detected: deprecated `playwright-cli` npm package does not expose an executable.",
"Next action: run the command with `npx -y @playwright/cli ...` (or install `@playwright/cli` via install_packages and retry).",
"Do not stop on this blocker.",
].join("\n");
}
if (text.includes("Host system is missing dependencies to run browsers")) {
return [
"Recoverable blocker detected: Playwright browser system dependencies are missing.",
"Next action: run install_packages with kind=\"apt\" for the required libs (or run `npx playwright install-deps` in terminal runtime), then retry the same Playwright command in this turn.",
"Do not stop and do not ask the user to run commands manually unless installation keeps failing after corrected retries.",
].join("\n");
}
const missingNodeModule = extractNodeMissingModule(text);
if (missingNodeModule) {
return [
`Recoverable blocker detected: missing Node module "${missingNodeModule}".`,
`Next action: call install_packages with kind="node" and packages=["${missingNodeModule}"], then retry the same command in this turn.`,
"Do not stop after this error.",
].join("\n");
}
const missingPythonModule = extractPythonMissingModule(text);
if (missingPythonModule) {
return [
`Recoverable blocker detected: missing Python module "${missingPythonModule}".`,
`Next action: call install_packages with kind="python" and packages=["${missingPythonModule}"], then retry the same command in this turn.`,
"Do not stop after this error.",
].join("\n");
}
if (/playwright-cli:\s*not found/i.test(text)) {
return [
"Recoverable blocker detected: playwright-cli is not installed/in PATH.",
"Next action: first try running the same command via `npx -y @playwright/cli ...`.",
"If npx path is unavailable, call install_packages with kind=\"node\" and packages=[\"@playwright/cli\"], then retry in this turn.",
"Do not end the turn on this error.",
].join("\n");
}
const missingCommand = extractMissingCommand(text);
if (missingCommand && missingCommand !== "node" && missingCommand !== "python3") {
return [
`Recoverable blocker detected: command "${missingCommand}" is missing.`,
`Next action: install it via install_packages (kind depends on ecosystem, e.g. apt for system commands), then retry the original command in this turn.`,
"Only report blocker after corrected install attempts fail.",
].join("\n");
}
return null;
}
function appendRecoveryHint(output: unknown, hint: string | null): unknown {
if (!hint) {
return output;
}
const block = `\n\n[Auto-recovery hint]\n${hint}`;
if (typeof output === "string") {
return `${output}${block}`;
}
const record = asRecord(output);
if (!record) {
return output;
}
const current = typeof record.output === "string" ? record.output : "";
return {
...record,
output: current ? `${current}${block}` : block.trim(),
recoverable: true,
recoveryHint: hint,
};
}
function extractDeterministicFailureSignature(output: unknown): string | null {
const outputRecord = asRecord(output);
if (outputRecord && outputRecord.success === false) {
@@ -153,7 +294,7 @@ function normalizeNoProgressValue(value: unknown): unknown {
}
function applyGlobalToolLoopGuard(tools: ToolSet): ToolSet {
const deterministicFailureByCall = new Map<string, string>();
let lastDeterministicFailure: { callKey: string; signature: string } | null = null;
const noProgressByCall = new Map<string, { hash: string; count: number }>();
const wrappedTools: ToolSet = {};
@@ -185,25 +326,29 @@ function applyGlobalToolLoopGuard(tools: ToolSet): ToolSet {
);
}
const previousFailure = deterministicFailureByCall.get(callKey);
if (previousFailure) {
if (lastDeterministicFailure?.callKey === callKey) {
return (
`[Loop guard] Blocked repeated tool call "${toolName}" with identical arguments.\n` +
`Previous deterministic error: ${previousFailure}\n` +
`Previous deterministic error: ${lastDeterministicFailure.signature}\n` +
"Change arguments based on the tool error before retrying."
);
}
const output = await toolDef.execute(input as never, options as never);
const failureSignature = extractDeterministicFailureSignature(output);
const recoveryHint = buildAutoRecoveryHint(toolName, output);
const outputWithHint = appendRecoveryHint(output, recoveryHint);
const failureSignature = extractDeterministicFailureSignature(outputWithHint);
if (failureSignature) {
deterministicFailureByCall.set(callKey, failureSignature);
lastDeterministicFailure = {
callKey,
signature: failureSignature,
};
} else {
deterministicFailureByCall.delete(callKey);
lastDeterministicFailure = null;
}
if (isPollLikeCall(toolName, input)) {
const outputHash = stableSerialize(normalizeNoProgressValue(output));
const outputHash = stableSerialize(normalizeNoProgressValue(outputWithHint));
const previous = noProgressByCall.get(callKey);
if (previous && previous.hash === outputHash) {
noProgressByCall.set(callKey, {
@@ -220,7 +365,7 @@ function applyGlobalToolLoopGuard(tools: ToolSet): ToolSet {
noProgressByCall.delete(callKey);
}
return output;
return outputWithHint;
},
} as typeof toolDef;
}

View File

@@ -51,6 +51,27 @@ interface TelegramRuntimeData {
chatId: string | number;
}
function getCurrentUserMessageText(context: AgentContext): string {
const value = context.data?.currentUserMessage;
return typeof value === "string" ? value.trim() : "";
}
function userExplicitlyRequestedProcessKill(context: AgentContext): boolean {
const text = getCurrentUserMessageText(context);
if (!text) return false;
const killIntent =
/\b(stop|terminate|kill|cancel|abort|end|прервать|прерви|остановить|останови|убить|убей|завершить|заверши|отменить|отмени)\b/i;
const negatedIntent =
/\b(do not|don't|dont|не)\b.{0,20}\b(stop|terminate|kill|cancel|abort|прерв|останов|убива|заверш|отмен)\b/i;
if (negatedIntent.test(text)) {
return false;
}
return killIntent.test(text);
}
function getTelegramRuntimeData(context: AgentContext): TelegramRuntimeData | null {
const raw = context.data?.telegram;
if (!raw || typeof raw !== "object" || Array.isArray(raw)) return null;
@@ -776,6 +797,13 @@ export function createAgentTools(
if (!session_id?.trim()) {
return { success: false, error: "session_id is required for kill." };
}
if (!userExplicitlyRequestedProcessKill(context)) {
return {
success: false,
error:
"Kill blocked by policy: only stop a background process when the user explicitly asks to stop/cancel it. Continue with poll/log or wait for completion.",
};
}
return killManagedProcessSession(session_id);
}
if (action === "remove") {

View File

@@ -26,6 +26,7 @@ You are a powerful AI agent with access to tools that allow you to interact with
- Choose the appropriate runtime: `python` for data processing and scripting, `nodejs` for web/JS tasks, `terminal` for shell commands
- Always handle errors and edge cases in your code
- If Python fails with `ModuleNotFoundError`, install the missing dependency with `python3 -m pip install <package>` using `terminal`, then retry
- If Node.js fails with `Cannot find module '<name>'`, install the missing package via `install_packages` (`kind=node`) or the project's package manager, then retry once
- For OS-level packages on Debian/Ubuntu, use `apt-get`/`apt` and add `sudo` only when needed and available
- For file operations, prefer dedicated file tools (`read_text_file`, `read_pdf_file`, `write_text_file`, `copy_file`) over code execution
- Use `code_execution` for file operations only as a fallback when dedicated tools cannot complete the task
@@ -54,6 +55,12 @@ You are a powerful AI agent with access to tools that allow you to interact with
- If a task is too complex, delegate parts to subordinate agents
- Always verify the final result before responding
### Blocker Recovery Protocol
- Treat common environment/setup failures as recoverable blockers, not final outcomes
- When a blocker is recoverable, do the fix immediately using tools (`install_packages`, `code_execution`) and retry in the same turn
- Do not stop at the first recoverable error and do not ask the user to run install commands manually unless corrected retries already failed
- Typical recoverable blockers: `Cannot find module ...`, `ModuleNotFoundError`, `...: not found`, Playwright missing browser dependencies
## Important Rules
1. **Always respond using the response tool** — this is how your answer gets to the user

View File

@@ -17,8 +17,28 @@ Execute code in a specified runtime environment. The code runs on the user's mac
5. **Use sessions wisely** — session 0 is the default; reuse the same session to keep terminal working-directory state between calls
6. **Prefer dedicated file tools first** — use `read_text_file`, `read_pdf_file`, `write_text_file`, and `copy_file` for common file tasks; use `code_execution` only when those tools are insufficient
7. **Auto-resolve missing Python deps** — if you see `ModuleNotFoundError`, run `python3 -m pip install <package>` in `terminal`, then rerun Python code
8. **Install system packages carefully** — for Debian/Ubuntu, use `apt-get`/`apt`; add `sudo` only when required and available
9. **Use background mode for long jobs**set `background=true` or `yield_ms` and then use the `process` tool to poll/log/kill
8. **Auto-resolve missing Node deps** — if you see `Cannot find module '<name>'`, install it via `install_packages` (`kind=node`, package `<name>`) or package manager command, then rerun Node code once
9. **Install system packages carefully** — for Debian/Ubuntu, use `apt-get`/`apt`; add `sudo` only when required and available
10. **Use background mode for long jobs** — set `background=true` or `yield_ms` and then use the `process` tool to poll/log/kill
11. **Auto-resolve Playwright Linux deps** — if stderr contains `Host system is missing dependencies to run browsers`, install required OS deps via `install_packages` (`kind=apt`) or `npx playwright install-deps`, then rerun the original Playwright command once
12. **Use non-interactive npx** — in this environment prefer `npx -y <package> ...` to avoid hanging on `Ok to proceed?` prompts
13. **Use the correct Playwright CLI package** — npm package `playwright-cli` is deprecated for execution; use `npx -y @playwright/cli ...` (or install `@playwright/cli`)
14. **Do not swallow execution failures** — if you run commands from Node/Python wrappers, propagate failure with non-zero exit code (e.g., `process.exit(1)` in catch)
## Blocker Handling Policy
If execution fails with a recoverable blocker, you must continue autonomously in this turn:
- Install/fix prerequisites using available tools.
- Retry the original command after the fix.
- Repeat for up to 2 corrected retries before reporting failure.
Do not stop after first failure for these classes:
- Missing command (`...: not found`, `spawn ... ENOENT`)
- Missing Node module (`Cannot find module ...`)
- Missing Python module (`ModuleNotFoundError`)
- Playwright browser dependency errors on Linux
- Interactive package prompts (`Need to install the following packages` / `Ok to proceed?`)
- `npm error could not determine executable to run` for `playwright-cli` (switch to `@playwright/cli`)
## Examples

View File

@@ -17,3 +17,4 @@ Manage background sessions created by `code_execution`.
2. For `poll`/`log`/`kill`/`remove`, always pass `session_id`.
3. If `poll` returns `retryInMs`, wait roughly that long before the next poll.
4. If status is `completed`/`failed`/`killed`, stop polling and report outcome.
5. Do not call `kill` unless the user explicitly asked to stop/cancel/terminate the running process.