mirror of
https://github.com/eggent-ai/eggent.git
synced 2026-03-07 01:53:08 +00:00
Harden tool execution recovery and fix multi-tab sync hangs
This commit is contained in:
16
Dockerfile
16
Dockerfile
@@ -33,6 +33,22 @@ RUN apt-get update \
|
||||
curl \
|
||||
git \
|
||||
jq \
|
||||
libasound2 \
|
||||
libatk1.0-0 \
|
||||
libatspi2.0-0 \
|
||||
libdbus-1-3 \
|
||||
libgbm1 \
|
||||
libglib2.0-0 \
|
||||
libnspr4 \
|
||||
libnss3 \
|
||||
libx11-6 \
|
||||
libxcb1 \
|
||||
libxcomposite1 \
|
||||
libxdamage1 \
|
||||
libxext6 \
|
||||
libxfixes3 \
|
||||
libxkbcommon0 \
|
||||
libxrandr2 \
|
||||
python3 \
|
||||
python3-requests \
|
||||
python3-venv \
|
||||
|
||||
@@ -216,11 +216,11 @@ playwright-cli kill-all
|
||||
|
||||
## Local installation
|
||||
|
||||
In some cases user might want to install playwright-cli locally. If running globally available `playwright-cli` binary fails, use `npx playwright-cli` to run the commands. For example:
|
||||
In some cases user might want to install playwright-cli locally. If running globally available `playwright-cli` binary fails, use non-interactive `npx -y @playwright/cli` to run the commands. (`playwright-cli` npm package is deprecated for execution.) For example:
|
||||
|
||||
```bash
|
||||
npx playwright-cli open https://example.com
|
||||
npx playwright-cli click e1
|
||||
npx -y @playwright/cli open https://example.com
|
||||
npx -y @playwright/cli click e1
|
||||
```
|
||||
|
||||
## Example: Form submission
|
||||
|
||||
@@ -10,6 +10,60 @@ interface BackgroundSyncOptions {
|
||||
fallbackIntervalMs?: number;
|
||||
}
|
||||
|
||||
type SyncSubscriber = (event: UiSyncEvent) => void;
|
||||
|
||||
let sharedEventSource: EventSource | null = null;
|
||||
let sharedSyncListener: ((event: MessageEvent<string>) => void) | null = null;
|
||||
let nextSubscriberId = 1;
|
||||
const syncSubscribers = new Map<number, SyncSubscriber>();
|
||||
|
||||
function ensureSharedEventSource(): void {
|
||||
if (sharedEventSource) {
|
||||
return;
|
||||
}
|
||||
|
||||
sharedEventSource = new EventSource("/api/events");
|
||||
sharedSyncListener = (event: MessageEvent<string>) => {
|
||||
let parsed: UiSyncEvent | null = null;
|
||||
try {
|
||||
parsed = JSON.parse(event.data) as UiSyncEvent;
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
|
||||
for (const subscriber of syncSubscribers.values()) {
|
||||
try {
|
||||
subscriber(parsed);
|
||||
} catch {
|
||||
// Keep fan-out resilient to individual listener failures.
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
sharedEventSource.addEventListener("sync", sharedSyncListener as EventListener);
|
||||
}
|
||||
|
||||
function subscribeSharedSync(subscriber: SyncSubscriber): () => void {
|
||||
ensureSharedEventSource();
|
||||
const subscriberId = nextSubscriberId++;
|
||||
syncSubscribers.set(subscriberId, subscriber);
|
||||
|
||||
return () => {
|
||||
syncSubscribers.delete(subscriberId);
|
||||
if (syncSubscribers.size === 0 && sharedEventSource) {
|
||||
if (sharedSyncListener) {
|
||||
sharedEventSource.removeEventListener(
|
||||
"sync",
|
||||
sharedSyncListener as EventListener
|
||||
);
|
||||
}
|
||||
sharedEventSource.close();
|
||||
sharedEventSource = null;
|
||||
sharedSyncListener = null;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
function matchesScope(
|
||||
event: UiSyncEvent,
|
||||
options: BackgroundSyncOptions
|
||||
@@ -49,7 +103,6 @@ export function useBackgroundSync(options: BackgroundSyncOptions = {}): number {
|
||||
const [tick, setTick] = useState(0);
|
||||
|
||||
useEffect(() => {
|
||||
let eventSource: EventSource | null = null;
|
||||
const scope: BackgroundSyncOptions = {
|
||||
topics: topicsKey
|
||||
? (topicsKey.split(",").filter(Boolean) as UiSyncTopic[])
|
||||
@@ -63,24 +116,14 @@ export function useBackgroundSync(options: BackgroundSyncOptions = {}): number {
|
||||
setTick((value) => value + 1);
|
||||
};
|
||||
|
||||
const onSync = (event: MessageEvent<string>) => {
|
||||
try {
|
||||
const parsed = JSON.parse(event.data) as UiSyncEvent;
|
||||
if (!matchesScope(parsed, scope)) {
|
||||
return;
|
||||
}
|
||||
bump();
|
||||
} catch {
|
||||
// Ignore malformed SSE event payloads.
|
||||
const onSync = (parsed: UiSyncEvent) => {
|
||||
if (!matchesScope(parsed, scope)) {
|
||||
return;
|
||||
}
|
||||
bump();
|
||||
};
|
||||
|
||||
const connect = () => {
|
||||
eventSource = new EventSource("/api/events");
|
||||
eventSource.addEventListener("sync", onSync as EventListener);
|
||||
};
|
||||
|
||||
connect();
|
||||
const unsubscribeSync = subscribeSharedSync(onSync);
|
||||
|
||||
const fallbackTimer =
|
||||
fallbackIntervalMs > 0 ? window.setInterval(bump, fallbackIntervalMs) : null;
|
||||
@@ -101,10 +144,7 @@ export function useBackgroundSync(options: BackgroundSyncOptions = {}): number {
|
||||
if (fallbackTimer) {
|
||||
window.clearInterval(fallbackTimer);
|
||||
}
|
||||
if (eventSource) {
|
||||
eventSource.removeEventListener("sync", onSync as EventListener);
|
||||
eventSource.close();
|
||||
}
|
||||
unsubscribeSync();
|
||||
document.removeEventListener("visibilitychange", onVisibilityChange);
|
||||
window.removeEventListener("focus", onWindowFocus);
|
||||
};
|
||||
|
||||
@@ -66,6 +66,147 @@ function parseJsonObject(text: string): Record<string, unknown> | null {
|
||||
}
|
||||
}
|
||||
|
||||
function getOutputTextForRecovery(output: unknown): string {
|
||||
if (typeof output === "string") {
|
||||
return output;
|
||||
}
|
||||
const record = asRecord(output);
|
||||
if (!record) {
|
||||
return "";
|
||||
}
|
||||
const out = typeof record.output === "string" ? record.output : "";
|
||||
const err = typeof record.error === "string" ? record.error : "";
|
||||
return [out, err].filter(Boolean).join("\n");
|
||||
}
|
||||
|
||||
function extractNodeMissingModule(text: string): string | null {
|
||||
const match = text.match(/Cannot find module ['"]([^'"\n]+)['"]/i);
|
||||
const mod = match?.[1]?.trim();
|
||||
return mod ? mod : null;
|
||||
}
|
||||
|
||||
function extractPythonMissingModule(text: string): string | null {
|
||||
const match = text.match(/ModuleNotFoundError:\s*No module named ['"]([^'"\n]+)['"]/i);
|
||||
const mod = match?.[1]?.trim();
|
||||
return mod ? mod : null;
|
||||
}
|
||||
|
||||
function extractMissingCommand(text: string): string | null {
|
||||
const shellMatch = text.match(/(?:^|\n)(?:\/bin\/sh:\s*\d+:\s*)?([a-zA-Z0-9._-]+):\s*not found(?:\n|$)/i);
|
||||
if (shellMatch?.[1]) {
|
||||
return shellMatch[1];
|
||||
}
|
||||
const spawnMatch = text.match(/spawn\s+([a-zA-Z0-9._/-]+)\s+ENOENT/i);
|
||||
if (spawnMatch?.[1]) {
|
||||
const command = spawnMatch[1].split("/").pop();
|
||||
return command ?? null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function buildAutoRecoveryHint(toolName: string, output: unknown): string | null {
|
||||
if (toolName !== "code_execution" && toolName !== "process") {
|
||||
return null;
|
||||
}
|
||||
|
||||
const text = getOutputTextForRecovery(output);
|
||||
if (!text) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (
|
||||
/Need to install the following packages/i.test(text) &&
|
||||
/Ok to proceed\?/i.test(text)
|
||||
) {
|
||||
return [
|
||||
"Recoverable blocker detected: interactive npx prompt is waiting for confirmation.",
|
||||
"Next action: rerun with non-interactive form using `npx -y ...`, then continue polling/retrying in this turn.",
|
||||
"Do not stop on this blocker.",
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
if (
|
||||
/npm error could not determine executable to run/i.test(text) &&
|
||||
/playwright-cli/i.test(text)
|
||||
) {
|
||||
return [
|
||||
"Recoverable blocker detected: deprecated `playwright-cli` npm package does not expose an executable.",
|
||||
"Next action: run the command with `npx -y @playwright/cli ...` (or install `@playwright/cli` via install_packages and retry).",
|
||||
"Do not stop on this blocker.",
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
if (text.includes("Host system is missing dependencies to run browsers")) {
|
||||
return [
|
||||
"Recoverable blocker detected: Playwright browser system dependencies are missing.",
|
||||
"Next action: run install_packages with kind=\"apt\" for the required libs (or run `npx playwright install-deps` in terminal runtime), then retry the same Playwright command in this turn.",
|
||||
"Do not stop and do not ask the user to run commands manually unless installation keeps failing after corrected retries.",
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
const missingNodeModule = extractNodeMissingModule(text);
|
||||
if (missingNodeModule) {
|
||||
return [
|
||||
`Recoverable blocker detected: missing Node module "${missingNodeModule}".`,
|
||||
`Next action: call install_packages with kind="node" and packages=["${missingNodeModule}"], then retry the same command in this turn.`,
|
||||
"Do not stop after this error.",
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
const missingPythonModule = extractPythonMissingModule(text);
|
||||
if (missingPythonModule) {
|
||||
return [
|
||||
`Recoverable blocker detected: missing Python module "${missingPythonModule}".`,
|
||||
`Next action: call install_packages with kind="python" and packages=["${missingPythonModule}"], then retry the same command in this turn.`,
|
||||
"Do not stop after this error.",
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
if (/playwright-cli:\s*not found/i.test(text)) {
|
||||
return [
|
||||
"Recoverable blocker detected: playwright-cli is not installed/in PATH.",
|
||||
"Next action: first try running the same command via `npx -y @playwright/cli ...`.",
|
||||
"If npx path is unavailable, call install_packages with kind=\"node\" and packages=[\"@playwright/cli\"], then retry in this turn.",
|
||||
"Do not end the turn on this error.",
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
const missingCommand = extractMissingCommand(text);
|
||||
if (missingCommand && missingCommand !== "node" && missingCommand !== "python3") {
|
||||
return [
|
||||
`Recoverable blocker detected: command "${missingCommand}" is missing.`,
|
||||
`Next action: install it via install_packages (kind depends on ecosystem, e.g. apt for system commands), then retry the original command in this turn.`,
|
||||
"Only report blocker after corrected install attempts fail.",
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function appendRecoveryHint(output: unknown, hint: string | null): unknown {
|
||||
if (!hint) {
|
||||
return output;
|
||||
}
|
||||
|
||||
const block = `\n\n[Auto-recovery hint]\n${hint}`;
|
||||
if (typeof output === "string") {
|
||||
return `${output}${block}`;
|
||||
}
|
||||
|
||||
const record = asRecord(output);
|
||||
if (!record) {
|
||||
return output;
|
||||
}
|
||||
|
||||
const current = typeof record.output === "string" ? record.output : "";
|
||||
return {
|
||||
...record,
|
||||
output: current ? `${current}${block}` : block.trim(),
|
||||
recoverable: true,
|
||||
recoveryHint: hint,
|
||||
};
|
||||
}
|
||||
|
||||
function extractDeterministicFailureSignature(output: unknown): string | null {
|
||||
const outputRecord = asRecord(output);
|
||||
if (outputRecord && outputRecord.success === false) {
|
||||
@@ -153,7 +294,7 @@ function normalizeNoProgressValue(value: unknown): unknown {
|
||||
}
|
||||
|
||||
function applyGlobalToolLoopGuard(tools: ToolSet): ToolSet {
|
||||
const deterministicFailureByCall = new Map<string, string>();
|
||||
let lastDeterministicFailure: { callKey: string; signature: string } | null = null;
|
||||
const noProgressByCall = new Map<string, { hash: string; count: number }>();
|
||||
const wrappedTools: ToolSet = {};
|
||||
|
||||
@@ -185,25 +326,29 @@ function applyGlobalToolLoopGuard(tools: ToolSet): ToolSet {
|
||||
);
|
||||
}
|
||||
|
||||
const previousFailure = deterministicFailureByCall.get(callKey);
|
||||
if (previousFailure) {
|
||||
if (lastDeterministicFailure?.callKey === callKey) {
|
||||
return (
|
||||
`[Loop guard] Blocked repeated tool call "${toolName}" with identical arguments.\n` +
|
||||
`Previous deterministic error: ${previousFailure}\n` +
|
||||
`Previous deterministic error: ${lastDeterministicFailure.signature}\n` +
|
||||
"Change arguments based on the tool error before retrying."
|
||||
);
|
||||
}
|
||||
|
||||
const output = await toolDef.execute(input as never, options as never);
|
||||
const failureSignature = extractDeterministicFailureSignature(output);
|
||||
const recoveryHint = buildAutoRecoveryHint(toolName, output);
|
||||
const outputWithHint = appendRecoveryHint(output, recoveryHint);
|
||||
const failureSignature = extractDeterministicFailureSignature(outputWithHint);
|
||||
if (failureSignature) {
|
||||
deterministicFailureByCall.set(callKey, failureSignature);
|
||||
lastDeterministicFailure = {
|
||||
callKey,
|
||||
signature: failureSignature,
|
||||
};
|
||||
} else {
|
||||
deterministicFailureByCall.delete(callKey);
|
||||
lastDeterministicFailure = null;
|
||||
}
|
||||
|
||||
if (isPollLikeCall(toolName, input)) {
|
||||
const outputHash = stableSerialize(normalizeNoProgressValue(output));
|
||||
const outputHash = stableSerialize(normalizeNoProgressValue(outputWithHint));
|
||||
const previous = noProgressByCall.get(callKey);
|
||||
if (previous && previous.hash === outputHash) {
|
||||
noProgressByCall.set(callKey, {
|
||||
@@ -220,7 +365,7 @@ function applyGlobalToolLoopGuard(tools: ToolSet): ToolSet {
|
||||
noProgressByCall.delete(callKey);
|
||||
}
|
||||
|
||||
return output;
|
||||
return outputWithHint;
|
||||
},
|
||||
} as typeof toolDef;
|
||||
}
|
||||
|
||||
@@ -51,6 +51,27 @@ interface TelegramRuntimeData {
|
||||
chatId: string | number;
|
||||
}
|
||||
|
||||
function getCurrentUserMessageText(context: AgentContext): string {
|
||||
const value = context.data?.currentUserMessage;
|
||||
return typeof value === "string" ? value.trim() : "";
|
||||
}
|
||||
|
||||
function userExplicitlyRequestedProcessKill(context: AgentContext): boolean {
|
||||
const text = getCurrentUserMessageText(context);
|
||||
if (!text) return false;
|
||||
|
||||
const killIntent =
|
||||
/\b(stop|terminate|kill|cancel|abort|end|прервать|прерви|остановить|останови|убить|убей|завершить|заверши|отменить|отмени)\b/i;
|
||||
const negatedIntent =
|
||||
/\b(do not|don't|dont|не)\b.{0,20}\b(stop|terminate|kill|cancel|abort|прерв|останов|убива|заверш|отмен)\b/i;
|
||||
|
||||
if (negatedIntent.test(text)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return killIntent.test(text);
|
||||
}
|
||||
|
||||
function getTelegramRuntimeData(context: AgentContext): TelegramRuntimeData | null {
|
||||
const raw = context.data?.telegram;
|
||||
if (!raw || typeof raw !== "object" || Array.isArray(raw)) return null;
|
||||
@@ -776,6 +797,13 @@ export function createAgentTools(
|
||||
if (!session_id?.trim()) {
|
||||
return { success: false, error: "session_id is required for kill." };
|
||||
}
|
||||
if (!userExplicitlyRequestedProcessKill(context)) {
|
||||
return {
|
||||
success: false,
|
||||
error:
|
||||
"Kill blocked by policy: only stop a background process when the user explicitly asks to stop/cancel it. Continue with poll/log or wait for completion.",
|
||||
};
|
||||
}
|
||||
return killManagedProcessSession(session_id);
|
||||
}
|
||||
if (action === "remove") {
|
||||
|
||||
@@ -26,6 +26,7 @@ You are a powerful AI agent with access to tools that allow you to interact with
|
||||
- Choose the appropriate runtime: `python` for data processing and scripting, `nodejs` for web/JS tasks, `terminal` for shell commands
|
||||
- Always handle errors and edge cases in your code
|
||||
- If Python fails with `ModuleNotFoundError`, install the missing dependency with `python3 -m pip install <package>` using `terminal`, then retry
|
||||
- If Node.js fails with `Cannot find module '<name>'`, install the missing package via `install_packages` (`kind=node`) or the project's package manager, then retry once
|
||||
- For OS-level packages on Debian/Ubuntu, use `apt-get`/`apt` and add `sudo` only when needed and available
|
||||
- For file operations, prefer dedicated file tools (`read_text_file`, `read_pdf_file`, `write_text_file`, `copy_file`) over code execution
|
||||
- Use `code_execution` for file operations only as a fallback when dedicated tools cannot complete the task
|
||||
@@ -54,6 +55,12 @@ You are a powerful AI agent with access to tools that allow you to interact with
|
||||
- If a task is too complex, delegate parts to subordinate agents
|
||||
- Always verify the final result before responding
|
||||
|
||||
### Blocker Recovery Protocol
|
||||
- Treat common environment/setup failures as recoverable blockers, not final outcomes
|
||||
- When a blocker is recoverable, do the fix immediately using tools (`install_packages`, `code_execution`) and retry in the same turn
|
||||
- Do not stop at the first recoverable error and do not ask the user to run install commands manually unless corrected retries already failed
|
||||
- Typical recoverable blockers: `Cannot find module ...`, `ModuleNotFoundError`, `...: not found`, Playwright missing browser dependencies
|
||||
|
||||
## Important Rules
|
||||
|
||||
1. **Always respond using the response tool** — this is how your answer gets to the user
|
||||
|
||||
@@ -17,8 +17,28 @@ Execute code in a specified runtime environment. The code runs on the user's mac
|
||||
5. **Use sessions wisely** — session 0 is the default; reuse the same session to keep terminal working-directory state between calls
|
||||
6. **Prefer dedicated file tools first** — use `read_text_file`, `read_pdf_file`, `write_text_file`, and `copy_file` for common file tasks; use `code_execution` only when those tools are insufficient
|
||||
7. **Auto-resolve missing Python deps** — if you see `ModuleNotFoundError`, run `python3 -m pip install <package>` in `terminal`, then rerun Python code
|
||||
8. **Install system packages carefully** — for Debian/Ubuntu, use `apt-get`/`apt`; add `sudo` only when required and available
|
||||
9. **Use background mode for long jobs** — set `background=true` or `yield_ms` and then use the `process` tool to poll/log/kill
|
||||
8. **Auto-resolve missing Node deps** — if you see `Cannot find module '<name>'`, install it via `install_packages` (`kind=node`, package `<name>`) or package manager command, then rerun Node code once
|
||||
9. **Install system packages carefully** — for Debian/Ubuntu, use `apt-get`/`apt`; add `sudo` only when required and available
|
||||
10. **Use background mode for long jobs** — set `background=true` or `yield_ms` and then use the `process` tool to poll/log/kill
|
||||
11. **Auto-resolve Playwright Linux deps** — if stderr contains `Host system is missing dependencies to run browsers`, install required OS deps via `install_packages` (`kind=apt`) or `npx playwright install-deps`, then rerun the original Playwright command once
|
||||
12. **Use non-interactive npx** — in this environment prefer `npx -y <package> ...` to avoid hanging on `Ok to proceed?` prompts
|
||||
13. **Use the correct Playwright CLI package** — npm package `playwright-cli` is deprecated for execution; use `npx -y @playwright/cli ...` (or install `@playwright/cli`)
|
||||
14. **Do not swallow execution failures** — if you run commands from Node/Python wrappers, propagate failure with non-zero exit code (e.g., `process.exit(1)` in catch)
|
||||
|
||||
## Blocker Handling Policy
|
||||
|
||||
If execution fails with a recoverable blocker, you must continue autonomously in this turn:
|
||||
- Install/fix prerequisites using available tools.
|
||||
- Retry the original command after the fix.
|
||||
- Repeat for up to 2 corrected retries before reporting failure.
|
||||
|
||||
Do not stop after first failure for these classes:
|
||||
- Missing command (`...: not found`, `spawn ... ENOENT`)
|
||||
- Missing Node module (`Cannot find module ...`)
|
||||
- Missing Python module (`ModuleNotFoundError`)
|
||||
- Playwright browser dependency errors on Linux
|
||||
- Interactive package prompts (`Need to install the following packages` / `Ok to proceed?`)
|
||||
- `npm error could not determine executable to run` for `playwright-cli` (switch to `@playwright/cli`)
|
||||
|
||||
## Examples
|
||||
|
||||
|
||||
@@ -17,3 +17,4 @@ Manage background sessions created by `code_execution`.
|
||||
2. For `poll`/`log`/`kill`/`remove`, always pass `session_id`.
|
||||
3. If `poll` returns `retryInMs`, wait roughly that long before the next poll.
|
||||
4. If status is `completed`/`failed`/`killed`, stop polling and report outcome.
|
||||
5. Do not call `kill` unless the user explicitly asked to stop/cancel/terminate the running process.
|
||||
|
||||
Reference in New Issue
Block a user