fix: auto-repair gateway watch startup

This commit is contained in:
Peter Steinberger
2026-05-03 17:11:07 +01:00
parent 0636442cde
commit e7f1b10ff8
5 changed files with 120 additions and 2 deletions

View File

@@ -27,6 +27,7 @@ Docs: https://docs.openclaw.ai
- Memory/status: keep plain `openclaw memory status` and `openclaw memory status --json` on the cheap read-only path by reserving vector and embedding provider probes for `--deep` or `--index`. Fixes #76769. Thanks @daruire.
- Control UI/Sessions: avoid full `sessions.list` reloads for chat-turn `sessions.changed` payloads, so large session stores no longer add multi-second delays while chat responses are being delivered. (#76676) Thanks @VACInc.
- Gateway/watch: run `doctor --fix --non-interactive` once and retry when the dev Gateway child exits during startup, so stale local plugin install/config state does not leave the tmux watch session disappearing without a repair attempt.
- Doctor/Telegram: warn when selected Telegram quote replies can suppress `streaming.preview.toolProgress`, and document the `replyToMode` trade-off without changing runtime delivery. Fixes #73487. Thanks @GodsBoy.
- Channels/Discord: send a best-effort native typing cue immediately after an inbound DM is accepted, so slow pre-dispatch turns show Discord liveness before queueing, context assembly, model, or tool work starts. Fixes #76417. Thanks @mlopez14.
- Plugins/install: reject source-only TypeScript package installs and installed plugin packages that are missing compiled runtime output, so broken npm artifacts fail at install/discovery time instead of falling through jiti and surfacing later as unavailable providers. Fixes #76720.

View File

@@ -152,6 +152,10 @@ The tmux wrapper carries common non-secret runtime selectors such as
`OPENCLAW_GATEWAY_PORT`, and `OPENCLAW_SKIP_CHANNELS` into the pane. Put
provider credentials in your normal profile/config, or use raw foreground mode
for one-off ephemeral secrets.
If the watched Gateway exits during startup, the watcher runs
`openclaw doctor --fix --non-interactive` once and restarts the Gateway child.
Use `OPENCLAW_GATEWAY_WATCH_AUTO_DOCTOR=0` when you want the original startup
failure without the dev-only repair pass.
The managed tmux pane also defaults to colored Gateway logs for readability;
set `FORCE_COLOR=0` when starting `pnpm gateway:watch` to disable ANSI output.

View File

@@ -103,7 +103,10 @@ session and auto-attaches from interactive terminals. Non-interactive shells sta
detached and print `tmux attach -t openclaw-gateway-watch-main`; use
`OPENCLAW_GATEWAY_WATCH_ATTACH=0 pnpm gateway:watch` to keep an interactive run
detached, or `pnpm gateway:watch:raw` for foreground watch mode. The watcher
reloads on relevant source, config, and bundled-plugin metadata changes.
reloads on relevant source, config, and bundled-plugin metadata changes. If the
watched Gateway exits during startup, `gateway:watch` runs
`openclaw doctor --fix --non-interactive` once and retries; set
`OPENCLAW_GATEWAY_WATCH_AUTO_DOCTOR=0` to disable that dev-only repair pass.
`pnpm openclaw setup` is the one-time local config/workspace initialization step for a fresh checkout.
`pnpm gateway:watch` does not rebuild `dist/control-ui`, so rerun `pnpm ui:build` after `ui/` changes or use `pnpm ui:dev` while developing the Control UI.

View File

@@ -15,8 +15,10 @@ const WATCH_IGNORED_PATH_SEGMENTS = new Set([".git", "dist", "node_modules"]);
const WATCH_LOCK_WAIT_MS = 5_000;
const WATCH_LOCK_POLL_MS = 100;
const WATCH_LOCK_DIR = path.join(".local", "watch-node");
const AUTO_DOCTOR_DISABLE_VALUES = new Set(["0", "false", "no", "off"]);
const buildRunnerArgs = (args) => [WATCH_NODE_RUNNER, ...args];
const buildDoctorRunnerArgs = () => [WATCH_NODE_RUNNER, "doctor", "--fix", "--non-interactive"];
const normalizePath = (filePath) =>
String(filePath ?? "")
@@ -69,6 +71,15 @@ const shouldRestartAfterChildExit = (exitCode, exitSignal) =>
(typeof exitCode === "number" && WATCH_RESTARTABLE_CHILD_EXIT_CODES.has(exitCode)) ||
(typeof exitSignal === "string" && WATCH_RESTARTABLE_CHILD_SIGNALS.has(exitSignal));
const isGatewayWatchCommand = (args) => args[0] === "gateway";
const shouldRunAutoDoctor = (deps, autoDoctorAttempted) =>
!autoDoctorAttempted &&
isGatewayWatchCommand(deps.args) &&
!AUTO_DOCTOR_DISABLE_VALUES.has(
String(deps.env.OPENCLAW_GATEWAY_WATCH_AUTO_DOCTOR ?? "").toLowerCase(),
);
const isProcessAlive = (pid, signalProcess) => {
if (!Number.isInteger(pid) || pid <= 0) {
return false;
@@ -288,6 +299,7 @@ export async function runWatchMain(params = {}) {
let restartRequested = false;
let watchProcess = null;
let lockHandle = null;
let autoDoctorAttempted = false;
let onSigInt;
let onSigTerm;
@@ -334,6 +346,44 @@ export async function runWatchMain(params = {}) {
startRunner();
return;
}
if (shouldRunAutoDoctor(deps, autoDoctorAttempted)) {
runAutoDoctorAndRestart();
return;
}
settle(exitSignal ? 1 : (exitCode ?? 1));
});
};
const runAutoDoctorAndRestart = () => {
autoDoctorAttempted = true;
logWatcher(
"Gateway exited early; running `openclaw doctor --fix --non-interactive` once.",
deps,
);
watchProcess = deps.spawn(deps.process.execPath, buildDoctorRunnerArgs(), {
cwd: deps.cwd,
env: childEnv,
stdio: "inherit",
});
watchProcess.on("error", (error) => {
watchProcess = null;
logWatcher(`Failed to spawn doctor repair: ${error?.message ?? "unknown error"}`, deps);
settle(1);
});
watchProcess.on("exit", (exitCode, exitSignal) => {
watchProcess = null;
if (shuttingDown) {
return;
}
if (exitCode === 0 && !exitSignal) {
logWatcher("Doctor repair completed; restarting gateway watch child.", deps);
startRunner();
return;
}
logWatcher(
`Doctor repair failed; gateway:watch exiting with code ${exitSignal ? 1 : (exitCode ?? 1)}.`,
deps,
);
settle(exitSignal ? 1 : (exitCode ?? 1));
});
};

View File

@@ -201,7 +201,7 @@ describe("watch-node script", () => {
const { child, spawn, watcher, createWatcher, fakeProcess } = createWatchHarness();
const runPromise = runWatch({
args: ["gateway", "--force", "--help"],
args: ["config", "validate"],
createWatcher,
lockDisabled: true,
process: fakeProcess,
@@ -217,6 +217,66 @@ describe("watch-node script", () => {
expect(fakeProcess.listenerCount("SIGTERM")).toBe(0);
});
it("runs doctor once and restarts when gateway exits nonzero", async () => {
const gatewayA = Object.assign(new EventEmitter(), { kill: vi.fn() });
const doctor = Object.assign(new EventEmitter(), { kill: vi.fn() });
const gatewayB = Object.assign(new EventEmitter(), { kill: vi.fn() });
const spawn = vi
.fn()
.mockReturnValueOnce(gatewayA)
.mockReturnValueOnce(doctor)
.mockReturnValueOnce(gatewayB);
const { watcher, fakeProcess, runPromise } = startWatchRun({ spawn });
gatewayA.emit("exit", 1, null);
await new Promise((resolve) => setImmediate(resolve));
expect(spawn).toHaveBeenCalledTimes(2);
expect(spawn).toHaveBeenNthCalledWith(
2,
"/usr/local/bin/node",
["scripts/run-node.mjs", "doctor", "--fix", "--non-interactive"],
expect.objectContaining({ stdio: "inherit" }),
);
doctor.emit("exit", 0, null);
await new Promise((resolve) => setImmediate(resolve));
expect(spawn).toHaveBeenCalledTimes(3);
expect(spawn).toHaveBeenNthCalledWith(
3,
"/usr/local/bin/node",
["scripts/run-node.mjs", "gateway", "--force"],
expect.objectContaining({ stdio: "inherit" }),
);
fakeProcess.emit("SIGINT");
const exitCode = await runPromise;
expect(exitCode).toBe(130);
expect(gatewayB.kill).toHaveBeenCalledWith("SIGTERM");
expect(watcher.close).toHaveBeenCalledTimes(1);
});
it("does not run doctor after a gateway failure when auto doctor is disabled", async () => {
const { child, spawn, watcher, createWatcher, fakeProcess } = createWatchHarness();
const runPromise = runWatch({
args: ["gateway", "--force"],
createWatcher,
env: { OPENCLAW_GATEWAY_WATCH_AUTO_DOCTOR: "0" },
lockDisabled: true,
process: fakeProcess,
spawn,
});
child.emit("exit", 1, null);
const exitCode = await runPromise;
expect(exitCode).toBe(1);
expect(spawn).toHaveBeenCalledTimes(1);
expect(watcher.close).toHaveBeenCalledTimes(1);
});
it("restarts when the runner exits with a SIGTERM-derived code unexpectedly", async () => {
const childA = Object.assign(new EventEmitter(), {
kill: vi.fn(),