mirror of
https://github.com/moltbot/moltbot.git
synced 2026-05-07 07:58:36 +00:00
fix(update): require applied gateway restarts
Require Control UI updates to observe a real gateway process replacement, surface skipped/error update outcomes, and verify the running gateway version after restart.\n\nAdds update.status restart-sentinel plumbing, docs, generated protocol model updates, and changelog attribution.\n\nLocal verification:\n- pnpm test src/gateway/server-methods/update.test.ts src/cli/gateway-cli/run-loop.test.ts src/infra/restart-sentinel.test.ts src/infra/process-respawn.test.ts src/infra/update-runner.test.ts ui/src/ui/app-gateway.node.test.ts ui/src/ui/controllers/config.test.ts\n- git diff --check\n- pnpm exec oxfmt --check --threads=1 CHANGELOG.md docs/gateway/protocol.md docs/gateway/configuration.md docs/web/control-ui.md\n- pnpm docs:check-mdx
This commit is contained in:
@@ -87,6 +87,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Gateway/install: add a validated `--wrapper`/`OPENCLAW_WRAPPER` service install path that persists executable LaunchAgent/systemd wrappers across forced reinstalls, updates, and doctor repairs instead of falling back to raw node/bun `ProgramArguments`. Fixes #69400. (#72445) Thanks @willtmc.
|
||||
- Plugins: fail plugin registration when loader-owned acceptance gates reject missing hook names or memory-only capability registration from non-memory plugins, surfacing the issue through plugin status and doctor instead of silently dropping the registration. Fixes #72459. Thanks @1fanwang and @amknight.
|
||||
- macOS Gateway: write launchd services with a state-dir `WorkingDirectory`, use a durable state-dir temp path instead of freezing macOS session `TMPDIR`, create that temp directory before bootstrap, and label abort-shaped launchd exits as `SIGABRT/abort` in status output. Fixes #53679 and #70223; refs #71848. Thanks @dlturock, @stammi922, and @palladius.
|
||||
- Control UI/update: make `Update now` require a real gateway process replacement, report skipped/error update outcomes with stable reasons, and verify the running gateway version after restart so global installs cannot silently keep old code in memory. Fixes #62492; addresses #64892 and #63562. Thanks @IAMSamuelRodda.
|
||||
- Exec approvals: accept runtime-owned `source: "allow-always"` and `commandText` allowlist metadata in gateway and node approval-set payloads so Control UI round-trips no longer fail with `unexpected property 'source'`. Fixes #60000; carries forward #60064. Thanks @sd1471123, @sharkqwy, and @luoyanglang.
|
||||
- Exec/node: skip approval-plan preparation for full-trust `host=node` runs so interpreter and script commands no longer fail with `SYSTEM_RUN_DENIED: approval cannot safely bind` when effective policy is `security=full` and `ask=off`. Fixes #48457 and duplicate #69251. Thanks @ajtran303, @jaserNo1, @Blakeshannon, @lesliefag, and @AvIsBeastMC.
|
||||
- Exec/node: synthesize a local approval plan when a paired node advertises `system.run` without `system.run.prepare`, unblocking approval-required `host=node` exec on current macOS companion nodes while preserving remote prepare for node hosts that support it. Fixes #37591 and duplicate #66839; carries forward #69725. Thanks @soloclz.
|
||||
|
||||
@@ -4487,6 +4487,8 @@ public struct ChatEvent: Codable, Sendable {
|
||||
}
|
||||
}
|
||||
|
||||
public struct UpdateStatusParams: Codable, Sendable {}
|
||||
|
||||
public struct UpdateRunParams: Codable, Sendable {
|
||||
public let sessionkey: String?
|
||||
public let deliverycontext: [String: AnyCodable]?
|
||||
|
||||
@@ -4487,6 +4487,8 @@ public struct ChatEvent: Codable, Sendable {
|
||||
}
|
||||
}
|
||||
|
||||
public struct UpdateStatusParams: Codable, Sendable {}
|
||||
|
||||
public struct UpdateRunParams: Codable, Sendable {
|
||||
public let sessionkey: String?
|
||||
public let deliverycontext: [String: AnyCodable]?
|
||||
|
||||
@@ -579,6 +579,7 @@ For tooling that writes config over the gateway API, prefer this flow:
|
||||
deletes, arrays replace)
|
||||
- `config.apply` only when you intend to replace the entire config
|
||||
- `update.run` for explicit self-update plus restart
|
||||
- `update.status` to inspect the latest update restart sentinel and verify the running version after a restart
|
||||
|
||||
Agents should treat `config.schema.lookup` as the first stop for exact
|
||||
field-level docs and constraints. Use [Configuration reference](/gateway/configuration-reference)
|
||||
@@ -589,6 +590,8 @@ subsystem references.
|
||||
Control-plane writes (`config.apply`, `config.patch`, `update.run`) are
|
||||
rate-limited to 3 requests per 60 seconds per `deviceId+clientIp`. Restart
|
||||
requests coalesce and then enforce a 30-second cooldown between restart cycles.
|
||||
`update.status` is read-only but admin-scoped because the restart sentinel can
|
||||
include update step summaries and command output tails.
|
||||
</Note>
|
||||
|
||||
Example partial patch:
|
||||
|
||||
@@ -330,6 +330,7 @@ enumeration of `src/gateway/server-methods/*.ts`.
|
||||
- `config.schema` returns the live config schema payload used by Control UI and CLI tooling: schema, `uiHints`, version, and generation metadata, including plugin + channel schema metadata when the runtime can load it. The schema includes field `title` / `description` metadata derived from the same labels and help text used by the UI, including nested object, wildcard, array-item, and `anyOf` / `oneOf` / `allOf` composition branches when matching field documentation exists.
|
||||
- `config.schema.lookup` returns a path-scoped lookup payload for one config path: normalized path, a shallow schema node, matched hint + `hintPath`, and immediate child summaries for UI/CLI drill-down. Lookup schema nodes keep the user-facing docs and common validation fields (`title`, `description`, `type`, `enum`, `const`, `format`, `pattern`, numeric/string/array/object bounds, and flags like `additionalProperties`, `deprecated`, `readOnly`, `writeOnly`). Child summaries expose `key`, normalized `path`, `type`, `required`, `hasChildren`, plus the matched `hint` / `hintPath`.
|
||||
- `update.run` runs the gateway update flow and schedules a restart only when the update itself succeeded.
|
||||
- `update.status` returns the latest cached update restart sentinel, including the post-restart running version when available.
|
||||
- `wizard.start`, `wizard.next`, `wizard.status`, and `wizard.cancel` expose the onboarding wizard over WS RPC.
|
||||
</Accordion>
|
||||
|
||||
|
||||
@@ -115,7 +115,7 @@ The Control UI can localize itself on first load based on your browser locale. T
|
||||
<Accordion title="Debug, logs, update">
|
||||
- Debug: status/health/models snapshots + event log + manual RPC calls (`status`, `health`, `models.list`).
|
||||
- Logs: live tail of gateway file logs with filter/export (`logs.tail`).
|
||||
- Update: run a package/git update + restart (`update.run`) with a restart report.
|
||||
- Update: run a package/git update + restart (`update.run`) with a restart report, then poll `update.status` after reconnect to verify the running gateway version.
|
||||
</Accordion>
|
||||
<Accordion title="Cron jobs panel notes">
|
||||
- For isolated jobs, delivery defaults to announce summary. You can switch to none if you want internal-only runs.
|
||||
|
||||
@@ -9,6 +9,7 @@ const consumeGatewaySigusr1RestartAuthorization = vi.fn(() => true);
|
||||
const consumeGatewayRestartIntentSync = vi.fn(() => false);
|
||||
const isGatewaySigusr1RestartExternallyAllowed = vi.fn(() => false);
|
||||
const markGatewaySigusr1RestartHandled = vi.fn();
|
||||
const peekGatewaySigusr1RestartReason = vi.fn<() => string | undefined>(() => undefined);
|
||||
const scheduleGatewaySigusr1Restart = vi.fn((_opts?: { delayMs?: number; reason?: string }) => ({
|
||||
ok: true,
|
||||
pid: process.pid,
|
||||
@@ -30,6 +31,17 @@ const waitForBundledRuntimeDepsInstallIdle = vi.fn(async (_timeoutMs?: number) =
|
||||
const restartGatewayProcessWithFreshPid = vi.fn<
|
||||
() => { mode: "spawned" | "supervised" | "disabled" | "failed"; pid?: number; detail?: string }
|
||||
>(() => ({ mode: "disabled" }));
|
||||
const respawnGatewayProcessForUpdate = vi.fn<
|
||||
() => {
|
||||
mode: "spawned" | "supervised" | "disabled" | "failed";
|
||||
pid?: number;
|
||||
detail?: string;
|
||||
child?: { kill: () => void };
|
||||
}
|
||||
>(() => ({ mode: "disabled", detail: "OPENCLAW_NO_RESPAWN" }));
|
||||
const markUpdateRestartSentinelFailure = vi.fn<(reason: string) => Promise<null>>(
|
||||
async (_reason: string) => null,
|
||||
);
|
||||
const abortEmbeddedPiRun = vi.fn(
|
||||
(_sessionId?: string, _opts?: { mode?: "all" | "compacting" }) => false,
|
||||
);
|
||||
@@ -58,14 +70,20 @@ vi.mock("../../infra/restart.js", () => ({
|
||||
consumeGatewayRestartIntentSync: () => consumeGatewayRestartIntentSync(),
|
||||
isGatewaySigusr1RestartExternallyAllowed: () => isGatewaySigusr1RestartExternallyAllowed(),
|
||||
markGatewaySigusr1RestartHandled: () => markGatewaySigusr1RestartHandled(),
|
||||
peekGatewaySigusr1RestartReason: () => peekGatewaySigusr1RestartReason(),
|
||||
scheduleGatewaySigusr1Restart: (opts?: { delayMs?: number; reason?: string }) =>
|
||||
scheduleGatewaySigusr1Restart(opts),
|
||||
}));
|
||||
|
||||
vi.mock("../../infra/process-respawn.js", () => ({
|
||||
respawnGatewayProcessForUpdate: () => respawnGatewayProcessForUpdate(),
|
||||
restartGatewayProcessWithFreshPid: () => restartGatewayProcessWithFreshPid(),
|
||||
}));
|
||||
|
||||
vi.mock("../../infra/restart-sentinel.js", () => ({
|
||||
markUpdateRestartSentinelFailure: (reason: string) => markUpdateRestartSentinelFailure(reason),
|
||||
}));
|
||||
|
||||
vi.mock("../../process/command-queue.js", () => ({
|
||||
getActiveTaskCount: () => getActiveTaskCount(),
|
||||
markGatewayDraining: () => markGatewayDraining(),
|
||||
@@ -195,6 +213,8 @@ async function runLoopWithStart(params: {
|
||||
start: ReturnType<typeof vi.fn>;
|
||||
runtime: LoopRuntime;
|
||||
lockPort?: number;
|
||||
healthHost?: string;
|
||||
waitForHealthyChild?: (port: number, pid?: number, host?: string) => Promise<boolean>;
|
||||
}) {
|
||||
vi.resetModules();
|
||||
const { runGatewayLoop } = await import("./run-loop.js");
|
||||
@@ -202,6 +222,8 @@ async function runLoopWithStart(params: {
|
||||
start: params.start as unknown as Parameters<typeof runGatewayLoop>[0]["start"],
|
||||
runtime: params.runtime,
|
||||
lockPort: params.lockPort,
|
||||
healthHost: params.healthHost,
|
||||
waitForHealthyChild: params.waitForHealthyChild,
|
||||
});
|
||||
return { loopPromise };
|
||||
}
|
||||
@@ -292,6 +314,12 @@ describe("runGatewayLoop", () => {
|
||||
},
|
||||
},
|
||||
});
|
||||
peekGatewaySigusr1RestartReason.mockReturnValue(undefined);
|
||||
respawnGatewayProcessForUpdate.mockReturnValue({
|
||||
mode: "disabled",
|
||||
detail: "OPENCLAW_NO_RESPAWN",
|
||||
});
|
||||
markUpdateRestartSentinelFailure.mockClear();
|
||||
|
||||
await withIsolatedSignals(async ({ captureSignal }) => {
|
||||
getActiveTaskCount.mockReturnValueOnce(2).mockReturnValueOnce(0);
|
||||
@@ -453,6 +481,7 @@ describe("runGatewayLoop", () => {
|
||||
|
||||
it("releases the lock before exiting on spawned restart", async () => {
|
||||
vi.clearAllMocks();
|
||||
peekGatewaySigusr1RestartReason.mockReturnValue(undefined);
|
||||
|
||||
await withIsolatedSignals(async ({ captureSignal }) => {
|
||||
const lockRelease = vi.fn(async () => {});
|
||||
@@ -484,6 +513,7 @@ describe("runGatewayLoop", () => {
|
||||
|
||||
it("waits briefly before exiting on launchd supervised restart", async () => {
|
||||
vi.clearAllMocks();
|
||||
peekGatewaySigusr1RestartReason.mockReturnValue(undefined);
|
||||
try {
|
||||
setPlatform("darwin");
|
||||
process.env.LAUNCH_JOB_LABEL = "ai.openclaw.gateway";
|
||||
@@ -511,6 +541,7 @@ describe("runGatewayLoop", () => {
|
||||
|
||||
it("forwards lockPort to initial and restart lock acquisitions", async () => {
|
||||
vi.clearAllMocks();
|
||||
peekGatewaySigusr1RestartReason.mockReturnValue(undefined);
|
||||
|
||||
await withIsolatedSignals(async ({ captureSignal }) => {
|
||||
const closeFirst = vi.fn(async () => {});
|
||||
@@ -549,6 +580,7 @@ describe("runGatewayLoop", () => {
|
||||
|
||||
it("exits when lock reacquire fails during in-process restart fallback", async () => {
|
||||
vi.clearAllMocks();
|
||||
peekGatewaySigusr1RestartReason.mockReturnValue(undefined);
|
||||
|
||||
await withIsolatedSignals(async ({ captureSignal }) => {
|
||||
const lockRelease = vi.fn(async () => {});
|
||||
@@ -574,6 +606,103 @@ describe("runGatewayLoop", () => {
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
it("hard-respawns update restarts and exits only after the replacement becomes healthy", async () => {
|
||||
vi.clearAllMocks();
|
||||
peekGatewaySigusr1RestartReason.mockReturnValue("update.run");
|
||||
respawnGatewayProcessForUpdate.mockReturnValueOnce({
|
||||
mode: "spawned",
|
||||
pid: 7777,
|
||||
child: { kill: vi.fn() },
|
||||
});
|
||||
|
||||
await withIsolatedSignals(async ({ captureSignal }) => {
|
||||
const waitForHealthyChild = vi.fn(async () => true);
|
||||
const close = vi.fn(async () => {});
|
||||
const { start, started } = createSignaledStart(close);
|
||||
const { runtime, exited } = createRuntimeWithExitSignal();
|
||||
await runLoopWithStart({ start, runtime, lockPort: 18789, waitForHealthyChild });
|
||||
await waitForStart(started);
|
||||
const sigusr1 = captureSignal("SIGUSR1");
|
||||
|
||||
sigusr1();
|
||||
|
||||
await expect(exited).resolves.toBe(0);
|
||||
expect(waitForHealthyChild).toHaveBeenCalledWith(18789, 7777, "127.0.0.1");
|
||||
expect(respawnGatewayProcessForUpdate).toHaveBeenCalledTimes(1);
|
||||
expect(start).toHaveBeenCalledTimes(1);
|
||||
expect(markUpdateRestartSentinelFailure).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
it("probes the configured gateway host for update respawn health", async () => {
|
||||
vi.clearAllMocks();
|
||||
peekGatewaySigusr1RestartReason.mockReturnValue("update.run");
|
||||
respawnGatewayProcessForUpdate.mockReturnValueOnce({
|
||||
mode: "spawned",
|
||||
pid: 7778,
|
||||
child: { kill: vi.fn() },
|
||||
});
|
||||
|
||||
await withIsolatedSignals(async ({ captureSignal }) => {
|
||||
const waitForHealthyChild = vi.fn(async () => true);
|
||||
const close = vi.fn(async () => {});
|
||||
const { start, started } = createSignaledStart(close);
|
||||
const { runtime, exited } = createRuntimeWithExitSignal();
|
||||
await runLoopWithStart({
|
||||
start,
|
||||
runtime,
|
||||
lockPort: 18789,
|
||||
healthHost: "10.0.0.25",
|
||||
waitForHealthyChild,
|
||||
});
|
||||
await waitForStart(started);
|
||||
const sigusr1 = captureSignal("SIGUSR1");
|
||||
|
||||
sigusr1();
|
||||
|
||||
await expect(exited).resolves.toBe(0);
|
||||
expect(waitForHealthyChild).toHaveBeenCalledWith(18789, 7778, "10.0.0.25");
|
||||
});
|
||||
});
|
||||
|
||||
it("marks update respawn failures and falls back to in-process restart", async () => {
|
||||
vi.clearAllMocks();
|
||||
peekGatewaySigusr1RestartReason.mockReturnValue("update.run");
|
||||
const kill = vi.fn();
|
||||
respawnGatewayProcessForUpdate.mockReturnValueOnce({
|
||||
mode: "spawned",
|
||||
pid: 8888,
|
||||
child: { kill },
|
||||
});
|
||||
|
||||
await withIsolatedSignals(async ({ captureSignal }) => {
|
||||
const waitForHealthyChild = vi.fn(async () => false);
|
||||
const closeFirst = vi.fn(async () => {});
|
||||
const closeSecond = vi.fn(async () => {});
|
||||
const { runtime, exited } = createRuntimeWithExitSignal();
|
||||
const start = vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce({ close: closeFirst })
|
||||
.mockResolvedValueOnce({ close: closeSecond });
|
||||
|
||||
await runLoopWithStart({ start, runtime, lockPort: 18789, waitForHealthyChild });
|
||||
await new Promise<void>((resolve) => setImmediate(resolve));
|
||||
const sigusr1 = captureSignal("SIGUSR1");
|
||||
const sigterm = captureSignal("SIGTERM");
|
||||
|
||||
sigusr1();
|
||||
await new Promise<void>((resolve) => setImmediate(resolve));
|
||||
|
||||
expect(waitForHealthyChild).toHaveBeenCalledWith(18789, 8888, "127.0.0.1");
|
||||
expect(kill).toHaveBeenCalledTimes(1);
|
||||
expect(markUpdateRestartSentinelFailure).toHaveBeenCalledWith("restart-unhealthy");
|
||||
expect(start).toHaveBeenCalledTimes(2);
|
||||
|
||||
sigterm();
|
||||
await expect(exited).resolves.toBe(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("gateway discover routing helpers", () => {
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import net from "node:net";
|
||||
import {
|
||||
abortEmbeddedPiRun,
|
||||
getActiveEmbeddedRunCount,
|
||||
@@ -7,12 +8,17 @@ import { loadConfig } from "../../config/config.js";
|
||||
import type { startGatewayServer } from "../../gateway/server.js";
|
||||
import { formatErrorMessage } from "../../infra/errors.js";
|
||||
import { acquireGatewayLock } from "../../infra/gateway-lock.js";
|
||||
import { restartGatewayProcessWithFreshPid } from "../../infra/process-respawn.js";
|
||||
import {
|
||||
respawnGatewayProcessForUpdate,
|
||||
restartGatewayProcessWithFreshPid,
|
||||
} from "../../infra/process-respawn.js";
|
||||
import { markUpdateRestartSentinelFailure } from "../../infra/restart-sentinel.js";
|
||||
import {
|
||||
consumeGatewaySigusr1RestartAuthorization,
|
||||
consumeGatewayRestartIntentSync,
|
||||
isGatewaySigusr1RestartExternallyAllowed,
|
||||
markGatewaySigusr1RestartHandled,
|
||||
peekGatewaySigusr1RestartReason,
|
||||
scheduleGatewaySigusr1Restart,
|
||||
} from "../../infra/restart.js";
|
||||
import { detectRespawnSupervisor } from "../../infra/supervisor-markers.js";
|
||||
@@ -35,22 +41,67 @@ const gatewayLog = createSubsystemLogger("gateway");
|
||||
const LAUNCHD_SUPERVISED_RESTART_EXIT_DELAY_MS = 1500;
|
||||
const DEFAULT_RESTART_DRAIN_TIMEOUT_MS = 300_000;
|
||||
const RESTART_DRAIN_STILL_PENDING_WARN_MS = 30_000;
|
||||
const UPDATE_RESPAWN_HEALTH_TIMEOUT_MS = 10_000;
|
||||
const UPDATE_RESPAWN_HEALTH_POLL_MS = 200;
|
||||
|
||||
type GatewayRunSignalAction = "stop" | "restart";
|
||||
type RestartDrainTimeoutMs = number | undefined;
|
||||
|
||||
async function waitForGatewayPortReady(host: string, port: number): Promise<boolean> {
|
||||
return await new Promise<boolean>((resolve) => {
|
||||
const socket = net.createConnection({ host, port });
|
||||
let settled = false;
|
||||
const finish = (value: boolean) => {
|
||||
if (settled) {
|
||||
return;
|
||||
}
|
||||
settled = true;
|
||||
clearTimeout(timer);
|
||||
socket.removeAllListeners();
|
||||
socket.destroy();
|
||||
resolve(value);
|
||||
};
|
||||
const timer = setTimeout(() => {
|
||||
finish(false);
|
||||
}, UPDATE_RESPAWN_HEALTH_POLL_MS);
|
||||
socket.once("connect", () => finish(true));
|
||||
socket.once("error", () => finish(false));
|
||||
});
|
||||
}
|
||||
|
||||
async function waitForHealthyGatewayChild(
|
||||
port: number,
|
||||
_pid?: number,
|
||||
host = "127.0.0.1",
|
||||
timeoutMs = UPDATE_RESPAWN_HEALTH_TIMEOUT_MS,
|
||||
): Promise<boolean> {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
while (Date.now() < deadline) {
|
||||
if (await waitForGatewayPortReady(host, port)) {
|
||||
return true;
|
||||
}
|
||||
await new Promise<void>((resolve) => {
|
||||
setTimeout(resolve, UPDATE_RESPAWN_HEALTH_POLL_MS);
|
||||
});
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
export async function runGatewayLoop(params: {
|
||||
start: (params?: {
|
||||
startupStartedAt?: number;
|
||||
}) => Promise<Awaited<ReturnType<typeof startGatewayServer>>>;
|
||||
runtime: RuntimeEnv;
|
||||
lockPort?: number;
|
||||
healthHost?: string;
|
||||
waitForHealthyChild?: (port: number, pid?: number, host?: string) => Promise<boolean>;
|
||||
}) {
|
||||
let startupStartedAt = Date.now();
|
||||
let lock = await acquireGatewayLock({ port: params.lockPort });
|
||||
let server: Awaited<ReturnType<typeof startGatewayServer>> | null = null;
|
||||
let shuttingDown = false;
|
||||
let restartResolver: (() => void) | null = null;
|
||||
const waitForHealthyChild = params.waitForHealthyChild ?? waitForHealthyGatewayChild;
|
||||
|
||||
const cleanupSignals = () => {
|
||||
process.removeListener("SIGTERM", onSigterm);
|
||||
@@ -86,8 +137,73 @@ export async function runGatewayLoop(params: {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
const handleRestartAfterServerClose = async () => {
|
||||
const handleRestartAfterServerClose = async (restartReason?: string) => {
|
||||
const hadLock = await releaseLockIfHeld();
|
||||
const isUpdateRestart = restartReason === "update.run";
|
||||
|
||||
if (isUpdateRestart) {
|
||||
const respawn = respawnGatewayProcessForUpdate();
|
||||
if (respawn.mode === "spawned") {
|
||||
const port = params.lockPort;
|
||||
const healthy =
|
||||
typeof port === "number"
|
||||
? await waitForHealthyChild(port, respawn.pid, params.healthHost ?? "127.0.0.1")
|
||||
: false;
|
||||
if (healthy) {
|
||||
gatewayLog.info(
|
||||
`restart mode: update process respawn (spawned pid ${respawn.pid ?? "unknown"})`,
|
||||
);
|
||||
exitProcess(0);
|
||||
return;
|
||||
}
|
||||
gatewayLog.warn(
|
||||
`update respawn child did not become healthy (${respawn.pid ?? "unknown"}); falling back to in-process restart`,
|
||||
);
|
||||
try {
|
||||
respawn.child?.kill();
|
||||
} catch {
|
||||
// Best-effort; parent fallback keeps the gateway reachable for recovery.
|
||||
}
|
||||
await markUpdateRestartSentinelFailure("restart-unhealthy").catch((err) => {
|
||||
gatewayLog.warn(`failed to mark update restart sentinel unhealthy: ${String(err)}`);
|
||||
});
|
||||
if (hadLock && !(await reacquireLockForInProcessRestart())) {
|
||||
return;
|
||||
}
|
||||
shuttingDown = false;
|
||||
restartResolver?.();
|
||||
return;
|
||||
}
|
||||
if (respawn.mode === "supervised") {
|
||||
gatewayLog.info("restart mode: update process respawn (supervisor restart)");
|
||||
if (detectRespawnSupervisor(process.env, process.platform) === "launchd") {
|
||||
await new Promise((resolve) => {
|
||||
setTimeout(resolve, LAUNCHD_SUPERVISED_RESTART_EXIT_DELAY_MS);
|
||||
});
|
||||
}
|
||||
exitProcess(0);
|
||||
return;
|
||||
}
|
||||
if (respawn.mode === "failed") {
|
||||
gatewayLog.warn(
|
||||
`update respawn failed (${respawn.detail ?? "unknown error"}); falling back to in-process restart`,
|
||||
);
|
||||
await markUpdateRestartSentinelFailure("restart-unhealthy").catch((err) => {
|
||||
gatewayLog.warn(`failed to mark update restart sentinel unhealthy: ${String(err)}`);
|
||||
});
|
||||
} else {
|
||||
gatewayLog.info(
|
||||
`restart mode: in-process restart (${respawn.detail ?? "OPENCLAW_NO_RESPAWN"})`,
|
||||
);
|
||||
}
|
||||
if (hadLock && !(await reacquireLockForInProcessRestart())) {
|
||||
return;
|
||||
}
|
||||
shuttingDown = false;
|
||||
restartResolver?.();
|
||||
return;
|
||||
}
|
||||
|
||||
// Release the lock BEFORE spawning so the child can acquire it immediately.
|
||||
const respawn = restartGatewayProcessWithFreshPid();
|
||||
if (respawn.mode === "spawned" || respawn.mode === "supervised") {
|
||||
@@ -143,7 +259,7 @@ export async function runGatewayLoop(params: {
|
||||
}
|
||||
};
|
||||
|
||||
const request = (action: GatewayRunSignalAction, signal: string) => {
|
||||
const request = (action: GatewayRunSignalAction, signal: string, restartReason?: string) => {
|
||||
if (shuttingDown) {
|
||||
gatewayLog.info(`received ${signal} during shutdown; ignoring`);
|
||||
return;
|
||||
@@ -257,7 +373,7 @@ export async function runGatewayLoop(params: {
|
||||
clearForceExitTimer();
|
||||
server = null;
|
||||
if (isRestart) {
|
||||
await handleRestartAfterServerClose();
|
||||
await handleRestartAfterServerClose(restartReason);
|
||||
} else {
|
||||
await handleStopAfterServerClose();
|
||||
}
|
||||
@@ -292,8 +408,9 @@ export async function runGatewayLoop(params: {
|
||||
scheduleGatewaySigusr1Restart({ delayMs: 0, reason: "SIGUSR1" });
|
||||
return;
|
||||
}
|
||||
const restartReason = peekGatewaySigusr1RestartReason();
|
||||
markGatewaySigusr1RestartHandled();
|
||||
request("restart", "SIGUSR1");
|
||||
request("restart", "SIGUSR1", restartReason);
|
||||
};
|
||||
|
||||
process.on("SIGTERM", onSigterm);
|
||||
|
||||
@@ -24,7 +24,11 @@ import {
|
||||
import type { OpenClawConfig } from "../../config/types.openclaw.js";
|
||||
import { hasConfiguredSecretInput } from "../../config/types.secrets.js";
|
||||
import { resolveGatewayAuth } from "../../gateway/auth.js";
|
||||
import { defaultGatewayBindMode, isContainerEnvironment } from "../../gateway/net.js";
|
||||
import {
|
||||
defaultGatewayBindMode,
|
||||
isContainerEnvironment,
|
||||
resolveGatewayBindHost,
|
||||
} from "../../gateway/net.js";
|
||||
import type { GatewayWsLogStyle } from "../../gateway/ws-logging.js";
|
||||
import { setGatewayWsLogStyle } from "../../gateway/ws-logging.js";
|
||||
import { setVerbose } from "../../globals.js";
|
||||
@@ -680,6 +684,7 @@ async function runGatewayCommand(opts: GatewayRunOpts) {
|
||||
await runGatewayLoop({
|
||||
runtime: defaultRuntime,
|
||||
lockPort: port,
|
||||
healthHost: await resolveGatewayBindHost(bind, cfg.gateway?.customBindHost),
|
||||
start: async ({ startupStartedAt } = {}) =>
|
||||
await startGatewayServer(port, {
|
||||
bind,
|
||||
|
||||
@@ -1078,6 +1078,30 @@ describe("update-cli", () => {
|
||||
).toContain("Low disk space near");
|
||||
});
|
||||
|
||||
it("refuses package updates from inside the gateway service process", async () => {
|
||||
mockPackageInstallStatus(createCaseDir("openclaw-update"));
|
||||
|
||||
await withEnvAsync(
|
||||
{
|
||||
OPENCLAW_SERVICE_MARKER: "openclaw",
|
||||
OPENCLAW_SERVICE_KIND: "gateway",
|
||||
},
|
||||
async () => {
|
||||
await updateCommand({ yes: true });
|
||||
},
|
||||
);
|
||||
|
||||
expect(defaultRuntime.error).toHaveBeenCalledWith(
|
||||
expect.stringContaining("Package updates cannot run from inside the gateway service process."),
|
||||
);
|
||||
expect(defaultRuntime.exit).toHaveBeenCalledWith(1);
|
||||
expect(runGatewayUpdate).not.toHaveBeenCalled();
|
||||
expect(runCommandWithTimeout).not.toHaveBeenCalledWith(
|
||||
["npm", "i", "-g", "openclaw@latest", "--no-fund", "--no-audit", "--loglevel=error"],
|
||||
expect.any(Object),
|
||||
);
|
||||
});
|
||||
|
||||
it("blocks package updates when the target requires a newer Node runtime", async () => {
|
||||
mockPackageInstallStatus(createCaseDir("openclaw-update"));
|
||||
vi.mocked(fetchNpmPackageTargetStatus).mockResolvedValue({
|
||||
|
||||
@@ -15,6 +15,7 @@ import {
|
||||
} from "../../config/config.js";
|
||||
import { formatConfigIssueLines } from "../../config/issue-format.js";
|
||||
import { asResolvedSourceConfig, asRuntimeConfig } from "../../config/materialize.js";
|
||||
import { GATEWAY_SERVICE_KIND, GATEWAY_SERVICE_MARKER } from "../../daemon/constants.js";
|
||||
import { resolveGatewayInstallEntrypoint } from "../../daemon/gateway-entrypoint.js";
|
||||
import { resolveGatewayRestartLogPath } from "../../daemon/restart-logs.js";
|
||||
import { readGatewayServiceState, resolveGatewayService } from "../../daemon/service.js";
|
||||
@@ -151,6 +152,16 @@ export function shouldUseLegacyProcessRestartAfterUpdate(params: {
|
||||
return !isPackageManagerUpdateMode(params.updateMode);
|
||||
}
|
||||
|
||||
function isRunningInsideGatewayService(
|
||||
env: Record<string, string | undefined> = process.env,
|
||||
): boolean {
|
||||
if (env.OPENCLAW_SERVICE_MARKER?.trim() !== GATEWAY_SERVICE_MARKER) {
|
||||
return false;
|
||||
}
|
||||
const serviceKind = env.OPENCLAW_SERVICE_KIND?.trim();
|
||||
return !serviceKind || serviceKind === GATEWAY_SERVICE_KIND;
|
||||
}
|
||||
|
||||
function formatCommandFailure(stdout: string, stderr: string): string {
|
||||
const detail = (stderr || stdout).trim();
|
||||
if (!detail) {
|
||||
@@ -1309,6 +1320,18 @@ export async function updateCommand(opts: UpdateCommandOptions): Promise<void> {
|
||||
return;
|
||||
}
|
||||
|
||||
if (updateInstallKind === "package" && isRunningInsideGatewayService()) {
|
||||
defaultRuntime.error(
|
||||
[
|
||||
"Package updates cannot run from inside the gateway service process.",
|
||||
"That path replaces the active OpenClaw dist tree while the live gateway may still lazy-load old chunks.",
|
||||
`Run \`${replaceCliName(formatCliCommand("openclaw update"), CLI_NAME)}\` from a shell outside the gateway service, or stop the gateway service first and then update.`,
|
||||
].join("\n"),
|
||||
);
|
||||
defaultRuntime.exit(1);
|
||||
return;
|
||||
}
|
||||
|
||||
if (downgradeRisk && !opts.yes) {
|
||||
if (!process.stdin.isTTY || opts.json) {
|
||||
defaultRuntime.error(
|
||||
|
||||
@@ -38,6 +38,7 @@ describe("method scope resolution", () => {
|
||||
["diagnostics.stability", ["operator.read"]],
|
||||
["node.pair.approve", ["operator.pairing"]],
|
||||
["poll", ["operator.write"]],
|
||||
["update.status", ["operator.admin"]],
|
||||
["config.patch", ["operator.admin"]],
|
||||
["nativeHook.invoke", ["operator.admin"]],
|
||||
["wizard.start", ["operator.admin"]],
|
||||
|
||||
@@ -183,6 +183,7 @@ const METHOD_SCOPE_GROUPS: Record<OperatorScope, readonly string[]> = {
|
||||
"set-heartbeats",
|
||||
"system-event",
|
||||
"agents.files.set",
|
||||
"update.status",
|
||||
],
|
||||
[TALK_SECRETS_SCOPE]: [],
|
||||
};
|
||||
|
||||
@@ -93,6 +93,8 @@ import {
|
||||
ConfigSchemaResponseSchema,
|
||||
type ConfigSetParams,
|
||||
ConfigSetParamsSchema,
|
||||
type UpdateStatusParams,
|
||||
UpdateStatusParamsSchema,
|
||||
type ConnectParams,
|
||||
ConnectParamsSchema,
|
||||
type CronAddParams,
|
||||
@@ -536,6 +538,8 @@ export const validateChatSendParams = ajv.compile(ChatSendParamsSchema);
|
||||
export const validateChatAbortParams = ajv.compile<ChatAbortParams>(ChatAbortParamsSchema);
|
||||
export const validateChatInjectParams = ajv.compile<ChatInjectParams>(ChatInjectParamsSchema);
|
||||
export const validateChatEvent = ajv.compile(ChatEventSchema);
|
||||
export const validateUpdateStatusParams =
|
||||
ajv.compile<UpdateStatusParams>(UpdateStatusParamsSchema);
|
||||
export const validateUpdateRunParams = ajv.compile<UpdateRunParams>(UpdateRunParamsSchema);
|
||||
export const validateWebLoginStartParams =
|
||||
ajv.compile<WebLoginStartParams>(WebLoginStartParamsSchema);
|
||||
@@ -638,6 +642,7 @@ export {
|
||||
ConfigSchemaLookupParamsSchema,
|
||||
ConfigSchemaResponseSchema,
|
||||
ConfigSchemaLookupResultSchema,
|
||||
UpdateStatusParamsSchema,
|
||||
WizardStartParamsSchema,
|
||||
WizardNextParamsSchema,
|
||||
WizardCancelParamsSchema,
|
||||
@@ -838,6 +843,7 @@ export type {
|
||||
WebPushSubscribeParams,
|
||||
WebPushUnsubscribeParams,
|
||||
WebPushTestParams,
|
||||
UpdateStatusParams,
|
||||
UpdateRunParams,
|
||||
ChatInjectParams,
|
||||
};
|
||||
|
||||
@@ -51,6 +51,8 @@ export const ConfigSchemaLookupParamsSchema = Type.Object(
|
||||
{ additionalProperties: false },
|
||||
);
|
||||
|
||||
export const UpdateStatusParamsSchema = Type.Object({}, { additionalProperties: false });
|
||||
|
||||
export const UpdateRunParamsSchema = Type.Object(
|
||||
{
|
||||
sessionKey: Type.Optional(Type.String()),
|
||||
|
||||
@@ -78,6 +78,7 @@ import {
|
||||
ConfigSchemaParamsSchema,
|
||||
ConfigSchemaResponseSchema,
|
||||
ConfigSetParamsSchema,
|
||||
UpdateStatusParamsSchema,
|
||||
UpdateRunParamsSchema,
|
||||
} from "./config.js";
|
||||
import {
|
||||
@@ -365,6 +366,7 @@ export const ProtocolSchemas = {
|
||||
ChatAbortParams: ChatAbortParamsSchema,
|
||||
ChatInjectParams: ChatInjectParamsSchema,
|
||||
ChatEvent: ChatEventSchema,
|
||||
UpdateStatusParams: UpdateStatusParamsSchema,
|
||||
UpdateRunParams: UpdateRunParamsSchema,
|
||||
TickEvent: TickEventSchema,
|
||||
ShutdownEvent: ShutdownEventSchema,
|
||||
|
||||
@@ -69,6 +69,7 @@ export type ConfigSchemaParams = SchemaType<"ConfigSchemaParams">;
|
||||
export type ConfigSchemaLookupParams = SchemaType<"ConfigSchemaLookupParams">;
|
||||
export type ConfigSchemaResponse = SchemaType<"ConfigSchemaResponse">;
|
||||
export type ConfigSchemaLookupResult = SchemaType<"ConfigSchemaLookupResult">;
|
||||
export type UpdateStatusParams = SchemaType<"UpdateStatusParams">;
|
||||
export type WizardStartParams = SchemaType<"WizardStartParams">;
|
||||
export type WizardNextParams = SchemaType<"WizardNextParams">;
|
||||
export type WizardCancelParams = SchemaType<"WizardCancelParams">;
|
||||
|
||||
@@ -71,6 +71,7 @@ const BASE_METHODS = [
|
||||
"skills.bins",
|
||||
"skills.install",
|
||||
"skills.update",
|
||||
"update.status",
|
||||
"update.run",
|
||||
"voicewake.get",
|
||||
"voicewake.set",
|
||||
|
||||
@@ -1,11 +1,21 @@
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { RestartSentinelPayload } from "../../infra/restart-sentinel.js";
|
||||
import type { UpdateRunResult } from "../../infra/update-runner.js";
|
||||
import type { UpdateInstallSurface, UpdateRunResult } from "../../infra/update-runner.js";
|
||||
|
||||
// Capture the sentinel payload written during update.run
|
||||
let capturedPayload: RestartSentinelPayload | undefined;
|
||||
|
||||
const runGatewayUpdateMock = vi.fn<() => Promise<UpdateRunResult>>();
|
||||
const resolveUpdateInstallSurfaceMock = vi.fn<() => Promise<UpdateInstallSurface>>(async () => ({
|
||||
kind: "git",
|
||||
mode: "git",
|
||||
root: "/tmp/openclaw",
|
||||
packageRoot: "/tmp/openclaw",
|
||||
}));
|
||||
const getLatestUpdateRestartSentinelMock = vi.fn<() => RestartSentinelPayload | null>(() => null);
|
||||
const isRestartEnabledMock = vi.fn(() => true);
|
||||
const readPackageVersionMock = vi.fn(async () => "1.0.0");
|
||||
const detectRespawnSupervisorMock = vi.fn(() => null);
|
||||
|
||||
const scheduleGatewaySigusr1RestartMock = vi.fn(() => ({ scheduled: true }));
|
||||
|
||||
@@ -13,6 +23,10 @@ vi.mock("../../config/config.js", () => ({
|
||||
loadConfig: () => ({ update: {} }),
|
||||
}));
|
||||
|
||||
vi.mock("../../config/commands.flags.js", () => ({
|
||||
isRestartEnabled: isRestartEnabledMock,
|
||||
}));
|
||||
|
||||
vi.mock("../../config/sessions.js", () => ({
|
||||
extractDeliveryInfo: (sessionKey: string | undefined) => {
|
||||
if (!sessionKey) {
|
||||
@@ -57,18 +71,33 @@ vi.mock("../../infra/restart.js", () => ({
|
||||
scheduleGatewaySigusr1Restart: scheduleGatewaySigusr1RestartMock,
|
||||
}));
|
||||
|
||||
vi.mock("../../infra/package-json.js", () => ({
|
||||
readPackageVersion: readPackageVersionMock,
|
||||
}));
|
||||
|
||||
vi.mock("../../infra/supervisor-markers.js", () => ({
|
||||
detectRespawnSupervisor: detectRespawnSupervisorMock,
|
||||
}));
|
||||
|
||||
vi.mock("../../infra/update-channels.js", () => ({
|
||||
normalizeUpdateChannel: () => undefined,
|
||||
}));
|
||||
|
||||
vi.mock("../../infra/update-runner.js", () => ({
|
||||
resolveUpdateInstallSurface: resolveUpdateInstallSurfaceMock,
|
||||
runGatewayUpdate: runGatewayUpdateMock,
|
||||
}));
|
||||
|
||||
vi.mock("../protocol/index.js", () => ({
|
||||
validateUpdateStatusParams: () => true,
|
||||
validateUpdateRunParams: () => true,
|
||||
}));
|
||||
|
||||
vi.mock("../server-restart-sentinel.js", () => ({
|
||||
getLatestUpdateRestartSentinel: getLatestUpdateRestartSentinelMock,
|
||||
recordLatestUpdateRestartSentinel: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock("./restart-request.js", () => ({
|
||||
parseRestartRequestParams: (params: Record<string, unknown>) => ({
|
||||
sessionKey: params.sessionKey,
|
||||
@@ -83,13 +112,28 @@ vi.mock("./validation.js", () => ({
|
||||
|
||||
beforeEach(() => {
|
||||
capturedPayload = undefined;
|
||||
isRestartEnabledMock.mockReset();
|
||||
isRestartEnabledMock.mockReturnValue(true);
|
||||
readPackageVersionMock.mockClear();
|
||||
readPackageVersionMock.mockResolvedValue("1.0.0");
|
||||
detectRespawnSupervisorMock.mockReset();
|
||||
detectRespawnSupervisorMock.mockReturnValue(null);
|
||||
runGatewayUpdateMock.mockClear();
|
||||
runGatewayUpdateMock.mockResolvedValue({
|
||||
status: "ok",
|
||||
mode: "npm",
|
||||
after: { version: "2.0.0" },
|
||||
steps: [],
|
||||
durationMs: 100,
|
||||
});
|
||||
resolveUpdateInstallSurfaceMock.mockClear();
|
||||
resolveUpdateInstallSurfaceMock.mockResolvedValue({
|
||||
kind: "git",
|
||||
mode: "git",
|
||||
root: "/tmp/openclaw",
|
||||
packageRoot: "/tmp/openclaw",
|
||||
});
|
||||
getLatestUpdateRestartSentinelMock.mockClear();
|
||||
scheduleGatewaySigusr1RestartMock.mockClear();
|
||||
scheduleGatewaySigusr1RestartMock.mockReturnValue({ scheduled: true });
|
||||
});
|
||||
@@ -199,4 +243,94 @@ describe("update.run restart scheduling", () => {
|
||||
expect(payload?.restart).toBeNull();
|
||||
expect(capturedPayload?.continuation).toBeUndefined();
|
||||
});
|
||||
|
||||
it.each([
|
||||
{ status: "skipped" as const, reason: "dirty" },
|
||||
{ status: "skipped" as const, reason: "not-git-install" },
|
||||
{ status: "skipped" as const, reason: "restart-disabled" },
|
||||
{ status: "error" as const, reason: "deps-install-failed" },
|
||||
{ status: "error" as const, reason: "build-failed" },
|
||||
{ status: "error" as const, reason: "global-install-failed" },
|
||||
])("returns ok=false for $status:$reason", async ({ status, reason }) => {
|
||||
runGatewayUpdateMock.mockResolvedValueOnce({
|
||||
status,
|
||||
mode: "git",
|
||||
reason,
|
||||
steps: [],
|
||||
durationMs: 100,
|
||||
});
|
||||
|
||||
let payload: { ok: boolean; result?: { status?: string; reason?: string } } | undefined;
|
||||
|
||||
await invokeUpdateRun({}, (_ok: boolean, response: unknown) => {
|
||||
payload = response as typeof payload;
|
||||
});
|
||||
|
||||
expect(payload?.ok).toBe(false);
|
||||
expect(payload?.result).toEqual(
|
||||
expect.objectContaining({
|
||||
status,
|
||||
reason,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("blocks unmanaged global installs before package mutation when restart is unavailable", async () => {
|
||||
isRestartEnabledMock.mockReturnValue(false);
|
||||
detectRespawnSupervisorMock.mockReturnValue(null);
|
||||
resolveUpdateInstallSurfaceMock.mockResolvedValueOnce({
|
||||
kind: "global",
|
||||
mode: "npm",
|
||||
root: "/tmp/openclaw-global",
|
||||
packageRoot: "/tmp/openclaw-global",
|
||||
});
|
||||
|
||||
let payload:
|
||||
| { ok: boolean; result?: { status?: string; reason?: string; mode?: string } }
|
||||
| undefined;
|
||||
|
||||
await invokeUpdateRun({}, (_ok: boolean, response: unknown) => {
|
||||
payload = response as typeof payload;
|
||||
});
|
||||
|
||||
expect(runGatewayUpdateMock).not.toHaveBeenCalled();
|
||||
expect(scheduleGatewaySigusr1RestartMock).not.toHaveBeenCalled();
|
||||
expect(payload).toEqual(
|
||||
expect.objectContaining({
|
||||
ok: false,
|
||||
result: expect.objectContaining({
|
||||
status: "skipped",
|
||||
reason: "restart-unavailable",
|
||||
mode: "npm",
|
||||
}),
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("update.status", () => {
|
||||
it("returns the latest cached update sentinel", async () => {
|
||||
getLatestUpdateRestartSentinelMock.mockReturnValueOnce({
|
||||
kind: "update",
|
||||
status: "ok",
|
||||
ts: 1,
|
||||
stats: {
|
||||
after: { version: "2.0.0" },
|
||||
},
|
||||
});
|
||||
const { updateHandlers } = await import("./update.js");
|
||||
const respond = vi.fn();
|
||||
|
||||
await updateHandlers["update.status"]({
|
||||
params: {},
|
||||
respond,
|
||||
} as never);
|
||||
|
||||
expect(respond).toHaveBeenCalledWith(true, {
|
||||
sentinel: expect.objectContaining({
|
||||
kind: "update",
|
||||
status: "ok",
|
||||
}),
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,21 +1,36 @@
|
||||
import { isRestartEnabled } from "../../config/commands.flags.js";
|
||||
import { loadConfig } from "../../config/config.js";
|
||||
import { extractDeliveryInfo } from "../../config/sessions.js";
|
||||
import { resolveOpenClawPackageRoot } from "../../infra/openclaw-root.js";
|
||||
import { readPackageVersion } from "../../infra/package-json.js";
|
||||
import {
|
||||
formatDoctorNonInteractiveHint,
|
||||
type RestartSentinelPayload,
|
||||
writeRestartSentinel,
|
||||
} from "../../infra/restart-sentinel.js";
|
||||
import { scheduleGatewaySigusr1Restart } from "../../infra/restart.js";
|
||||
import { detectRespawnSupervisor } from "../../infra/supervisor-markers.js";
|
||||
import { normalizeUpdateChannel } from "../../infra/update-channels.js";
|
||||
import { runGatewayUpdate } from "../../infra/update-runner.js";
|
||||
import { resolveUpdateInstallSurface, runGatewayUpdate } from "../../infra/update-runner.js";
|
||||
import { formatControlPlaneActor, resolveControlPlaneActor } from "../control-plane-audit.js";
|
||||
import { validateUpdateRunParams } from "../protocol/index.js";
|
||||
import { validateUpdateRunParams, validateUpdateStatusParams } from "../protocol/index.js";
|
||||
import {
|
||||
getLatestUpdateRestartSentinel,
|
||||
recordLatestUpdateRestartSentinel,
|
||||
} from "../server-restart-sentinel.js";
|
||||
import { parseRestartRequestParams } from "./restart-request.js";
|
||||
import type { GatewayRequestHandlers } from "./types.js";
|
||||
import { assertValidParams } from "./validation.js";
|
||||
|
||||
export const updateHandlers: GatewayRequestHandlers = {
|
||||
"update.status": async ({ params, respond }) => {
|
||||
if (!assertValidParams(params, validateUpdateStatusParams, "update.status", respond)) {
|
||||
return;
|
||||
}
|
||||
respond(true, {
|
||||
sentinel: getLatestUpdateRestartSentinel(),
|
||||
});
|
||||
},
|
||||
"update.run": async ({ params, respond, client, context }) => {
|
||||
if (!assertValidParams(params, validateUpdateRunParams, "update.run", respond)) {
|
||||
return;
|
||||
@@ -48,17 +63,38 @@ export const updateHandlers: GatewayRequestHandlers = {
|
||||
argv1: process.argv[1],
|
||||
cwd: process.cwd(),
|
||||
})) ?? process.cwd();
|
||||
result = await runGatewayUpdate({
|
||||
const installSurface = await resolveUpdateInstallSurface({
|
||||
timeoutMs,
|
||||
cwd: root,
|
||||
argv1: process.argv[1],
|
||||
channel: configChannel ?? undefined,
|
||||
});
|
||||
} catch (err) {
|
||||
const supervisor = detectRespawnSupervisor(process.env, process.platform);
|
||||
if (!isRestartEnabled(config) && !supervisor) {
|
||||
const beforeVersion = installSurface.root
|
||||
? await readPackageVersion(installSurface.root)
|
||||
: null;
|
||||
result = {
|
||||
status: "skipped",
|
||||
mode: installSurface.mode,
|
||||
...(installSurface.root ? { root: installSurface.root } : {}),
|
||||
reason: installSurface.kind === "global" ? "restart-unavailable" : "restart-disabled",
|
||||
...(beforeVersion ? { before: { version: beforeVersion } } : {}),
|
||||
steps: [],
|
||||
durationMs: 0,
|
||||
};
|
||||
} else {
|
||||
result = await runGatewayUpdate({
|
||||
timeoutMs,
|
||||
cwd: root,
|
||||
argv1: process.argv[1],
|
||||
channel: configChannel ?? undefined,
|
||||
});
|
||||
}
|
||||
} catch {
|
||||
result = {
|
||||
status: "error",
|
||||
mode: "unknown",
|
||||
reason: String(err),
|
||||
reason: "unexpected-error",
|
||||
steps: [],
|
||||
durationMs: 0,
|
||||
};
|
||||
@@ -97,6 +133,7 @@ export const updateHandlers: GatewayRequestHandlers = {
|
||||
let sentinelPath: string | null = null;
|
||||
try {
|
||||
sentinelPath = await writeRestartSentinel(payload);
|
||||
recordLatestUpdateRestartSentinel(payload);
|
||||
} catch {
|
||||
sentinelPath = null;
|
||||
}
|
||||
@@ -129,7 +166,7 @@ export const updateHandlers: GatewayRequestHandlers = {
|
||||
respond(
|
||||
true,
|
||||
{
|
||||
ok: result.status !== "error",
|
||||
ok: result.status === "ok",
|
||||
result,
|
||||
restart,
|
||||
sentinel: {
|
||||
|
||||
@@ -14,10 +14,12 @@ import { ackDelivery, enqueueDelivery, failDelivery } from "../infra/outbound/de
|
||||
import { buildOutboundSessionContext } from "../infra/outbound/session-context.js";
|
||||
import { resolveOutboundTarget } from "../infra/outbound/targets.js";
|
||||
import {
|
||||
finalizeUpdateRestartSentinelRunningVersion,
|
||||
formatRestartSentinelMessage,
|
||||
readRestartSentinel,
|
||||
removeRestartSentinelFile,
|
||||
type RestartSentinelContinuation,
|
||||
type RestartSentinelPayload,
|
||||
resolveRestartSentinelPath,
|
||||
summarizeRestartSentinel,
|
||||
} from "../infra/restart-sentinel.js";
|
||||
@@ -45,6 +47,16 @@ import { runStartupTasks, type StartupTask } from "./startup-tasks.js";
|
||||
const log = createSubsystemLogger("gateway/restart-sentinel");
|
||||
const OUTBOUND_RETRY_DELAY_MS = 1_000;
|
||||
const OUTBOUND_MAX_ATTEMPTS = 45;
|
||||
let latestUpdateRestartSentinel: RestartSentinelPayload | null = null;
|
||||
|
||||
function cloneRestartSentinelPayload(
|
||||
payload: RestartSentinelPayload | null,
|
||||
): RestartSentinelPayload | null {
|
||||
if (!payload) {
|
||||
return null;
|
||||
}
|
||||
return JSON.parse(JSON.stringify(payload)) as RestartSentinelPayload;
|
||||
}
|
||||
|
||||
function hasRoutableDeliveryContext(context?: {
|
||||
channel?: string;
|
||||
@@ -562,3 +574,20 @@ export async function scheduleRestartSentinelWake(params: { deps: CliDeps }) {
|
||||
export function shouldWakeFromRestartSentinel() {
|
||||
return !process.env.VITEST && process.env.NODE_ENV !== "test";
|
||||
}
|
||||
|
||||
export async function refreshLatestUpdateRestartSentinel(): Promise<RestartSentinelPayload | null> {
|
||||
const finalized = await finalizeUpdateRestartSentinelRunningVersion();
|
||||
const sentinel = finalized ?? (await readRestartSentinel());
|
||||
if (sentinel?.payload.kind === "update") {
|
||||
latestUpdateRestartSentinel = cloneRestartSentinelPayload(sentinel.payload);
|
||||
}
|
||||
return cloneRestartSentinelPayload(latestUpdateRestartSentinel);
|
||||
}
|
||||
|
||||
export function getLatestUpdateRestartSentinel(): RestartSentinelPayload | null {
|
||||
return cloneRestartSentinelPayload(latestUpdateRestartSentinel);
|
||||
}
|
||||
|
||||
export function recordLatestUpdateRestartSentinel(payload: RestartSentinelPayload): void {
|
||||
latestUpdateRestartSentinel = cloneRestartSentinelPayload(payload);
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@ const hoisted = vi.hoisted(() => {
|
||||
const scheduleSubagentOrphanRecovery = vi.fn();
|
||||
const shouldWakeFromRestartSentinel = vi.fn(() => false);
|
||||
const scheduleRestartSentinelWake = vi.fn();
|
||||
const refreshLatestUpdateRestartSentinel = vi.fn(async () => null);
|
||||
const getAcpRuntimeBackend = vi.fn<(id?: string) => unknown>(() => null);
|
||||
const reconcilePendingSessionIdentities = vi.fn(async () => ({
|
||||
checked: 0,
|
||||
@@ -42,6 +43,7 @@ const hoisted = vi.hoisted(() => {
|
||||
scheduleSubagentOrphanRecovery,
|
||||
shouldWakeFromRestartSentinel,
|
||||
scheduleRestartSentinelWake,
|
||||
refreshLatestUpdateRestartSentinel,
|
||||
getAcpRuntimeBackend,
|
||||
reconcilePendingSessionIdentities,
|
||||
};
|
||||
@@ -104,6 +106,7 @@ vi.mock("../acp/runtime/registry.js", () => ({
|
||||
}));
|
||||
|
||||
vi.mock("./server-restart-sentinel.js", () => ({
|
||||
refreshLatestUpdateRestartSentinel: hoisted.refreshLatestUpdateRestartSentinel,
|
||||
scheduleRestartSentinelWake: hoisted.scheduleRestartSentinelWake,
|
||||
shouldWakeFromRestartSentinel: hoisted.shouldWakeFromRestartSentinel,
|
||||
}));
|
||||
@@ -403,6 +406,7 @@ function createPostAttachRuntimeDeps(
|
||||
return {
|
||||
getGlobalHookRunner: vi.fn(() => null),
|
||||
logGatewayStartup: hoisted.logGatewayStartup,
|
||||
refreshLatestUpdateRestartSentinel: hoisted.refreshLatestUpdateRestartSentinel,
|
||||
scheduleGatewayUpdateCheck: hoisted.scheduleGatewayUpdateCheck,
|
||||
startGatewaySidecars: vi.fn(async () => ({ pluginServices: null })),
|
||||
startGatewayTailscaleExposure: hoisted.startGatewayTailscaleExposure,
|
||||
|
||||
@@ -13,6 +13,7 @@ import {
|
||||
GATEWAY_EVENT_UPDATE_AVAILABLE,
|
||||
type GatewayUpdateAvailableEventPayload,
|
||||
} from "./events.js";
|
||||
import type { refreshLatestUpdateRestartSentinel } from "./server-restart-sentinel.js";
|
||||
import type { logGatewayStartup } from "./server-startup-log.js";
|
||||
import { STARTUP_UNAVAILABLE_GATEWAY_METHODS } from "./server-startup-unavailable-methods.js";
|
||||
import type { startGatewayTailscaleExposure } from "./server-tailscale.js";
|
||||
@@ -407,6 +408,9 @@ export async function startGatewaySidecars(params: {
|
||||
type GatewayPostAttachRuntimeDeps = {
|
||||
getGlobalHookRunner: () => Awaitable<ReturnType<typeof getGlobalHookRunner>>;
|
||||
logGatewayStartup: (params: Parameters<typeof logGatewayStartup>[0]) => Awaitable<void>;
|
||||
refreshLatestUpdateRestartSentinel: () => Awaitable<
|
||||
ReturnType<typeof refreshLatestUpdateRestartSentinel>
|
||||
>;
|
||||
scheduleGatewayUpdateCheck: (
|
||||
...args: Parameters<typeof scheduleGatewayUpdateCheck>
|
||||
) => Awaitable<ReturnType<typeof scheduleGatewayUpdateCheck>>;
|
||||
@@ -421,6 +425,8 @@ const defaultGatewayPostAttachRuntimeDeps: GatewayPostAttachRuntimeDeps = {
|
||||
(await import("../plugins/hook-runner-global.js")).getGlobalHookRunner(),
|
||||
logGatewayStartup: async (params) =>
|
||||
(await import("./server-startup-log.js")).logGatewayStartup(params),
|
||||
refreshLatestUpdateRestartSentinel: async () =>
|
||||
(await import("./server-restart-sentinel.js")).refreshLatestUpdateRestartSentinel(),
|
||||
scheduleGatewayUpdateCheck: async (...args) =>
|
||||
(await import("../infra/update-startup.js")).scheduleGatewayUpdateCheck(...args),
|
||||
startGatewaySidecars,
|
||||
@@ -471,6 +477,14 @@ export async function startGatewayPostAttachRuntime(
|
||||
},
|
||||
runtimeDeps: GatewayPostAttachRuntimeDeps = defaultGatewayPostAttachRuntimeDeps,
|
||||
) {
|
||||
await measureStartup(params.startupTrace, "post-attach.update-sentinel", async () => {
|
||||
try {
|
||||
await runtimeDeps.refreshLatestUpdateRestartSentinel();
|
||||
} catch (err) {
|
||||
params.log.warn(`restart sentinel refresh failed: ${String(err)}`);
|
||||
}
|
||||
});
|
||||
|
||||
await measureStartup(params.startupTrace, "post-attach.log", () =>
|
||||
runtimeDeps.logGatewayStartup({
|
||||
cfg: params.cfgAtStart,
|
||||
|
||||
@@ -12,6 +12,12 @@ import { GATEWAY_CLIENT_MODES, GATEWAY_CLIENT_NAMES } from "../utils/message-cha
|
||||
import type { GatewayClient } from "./client.js";
|
||||
|
||||
vi.mock("../infra/update-runner.js", () => ({
|
||||
resolveUpdateInstallSurface: vi.fn(async () => ({
|
||||
kind: "git",
|
||||
mode: "git",
|
||||
root: "/repo",
|
||||
packageRoot: "/repo",
|
||||
})),
|
||||
runGatewayUpdate: vi.fn(async () => ({
|
||||
status: "ok",
|
||||
mode: "git",
|
||||
|
||||
@@ -12,6 +12,7 @@ import {
|
||||
emitGatewayRestart,
|
||||
isGatewaySigusr1RestartExternallyAllowed,
|
||||
markGatewaySigusr1RestartHandled,
|
||||
peekGatewaySigusr1RestartReason,
|
||||
scheduleGatewaySigusr1Restart,
|
||||
setGatewaySigusr1RestartPolicy,
|
||||
setPreRestartDeferralCheck,
|
||||
@@ -101,6 +102,24 @@ describe("infra runtime", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("preserves update restart reason when a scheduled restart coalesces", async () => {
|
||||
const handler = () => {};
|
||||
process.on("SIGUSR1", handler);
|
||||
try {
|
||||
const first = scheduleGatewaySigusr1Restart({ delayMs: 1_000, reason: "config.patch" });
|
||||
const second = scheduleGatewaySigusr1Restart({ delayMs: 1_000, reason: "update.run" });
|
||||
|
||||
expect(first.coalesced).toBe(false);
|
||||
expect(second.coalesced).toBe(true);
|
||||
|
||||
await vi.advanceTimersByTimeAsync(1_000);
|
||||
|
||||
expect(peekGatewaySigusr1RestartReason()).toBe("update.run");
|
||||
} finally {
|
||||
process.removeListener("SIGUSR1", handler);
|
||||
}
|
||||
});
|
||||
|
||||
it("runs restart preparation only when the scheduled restart emits", async () => {
|
||||
const beforeEmit = vi.fn(async () => {});
|
||||
const emitSpy = vi.spyOn(process, "emit");
|
||||
|
||||
@@ -18,7 +18,10 @@ vi.mock("./restart.js", () => ({
|
||||
triggerOpenClawRestart: (...args: unknown[]) => triggerOpenClawRestartMock(...args),
|
||||
}));
|
||||
|
||||
import { restartGatewayProcessWithFreshPid } from "./process-respawn.js";
|
||||
import {
|
||||
respawnGatewayProcessForUpdate,
|
||||
restartGatewayProcessWithFreshPid,
|
||||
} from "./process-respawn.js";
|
||||
|
||||
const originalArgv = [...process.argv];
|
||||
const originalExecArgv = [...process.execArgv];
|
||||
@@ -231,3 +234,42 @@ describe("restartGatewayProcessWithFreshPid", () => {
|
||||
expect(result.detail).toContain("spawn failed");
|
||||
});
|
||||
});
|
||||
|
||||
describe("respawnGatewayProcessForUpdate", () => {
|
||||
it("keeps OPENCLAW_NO_RESPAWN semantics for update restarts", () => {
|
||||
clearSupervisorHints();
|
||||
process.env.OPENCLAW_NO_RESPAWN = "1";
|
||||
|
||||
const result = respawnGatewayProcessForUpdate();
|
||||
|
||||
expect(result).toEqual({ mode: "disabled", detail: "OPENCLAW_NO_RESPAWN" });
|
||||
expect(spawnMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("allows detached respawn on unmanaged Windows during updates", () => {
|
||||
clearSupervisorHints();
|
||||
setPlatform("win32");
|
||||
process.execArgv = [];
|
||||
process.argv = [
|
||||
"C:\\Program Files\\node.exe",
|
||||
"C:\\openclaw\\dist\\index.js",
|
||||
"gateway",
|
||||
"run",
|
||||
];
|
||||
spawnMock.mockReturnValue({ pid: 5151, unref: vi.fn(), kill: vi.fn() });
|
||||
|
||||
const result = respawnGatewayProcessForUpdate();
|
||||
|
||||
expect(result.mode).toBe("spawned");
|
||||
expect(result.pid).toBe(5151);
|
||||
expect(spawnMock).toHaveBeenCalledWith(
|
||||
process.execPath,
|
||||
["C:\\openclaw\\dist\\index.js", "gateway", "run"],
|
||||
expect.objectContaining({
|
||||
detached: true,
|
||||
env: process.env,
|
||||
stdio: "inherit",
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { spawn } from "node:child_process";
|
||||
import { spawn, type ChildProcess } from "node:child_process";
|
||||
import { normalizeOptionalLowercaseString } from "../shared/string-coerce.js";
|
||||
import { formatErrorMessage } from "./errors.js";
|
||||
import { triggerOpenClawRestart } from "./restart.js";
|
||||
@@ -12,11 +12,26 @@ export type GatewayRespawnResult = {
|
||||
detail?: string;
|
||||
};
|
||||
|
||||
export type GatewayUpdateRespawnResult = GatewayRespawnResult & {
|
||||
child?: ChildProcess;
|
||||
};
|
||||
|
||||
function isTruthy(value: string | undefined): boolean {
|
||||
const normalized = normalizeOptionalLowercaseString(value);
|
||||
return normalized === "1" || normalized === "true" || normalized === "yes" || normalized === "on";
|
||||
}
|
||||
|
||||
function spawnDetachedGatewayProcess(): { child: ChildProcess; pid?: number } {
|
||||
const args = [...process.execArgv, ...process.argv.slice(1)];
|
||||
const child = spawn(process.execPath, args, {
|
||||
env: process.env,
|
||||
detached: true,
|
||||
stdio: "inherit",
|
||||
});
|
||||
child.unref();
|
||||
return { child, pid: child.pid ?? undefined };
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to restart this process with a fresh PID.
|
||||
* - supervised environments (launchd/systemd/schtasks): caller should exit and let supervisor restart
|
||||
@@ -53,16 +68,46 @@ export function restartGatewayProcessWithFreshPid(): GatewayRespawnResult {
|
||||
}
|
||||
|
||||
try {
|
||||
const args = [...process.execArgv, ...process.argv.slice(1)];
|
||||
const child = spawn(process.execPath, args, {
|
||||
env: process.env,
|
||||
detached: true,
|
||||
stdio: "inherit",
|
||||
});
|
||||
child.unref();
|
||||
return { mode: "spawned", pid: child.pid ?? undefined };
|
||||
const { pid } = spawnDetachedGatewayProcess();
|
||||
return { mode: "spawned", pid };
|
||||
} catch (err) {
|
||||
const detail = formatErrorMessage(err);
|
||||
return { mode: "failed", detail };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update restarts must replace the OS process so the new code runs from a
|
||||
* fresh module graph after package files have changed on disk.
|
||||
*
|
||||
* Unlike the generic restart path, update mode allows detached respawn on
|
||||
* unmanaged Windows installs because there is no safe in-process fallback once
|
||||
* the installed package contents have been replaced.
|
||||
*/
|
||||
export function respawnGatewayProcessForUpdate(): GatewayUpdateRespawnResult {
|
||||
if (isTruthy(process.env.OPENCLAW_NO_RESPAWN)) {
|
||||
return { mode: "disabled", detail: "OPENCLAW_NO_RESPAWN" };
|
||||
}
|
||||
const supervisor = detectRespawnSupervisor(process.env);
|
||||
if (supervisor) {
|
||||
if (supervisor === "schtasks") {
|
||||
const restart = triggerOpenClawRestart();
|
||||
if (!restart.ok) {
|
||||
return {
|
||||
mode: "failed",
|
||||
detail: restart.detail ?? `${restart.method} restart failed`,
|
||||
};
|
||||
}
|
||||
}
|
||||
return { mode: "supervised" };
|
||||
}
|
||||
try {
|
||||
const { child, pid } = spawnDetachedGatewayProcess();
|
||||
return { mode: "spawned", pid, child };
|
||||
} catch (err) {
|
||||
return {
|
||||
mode: "failed",
|
||||
detail: formatErrorMessage(err),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,8 +7,10 @@ import {
|
||||
DEFAULT_RESTART_SUCCESS_CONTINUATION_MESSAGE,
|
||||
buildRestartSuccessContinuation,
|
||||
consumeRestartSentinel,
|
||||
finalizeUpdateRestartSentinelRunningVersion,
|
||||
formatDoctorNonInteractiveHint,
|
||||
formatRestartSentinelMessage,
|
||||
markUpdateRestartSentinelFailure,
|
||||
readRestartSentinel,
|
||||
resolveRestartSentinelPath,
|
||||
summarizeRestartSentinel,
|
||||
@@ -184,6 +186,55 @@ describe("restart sentinel", () => {
|
||||
expect(trimLogTail("hello\n")).toBe("hello");
|
||||
expect(trimLogTail(undefined)).toBeNull();
|
||||
});
|
||||
|
||||
it("writes the running version back to update sentinels on startup", async () => {
|
||||
await withRestartSentinelStateDir(async () => {
|
||||
await writeRestartSentinel({
|
||||
kind: "update",
|
||||
status: "ok",
|
||||
ts: Date.now(),
|
||||
stats: {
|
||||
after: { version: "expected-version" },
|
||||
},
|
||||
});
|
||||
|
||||
await finalizeUpdateRestartSentinelRunningVersion("actual-version");
|
||||
|
||||
await expect(readRestartSentinel()).resolves.toMatchObject({
|
||||
payload: {
|
||||
kind: "update",
|
||||
stats: {
|
||||
after: {
|
||||
version: "actual-version",
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("marks update restart failures with a stable reason", async () => {
|
||||
await withRestartSentinelStateDir(async () => {
|
||||
await writeRestartSentinel({
|
||||
kind: "update",
|
||||
status: "ok",
|
||||
ts: Date.now(),
|
||||
stats: {},
|
||||
});
|
||||
|
||||
await markUpdateRestartSentinelFailure("restart-unhealthy");
|
||||
|
||||
await expect(readRestartSentinel()).resolves.toMatchObject({
|
||||
payload: {
|
||||
kind: "update",
|
||||
status: "error",
|
||||
stats: {
|
||||
reason: "restart-unhealthy",
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("restart success continuation", () => {
|
||||
|
||||
@@ -2,6 +2,7 @@ import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { formatCliCommand } from "../cli/command-format.js";
|
||||
import { resolveStateDir } from "../config/paths.js";
|
||||
import { resolveRuntimeServiceVersion } from "../version.js";
|
||||
import { writeJsonAtomic } from "./json-files.js";
|
||||
|
||||
export type RestartSentinelLog = {
|
||||
@@ -87,6 +88,70 @@ export async function writeRestartSentinel(
|
||||
return filePath;
|
||||
}
|
||||
|
||||
function isPlainRecord(value: unknown): value is Record<string, unknown> {
|
||||
return Boolean(value && typeof value === "object" && !Array.isArray(value));
|
||||
}
|
||||
|
||||
function cloneRestartSentinelPayload(payload: RestartSentinelPayload): RestartSentinelPayload {
|
||||
return JSON.parse(JSON.stringify(payload)) as RestartSentinelPayload;
|
||||
}
|
||||
|
||||
export async function rewriteRestartSentinel(
|
||||
rewrite: (payload: RestartSentinelPayload) => RestartSentinelPayload | null,
|
||||
env: NodeJS.ProcessEnv = process.env,
|
||||
): Promise<RestartSentinel | null> {
|
||||
const current = await readRestartSentinel(env);
|
||||
if (!current) {
|
||||
return null;
|
||||
}
|
||||
const nextPayload = rewrite(cloneRestartSentinelPayload(current.payload));
|
||||
if (!nextPayload) {
|
||||
return null;
|
||||
}
|
||||
await writeRestartSentinel(nextPayload, env);
|
||||
return {
|
||||
version: 1,
|
||||
payload: nextPayload,
|
||||
};
|
||||
}
|
||||
|
||||
export async function finalizeUpdateRestartSentinelRunningVersion(
|
||||
version = resolveRuntimeServiceVersion(process.env),
|
||||
env: NodeJS.ProcessEnv = process.env,
|
||||
): Promise<RestartSentinel | null> {
|
||||
return await rewriteRestartSentinel((payload) => {
|
||||
if (payload.kind !== "update") {
|
||||
return null;
|
||||
}
|
||||
const stats = payload.stats ? { ...payload.stats } : {};
|
||||
const after = isPlainRecord(stats.after) ? { ...stats.after } : {};
|
||||
after.version = version;
|
||||
stats.after = after;
|
||||
return {
|
||||
...payload,
|
||||
stats,
|
||||
};
|
||||
}, env);
|
||||
}
|
||||
|
||||
export async function markUpdateRestartSentinelFailure(
|
||||
reason: string,
|
||||
env: NodeJS.ProcessEnv = process.env,
|
||||
): Promise<RestartSentinel | null> {
|
||||
return await rewriteRestartSentinel((payload) => {
|
||||
if (payload.kind !== "update") {
|
||||
return null;
|
||||
}
|
||||
const stats = payload.stats ? { ...payload.stats } : {};
|
||||
stats.reason = reason;
|
||||
return {
|
||||
...payload,
|
||||
status: "error",
|
||||
stats,
|
||||
};
|
||||
}, env);
|
||||
}
|
||||
|
||||
export async function removeRestartSentinelFile(filePath: string | null | undefined) {
|
||||
if (!filePath) {
|
||||
return;
|
||||
|
||||
@@ -37,6 +37,7 @@ let preRestartCheck: (() => number) | null = null;
|
||||
let restartCycleToken = 0;
|
||||
let emittedRestartToken = 0;
|
||||
let consumedRestartToken = 0;
|
||||
let emittedRestartReason: string | undefined;
|
||||
let lastRestartEmittedAt = 0;
|
||||
let pendingRestartTimer: ReturnType<typeof setTimeout> | null = null;
|
||||
let pendingRestartDueAt = 0;
|
||||
@@ -45,6 +46,10 @@ let pendingRestartEmitHooks: RestartEmitHooks | undefined;
|
||||
let pendingRestartPreparing = false;
|
||||
const activeDeferralPolls = new Set<ReturnType<typeof setInterval>>();
|
||||
|
||||
function shouldPreferRestartReason(next?: string, current?: string): boolean {
|
||||
return next === "update.run" && current !== "update.run";
|
||||
}
|
||||
|
||||
function hasUnconsumedRestartSignal(): boolean {
|
||||
return emittedRestartToken > consumedRestartToken;
|
||||
}
|
||||
@@ -241,7 +246,7 @@ export function setPreRestartDeferralCheck(fn: () => number): void {
|
||||
* Both scheduleGatewaySigusr1Restart and the config watcher should use this
|
||||
* to ensure only one restart fires.
|
||||
*/
|
||||
export function emitGatewayRestart(): boolean {
|
||||
export function emitGatewayRestart(reasonOverride?: string): boolean {
|
||||
if (hasUnconsumedRestartSignal()) {
|
||||
clearActiveDeferralPolls();
|
||||
clearPendingScheduledRestart();
|
||||
@@ -251,6 +256,7 @@ export function emitGatewayRestart(): boolean {
|
||||
clearPendingScheduledRestart();
|
||||
const cycleToken = ++restartCycleToken;
|
||||
emittedRestartToken = cycleToken;
|
||||
emittedRestartReason = reasonOverride ?? pendingRestartReason;
|
||||
authorizeGatewaySigusr1Restart();
|
||||
try {
|
||||
if (process.listenerCount("SIGUSR1") > 0) {
|
||||
@@ -261,6 +267,7 @@ export function emitGatewayRestart(): boolean {
|
||||
} catch {
|
||||
// Roll back the cycle marker so future restart requests can still proceed.
|
||||
emittedRestartToken = consumedRestartToken;
|
||||
emittedRestartReason = undefined;
|
||||
return false;
|
||||
}
|
||||
lastRestartEmittedAt = Date.now();
|
||||
@@ -307,6 +314,10 @@ export function consumeGatewaySigusr1RestartAuthorization(): boolean {
|
||||
return true;
|
||||
}
|
||||
|
||||
export function peekGatewaySigusr1RestartReason(): string | undefined {
|
||||
return hasUnconsumedRestartSignal() ? emittedRestartReason : undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark the currently emitted SIGUSR1 restart cycle as consumed by the run loop.
|
||||
* This explicitly advances the cycle state instead of resetting emit guards inside
|
||||
@@ -315,6 +326,7 @@ export function consumeGatewaySigusr1RestartAuthorization(): boolean {
|
||||
export function markGatewaySigusr1RestartHandled(): void {
|
||||
if (hasUnconsumedRestartSignal()) {
|
||||
consumedRestartToken = emittedRestartToken;
|
||||
emittedRestartReason = undefined;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -337,7 +349,10 @@ function updatePendingRestartEmitHooks(hooks?: RestartEmitHooks): void {
|
||||
}
|
||||
}
|
||||
|
||||
async function emitPreparedGatewayRestart(hooks?: RestartEmitHooks): Promise<void> {
|
||||
async function emitPreparedGatewayRestart(
|
||||
hooks?: RestartEmitHooks,
|
||||
reasonOverride?: string,
|
||||
): Promise<void> {
|
||||
let nextHooks = hooks ?? pendingRestartEmitHooks;
|
||||
if (!hooks) {
|
||||
pendingRestartEmitHooks = undefined;
|
||||
@@ -363,7 +378,7 @@ async function emitPreparedGatewayRestart(hooks?: RestartEmitHooks): Promise<voi
|
||||
pendingRestartEmitHooks = undefined;
|
||||
}
|
||||
|
||||
const emitted = emitGatewayRestart();
|
||||
const emitted = emitGatewayRestart(reasonOverride);
|
||||
if (!emitted) {
|
||||
await preparedHooks?.afterEmitRejected?.().catch(() => undefined);
|
||||
}
|
||||
@@ -380,6 +395,7 @@ export function deferGatewayRestartUntilIdle(opts: {
|
||||
emitHooks?: RestartEmitHooks;
|
||||
pollMs?: number;
|
||||
maxWaitMs?: number;
|
||||
reason?: string;
|
||||
}): void {
|
||||
const pollMsRaw = opts.pollMs ?? DEFAULT_DEFERRAL_POLL_MS;
|
||||
const pollMs = Math.max(10, Math.floor(pollMsRaw));
|
||||
@@ -393,12 +409,12 @@ export function deferGatewayRestartUntilIdle(opts: {
|
||||
pending = opts.getPendingCount();
|
||||
} catch (err) {
|
||||
opts.hooks?.onCheckError?.(err);
|
||||
void emitPreparedGatewayRestart(opts.emitHooks);
|
||||
void emitPreparedGatewayRestart(opts.emitHooks, opts.reason);
|
||||
return;
|
||||
}
|
||||
if (pending <= 0) {
|
||||
opts.hooks?.onReady?.();
|
||||
void emitPreparedGatewayRestart(opts.emitHooks);
|
||||
void emitPreparedGatewayRestart(opts.emitHooks, opts.reason);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -413,14 +429,14 @@ export function deferGatewayRestartUntilIdle(opts: {
|
||||
clearInterval(poll);
|
||||
activeDeferralPolls.delete(poll);
|
||||
opts.hooks?.onCheckError?.(err);
|
||||
void emitPreparedGatewayRestart(opts.emitHooks);
|
||||
void emitPreparedGatewayRestart(opts.emitHooks, opts.reason);
|
||||
return;
|
||||
}
|
||||
if (current <= 0) {
|
||||
clearInterval(poll);
|
||||
activeDeferralPolls.delete(poll);
|
||||
opts.hooks?.onReady?.();
|
||||
void emitPreparedGatewayRestart(opts.emitHooks);
|
||||
void emitPreparedGatewayRestart(opts.emitHooks, opts.reason);
|
||||
return;
|
||||
}
|
||||
const elapsedMs = Date.now() - startedAt;
|
||||
@@ -432,7 +448,7 @@ export function deferGatewayRestartUntilIdle(opts: {
|
||||
clearInterval(poll);
|
||||
activeDeferralPolls.delete(poll);
|
||||
opts.hooks?.onTimeout?.(current, elapsedMs);
|
||||
void emitPreparedGatewayRestart(opts.emitHooks);
|
||||
void emitPreparedGatewayRestart(opts.emitHooks, opts.reason);
|
||||
}
|
||||
}, pollMs);
|
||||
activeDeferralPolls.add(poll);
|
||||
@@ -622,6 +638,9 @@ export function scheduleGatewaySigusr1Restart(opts?: {
|
||||
const requestedDueAt = nowMs + delayMs + cooldownMsApplied;
|
||||
|
||||
if (hasUnconsumedRestartSignal()) {
|
||||
if (shouldPreferRestartReason(reason, emittedRestartReason)) {
|
||||
emittedRestartReason = reason;
|
||||
}
|
||||
restartLog.warn(
|
||||
`restart request coalesced (already in-flight) reason=${reason ?? "unspecified"} ${formatRestartAudit(opts?.audit)}`,
|
||||
);
|
||||
@@ -646,6 +665,9 @@ export function scheduleGatewaySigusr1Restart(opts?: {
|
||||
);
|
||||
clearPendingScheduledRestart();
|
||||
} else {
|
||||
if (shouldPreferRestartReason(reason, pendingRestartReason)) {
|
||||
pendingRestartReason = reason;
|
||||
}
|
||||
restartLog.warn(
|
||||
`restart request coalesced (already scheduled) reason=${reason ?? "unspecified"} pendingReason=${pendingRestartReason ?? "unspecified"} delayMs=${remainingMs} ${formatRestartAudit(opts?.audit)}`,
|
||||
);
|
||||
@@ -668,19 +690,21 @@ export function scheduleGatewaySigusr1Restart(opts?: {
|
||||
pendingRestartEmitHooks = opts?.emitHooks;
|
||||
pendingRestartTimer = setTimeout(
|
||||
() => {
|
||||
const scheduledReason = pendingRestartReason;
|
||||
pendingRestartTimer = null;
|
||||
pendingRestartDueAt = 0;
|
||||
pendingRestartReason = undefined;
|
||||
pendingRestartPreparing = true;
|
||||
const pendingCheck = preRestartCheck;
|
||||
if (!pendingCheck) {
|
||||
void emitPreparedGatewayRestart();
|
||||
void emitPreparedGatewayRestart(undefined, scheduledReason);
|
||||
return;
|
||||
}
|
||||
const cfg = getRuntimeConfig();
|
||||
deferGatewayRestartUntilIdle({
|
||||
getPendingCount: pendingCheck,
|
||||
maxWaitMs: cfg.gateway?.reload?.deferralTimeoutMs,
|
||||
reason: scheduledReason,
|
||||
});
|
||||
},
|
||||
Math.max(0, requestedDueAt - nowMs),
|
||||
@@ -706,6 +730,7 @@ export const __testing = {
|
||||
restartCycleToken = 0;
|
||||
emittedRestartToken = 0;
|
||||
consumedRestartToken = 0;
|
||||
emittedRestartReason = undefined;
|
||||
lastRestartEmittedAt = 0;
|
||||
clearActiveDeferralPolls();
|
||||
clearPendingScheduledRestart();
|
||||
|
||||
@@ -1509,7 +1509,7 @@ describe("runGatewayUpdate", () => {
|
||||
});
|
||||
|
||||
expect(result.status).toBe("error");
|
||||
expect(result.reason).toBe("global install verify");
|
||||
expect(result.reason).toBe("global-install-failed");
|
||||
expect(result.after?.version).toBe("2.0.0");
|
||||
expect(result.steps.at(-1)?.stderrTail).toContain(
|
||||
"expected installed version 2026.3.23-2, found 2.0.0",
|
||||
@@ -1539,7 +1539,7 @@ describe("runGatewayUpdate", () => {
|
||||
const result = await runWithCommand(runCommand, { cwd: pkgRoot });
|
||||
|
||||
expect(result.status).toBe("error");
|
||||
expect(result.reason).toBe("global install verify");
|
||||
expect(result.reason).toBe("global-install-failed");
|
||||
expect(result.steps.at(-1)?.stderrTail).toContain(
|
||||
`missing packaged dist file ${WHATSAPP_LIGHT_RUNTIME_API}`,
|
||||
);
|
||||
|
||||
@@ -26,6 +26,7 @@ import {
|
||||
detectGlobalInstallManagerForRoot,
|
||||
resolveGlobalInstallTarget,
|
||||
resolveGlobalInstallSpec,
|
||||
type GlobalInstallManager,
|
||||
} from "./update-global.js";
|
||||
import {
|
||||
managerInstallIgnoreScriptsArgs,
|
||||
@@ -123,6 +124,32 @@ type UpdateRunnerOptions = {
|
||||
progress?: UpdateStepProgress;
|
||||
};
|
||||
|
||||
export type UpdateInstallSurface =
|
||||
| {
|
||||
kind: "git";
|
||||
mode: "git";
|
||||
root: string;
|
||||
packageRoot: string;
|
||||
}
|
||||
| {
|
||||
kind: "global";
|
||||
mode: GlobalInstallManager;
|
||||
root: string;
|
||||
packageRoot: string;
|
||||
}
|
||||
| {
|
||||
kind: "package-root";
|
||||
mode: "unknown";
|
||||
root: string;
|
||||
packageRoot: string;
|
||||
}
|
||||
| {
|
||||
kind: "missing";
|
||||
mode: "unknown";
|
||||
root?: string;
|
||||
packageRoot?: undefined;
|
||||
};
|
||||
|
||||
function mapManagerResolutionFailure(
|
||||
reason: UpdatePackageManagerFailureReason,
|
||||
): UpdateRunResult["reason"] {
|
||||
@@ -132,7 +159,6 @@ function mapManagerResolutionFailure(
|
||||
const DEFAULT_TIMEOUT_MS = 20 * 60_000;
|
||||
const MAX_LOG_CHARS = 8000;
|
||||
const PREFLIGHT_MAX_COMMITS = 10;
|
||||
const START_DIRS = ["cwd", "argv1", "process"];
|
||||
const DEFAULT_PACKAGE_NAME = "openclaw";
|
||||
const CORE_PACKAGE_NAMES = new Set([DEFAULT_PACKAGE_NAME]);
|
||||
const PREFLIGHT_TEMP_PREFIX =
|
||||
@@ -535,18 +561,98 @@ function shouldRunDevPreflightLint(): boolean {
|
||||
return process.platform !== "win32";
|
||||
}
|
||||
|
||||
export async function runGatewayUpdate(opts: UpdateRunnerOptions = {}): Promise<UpdateRunResult> {
|
||||
const startedAt = Date.now();
|
||||
function normalizeFallbackFailureReason(stepName: string): NonNullable<UpdateRunResult["reason"]> {
|
||||
switch (stepName) {
|
||||
case "global update":
|
||||
case "global update (omit optional)":
|
||||
case "global install verify":
|
||||
return "global-install-failed";
|
||||
case "openclaw doctor":
|
||||
return "doctor-failed";
|
||||
case "ui:build (post-doctor repair)":
|
||||
return "ui-build-failed";
|
||||
default:
|
||||
return "unexpected-error";
|
||||
}
|
||||
}
|
||||
|
||||
async function buildUpdateCommandRunner(
|
||||
runCommand?: CommandRunner,
|
||||
): Promise<{ defaultCommandEnv: NodeJS.ProcessEnv | undefined; runCommand: CommandRunner }> {
|
||||
const defaultCommandEnv = await createGlobalInstallEnv();
|
||||
const runCommand =
|
||||
opts.runCommand ??
|
||||
(async (argv, options) => {
|
||||
if (runCommand) {
|
||||
return {
|
||||
defaultCommandEnv,
|
||||
runCommand,
|
||||
};
|
||||
}
|
||||
return {
|
||||
defaultCommandEnv,
|
||||
runCommand: async (argv, options) => {
|
||||
const res = await runCommandWithTimeout(argv, {
|
||||
...options,
|
||||
env: mergeCommandEnvironments(defaultCommandEnv, options.env),
|
||||
});
|
||||
return { stdout: res.stdout, stderr: res.stderr, code: res.code };
|
||||
});
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export async function resolveUpdateInstallSurface(
|
||||
opts: Pick<UpdateRunnerOptions, "cwd" | "argv1" | "timeoutMs" | "runCommand"> = {},
|
||||
): Promise<UpdateInstallSurface> {
|
||||
const { runCommand } = await buildUpdateCommandRunner(opts.runCommand);
|
||||
const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
||||
const candidates = buildStartDirs(opts);
|
||||
const pkgRoot = await findPackageRoot(candidates);
|
||||
|
||||
let gitRoot = await resolveGitRoot(runCommand, candidates, timeoutMs);
|
||||
if (gitRoot && pkgRoot && path.resolve(gitRoot) !== path.resolve(pkgRoot)) {
|
||||
gitRoot = null;
|
||||
}
|
||||
if (gitRoot && !pkgRoot) {
|
||||
return {
|
||||
kind: "missing",
|
||||
mode: "unknown",
|
||||
root: gitRoot,
|
||||
};
|
||||
}
|
||||
if (gitRoot && pkgRoot && path.resolve(gitRoot) === path.resolve(pkgRoot)) {
|
||||
return {
|
||||
kind: "git",
|
||||
mode: "git",
|
||||
root: gitRoot,
|
||||
packageRoot: pkgRoot,
|
||||
};
|
||||
}
|
||||
if (!pkgRoot) {
|
||||
return {
|
||||
kind: "missing",
|
||||
mode: "unknown",
|
||||
};
|
||||
}
|
||||
|
||||
const globalManager = await detectGlobalInstallManagerForRoot(runCommand, pkgRoot, timeoutMs);
|
||||
if (globalManager) {
|
||||
return {
|
||||
kind: "global",
|
||||
mode: globalManager,
|
||||
root: pkgRoot,
|
||||
packageRoot: pkgRoot,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
kind: "package-root",
|
||||
mode: "unknown",
|
||||
root: pkgRoot,
|
||||
packageRoot: pkgRoot,
|
||||
};
|
||||
}
|
||||
|
||||
export async function runGatewayUpdate(opts: UpdateRunnerOptions = {}): Promise<UpdateRunResult> {
|
||||
const startedAt = Date.now();
|
||||
const { defaultCommandEnv, runCommand } = await buildUpdateCommandRunner(opts.runCommand);
|
||||
const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
||||
const progress = opts.progress;
|
||||
const steps: UpdateStepResult[] = [];
|
||||
@@ -1187,6 +1293,17 @@ export async function runGatewayUpdate(opts: UpdateRunnerOptions = {}): Promise<
|
||||
step("openclaw doctor", doctorArgv, gitRoot, { OPENCLAW_UPDATE_IN_PROGRESS: "1" }),
|
||||
);
|
||||
steps.push(doctorStep);
|
||||
if (doctorStep.exitCode !== 0) {
|
||||
return {
|
||||
status: "error",
|
||||
mode: "git",
|
||||
root: gitRoot,
|
||||
reason: "doctor-failed",
|
||||
before: { sha: beforeSha, version: beforeVersion },
|
||||
steps,
|
||||
durationMs: Date.now() - startedAt,
|
||||
};
|
||||
}
|
||||
|
||||
const uiIndexHealth = await resolveControlUiDistIndexHealth({ root: gitRoot });
|
||||
if (!uiIndexHealth.exists) {
|
||||
@@ -1213,7 +1330,7 @@ export async function runGatewayUpdate(opts: UpdateRunnerOptions = {}): Promise<
|
||||
status: "error",
|
||||
mode: "git",
|
||||
root: gitRoot,
|
||||
reason: repairStep.name,
|
||||
reason: "ui-build-failed",
|
||||
before: { sha: beforeSha, version: beforeVersion },
|
||||
steps,
|
||||
durationMs: Date.now() - startedAt,
|
||||
@@ -1255,7 +1372,7 @@ export async function runGatewayUpdate(opts: UpdateRunnerOptions = {}): Promise<
|
||||
status: failedStep ? "error" : "ok",
|
||||
mode: "git",
|
||||
root: gitRoot,
|
||||
reason: failedStep ? failedStep.name : undefined,
|
||||
reason: failedStep ? normalizeFallbackFailureReason(failedStep.name) : undefined,
|
||||
before: { sha: beforeSha, version: beforeVersion },
|
||||
after: {
|
||||
sha: afterShaStep.stdoutTail?.trim() ?? null,
|
||||
@@ -1273,7 +1390,7 @@ export async function runGatewayUpdate(opts: UpdateRunnerOptions = {}): Promise<
|
||||
return {
|
||||
status: "error",
|
||||
mode: "unknown",
|
||||
reason: `no root (${START_DIRS.join(",")})`,
|
||||
reason: "not-openclaw-root",
|
||||
steps: [],
|
||||
durationMs: Date.now() - startedAt,
|
||||
};
|
||||
@@ -1324,7 +1441,9 @@ export async function runGatewayUpdate(opts: UpdateRunnerOptions = {}): Promise<
|
||||
status: packageUpdate.failedStep ? "error" : "ok",
|
||||
mode: globalManager,
|
||||
root: packageUpdate.verifiedPackageRoot ?? pkgRoot,
|
||||
reason: packageUpdate.failedStep ? packageUpdate.failedStep.name : undefined,
|
||||
reason: packageUpdate.failedStep
|
||||
? normalizeFallbackFailureReason(packageUpdate.failedStep.name)
|
||||
: undefined,
|
||||
before: { version: beforeVersion },
|
||||
after: { version: packageUpdate.afterVersion },
|
||||
steps: packageUpdate.steps,
|
||||
|
||||
@@ -43,6 +43,9 @@ vi.mock("./gateway.ts", async (importOriginal) => {
|
||||
readonly start = vi.fn();
|
||||
readonly stop = vi.fn();
|
||||
readonly request = vi.fn(async (method: string) => {
|
||||
if (method === "update.status") {
|
||||
return { sentinel: null };
|
||||
}
|
||||
if (method === "models.authStatus") {
|
||||
return { ts: 0, providers: [] };
|
||||
}
|
||||
@@ -154,6 +157,8 @@ function createHost(): TestGatewayHost {
|
||||
assistantAgentId: null,
|
||||
localMediaPreviewRoots: [],
|
||||
serverVersion: null,
|
||||
pendingUpdateExpectedVersion: null,
|
||||
updateStatusBanner: null,
|
||||
sessionKey: "main",
|
||||
chatMessages: [],
|
||||
chatQueue: [],
|
||||
@@ -283,6 +288,117 @@ describe("connectGateway", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("clears pending update verification when the restarted version matches", async () => {
|
||||
const host = createHost();
|
||||
host.pendingUpdateExpectedVersion = "2.0.0";
|
||||
|
||||
connectGateway(host);
|
||||
const client = gatewayClientInstances[0];
|
||||
expect(client).toBeDefined();
|
||||
client.request.mockImplementation(async (method: string) => {
|
||||
if (method === "update.status") {
|
||||
return {
|
||||
sentinel: {
|
||||
kind: "update",
|
||||
status: "ok",
|
||||
stats: {
|
||||
after: { version: "2.0.0" },
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
return {};
|
||||
});
|
||||
|
||||
client.emitHello({
|
||||
type: "hello-ok",
|
||||
protocol: 3,
|
||||
server: { version: "2.0.0" },
|
||||
snapshot: {},
|
||||
});
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(host.pendingUpdateExpectedVersion).toBeNull();
|
||||
});
|
||||
expect(host.updateStatusBanner).toBeNull();
|
||||
});
|
||||
|
||||
it("shows a hard error when the restarted version does not match the expected update", async () => {
|
||||
const host = createHost();
|
||||
host.pendingUpdateExpectedVersion = "2.0.0";
|
||||
|
||||
connectGateway(host);
|
||||
const client = gatewayClientInstances[0];
|
||||
expect(client).toBeDefined();
|
||||
client.request.mockImplementation(async (method: string) => {
|
||||
if (method === "update.status") {
|
||||
return {
|
||||
sentinel: {
|
||||
kind: "update",
|
||||
status: "ok",
|
||||
stats: {
|
||||
after: { version: "1.0.0" },
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
return {};
|
||||
});
|
||||
|
||||
client.emitHello({
|
||||
type: "hello-ok",
|
||||
protocol: 3,
|
||||
server: { version: "1.0.0" },
|
||||
snapshot: {},
|
||||
});
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(host.pendingUpdateExpectedVersion).toBeNull();
|
||||
expect(host.updateStatusBanner?.text).toContain(
|
||||
"Update installed but running version did not change",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
it("surfaces post-restart sentinel failures after reconnect", async () => {
|
||||
const host = createHost();
|
||||
host.pendingUpdateExpectedVersion = "2.0.0";
|
||||
|
||||
connectGateway(host);
|
||||
const client = gatewayClientInstances[0];
|
||||
expect(client).toBeDefined();
|
||||
client.request.mockImplementation(async (method: string) => {
|
||||
if (method === "update.status") {
|
||||
return {
|
||||
sentinel: {
|
||||
kind: "update",
|
||||
status: "error",
|
||||
stats: {
|
||||
reason: "restart-unhealthy",
|
||||
after: { version: "1.0.0" },
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
return {};
|
||||
});
|
||||
|
||||
client.emitHello({
|
||||
type: "hello-ok",
|
||||
protocol: 3,
|
||||
server: { version: "1.0.0" },
|
||||
snapshot: {},
|
||||
});
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(host.pendingUpdateExpectedVersion).toBeNull();
|
||||
expect(host.updateStatusBanner).toEqual({
|
||||
tone: "danger",
|
||||
text: "Update error: restart-unhealthy. The replacement process never became healthy and the previous process stayed up.",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("ignores stale client onClose callbacks after reconnect", () => {
|
||||
const host = createHost();
|
||||
|
||||
|
||||
@@ -97,6 +97,8 @@ type GatewayHost = {
|
||||
assistantAvatar: string | null;
|
||||
assistantAgentId: string | null;
|
||||
serverVersion: string | null;
|
||||
pendingUpdateExpectedVersion: string | null;
|
||||
updateStatusBanner: { tone: "danger" | "warn" | "info"; text: string } | null;
|
||||
sessionKey: string;
|
||||
chatRunId: string | null;
|
||||
pendingAbort?: { runId: string; sessionKey: string } | null;
|
||||
@@ -157,6 +159,94 @@ type ConnectGatewayOptions = {
|
||||
reason?: "initial" | "seq-gap";
|
||||
};
|
||||
|
||||
type UpdateRestartStatusResponse = {
|
||||
sentinel?: {
|
||||
kind?: string;
|
||||
status?: string;
|
||||
stats?: {
|
||||
reason?: string | null;
|
||||
after?: { version?: string | null } | null;
|
||||
} | null;
|
||||
} | null;
|
||||
};
|
||||
|
||||
function resolveUpdateVerificationBanner(params: {
|
||||
expectedVersion: string;
|
||||
actualVersion: string | null;
|
||||
}): { tone: "danger"; text: string } {
|
||||
const actualSuffix = params.actualVersion
|
||||
? ` Expected v${params.expectedVersion}, running v${params.actualVersion}.`
|
||||
: "";
|
||||
return {
|
||||
tone: "danger",
|
||||
text: `Update installed but running version did not change — restart may have been blocked.${actualSuffix}`,
|
||||
};
|
||||
}
|
||||
|
||||
function resolvePostRestartUpdateBanner(reason: string | null | undefined): {
|
||||
tone: "danger";
|
||||
text: string;
|
||||
} {
|
||||
const normalizedReason = reason?.trim() || "restart-unhealthy";
|
||||
const guidance =
|
||||
normalizedReason === "restart-unhealthy"
|
||||
? "The replacement process never became healthy and the previous process stayed up."
|
||||
: "Check the gateway logs for the replacement failure.";
|
||||
return {
|
||||
tone: "danger",
|
||||
text: `Update error: ${normalizedReason}. ${guidance}`,
|
||||
};
|
||||
}
|
||||
|
||||
async function verifyPendingUpdateVersion(
|
||||
host: GatewayHost,
|
||||
client: GatewayBrowserClient,
|
||||
): Promise<void> {
|
||||
const expectedVersion = host.pendingUpdateExpectedVersion?.trim();
|
||||
if (!expectedVersion) {
|
||||
return;
|
||||
}
|
||||
const deadline = Date.now() + 10_000;
|
||||
while (host.client === client && host.connected && Date.now() < deadline) {
|
||||
let response: UpdateRestartStatusResponse | null = null;
|
||||
try {
|
||||
response = await client.request<UpdateRestartStatusResponse>("update.status", {});
|
||||
} catch {
|
||||
response = null;
|
||||
}
|
||||
const sentinel = response?.sentinel;
|
||||
const actualVersion = sentinel?.stats?.after?.version?.trim() || null;
|
||||
if (sentinel?.kind === "update" && actualVersion) {
|
||||
host.pendingUpdateExpectedVersion = null;
|
||||
if (sentinel.status && sentinel.status !== "ok") {
|
||||
host.updateStatusBanner = resolvePostRestartUpdateBanner(sentinel.stats?.reason ?? null);
|
||||
return;
|
||||
}
|
||||
if (actualVersion !== expectedVersion) {
|
||||
host.updateStatusBanner = resolveUpdateVerificationBanner({
|
||||
expectedVersion,
|
||||
actualVersion,
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
await new Promise<void>((resolve) => {
|
||||
setTimeout(resolve, 250);
|
||||
});
|
||||
}
|
||||
if (host.client !== client || !host.connected) {
|
||||
return;
|
||||
}
|
||||
const currentVersion = host.hello?.server?.version?.trim() || null;
|
||||
host.pendingUpdateExpectedVersion = null;
|
||||
if (currentVersion !== expectedVersion) {
|
||||
host.updateStatusBanner = resolveUpdateVerificationBanner({
|
||||
expectedVersion,
|
||||
actualVersion: currentVersion,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export function resolveControlUiClientVersion(params: {
|
||||
gatewayUrl: string;
|
||||
serverVersion: string | null;
|
||||
@@ -344,6 +434,7 @@ export function connectGateway(host: GatewayHost, options?: ConnectGatewayOption
|
||||
void refreshActiveTab(host as unknown as Parameters<typeof refreshActiveTab>[0]);
|
||||
// Re-run push reconciliation now that the gateway client is available.
|
||||
void host.reconcileWebPushState?.();
|
||||
void verifyPendingUpdateVersion(host, client);
|
||||
},
|
||||
onClose: ({ code, reason, error }) => {
|
||||
if (host.client !== client) {
|
||||
|
||||
@@ -1482,6 +1482,11 @@ export function renderApp(state: AppViewState) {
|
||||
</aside>
|
||||
</div>
|
||||
<main class="content ${isChat ? "content--chat" : ""}">
|
||||
${state.updateStatusBanner
|
||||
? html`<div class="callout ${state.updateStatusBanner.tone}" role="alert">
|
||||
${state.updateStatusBanner.text}
|
||||
</div>`
|
||||
: nothing}
|
||||
${state.updateAvailable &&
|
||||
state.updateAvailable.latestVersion !== state.updateAvailable.currentVersion &&
|
||||
!isUpdateBannerDismissed(state.updateAvailable)
|
||||
|
||||
@@ -744,7 +744,7 @@ function buildAttentionItems(host: SettingsAppHost) {
|
||||
// Use the same predicate as the Overview card so the two stay in sync.
|
||||
// Without this, a `missing` provider shows up on the card but never
|
||||
// produces the re-auth attention callout.
|
||||
const monitored = modelAuth.providers.filter(isMonitoredAuthProvider);
|
||||
const monitored = (modelAuth.providers ?? []).filter(isMonitoredAuthProvider);
|
||||
const expiredProviders = monitored.filter(
|
||||
(p) => p.status === "expired" || p.status === "missing",
|
||||
);
|
||||
|
||||
@@ -176,6 +176,8 @@ export type AppViewState = {
|
||||
configSearchQuery: string;
|
||||
configActiveSection: string | null;
|
||||
configActiveSubsection: string | null;
|
||||
pendingUpdateExpectedVersion: string | null;
|
||||
updateStatusBanner: { tone: "danger" | "warn" | "info"; text: string } | null;
|
||||
communicationsFormMode: "form" | "raw";
|
||||
communicationsSearchQuery: string;
|
||||
communicationsActiveSection: string | null;
|
||||
|
||||
@@ -284,6 +284,8 @@ export class OpenClawApp extends LitElement {
|
||||
@state() configSearchQuery = "";
|
||||
@state() configActiveSection: string | null = null;
|
||||
@state() configActiveSubsection: string | null = null;
|
||||
@state() pendingUpdateExpectedVersion: string | null = null;
|
||||
@state() updateStatusBanner: { tone: "danger" | "warn" | "info"; text: string } | null = null;
|
||||
@state() communicationsFormMode: "form" | "raw" = "form";
|
||||
@state() communicationsSearchQuery = "";
|
||||
@state() communicationsActiveSection: string | null = null;
|
||||
|
||||
@@ -73,6 +73,8 @@ function createSaveState(): {
|
||||
configSearchQuery: "",
|
||||
configActiveSection: null,
|
||||
configActiveSubsection: null,
|
||||
pendingUpdateExpectedVersion: null,
|
||||
updateStatusBanner: null,
|
||||
lastError: null,
|
||||
},
|
||||
request,
|
||||
|
||||
@@ -37,6 +37,8 @@ function createState(): ConfigState {
|
||||
configValid: null,
|
||||
connected: false,
|
||||
lastError: null,
|
||||
pendingUpdateExpectedVersion: null,
|
||||
updateStatusBanner: null,
|
||||
updateRunning: false,
|
||||
};
|
||||
}
|
||||
@@ -554,6 +556,44 @@ describe("runUpdate", () => {
|
||||
|
||||
await runUpdate(state);
|
||||
|
||||
expect(state.lastError).toBe("Update error: network unavailable");
|
||||
expect(state.updateStatusBanner).toEqual({
|
||||
tone: "danger",
|
||||
text: "Update error: network unavailable. See the gateway logs for the exact failure and retry once the cause is fixed.",
|
||||
});
|
||||
});
|
||||
|
||||
it("surfaces skipped updates with actionable guidance", async () => {
|
||||
const request = vi.fn().mockResolvedValue({
|
||||
ok: false,
|
||||
result: { status: "skipped", reason: "dirty" },
|
||||
});
|
||||
const state = createState();
|
||||
state.connected = true;
|
||||
state.client = { request } as unknown as ConfigState["client"];
|
||||
|
||||
await runUpdate(state);
|
||||
|
||||
expect(state.updateStatusBanner).toEqual({
|
||||
tone: "warn",
|
||||
text: "Update skipped: dirty. Commit or stash changes, then retry.",
|
||||
});
|
||||
});
|
||||
|
||||
it("stores the expected post-update version when update.run succeeds", async () => {
|
||||
const request = vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
result: {
|
||||
status: "ok",
|
||||
after: { version: "2.0.0" },
|
||||
},
|
||||
});
|
||||
const state = createState();
|
||||
state.connected = true;
|
||||
state.client = { request } as unknown as ConfigState["client"];
|
||||
|
||||
await runUpdate(state);
|
||||
|
||||
expect(state.pendingUpdateExpectedVersion).toBe("2.0.0");
|
||||
expect(state.updateStatusBanner).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -34,6 +34,8 @@ export type ConfigState = {
|
||||
configSearchQuery: string;
|
||||
configActiveSection: string | null;
|
||||
configActiveSubsection: string | null;
|
||||
pendingUpdateExpectedVersion: string | null;
|
||||
updateStatusBanner: { tone: "danger" | "warn" | "info"; text: string } | null;
|
||||
lastError: string | null;
|
||||
};
|
||||
|
||||
@@ -138,6 +140,39 @@ function serializeFormForSubmit(state: ConfigState): string {
|
||||
type ConfigSubmitMethod = "config.set" | "config.apply";
|
||||
type ConfigSubmitBusyKey = "configSaving" | "configApplying";
|
||||
|
||||
function resolveUpdateStatusBanner(params: { status?: string; reason?: string }): {
|
||||
tone: "danger" | "warn" | "info";
|
||||
text: string;
|
||||
} {
|
||||
const status = (params.status ?? "error").trim() || "error";
|
||||
const reason = (params.reason ?? "unexpected-error").trim() || "unexpected-error";
|
||||
const tone = status === "skipped" ? "warn" : "danger";
|
||||
const guidance =
|
||||
{
|
||||
dirty: "Commit or stash changes, then retry.",
|
||||
"no-upstream": "Set an upstream branch, then retry.",
|
||||
"not-git-install":
|
||||
"Not a git checkout. Run `openclaw update` from the CLI for a global reinstall.",
|
||||
"not-openclaw-root":
|
||||
"Run the update from an OpenClaw checkout or use the CLI global reinstall path.",
|
||||
"deps-install-failed": "Dependency install failed. Fix the install error and retry.",
|
||||
"build-failed": "Build failed. Fix the build error and retry.",
|
||||
"ui-build-failed": "The control UI rebuild failed. Fix the UI build error and retry.",
|
||||
"global-install-failed":
|
||||
"The global package install did not verify on disk. Retry or reinstall from the CLI.",
|
||||
"restart-disabled": "The update was not applied because gateway restarts are disabled. Enable restarts in config, then retry — or run `openclaw update` from the CLI.",
|
||||
"restart-unavailable":
|
||||
"This global install cannot be safely replaced while restarts are disabled and no supervisor is present.",
|
||||
"restart-unhealthy":
|
||||
"The replacement process never became healthy. The previous process stayed up so you can recover.",
|
||||
"doctor-failed": "Doctor repair failed. Run `openclaw doctor --non-interactive` and retry.",
|
||||
}[reason] ?? "See the gateway logs for the exact failure and retry once the cause is fixed.";
|
||||
return {
|
||||
tone,
|
||||
text: `Update ${status}: ${reason}. ${guidance}`,
|
||||
};
|
||||
}
|
||||
|
||||
async function submitConfigChange(
|
||||
state: ConfigState,
|
||||
method: ConfigSubmitMethod,
|
||||
@@ -193,20 +228,27 @@ export async function runUpdate(state: ConfigState) {
|
||||
}
|
||||
state.updateRunning = true;
|
||||
state.lastError = null;
|
||||
state.updateStatusBanner = null;
|
||||
try {
|
||||
const res = await state.client.request<{
|
||||
ok?: boolean;
|
||||
result?: { status?: string; reason?: string };
|
||||
result?: { status?: string; reason?: string; after?: { version?: string | null } };
|
||||
}>("update.run", {
|
||||
sessionKey: state.applySessionKey,
|
||||
});
|
||||
if (res && res.ok === false) {
|
||||
const status = res.result?.status ?? "error";
|
||||
const reason = res.result?.reason ?? "Update failed.";
|
||||
state.lastError = `Update ${status}: ${reason}`;
|
||||
const status = res.result?.status ?? (res.ok === true ? "ok" : "error");
|
||||
if (status === "ok" && res.ok === true) {
|
||||
state.pendingUpdateExpectedVersion = res.result?.after?.version ?? null;
|
||||
return;
|
||||
}
|
||||
state.pendingUpdateExpectedVersion = null;
|
||||
state.updateStatusBanner = resolveUpdateStatusBanner({
|
||||
status,
|
||||
reason: res.result?.reason,
|
||||
});
|
||||
} catch (err) {
|
||||
state.lastError = String(err);
|
||||
state.pendingUpdateExpectedVersion = null;
|
||||
} finally {
|
||||
state.updateRunning = false;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user