fix(infra): land #29078 from @cathrynlavery with restart fallback

Co-authored-by: Cathryn Lavery <cathryn@littlemight.com>
This commit is contained in:
Peter Steinberger
2026-02-27 22:04:46 +00:00
parent db67492a00
commit 4aa2dc6857
3 changed files with 31 additions and 52 deletions

View File

@@ -16,6 +16,7 @@ Docs: https://docs.openclaw.ai
- Security/Feishu webhook ingress: bound unauthenticated webhook rate-limit state with stale-window pruning and a hard key cap to prevent unbounded pre-auth memory growth from rotating source keys. (#26050) Thanks @bmendonca3.
- Telegram/Reply media context: include replied media files in inbound context when replying to media, defer reply-media downloads to debounce flush, gate reply-media fetch behind DM authorization, and preserve replied media when non-vision sticker fallback runs (including cached-sticker paths). (#28488) Thanks @obviyus.
- Gateway/WS: close repeated post-handshake `unauthorized role:*` request floods per connection and sample duplicate rejection logs, preventing a single misbehaving client from degrading gateway responsiveness. (#20168) Thanks @acy103, @vibecodooor, and @vincentkoc.
- Gateway/macOS supervised restart: actively `launchctl kickstart -k` during intentional supervised restarts to bypass LaunchAgent `ThrottleInterval` delays, and fall back to in-process restart when kickstart fails. Landed from contributor PR #29078 by @cathrynlavery. Thanks @cathrynlavery.
- Gateway/Auth: improve device-auth v2 migration diagnostics so operators get clearer guidance when legacy clients connect. (#28305) Thanks @vincentkoc.
- CLI/Install: add an npm-link fallback to fix CLI startup `Permission denied` failures (`exit 127`) on affected installs. (#17151) Thanks @sskyu and @vincentkoc.
- Onboarding/Custom providers: improve verification reliability for slower local endpoints (for example Ollama) during setup. (#27380) Thanks @Sid-Qin.

View File

@@ -3,10 +3,14 @@ import { captureFullEnv } from "../test-utils/env.js";
import { SUPERVISOR_HINT_ENV_VARS } from "./supervisor-markers.js";
const spawnMock = vi.hoisted(() => vi.fn());
const triggerOpenClawRestartMock = vi.hoisted(() => vi.fn());
vi.mock("node:child_process", () => ({
spawn: (...args: unknown[]) => spawnMock(...args),
}));
vi.mock("./restart.js", () => ({
triggerOpenClawRestart: (...args: unknown[]) => triggerOpenClawRestartMock(...args),
}));
import { restartGatewayProcessWithFreshPid } from "./process-respawn.js";
@@ -30,6 +34,7 @@ afterEach(() => {
process.argv = [...originalArgv];
process.execArgv = [...originalExecArgv];
spawnMock.mockClear();
triggerOpenClawRestartMock.mockClear();
if (originalPlatformDescriptor) {
Object.defineProperty(process, "platform", originalPlatformDescriptor);
}
@@ -56,40 +61,33 @@ describe("restartGatewayProcessWithFreshPid", () => {
expect(spawnMock).not.toHaveBeenCalled();
});
it("schedules detached launchctl kickstart on macOS when launchd label is set", () => {
it("runs launchd kickstart helper on macOS when launchd label is set", () => {
setPlatform("darwin");
process.env.LAUNCH_JOB_LABEL = "ai.openclaw.gateway";
process.env.OPENCLAW_LAUNCHD_LABEL = "ai.openclaw.gateway";
const unrefMock = vi.fn();
spawnMock.mockReturnValue({ unref: unrefMock, on: vi.fn() });
triggerOpenClawRestartMock.mockReturnValue({ ok: true, method: "launchctl" });
const result = restartGatewayProcessWithFreshPid();
expect(result.mode).toBe("supervised");
expect(spawnMock).toHaveBeenCalledWith(
"launchctl",
["kickstart", "-k", expect.stringContaining("ai.openclaw.gateway")],
expect.objectContaining({ detached: true, stdio: "ignore" }),
);
expect(unrefMock).toHaveBeenCalledOnce();
expect(triggerOpenClawRestartMock).toHaveBeenCalledOnce();
expect(spawnMock).not.toHaveBeenCalled();
});
it("still returns supervised even if kickstart spawn throws", () => {
it("returns failed when launchd kickstart helper fails", () => {
setPlatform("darwin");
process.env.LAUNCH_JOB_LABEL = "ai.openclaw.gateway";
process.env.OPENCLAW_LAUNCHD_LABEL = "ai.openclaw.gateway";
spawnMock.mockImplementation((...args: unknown[]) => {
const [cmd] = args as [string];
if (cmd === "launchctl") {
throw new Error("spawn failed");
}
return { unref: vi.fn(), on: vi.fn() };
triggerOpenClawRestartMock.mockReturnValue({
ok: false,
method: "launchctl",
detail: "spawn failed",
});
const result = restartGatewayProcessWithFreshPid();
// Kickstart is best-effort; failure should not block supervised exit
expect(result.mode).toBe("supervised");
expect(result.mode).toBe("failed");
expect(result.detail).toContain("spawn failed");
});
it("does not schedule kickstart on non-darwin platforms", () => {
@@ -100,6 +98,7 @@ describe("restartGatewayProcessWithFreshPid", () => {
const result = restartGatewayProcessWithFreshPid();
expect(result.mode).toBe("supervised");
expect(triggerOpenClawRestartMock).not.toHaveBeenCalled();
expect(spawnMock).not.toHaveBeenCalled();
});
@@ -127,16 +126,11 @@ describe("restartGatewayProcessWithFreshPid", () => {
clearSupervisorHints();
setPlatform("darwin");
process.env.OPENCLAW_LAUNCHD_LABEL = "ai.openclaw.gateway";
const unrefMock = vi.fn();
spawnMock.mockReturnValue({ unref: unrefMock, on: vi.fn() });
triggerOpenClawRestartMock.mockReturnValue({ ok: true, method: "launchctl" });
const result = restartGatewayProcessWithFreshPid();
expect(result.mode).toBe("supervised");
expect(spawnMock).toHaveBeenCalledWith(
"launchctl",
expect.arrayContaining(["kickstart", "-k"]),
expect.objectContaining({ detached: true }),
);
expect(unrefMock).toHaveBeenCalledOnce();
expect(triggerOpenClawRestartMock).toHaveBeenCalledOnce();
expect(spawnMock).not.toHaveBeenCalled();
});
it("returns supervised when OPENCLAW_SYSTEMD_UNIT is set", () => {

View File

@@ -1,4 +1,5 @@
import { spawn } from "node:child_process";
import { triggerOpenClawRestart } from "./restart.js";
import { hasSupervisorHint } from "./supervisor-markers.js";
type RespawnMode = "spawned" | "supervised" | "disabled" | "failed";
@@ -21,29 +22,6 @@ function isLikelySupervisedProcess(env: NodeJS.ProcessEnv = process.env): boolea
return hasSupervisorHint(env);
}
/**
* Spawn a detached `launchctl kickstart -k` to force an immediate launchd
* restart, bypassing ThrottleInterval. The -k flag sends SIGTERM to the
* current process, so this MUST be non-blocking (spawn, not spawnSync) to
* avoid deadlocking — the gateway needs to be free to handle the signal
* and exit so launchd can start the replacement.
*/
function schedulelaunchdKickstart(label: string): boolean {
const uid = typeof process.getuid === "function" ? process.getuid() : undefined;
const target = uid !== undefined ? `gui/${uid}/${label}` : label;
try {
const child = spawn("launchctl", ["kickstart", "-k", target], {
detached: true,
stdio: "ignore",
});
child.on("error", () => {}); // best-effort; suppress uncaught error event
child.unref();
return true;
} catch {
return false;
}
}
/**
* Attempt to restart this process with a fresh PID.
* - supervised environments (launchd/systemd): caller should exit and let supervisor restart
@@ -55,10 +33,16 @@ export function restartGatewayProcessWithFreshPid(): GatewayRespawnResult {
return { mode: "disabled" };
}
if (isLikelySupervisedProcess(process.env)) {
// On macOS under launchd, fire a detached kickstart so launchd restarts
// us immediately instead of waiting for ThrottleInterval (up to 60s).
// On macOS under launchd, actively kickstart the supervised service to
// bypass ThrottleInterval delays for intentional restarts.
if (process.platform === "darwin" && process.env.OPENCLAW_LAUNCHD_LABEL?.trim()) {
schedulelaunchdKickstart(process.env.OPENCLAW_LAUNCHD_LABEL.trim());
const restart = triggerOpenClawRestart();
if (!restart.ok) {
return {
mode: "failed",
detail: restart.detail ?? "launchctl kickstart failed",
};
}
}
return { mode: "supervised" };
}