fix(gateway): hot-reload channelHealthCheckMinutes without full restart

The health monitor was created once at startup and never touched by
applyHotReload(), so changing channelHealthCheckMinutes only took
effect after a full gateway restart.

Wire up a "restart-health-monitor" reload action so hot-reload can
stop the old monitor and (re)create one with the updated interval —
or disable it entirely when set to 0.

Closes #32105

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
scoootscooob
2026-03-02 12:47:55 -08:00
committed by Peter Steinberger
parent b782ecb7eb
commit 0f1388fa15
4 changed files with 33 additions and 1 deletions

View File

@@ -159,6 +159,13 @@ describe("buildGatewayReloadPlan", () => {
);
});
it("hot-reloads health monitor when channelHealthCheckMinutes changes", () => {
const plan = buildGatewayReloadPlan(["gateway.channelHealthCheckMinutes"]);
expect(plan.restartGateway).toBe(false);
expect(plan.restartHealthMonitor).toBe(true);
expect(plan.hotReasons).toContain("gateway.channelHealthCheckMinutes");
});
it("treats gateway.remote as no-op", () => {
const plan = buildGatewayReloadPlan(["gateway.remote.url"]);
expect(plan.restartGateway).toBe(false);

View File

@@ -22,6 +22,7 @@ export type GatewayReloadPlan = {
restartBrowserControl: boolean;
restartCron: boolean;
restartHeartbeat: boolean;
restartHealthMonitor: boolean;
restartChannels: Set<ChannelKind>;
noopPaths: string[];
};
@@ -38,6 +39,7 @@ type ReloadAction =
| "restart-browser-control"
| "restart-cron"
| "restart-heartbeat"
| "restart-health-monitor"
| `restart-channel:${ChannelId}`;
const DEFAULT_RELOAD_SETTINGS: GatewayReloadSettings = {
@@ -50,6 +52,11 @@ const MISSING_CONFIG_MAX_RETRIES = 2;
const BASE_RELOAD_RULES: ReloadRule[] = [
{ prefix: "gateway.remote", kind: "none" },
{ prefix: "gateway.reload", kind: "none" },
{
prefix: "gateway.channelHealthCheckMinutes",
kind: "hot",
actions: ["restart-health-monitor"],
},
// Stuck-session warning threshold is read by the diagnostics heartbeat loop.
{ prefix: "diagnostics.stuckSessionWarnMs", kind: "none" },
{ prefix: "hooks.gmail", kind: "hot", actions: ["restart-gmail-watcher"] },
@@ -199,6 +206,7 @@ export function buildGatewayReloadPlan(changedPaths: string[]): GatewayReloadPla
restartBrowserControl: false,
restartCron: false,
restartHeartbeat: false,
restartHealthMonitor: false,
restartChannels: new Set(),
noopPaths: [],
};
@@ -225,6 +233,9 @@ export function buildGatewayReloadPlan(changedPaths: string[]): GatewayReloadPla
case "restart-heartbeat":
plan.restartHeartbeat = true;
break;
case "restart-health-monitor":
plan.restartHealthMonitor = true;
break;
default:
break;
}

View File

@@ -16,6 +16,7 @@ import {
} from "../infra/restart.js";
import { setCommandLaneConcurrency, getTotalQueueSize } from "../process/command-queue.js";
import { CommandLane } from "../process/lanes.js";
import type { ChannelHealthMonitor } from "./channel-health-monitor.js";
import type { ChannelKind, GatewayReloadPlan } from "./config-reload.js";
import { resolveHooksConfig } from "./hooks.js";
import { startBrowserControlServerIfEnabled } from "./server-browser.js";
@@ -26,6 +27,7 @@ type GatewayHotReloadState = {
heartbeatRunner: HeartbeatRunner;
cronState: GatewayCronState;
browserControl: Awaited<ReturnType<typeof startBrowserControlServerIfEnabled>> | null;
channelHealthMonitor: ChannelHealthMonitor | null;
};
export function createGatewayReloadHandlers(params: {
@@ -44,6 +46,7 @@ export function createGatewayReloadHandlers(params: {
logChannels: { info: (msg: string) => void; error: (msg: string) => void };
logCron: { error: (msg: string) => void };
logReload: { info: (msg: string) => void; warn: (msg: string) => void };
createHealthMonitor: (checkIntervalMs: number) => ChannelHealthMonitor;
}) {
const applyHotReload = async (
plan: GatewayReloadPlan,
@@ -90,6 +93,13 @@ export function createGatewayReloadHandlers(params: {
}
}
if (plan.restartHealthMonitor) {
state.channelHealthMonitor?.stop();
const minutes = nextConfig.gateway?.channelHealthCheckMinutes;
nextState.channelHealthMonitor =
minutes === 0 ? null : params.createHealthMonitor((minutes ?? 5) * 60_000);
}
if (plan.restartGmailWatcher) {
await stopGmailWatcher().catch(() => {});
await startGmailWatcherWithLogs({

View File

@@ -656,7 +656,7 @@ export async function startGatewayServer(
const healthCheckMinutes = cfgAtStart.gateway?.channelHealthCheckMinutes;
const healthCheckDisabled = healthCheckMinutes === 0;
const channelHealthMonitor = healthCheckDisabled
let channelHealthMonitor = healthCheckDisabled
? null
: startChannelHealthMonitor({
channelManager,
@@ -841,6 +841,7 @@ export async function startGatewayServer(
heartbeatRunner,
cronState,
browserControl,
channelHealthMonitor,
}),
setState: (nextState) => {
hooksConfig = nextState.hooksConfig;
@@ -849,6 +850,7 @@ export async function startGatewayServer(
cron = cronState.cron;
cronStorePath = cronState.storePath;
browserControl = nextState.browserControl;
channelHealthMonitor = nextState.channelHealthMonitor;
},
startChannel,
stopChannel,
@@ -857,6 +859,8 @@ export async function startGatewayServer(
logChannels,
logCron,
logReload,
createHealthMonitor: (checkIntervalMs: number) =>
startChannelHealthMonitor({ channelManager, checkIntervalMs }),
});
return startGatewayConfigReloader({