From 3fc19ed7d747846ae81541b8ccdc9d8d087c8642 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 2 Mar 2026 03:58:34 +0000 Subject: [PATCH] fix: harden feishu startup probe sequencing (#29941) (thanks @bmendonca3) --- CHANGELOG.md | 2 +- extensions/feishu/src/monitor.ts | 82 +++++++++++++++++-- .../src/monitor.webhook-security.test.ts | 75 ++++++++++++++++- extensions/feishu/src/probe.test.ts | 19 ++++- extensions/feishu/src/probe.ts | 2 + 5 files changed, 170 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 308fd7e5b9c..f1d374e964f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -267,7 +267,7 @@ Docs: https://docs.openclaw.ai - Agents/Ollama discovery: skip Ollama discovery when explicit models are configured. (#28827) Thanks @Kansodata and @vincentkoc. - Issues/triage labeling: consolidate bug intake to a single bug issue form with required bug-type classification (regression/crash/behavior), auto-apply matching subtype labels from issue form content, and retire the separate regression template to reduce misfiled issue types and improve queue filtering. Thanks @vincentkoc. - Android/Onboarding + voice reliability: request per-toggle onboarding permissions, update pairing guidance to `openclaw devices list/approve`, restore assistant speech playback in mic capture flow, cancel superseded in-flight speech (mute + per-reply token rotation), and keep `talk.config` loads retryable after transient failures. (#29796) Thanks @obviyus. -- Feishu/Startup probes: serialize multi-account bot-info probes during monitor startup so large Feishu account sets do not burst `/open-apis/bot/v3/info` and trigger avoidable rate limits. (#26685) +- Feishu/Startup probes: serialize multi-account bot-info probes during monitor startup so large Feishu account sets do not burst `/open-apis/bot/v3/info`, bound startup probe latency/abort handling to avoid head-of-line stalls, and avoid triggering rate limits. (#26685, #29941) Thanks @bmendonca3. - FS/Sandbox workspace boundaries: add a dedicated `outside-workspace` safe-open error code for root-escape checks, and propagate specific outside-workspace messages across edit/browser/media consumers instead of generic not-found/invalid-path fallbacks. (#29715) Thanks @YuzuruS. - Config/Doctor group allowlist diagnostics: align `groupPolicy: "allowlist"` warnings with per-channel runtime semantics by excluding Google Chat sender-list checks and by warning when no-fallback channels (for example iMessage) omit `groupAllowFrom`, with regression coverage. (#28477) Thanks @tonydehnke. - Slack/Disabled channel startup: skip Slack monitor socket startup entirely when `channels.slack.enabled=false` (including configs that still contain valid tokens), preventing disabled accounts from opening websocket connections. (#30586) Thanks @liuxiaopai-ai. diff --git a/extensions/feishu/src/monitor.ts b/extensions/feishu/src/monitor.ts index e71a4316b4c..7e9a9d90c1c 100644 --- a/extensions/feishu/src/monitor.ts +++ b/extensions/feishu/src/monitor.ts @@ -34,6 +34,9 @@ const botOpenIds = new Map(); const FEISHU_WEBHOOK_MAX_BODY_BYTES = 1024 * 1024; const FEISHU_WEBHOOK_BODY_TIMEOUT_MS = 30_000; const FEISHU_REACTION_VERIFY_TIMEOUT_MS = 1_500; +const FEISHU_STARTUP_BOT_INFO_TIMEOUT_MS = 10_000; +const FEISHU_BOT_INFO_FETCH_ABORTED = Symbol("feishu-bot-info-fetch-aborted"); +const FEISHU_BOT_INFO_FETCH_TIMED_OUT = Symbol("feishu-bot-info-fetch-timed-out"); export type FeishuReactionCreatedEvent = { message_id: string; @@ -188,12 +191,68 @@ export async function resolveReactionSyntheticEvent( }; } -async function fetchBotOpenId(account: ResolvedFeishuAccount): Promise { +type FetchBotOpenIdOptions = { + runtime?: RuntimeEnv; + abortSignal?: AbortSignal; + timeoutMs?: number; +}; + +async function fetchBotOpenId( + account: ResolvedFeishuAccount, + options: FetchBotOpenIdOptions = {}, +): Promise { + if (options.abortSignal?.aborted) { + return undefined; + } + + const timeoutMs = options.timeoutMs ?? FEISHU_STARTUP_BOT_INFO_TIMEOUT_MS; + let timeoutHandle: ReturnType | undefined; + let abortHandler: (() => void) | undefined; try { - const result = await probeFeishu(account); - return result.ok ? result.botOpenId : undefined; + const contenders: Array< + Promise< + | string + | undefined + | typeof FEISHU_BOT_INFO_FETCH_ABORTED + | typeof FEISHU_BOT_INFO_FETCH_TIMED_OUT + > + > = [ + probeFeishu(account) + .then((result) => (result.ok ? result.botOpenId : undefined)) + .catch(() => undefined), + new Promise((resolve) => { + timeoutHandle = setTimeout(() => resolve(FEISHU_BOT_INFO_FETCH_TIMED_OUT), timeoutMs); + }), + ]; + if (options.abortSignal) { + contenders.push( + new Promise((resolve) => { + abortHandler = () => resolve(FEISHU_BOT_INFO_FETCH_ABORTED); + options.abortSignal?.addEventListener("abort", abortHandler, { once: true }); + }), + ); + } + const outcome = await Promise.race(contenders); + if (outcome === FEISHU_BOT_INFO_FETCH_ABORTED) { + return undefined; + } + if (outcome === FEISHU_BOT_INFO_FETCH_TIMED_OUT) { + const error = options.runtime?.error ?? console.error; + error( + `feishu[${account.accountId}]: bot info probe timed out after ${timeoutMs}ms; continuing startup`, + ); + return undefined; + } + return outcome; } catch { return undefined; + } finally { + if (timeoutHandle) { + clearTimeout(timeoutHandle); + } + if (abortHandler) { + options.abortSignal?.removeEventListener("abort", abortHandler); + } } } @@ -347,7 +406,9 @@ async function monitorSingleAccount(params: MonitorAccountParams): Promise const log = runtime?.log ?? console.log; // Fetch bot open_id - const botOpenId = params.botOpenIdPrefetched ? params.botOpenId : await fetchBotOpenId(account); + const botOpenId = params.botOpenIdPrefetched + ? params.botOpenId + : await fetchBotOpenId(account, { runtime, abortSignal }); botOpenIds.set(accountId, botOpenId ?? ""); log(`feishu[${accountId}]: bot open_id resolved: ${botOpenId ?? "unknown"}`); @@ -550,8 +611,19 @@ export async function monitorFeishuProvider(opts: MonitorFeishuOpts = {}): Promi const monitorPromises: Promise[] = []; for (const account of accounts) { + if (opts.abortSignal?.aborted) { + log("feishu: abort signal received during startup preflight; stopping startup"); + break; + } // Probe sequentially so large multi-account startups do not burst Feishu's bot-info endpoint. - const botOpenId = await fetchBotOpenId(account); + const botOpenId = await fetchBotOpenId(account, { + runtime: opts.runtime, + abortSignal: opts.abortSignal, + }); + if (opts.abortSignal?.aborted) { + log("feishu: abort signal received during startup preflight; stopping startup"); + break; + } monitorPromises.push( monitorSingleAccount({ cfg, diff --git a/extensions/feishu/src/monitor.webhook-security.test.ts b/extensions/feishu/src/monitor.webhook-security.test.ts index 586cc465e93..fe27ef755e6 100644 --- a/extensions/feishu/src/monitor.webhook-security.test.ts +++ b/extensions/feishu/src/monitor.webhook-security.test.ts @@ -287,9 +287,9 @@ describe("Feishu webhook security hardening", () => { }); try { - await Promise.resolve(); - await Promise.resolve(); - await Promise.resolve(); + for (let i = 0; i < 10 && !started.includes("beta"); i += 1) { + await Promise.resolve(); + } expect(started).toEqual(["alpha", "beta"]); expect(started.filter((accountId) => accountId === "alpha")).toHaveLength(1); @@ -299,4 +299,73 @@ describe("Feishu webhook security hardening", () => { await monitorPromise; } }); + + it("continues startup when a sequential preflight probe times out", async () => { + vi.useFakeTimers(); + const started: string[] = []; + let releaseBetaProbe!: () => void; + const betaProbeReleased = new Promise((resolve) => { + releaseBetaProbe = () => resolve(); + }); + + probeFeishuMock.mockImplementation((account: { accountId: string }) => { + started.push(account.accountId); + if (account.accountId === "alpha") { + return new Promise(() => {}); + } + return betaProbeReleased.then(() => ({ ok: true, botOpenId: `bot_${account.accountId}` })); + }); + + const abortController = new AbortController(); + const runtime = { log: vi.fn(), error: vi.fn(), exit: vi.fn() }; + const monitorPromise = monitorFeishuProvider({ + config: buildMultiAccountWebsocketConfig(["alpha", "beta"]), + runtime, + abortSignal: abortController.signal, + }); + + try { + await Promise.resolve(); + expect(started).toEqual(["alpha"]); + + await vi.advanceTimersByTimeAsync(10_000); + await Promise.resolve(); + + expect(started).toEqual(["alpha", "beta"]); + expect(runtime.error).toHaveBeenCalledWith( + expect.stringContaining("bot info probe timed out"), + ); + } finally { + releaseBetaProbe(); + abortController.abort(); + await monitorPromise; + vi.useRealTimers(); + } + }); + + it("stops sequential preflight when aborted during a stuck probe", async () => { + const started: string[] = []; + probeFeishuMock.mockImplementation((account: { accountId: string }) => { + started.push(account.accountId); + return new Promise(() => {}); + }); + + const abortController = new AbortController(); + const monitorPromise = monitorFeishuProvider({ + config: buildMultiAccountWebsocketConfig(["alpha", "beta"]), + abortSignal: abortController.signal, + }); + + try { + await Promise.resolve(); + expect(started).toEqual(["alpha"]); + + abortController.abort(); + await monitorPromise; + + expect(started).toEqual(["alpha"]); + } finally { + abortController.abort(); + } + }); }); diff --git a/extensions/feishu/src/probe.test.ts b/extensions/feishu/src/probe.test.ts index b869393601b..23b45c21165 100644 --- a/extensions/feishu/src/probe.test.ts +++ b/extensions/feishu/src/probe.test.ts @@ -6,7 +6,7 @@ vi.mock("./client.js", () => ({ createFeishuClient: createFeishuClientMock, })); -import { probeFeishu, clearProbeCache } from "./probe.js"; +import { FEISHU_PROBE_REQUEST_TIMEOUT_MS, probeFeishu, clearProbeCache } from "./probe.js"; function makeRequestFn(response: Record) { return vi.fn().mockResolvedValue(response); @@ -59,6 +59,23 @@ describe("probeFeishu", () => { expect(requestFn).toHaveBeenCalledTimes(1); }); + it("uses explicit timeout for bot info request", async () => { + const requestFn = setupClient({ + code: 0, + bot: { bot_name: "TestBot", open_id: "ou_abc123" }, + }); + + await probeFeishu({ appId: "cli_123", appSecret: "secret" }); + + expect(requestFn).toHaveBeenCalledWith( + expect.objectContaining({ + method: "GET", + url: "/open-apis/bot/v3/info", + timeout: FEISHU_PROBE_REQUEST_TIMEOUT_MS, + }), + ); + }); + it("returns cached result on subsequent calls within TTL", async () => { const requestFn = setupClient({ code: 0, diff --git a/extensions/feishu/src/probe.ts b/extensions/feishu/src/probe.ts index fff93dc8921..a5efacd6a74 100644 --- a/extensions/feishu/src/probe.ts +++ b/extensions/feishu/src/probe.ts @@ -8,6 +8,7 @@ import type { FeishuProbeResult } from "./types.js"; const probeCache = new Map(); const PROBE_CACHE_TTL_MS = 10 * 60 * 1000; // 10 minutes const MAX_PROBE_CACHE_SIZE = 64; +export const FEISHU_PROBE_REQUEST_TIMEOUT_MS = 10_000; export async function probeFeishu(creds?: FeishuClientCredentials): Promise { if (!creds?.appId || !creds?.appSecret) { @@ -35,6 +36,7 @@ export async function probeFeishu(creds?: FeishuClientCredentials): Promise