From 7c59b78aeedd1dd2b608bf7df9493e8339e02f03 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Wed, 25 Feb 2026 02:48:25 +0000 Subject: [PATCH] test: cap docker live model sweeps and harden timeouts --- scripts/test-live-gateway-models-docker.sh | 3 +- scripts/test-live-models-docker.sh | 3 +- src/agents/models.profiles.live.test.ts | 99 +++++++++++++++++-- .../gateway-models.profiles.live.test.ts | 69 ++++++++++++- 4 files changed, 159 insertions(+), 15 deletions(-) diff --git a/scripts/test-live-gateway-models-docker.sh b/scripts/test-live-gateway-models-docker.sh index bb0641df16b..3cc5ed2bf0b 100755 --- a/scripts/test-live-gateway-models-docker.sh +++ b/scripts/test-live-gateway-models-docker.sh @@ -22,8 +22,9 @@ docker run --rm -t \ -e HOME=/home/node \ -e NODE_OPTIONS=--disable-warning=ExperimentalWarning \ -e OPENCLAW_LIVE_TEST=1 \ - -e OPENCLAW_LIVE_GATEWAY_MODELS="${OPENCLAW_LIVE_GATEWAY_MODELS:-${CLAWDBOT_LIVE_GATEWAY_MODELS:-all}}" \ + -e OPENCLAW_LIVE_GATEWAY_MODELS="${OPENCLAW_LIVE_GATEWAY_MODELS:-${CLAWDBOT_LIVE_GATEWAY_MODELS:-modern}}" \ -e OPENCLAW_LIVE_GATEWAY_PROVIDERS="${OPENCLAW_LIVE_GATEWAY_PROVIDERS:-${CLAWDBOT_LIVE_GATEWAY_PROVIDERS:-}}" \ + -e OPENCLAW_LIVE_GATEWAY_MAX_MODELS="${OPENCLAW_LIVE_GATEWAY_MAX_MODELS:-${CLAWDBOT_LIVE_GATEWAY_MAX_MODELS:-24}}" \ -e OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS="${OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS:-${CLAWDBOT_LIVE_GATEWAY_MODEL_TIMEOUT_MS:-}}" \ -v "$CONFIG_DIR":/home/node/.openclaw \ -v "$WORKSPACE_DIR":/home/node/.openclaw/workspace \ diff --git a/scripts/test-live-models-docker.sh b/scripts/test-live-models-docker.sh index 1a7df857c7a..f3aecc0049a 100755 --- a/scripts/test-live-models-docker.sh +++ b/scripts/test-live-models-docker.sh @@ -22,8 +22,9 @@ docker run --rm -t \ -e HOME=/home/node \ -e NODE_OPTIONS=--disable-warning=ExperimentalWarning \ -e OPENCLAW_LIVE_TEST=1 \ - -e OPENCLAW_LIVE_MODELS="${OPENCLAW_LIVE_MODELS:-${CLAWDBOT_LIVE_MODELS:-all}}" \ + -e OPENCLAW_LIVE_MODELS="${OPENCLAW_LIVE_MODELS:-${CLAWDBOT_LIVE_MODELS:-modern}}" \ -e OPENCLAW_LIVE_PROVIDERS="${OPENCLAW_LIVE_PROVIDERS:-${CLAWDBOT_LIVE_PROVIDERS:-}}" \ + -e OPENCLAW_LIVE_MAX_MODELS="${OPENCLAW_LIVE_MAX_MODELS:-${CLAWDBOT_LIVE_MAX_MODELS:-48}}" \ -e OPENCLAW_LIVE_MODEL_TIMEOUT_MS="${OPENCLAW_LIVE_MODEL_TIMEOUT_MS:-${CLAWDBOT_LIVE_MODEL_TIMEOUT_MS:-}}" \ -e OPENCLAW_LIVE_REQUIRE_PROFILE_KEYS="${OPENCLAW_LIVE_REQUIRE_PROFILE_KEYS:-${CLAWDBOT_LIVE_REQUIRE_PROFILE_KEYS:-}}" \ -v "$CONFIG_DIR":/home/node/.openclaw \ diff --git a/src/agents/models.profiles.live.test.ts b/src/agents/models.profiles.live.test.ts index d56986b8038..2db27d07671 100644 --- a/src/agents/models.profiles.live.test.ts +++ b/src/agents/models.profiles.live.test.ts @@ -91,6 +91,10 @@ function isInstructionsRequiredError(raw: string): boolean { return /instructions are required/i.test(raw); } +function isModelTimeoutError(raw: string): boolean { + return /model call timed out after \d+ms/i.test(raw); +} + function toInt(value: string | undefined, fallback: number): number { const trimmed = value?.trim(); if (!trimmed) { @@ -100,6 +104,49 @@ function toInt(value: string | undefined, fallback: number): number { return Number.isFinite(parsed) ? parsed : fallback; } +function capByProviderSpread( + items: T[], + maxItems: number, + providerOf: (item: T) => string, +): T[] { + if (maxItems <= 0 || items.length <= maxItems) { + return items; + } + const providerOrder: string[] = []; + const grouped = new Map(); + for (const item of items) { + const provider = providerOf(item); + const bucket = grouped.get(provider); + if (bucket) { + bucket.push(item); + continue; + } + providerOrder.push(provider); + grouped.set(provider, [item]); + } + + const selected: T[] = []; + while (selected.length < maxItems && grouped.size > 0) { + for (const provider of providerOrder) { + const bucket = grouped.get(provider); + if (!bucket || bucket.length === 0) { + continue; + } + const item = bucket.shift(); + if (item) { + selected.push(item); + } + if (bucket.length === 0) { + grouped.delete(provider); + } + if (selected.length >= maxItems) { + break; + } + } + } + return selected; +} + function resolveTestReasoning( model: Model, ): "minimal" | "low" | "medium" | "high" | "xhigh" | undefined { @@ -122,16 +169,32 @@ async function completeSimpleWithTimeout( options: Parameters>[2], timeoutMs: number, ) { + const maxTimeoutMs = Math.max(1, timeoutMs); const controller = new AbortController(); - const timer = setTimeout(() => controller.abort(), Math.max(1, timeoutMs)); - timer.unref?.(); + const abortTimer = setTimeout(() => { + controller.abort(); + }, maxTimeoutMs); + abortTimer.unref?.(); + let hardTimer: ReturnType | undefined; + const timeout = new Promise((_, reject) => { + hardTimer = setTimeout(() => { + reject(new Error(`model call timed out after ${maxTimeoutMs}ms`)); + }, maxTimeoutMs); + hardTimer.unref?.(); + }); try { - return await completeSimple(model, context, { - ...options, - signal: controller.signal, - }); + return await Promise.race([ + completeSimple(model, context, { + ...options, + signal: controller.signal, + }), + timeout, + ]); } finally { - clearTimeout(timer); + clearTimeout(abortTimer); + if (hardTimer) { + clearTimeout(hardTimer); + } } } @@ -205,6 +268,7 @@ describeLive("live models (profile keys)", () => { const allowNotFoundSkip = useModern; const providers = parseProviderFilter(process.env.OPENCLAW_LIVE_PROVIDERS); const perModelTimeoutMs = toInt(process.env.OPENCLAW_LIVE_MODEL_TIMEOUT_MS, 30_000); + const maxModels = toInt(process.env.OPENCLAW_LIVE_MAX_MODELS, 0); const failures: Array<{ model: string; error: string }> = []; const skipped: Array<{ model: string; reason: string }> = []; @@ -246,11 +310,21 @@ describeLive("live models (profile keys)", () => { return; } + const selectedCandidates = capByProviderSpread( + candidates, + maxModels > 0 ? maxModels : candidates.length, + (entry) => entry.model.provider, + ); logProgress(`[live-models] selection=${useExplicit ? "explicit" : "modern"}`); - logProgress(`[live-models] running ${candidates.length} models`); - const total = candidates.length; + if (selectedCandidates.length < candidates.length) { + logProgress( + `[live-models] capped to ${selectedCandidates.length}/${candidates.length} via OPENCLAW_LIVE_MAX_MODELS=${maxModels}`, + ); + } + logProgress(`[live-models] running ${selectedCandidates.length} models`); + const total = selectedCandidates.length; - for (const [index, entry] of candidates.entries()) { + for (const [index, entry] of selectedCandidates.entries()) { const { model, apiKeyInfo } = entry; const id = `${model.provider}/${model.id}`; const progressLabel = `[live-models] ${index + 1}/${total} ${id}`; @@ -513,6 +587,11 @@ describeLive("live models (profile keys)", () => { logProgress(`${progressLabel}: skip (instructions required)`); break; } + if (allowNotFoundSkip && isModelTimeoutError(message)) { + skipped.push({ model: id, reason: message }); + logProgress(`${progressLabel}: skip (timeout)`); + break; + } logProgress(`${progressLabel}: failed`); failures.push({ model: id, error: message }); break; diff --git a/src/gateway/gateway-models.profiles.live.test.ts b/src/gateway/gateway-models.profiles.live.test.ts index 0140a6569d9..f8cd415cfe0 100644 --- a/src/gateway/gateway-models.profiles.live.test.ts +++ b/src/gateway/gateway-models.profiles.live.test.ts @@ -55,6 +55,58 @@ function parseFilter(raw?: string): Set | null { return ids.length ? new Set(ids) : null; } +function toInt(value: string | undefined, fallback: number): number { + const trimmed = value?.trim(); + if (!trimmed) { + return fallback; + } + const parsed = Number.parseInt(trimmed, 10); + return Number.isFinite(parsed) ? parsed : fallback; +} + +function capByProviderSpread( + items: T[], + maxItems: number, + providerOf: (item: T) => string, +): T[] { + if (maxItems <= 0 || items.length <= maxItems) { + return items; + } + const providerOrder: string[] = []; + const grouped = new Map(); + for (const item of items) { + const provider = providerOf(item); + const bucket = grouped.get(provider); + if (bucket) { + bucket.push(item); + continue; + } + providerOrder.push(provider); + grouped.set(provider, [item]); + } + + const selected: T[] = []; + while (selected.length < maxItems && grouped.size > 0) { + for (const provider of providerOrder) { + const bucket = grouped.get(provider); + if (!bucket || bucket.length === 0) { + continue; + } + const item = bucket.shift(); + if (item) { + selected.push(item); + } + if (bucket.length === 0) { + grouped.delete(provider); + } + if (selected.length >= maxItems) { + break; + } + } + } + return selected; +} + function logProgress(message: string): void { console.log(`[live] ${message}`); } @@ -1061,6 +1113,7 @@ describeLive("gateway live (dev agent, profile keys)", () => { const useModern = !rawModels || rawModels === "modern" || rawModels === "all"; const useExplicit = Boolean(rawModels) && !useModern; const filter = useExplicit ? parseFilter(rawModels) : null; + const maxModels = toInt(process.env.OPENCLAW_LIVE_GATEWAY_MAX_MODELS, 0); const wanted = filter ? all.filter((m) => filter.has(`${m.provider}/${m.id}`)) : all.filter((m) => isModernModelRef({ provider: m.provider, id: m.id })); @@ -1091,21 +1144,31 @@ describeLive("gateway live (dev agent, profile keys)", () => { logProgress("[all-models] no API keys found; skipping"); return; } + const selectedCandidates = capByProviderSpread( + candidates, + maxModels > 0 ? maxModels : candidates.length, + (model) => model.provider, + ); logProgress(`[all-models] selection=${useExplicit ? "explicit" : "modern"}`); - const imageCandidates = candidates.filter((m) => m.input?.includes("image")); + if (selectedCandidates.length < candidates.length) { + logProgress( + `[all-models] capped to ${selectedCandidates.length}/${candidates.length} via OPENCLAW_LIVE_GATEWAY_MAX_MODELS=${maxModels}`, + ); + } + const imageCandidates = selectedCandidates.filter((m) => m.input?.includes("image")); if (imageCandidates.length === 0) { logProgress("[all-models] no image-capable models selected; image probe will be skipped"); } await runGatewayModelSuite({ label: "all-models", cfg, - candidates, + candidates: selectedCandidates, extraToolProbes: true, extraImageProbes: true, thinkingLevel: THINKING_LEVEL, }); - const minimaxCandidates = candidates.filter((model) => model.provider === "minimax"); + const minimaxCandidates = selectedCandidates.filter((model) => model.provider === "minimax"); if (minimaxCandidates.length === 0) { logProgress("[minimax] no candidates with keys; skipping dual endpoint probes"); return;