test: cap docker live model sweeps and harden timeouts

2026-03-07 14:34:21 +00:00 · 2026-02-25 02:48:25 +00:00
parent df9a474891
commit 7c59b78aee
4 changed files with 159 additions and 15 deletions
--- a/scripts/test-live-gateway-models-docker.sh
+++ b/scripts/test-live-gateway-models-docker.sh
@@ -22,8 +22,9 @@ docker run --rm -t \
  -e HOME=/home/node \
  -e NODE_OPTIONS=--disable-warning=ExperimentalWarning \
  -e OPENCLAW_LIVE_TEST=1 \
-  -e OPENCLAW_LIVE_GATEWAY_MODELS="${OPENCLAW_LIVE_GATEWAY_MODELS:-${CLAWDBOT_LIVE_GATEWAY_MODELS:-all}}" \
+  -e OPENCLAW_LIVE_GATEWAY_MODELS="${OPENCLAW_LIVE_GATEWAY_MODELS:-${CLAWDBOT_LIVE_GATEWAY_MODELS:-modern}}" \
  -e OPENCLAW_LIVE_GATEWAY_PROVIDERS="${OPENCLAW_LIVE_GATEWAY_PROVIDERS:-${CLAWDBOT_LIVE_GATEWAY_PROVIDERS:-}}" \
+  -e OPENCLAW_LIVE_GATEWAY_MAX_MODELS="${OPENCLAW_LIVE_GATEWAY_MAX_MODELS:-${CLAWDBOT_LIVE_GATEWAY_MAX_MODELS:-24}}" \
  -e OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS="${OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS:-${CLAWDBOT_LIVE_GATEWAY_MODEL_TIMEOUT_MS:-}}" \
  -v "$CONFIG_DIR":/home/node/.openclaw \
  -v "$WORKSPACE_DIR":/home/node/.openclaw/workspace \
--- a/scripts/test-live-models-docker.sh
+++ b/scripts/test-live-models-docker.sh
@@ -22,8 +22,9 @@ docker run --rm -t \
  -e HOME=/home/node \
  -e NODE_OPTIONS=--disable-warning=ExperimentalWarning \
  -e OPENCLAW_LIVE_TEST=1 \
-  -e OPENCLAW_LIVE_MODELS="${OPENCLAW_LIVE_MODELS:-${CLAWDBOT_LIVE_MODELS:-all}}" \
+  -e OPENCLAW_LIVE_MODELS="${OPENCLAW_LIVE_MODELS:-${CLAWDBOT_LIVE_MODELS:-modern}}" \
  -e OPENCLAW_LIVE_PROVIDERS="${OPENCLAW_LIVE_PROVIDERS:-${CLAWDBOT_LIVE_PROVIDERS:-}}" \
+  -e OPENCLAW_LIVE_MAX_MODELS="${OPENCLAW_LIVE_MAX_MODELS:-${CLAWDBOT_LIVE_MAX_MODELS:-48}}" \
  -e OPENCLAW_LIVE_MODEL_TIMEOUT_MS="${OPENCLAW_LIVE_MODEL_TIMEOUT_MS:-${CLAWDBOT_LIVE_MODEL_TIMEOUT_MS:-}}" \
  -e OPENCLAW_LIVE_REQUIRE_PROFILE_KEYS="${OPENCLAW_LIVE_REQUIRE_PROFILE_KEYS:-${CLAWDBOT_LIVE_REQUIRE_PROFILE_KEYS:-}}" \
  -v "$CONFIG_DIR":/home/node/.openclaw \
--- a/src/agents/models.profiles.live.test.ts
+++ b/src/agents/models.profiles.live.test.ts
@@ -91,6 +91,10 @@ function isInstructionsRequiredError(raw: string): boolean {
  return /instructions are required/i.test(raw);
 }

+function isModelTimeoutError(raw: string): boolean {
+  return /model call timed out after \d+ms/i.test(raw);
+}
+
 function toInt(value: string | undefined, fallback: number): number {
  const trimmed = value?.trim();
  if (!trimmed) {
@@ -100,6 +104,49 @@ function toInt(value: string | undefined, fallback: number): number {
  return Number.isFinite(parsed) ? parsed : fallback;
 }

+function capByProviderSpread<T>(
+  items: T[],
+  maxItems: number,
+  providerOf: (item: T) => string,
+): T[] {
+  if (maxItems <= 0 || items.length <= maxItems) {
+    return items;
+  }
+  const providerOrder: string[] = [];
+  const grouped = new Map<string, T[]>();
+  for (const item of items) {
+    const provider = providerOf(item);
+    const bucket = grouped.get(provider);
+    if (bucket) {
+      bucket.push(item);
+      continue;
+    }
+    providerOrder.push(provider);
+    grouped.set(provider, [item]);
+  }
+
+  const selected: T[] = [];
+  while (selected.length < maxItems && grouped.size > 0) {
+    for (const provider of providerOrder) {
+      const bucket = grouped.get(provider);
+      if (!bucket || bucket.length === 0) {
+        continue;
+      }
+      const item = bucket.shift();
+      if (item) {
+        selected.push(item);
+      }
+      if (bucket.length === 0) {
+        grouped.delete(provider);
+      }
+      if (selected.length >= maxItems) {
+        break;
+      }
+    }
+  }
+  return selected;
+}
+
 function resolveTestReasoning(
  model: Model<Api>,
 ): "minimal" | "low" | "medium" | "high" | "xhigh" | undefined {
@@ -122,16 +169,32 @@ async function completeSimpleWithTimeout<TApi extends Api>(
  options: Parameters<typeof completeSimple<TApi>>[2],
  timeoutMs: number,
 ) {
+  const maxTimeoutMs = Math.max(1, timeoutMs);
  const controller = new AbortController();
-  const timer = setTimeout(() => controller.abort(), Math.max(1, timeoutMs));
-  timer.unref?.();
+  const abortTimer = setTimeout(() => {
+    controller.abort();
+  }, maxTimeoutMs);
+  abortTimer.unref?.();
+  let hardTimer: ReturnType<typeof setTimeout> | undefined;
+  const timeout = new Promise<never>((_, reject) => {
+    hardTimer = setTimeout(() => {
+      reject(new Error(`model call timed out after ${maxTimeoutMs}ms`));
+    }, maxTimeoutMs);
+    hardTimer.unref?.();
+  });
  try {
-    return await completeSimple(model, context, {
-      ...options,
-      signal: controller.signal,
-    });
+    return await Promise.race([
+      completeSimple(model, context, {
+        ...options,
+        signal: controller.signal,
+      }),
+      timeout,
+    ]);
  } finally {
-    clearTimeout(timer);
+    clearTimeout(abortTimer);
+    if (hardTimer) {
+      clearTimeout(hardTimer);
+    }
  }
 }

@@ -205,6 +268,7 @@ describeLive("live models (profile keys)", () => {
      const allowNotFoundSkip = useModern;
      const providers = parseProviderFilter(process.env.OPENCLAW_LIVE_PROVIDERS);
      const perModelTimeoutMs = toInt(process.env.OPENCLAW_LIVE_MODEL_TIMEOUT_MS, 30_000);
+      const maxModels = toInt(process.env.OPENCLAW_LIVE_MAX_MODELS, 0);

      const failures: Array<{ model: string; error: string }> = [];
      const skipped: Array<{ model: string; reason: string }> = [];
@@ -246,11 +310,21 @@ describeLive("live models (profile keys)", () => {
        return;
      }

+      const selectedCandidates = capByProviderSpread(
+        candidates,
+        maxModels > 0 ? maxModels : candidates.length,
+        (entry) => entry.model.provider,
+      );
      logProgress(`[live-models] selection=${useExplicit ? "explicit" : "modern"}`);
-      logProgress(`[live-models] running ${candidates.length} models`);
-      const total = candidates.length;
+      if (selectedCandidates.length < candidates.length) {
+        logProgress(
+          `[live-models] capped to ${selectedCandidates.length}/${candidates.length} via OPENCLAW_LIVE_MAX_MODELS=${maxModels}`,
+        );
+      }
+      logProgress(`[live-models] running ${selectedCandidates.length} models`);
+      const total = selectedCandidates.length;

-      for (const [index, entry] of candidates.entries()) {
+      for (const [index, entry] of selectedCandidates.entries()) {
        const { model, apiKeyInfo } = entry;
        const id = `${model.provider}/${model.id}`;
        const progressLabel = `[live-models] ${index + 1}/${total} ${id}`;
@@ -513,6 +587,11 @@ describeLive("live models (profile keys)", () => {
              logProgress(`${progressLabel}: skip (instructions required)`);
              break;
            }
+            if (allowNotFoundSkip && isModelTimeoutError(message)) {
+              skipped.push({ model: id, reason: message });
+              logProgress(`${progressLabel}: skip (timeout)`);
+              break;
+            }
            logProgress(`${progressLabel}: failed`);
            failures.push({ model: id, error: message });
            break;
--- a/src/gateway/gateway-models.profiles.live.test.ts
+++ b/src/gateway/gateway-models.profiles.live.test.ts
@@ -55,6 +55,58 @@ function parseFilter(raw?: string): Set<string> | null {
  return ids.length ? new Set(ids) : null;
 }

+function toInt(value: string | undefined, fallback: number): number {
+  const trimmed = value?.trim();
+  if (!trimmed) {
+    return fallback;
+  }
+  const parsed = Number.parseInt(trimmed, 10);
+  return Number.isFinite(parsed) ? parsed : fallback;
+}
+
+function capByProviderSpread<T>(
+  items: T[],
+  maxItems: number,
+  providerOf: (item: T) => string,
+): T[] {
+  if (maxItems <= 0 || items.length <= maxItems) {
+    return items;
+  }
+  const providerOrder: string[] = [];
+  const grouped = new Map<string, T[]>();
+  for (const item of items) {
+    const provider = providerOf(item);
+    const bucket = grouped.get(provider);
+    if (bucket) {
+      bucket.push(item);
+      continue;
+    }
+    providerOrder.push(provider);
+    grouped.set(provider, [item]);
+  }
+
+  const selected: T[] = [];
+  while (selected.length < maxItems && grouped.size > 0) {
+    for (const provider of providerOrder) {
+      const bucket = grouped.get(provider);
+      if (!bucket || bucket.length === 0) {
+        continue;
+      }
+      const item = bucket.shift();
+      if (item) {
+        selected.push(item);
+      }
+      if (bucket.length === 0) {
+        grouped.delete(provider);
+      }
+      if (selected.length >= maxItems) {
+        break;
+      }
+    }
+  }
+  return selected;
+}
+
 function logProgress(message: string): void {
  console.log(`[live] ${message}`);
 }
@@ -1061,6 +1113,7 @@ describeLive("gateway live (dev agent, profile keys)", () => {
      const useModern = !rawModels || rawModels === "modern" || rawModels === "all";
      const useExplicit = Boolean(rawModels) && !useModern;
      const filter = useExplicit ? parseFilter(rawModels) : null;
+      const maxModels = toInt(process.env.OPENCLAW_LIVE_GATEWAY_MAX_MODELS, 0);
      const wanted = filter
        ? all.filter((m) => filter.has(`${m.provider}/${m.id}`))
        : all.filter((m) => isModernModelRef({ provider: m.provider, id: m.id }));
@@ -1091,21 +1144,31 @@ describeLive("gateway live (dev agent, profile keys)", () => {
        logProgress("[all-models] no API keys found; skipping");
        return;
      }
+      const selectedCandidates = capByProviderSpread(
+        candidates,
+        maxModels > 0 ? maxModels : candidates.length,
+        (model) => model.provider,
+      );
      logProgress(`[all-models] selection=${useExplicit ? "explicit" : "modern"}`);
-      const imageCandidates = candidates.filter((m) => m.input?.includes("image"));
+      if (selectedCandidates.length < candidates.length) {
+        logProgress(
+          `[all-models] capped to ${selectedCandidates.length}/${candidates.length} via OPENCLAW_LIVE_GATEWAY_MAX_MODELS=${maxModels}`,
+        );
+      }
+      const imageCandidates = selectedCandidates.filter((m) => m.input?.includes("image"));
      if (imageCandidates.length === 0) {
        logProgress("[all-models] no image-capable models selected; image probe will be skipped");
      }
      await runGatewayModelSuite({
        label: "all-models",
        cfg,
-        candidates,
+        candidates: selectedCandidates,
        extraToolProbes: true,
        extraImageProbes: true,
        thinkingLevel: THINKING_LEVEL,
      });

-      const minimaxCandidates = candidates.filter((model) => model.provider === "minimax");
+      const minimaxCandidates = selectedCandidates.filter((model) => model.provider === "minimax");
      if (minimaxCandidates.length === 0) {
        logProgress("[minimax] no candidates with keys; skipping dual endpoint probes");
        return;