fix: stabilize live docker test handling

2026-03-21 16:41:56 +00:00 · 2026-02-25 04:34:59 +00:00
parent 9beec48e9c
commit 146c92069b
3 changed files with 90 additions and 21 deletions
--- a/src/agents/models.profiles.live.test.ts
+++ b/src/agents/models.profiles.live.test.ts
@@ -45,6 +45,23 @@ function logProgress(message: string): void {
  console.log(`[live] ${message}`);
 }

+function formatFailurePreview(
+  failures: Array<{ model: string; error: string }>,
+  maxItems: number,
+): string {
+  const limit = Math.max(1, maxItems);
+  const lines = failures.slice(0, limit).map((failure, index) => {
+    const normalized = failure.error.replace(/\s+/g, " ").trim();
+    const clipped = normalized.length > 320 ? `${normalized.slice(0, 317)}...` : normalized;
+    return `${index + 1}. ${failure.model}: ${clipped}`;
+  });
+  const remaining = failures.length - limit;
+  if (remaining > 0) {
+    lines.push(`... and ${remaining} more`);
+  }
+  return lines.join("\n");
+}
+
 function isGoogleModelNotFoundError(err: unknown): boolean {
  const msg = String(err);
  if (!/not found/i.test(msg)) {
@@ -95,6 +112,16 @@ function isModelTimeoutError(raw: string): boolean {
  return /model call timed out after \d+ms/i.test(raw);
 }

+function isProviderUnavailableErrorMessage(raw: string): boolean {
+  const msg = raw.toLowerCase();
+  return (
+    msg.includes("no allowed providers are available") ||
+    msg.includes("provider unavailable") ||
+    msg.includes("upstream provider unavailable") ||
+    msg.includes("upstream error from google")
+  );
+}
+
 function toInt(value: string | undefined, fallback: number): number {
  const trimmed = value?.trim();
  if (!trimmed) {
@@ -592,6 +619,11 @@ describeLive("live models (profile keys)", () => {
              logProgress(`${progressLabel}: skip (timeout)`);
              break;
            }
+            if (allowNotFoundSkip && isProviderUnavailableErrorMessage(message)) {
+              skipped.push({ model: id, reason: message });
+              logProgress(`${progressLabel}: skip (provider unavailable)`);
+              break;
+            }
            logProgress(`${progressLabel}: failed`);
            failures.push({ model: id, error: message });
            break;
@@ -600,11 +632,10 @@ describeLive("live models (profile keys)", () => {
      }

      if (failures.length > 0) {
-        const preview = failures
-          .slice(0, 10)
-          .map((f) => `- ${f.model}: ${f.error}`)
-          .join("\n");
-        throw new Error(`live model failures (${failures.length}):\n${preview}`);
+        const preview = formatFailurePreview(failures, 20);
+        throw new Error(
+          `live model failures (${failures.length}, showing ${Math.min(failures.length, 20)}):\n${preview}`,
+        );
      }

      void skipped;
--- a/src/gateway/gateway-models.profiles.live.test.ts
+++ b/src/gateway/gateway-models.profiles.live.test.ts
@@ -111,6 +111,23 @@ function logProgress(message: string): void {
  console.log(`[live] ${message}`);
 }

+function formatFailurePreview(
+  failures: Array<{ model: string; error: string }>,
+  maxItems: number,
+): string {
+  const limit = Math.max(1, maxItems);
+  const lines = failures.slice(0, limit).map((failure, index) => {
+    const normalized = failure.error.replace(/\s+/g, " ").trim();
+    const clipped = normalized.length > 320 ? `${normalized.slice(0, 317)}...` : normalized;
+    return `${index + 1}. ${failure.model}: ${clipped}`;
+  });
+  const remaining = failures.length - limit;
+  if (remaining > 0) {
+    lines.push(`... and ${remaining} more`);
+  }
+  return lines.join("\n");
+}
+
 function assertNoReasoningTags(params: {
  text: string;
  model: string;
@@ -179,6 +196,16 @@ function isChatGPTUsageLimitErrorMessage(raw: string): boolean {
  return msg.includes("hit your chatgpt usage limit") && msg.includes("try again in");
 }

+function isProviderUnavailableErrorMessage(raw: string): boolean {
+  const msg = raw.toLowerCase();
+  return (
+    msg.includes("no allowed providers are available") ||
+    msg.includes("provider unavailable") ||
+    msg.includes("upstream provider unavailable") ||
+    msg.includes("upstream error from google")
+  );
+}
+
 function isInstructionsRequiredError(error: string): boolean {
  return /instructions are required/i.test(error);
 }
@@ -1013,6 +1040,11 @@ async function runGatewayModelSuite(params: GatewayModelSuiteParams) {
            logProgress(`${progressLabel}: skip (anthropic empty response)`);
            break;
          }
+          if (isProviderUnavailableErrorMessage(message)) {
+            skippedCount += 1;
+            logProgress(`${progressLabel}: skip (provider unavailable)`);
+            break;
+          }
          // OpenAI Codex refresh tokens can become single-use; skip instead of failing all live tests.
          if (model.provider === "openai-codex" && isRefreshTokenReused(message)) {
            logProgress(`${progressLabel}: skip (codex refresh token reused)`);
@@ -1061,11 +1093,10 @@ async function runGatewayModelSuite(params: GatewayModelSuiteParams) {
    }

    if (failures.length > 0) {
-      const preview = failures
-        .slice(0, 20)
-        .map((f) => `- ${f.model}: ${f.error}`)
-        .join("\n");
-      throw new Error(`gateway live model failures (${failures.length}):\n${preview}`);
+      const preview = formatFailurePreview(failures, 20);
+      throw new Error(
+        `gateway live model failures (${failures.length}, showing ${Math.min(failures.length, 20)}):\n${preview}`,
+      );
    }
    if (skippedCount === total) {
      logProgress(`[${params.label}] skipped all models (missing profiles)`);