test: cap docker live model sweeps and harden timeouts

This commit is contained in:
Peter Steinberger
2026-02-25 02:48:25 +00:00
parent df9a474891
commit 7c59b78aee
4 changed files with 159 additions and 15 deletions

View File

@@ -22,8 +22,9 @@ docker run --rm -t \
-e HOME=/home/node \
-e NODE_OPTIONS=--disable-warning=ExperimentalWarning \
-e OPENCLAW_LIVE_TEST=1 \
-e OPENCLAW_LIVE_GATEWAY_MODELS="${OPENCLAW_LIVE_GATEWAY_MODELS:-${CLAWDBOT_LIVE_GATEWAY_MODELS:-all}}" \
-e OPENCLAW_LIVE_GATEWAY_MODELS="${OPENCLAW_LIVE_GATEWAY_MODELS:-${CLAWDBOT_LIVE_GATEWAY_MODELS:-modern}}" \
-e OPENCLAW_LIVE_GATEWAY_PROVIDERS="${OPENCLAW_LIVE_GATEWAY_PROVIDERS:-${CLAWDBOT_LIVE_GATEWAY_PROVIDERS:-}}" \
-e OPENCLAW_LIVE_GATEWAY_MAX_MODELS="${OPENCLAW_LIVE_GATEWAY_MAX_MODELS:-${CLAWDBOT_LIVE_GATEWAY_MAX_MODELS:-24}}" \
-e OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS="${OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS:-${CLAWDBOT_LIVE_GATEWAY_MODEL_TIMEOUT_MS:-}}" \
-v "$CONFIG_DIR":/home/node/.openclaw \
-v "$WORKSPACE_DIR":/home/node/.openclaw/workspace \

View File

@@ -22,8 +22,9 @@ docker run --rm -t \
-e HOME=/home/node \
-e NODE_OPTIONS=--disable-warning=ExperimentalWarning \
-e OPENCLAW_LIVE_TEST=1 \
-e OPENCLAW_LIVE_MODELS="${OPENCLAW_LIVE_MODELS:-${CLAWDBOT_LIVE_MODELS:-all}}" \
-e OPENCLAW_LIVE_MODELS="${OPENCLAW_LIVE_MODELS:-${CLAWDBOT_LIVE_MODELS:-modern}}" \
-e OPENCLAW_LIVE_PROVIDERS="${OPENCLAW_LIVE_PROVIDERS:-${CLAWDBOT_LIVE_PROVIDERS:-}}" \
-e OPENCLAW_LIVE_MAX_MODELS="${OPENCLAW_LIVE_MAX_MODELS:-${CLAWDBOT_LIVE_MAX_MODELS:-48}}" \
-e OPENCLAW_LIVE_MODEL_TIMEOUT_MS="${OPENCLAW_LIVE_MODEL_TIMEOUT_MS:-${CLAWDBOT_LIVE_MODEL_TIMEOUT_MS:-}}" \
-e OPENCLAW_LIVE_REQUIRE_PROFILE_KEYS="${OPENCLAW_LIVE_REQUIRE_PROFILE_KEYS:-${CLAWDBOT_LIVE_REQUIRE_PROFILE_KEYS:-}}" \
-v "$CONFIG_DIR":/home/node/.openclaw \

View File

@@ -91,6 +91,10 @@ function isInstructionsRequiredError(raw: string): boolean {
return /instructions are required/i.test(raw);
}
function isModelTimeoutError(raw: string): boolean {
return /model call timed out after \d+ms/i.test(raw);
}
function toInt(value: string | undefined, fallback: number): number {
const trimmed = value?.trim();
if (!trimmed) {
@@ -100,6 +104,49 @@ function toInt(value: string | undefined, fallback: number): number {
return Number.isFinite(parsed) ? parsed : fallback;
}
function capByProviderSpread<T>(
items: T[],
maxItems: number,
providerOf: (item: T) => string,
): T[] {
if (maxItems <= 0 || items.length <= maxItems) {
return items;
}
const providerOrder: string[] = [];
const grouped = new Map<string, T[]>();
for (const item of items) {
const provider = providerOf(item);
const bucket = grouped.get(provider);
if (bucket) {
bucket.push(item);
continue;
}
providerOrder.push(provider);
grouped.set(provider, [item]);
}
const selected: T[] = [];
while (selected.length < maxItems && grouped.size > 0) {
for (const provider of providerOrder) {
const bucket = grouped.get(provider);
if (!bucket || bucket.length === 0) {
continue;
}
const item = bucket.shift();
if (item) {
selected.push(item);
}
if (bucket.length === 0) {
grouped.delete(provider);
}
if (selected.length >= maxItems) {
break;
}
}
}
return selected;
}
function resolveTestReasoning(
model: Model<Api>,
): "minimal" | "low" | "medium" | "high" | "xhigh" | undefined {
@@ -122,16 +169,32 @@ async function completeSimpleWithTimeout<TApi extends Api>(
options: Parameters<typeof completeSimple<TApi>>[2],
timeoutMs: number,
) {
const maxTimeoutMs = Math.max(1, timeoutMs);
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), Math.max(1, timeoutMs));
timer.unref?.();
const abortTimer = setTimeout(() => {
controller.abort();
}, maxTimeoutMs);
abortTimer.unref?.();
let hardTimer: ReturnType<typeof setTimeout> | undefined;
const timeout = new Promise<never>((_, reject) => {
hardTimer = setTimeout(() => {
reject(new Error(`model call timed out after ${maxTimeoutMs}ms`));
}, maxTimeoutMs);
hardTimer.unref?.();
});
try {
return await completeSimple(model, context, {
...options,
signal: controller.signal,
});
return await Promise.race([
completeSimple(model, context, {
...options,
signal: controller.signal,
}),
timeout,
]);
} finally {
clearTimeout(timer);
clearTimeout(abortTimer);
if (hardTimer) {
clearTimeout(hardTimer);
}
}
}
@@ -205,6 +268,7 @@ describeLive("live models (profile keys)", () => {
const allowNotFoundSkip = useModern;
const providers = parseProviderFilter(process.env.OPENCLAW_LIVE_PROVIDERS);
const perModelTimeoutMs = toInt(process.env.OPENCLAW_LIVE_MODEL_TIMEOUT_MS, 30_000);
const maxModels = toInt(process.env.OPENCLAW_LIVE_MAX_MODELS, 0);
const failures: Array<{ model: string; error: string }> = [];
const skipped: Array<{ model: string; reason: string }> = [];
@@ -246,11 +310,21 @@ describeLive("live models (profile keys)", () => {
return;
}
const selectedCandidates = capByProviderSpread(
candidates,
maxModels > 0 ? maxModels : candidates.length,
(entry) => entry.model.provider,
);
logProgress(`[live-models] selection=${useExplicit ? "explicit" : "modern"}`);
logProgress(`[live-models] running ${candidates.length} models`);
const total = candidates.length;
if (selectedCandidates.length < candidates.length) {
logProgress(
`[live-models] capped to ${selectedCandidates.length}/${candidates.length} via OPENCLAW_LIVE_MAX_MODELS=${maxModels}`,
);
}
logProgress(`[live-models] running ${selectedCandidates.length} models`);
const total = selectedCandidates.length;
for (const [index, entry] of candidates.entries()) {
for (const [index, entry] of selectedCandidates.entries()) {
const { model, apiKeyInfo } = entry;
const id = `${model.provider}/${model.id}`;
const progressLabel = `[live-models] ${index + 1}/${total} ${id}`;
@@ -513,6 +587,11 @@ describeLive("live models (profile keys)", () => {
logProgress(`${progressLabel}: skip (instructions required)`);
break;
}
if (allowNotFoundSkip && isModelTimeoutError(message)) {
skipped.push({ model: id, reason: message });
logProgress(`${progressLabel}: skip (timeout)`);
break;
}
logProgress(`${progressLabel}: failed`);
failures.push({ model: id, error: message });
break;

View File

@@ -55,6 +55,58 @@ function parseFilter(raw?: string): Set<string> | null {
return ids.length ? new Set(ids) : null;
}
function toInt(value: string | undefined, fallback: number): number {
const trimmed = value?.trim();
if (!trimmed) {
return fallback;
}
const parsed = Number.parseInt(trimmed, 10);
return Number.isFinite(parsed) ? parsed : fallback;
}
function capByProviderSpread<T>(
items: T[],
maxItems: number,
providerOf: (item: T) => string,
): T[] {
if (maxItems <= 0 || items.length <= maxItems) {
return items;
}
const providerOrder: string[] = [];
const grouped = new Map<string, T[]>();
for (const item of items) {
const provider = providerOf(item);
const bucket = grouped.get(provider);
if (bucket) {
bucket.push(item);
continue;
}
providerOrder.push(provider);
grouped.set(provider, [item]);
}
const selected: T[] = [];
while (selected.length < maxItems && grouped.size > 0) {
for (const provider of providerOrder) {
const bucket = grouped.get(provider);
if (!bucket || bucket.length === 0) {
continue;
}
const item = bucket.shift();
if (item) {
selected.push(item);
}
if (bucket.length === 0) {
grouped.delete(provider);
}
if (selected.length >= maxItems) {
break;
}
}
}
return selected;
}
function logProgress(message: string): void {
console.log(`[live] ${message}`);
}
@@ -1061,6 +1113,7 @@ describeLive("gateway live (dev agent, profile keys)", () => {
const useModern = !rawModels || rawModels === "modern" || rawModels === "all";
const useExplicit = Boolean(rawModels) && !useModern;
const filter = useExplicit ? parseFilter(rawModels) : null;
const maxModels = toInt(process.env.OPENCLAW_LIVE_GATEWAY_MAX_MODELS, 0);
const wanted = filter
? all.filter((m) => filter.has(`${m.provider}/${m.id}`))
: all.filter((m) => isModernModelRef({ provider: m.provider, id: m.id }));
@@ -1091,21 +1144,31 @@ describeLive("gateway live (dev agent, profile keys)", () => {
logProgress("[all-models] no API keys found; skipping");
return;
}
const selectedCandidates = capByProviderSpread(
candidates,
maxModels > 0 ? maxModels : candidates.length,
(model) => model.provider,
);
logProgress(`[all-models] selection=${useExplicit ? "explicit" : "modern"}`);
const imageCandidates = candidates.filter((m) => m.input?.includes("image"));
if (selectedCandidates.length < candidates.length) {
logProgress(
`[all-models] capped to ${selectedCandidates.length}/${candidates.length} via OPENCLAW_LIVE_GATEWAY_MAX_MODELS=${maxModels}`,
);
}
const imageCandidates = selectedCandidates.filter((m) => m.input?.includes("image"));
if (imageCandidates.length === 0) {
logProgress("[all-models] no image-capable models selected; image probe will be skipped");
}
await runGatewayModelSuite({
label: "all-models",
cfg,
candidates,
candidates: selectedCandidates,
extraToolProbes: true,
extraImageProbes: true,
thinkingLevel: THINKING_LEVEL,
});
const minimaxCandidates = candidates.filter((model) => model.provider === "minimax");
const minimaxCandidates = selectedCandidates.filter((model) => model.provider === "minimax");
if (minimaxCandidates.length === 0) {
logProgress("[minimax] no candidates with keys; skipping dual endpoint probes");
return;