mirror of
https://github.com/moltbot/moltbot.git
synced 2026-03-07 14:34:21 +00:00
test: cap docker live model sweeps and harden timeouts
This commit is contained in:
@@ -22,8 +22,9 @@ docker run --rm -t \
|
||||
-e HOME=/home/node \
|
||||
-e NODE_OPTIONS=--disable-warning=ExperimentalWarning \
|
||||
-e OPENCLAW_LIVE_TEST=1 \
|
||||
-e OPENCLAW_LIVE_GATEWAY_MODELS="${OPENCLAW_LIVE_GATEWAY_MODELS:-${CLAWDBOT_LIVE_GATEWAY_MODELS:-all}}" \
|
||||
-e OPENCLAW_LIVE_GATEWAY_MODELS="${OPENCLAW_LIVE_GATEWAY_MODELS:-${CLAWDBOT_LIVE_GATEWAY_MODELS:-modern}}" \
|
||||
-e OPENCLAW_LIVE_GATEWAY_PROVIDERS="${OPENCLAW_LIVE_GATEWAY_PROVIDERS:-${CLAWDBOT_LIVE_GATEWAY_PROVIDERS:-}}" \
|
||||
-e OPENCLAW_LIVE_GATEWAY_MAX_MODELS="${OPENCLAW_LIVE_GATEWAY_MAX_MODELS:-${CLAWDBOT_LIVE_GATEWAY_MAX_MODELS:-24}}" \
|
||||
-e OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS="${OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS:-${CLAWDBOT_LIVE_GATEWAY_MODEL_TIMEOUT_MS:-}}" \
|
||||
-v "$CONFIG_DIR":/home/node/.openclaw \
|
||||
-v "$WORKSPACE_DIR":/home/node/.openclaw/workspace \
|
||||
|
||||
@@ -22,8 +22,9 @@ docker run --rm -t \
|
||||
-e HOME=/home/node \
|
||||
-e NODE_OPTIONS=--disable-warning=ExperimentalWarning \
|
||||
-e OPENCLAW_LIVE_TEST=1 \
|
||||
-e OPENCLAW_LIVE_MODELS="${OPENCLAW_LIVE_MODELS:-${CLAWDBOT_LIVE_MODELS:-all}}" \
|
||||
-e OPENCLAW_LIVE_MODELS="${OPENCLAW_LIVE_MODELS:-${CLAWDBOT_LIVE_MODELS:-modern}}" \
|
||||
-e OPENCLAW_LIVE_PROVIDERS="${OPENCLAW_LIVE_PROVIDERS:-${CLAWDBOT_LIVE_PROVIDERS:-}}" \
|
||||
-e OPENCLAW_LIVE_MAX_MODELS="${OPENCLAW_LIVE_MAX_MODELS:-${CLAWDBOT_LIVE_MAX_MODELS:-48}}" \
|
||||
-e OPENCLAW_LIVE_MODEL_TIMEOUT_MS="${OPENCLAW_LIVE_MODEL_TIMEOUT_MS:-${CLAWDBOT_LIVE_MODEL_TIMEOUT_MS:-}}" \
|
||||
-e OPENCLAW_LIVE_REQUIRE_PROFILE_KEYS="${OPENCLAW_LIVE_REQUIRE_PROFILE_KEYS:-${CLAWDBOT_LIVE_REQUIRE_PROFILE_KEYS:-}}" \
|
||||
-v "$CONFIG_DIR":/home/node/.openclaw \
|
||||
|
||||
@@ -91,6 +91,10 @@ function isInstructionsRequiredError(raw: string): boolean {
|
||||
return /instructions are required/i.test(raw);
|
||||
}
|
||||
|
||||
function isModelTimeoutError(raw: string): boolean {
|
||||
return /model call timed out after \d+ms/i.test(raw);
|
||||
}
|
||||
|
||||
function toInt(value: string | undefined, fallback: number): number {
|
||||
const trimmed = value?.trim();
|
||||
if (!trimmed) {
|
||||
@@ -100,6 +104,49 @@ function toInt(value: string | undefined, fallback: number): number {
|
||||
return Number.isFinite(parsed) ? parsed : fallback;
|
||||
}
|
||||
|
||||
function capByProviderSpread<T>(
|
||||
items: T[],
|
||||
maxItems: number,
|
||||
providerOf: (item: T) => string,
|
||||
): T[] {
|
||||
if (maxItems <= 0 || items.length <= maxItems) {
|
||||
return items;
|
||||
}
|
||||
const providerOrder: string[] = [];
|
||||
const grouped = new Map<string, T[]>();
|
||||
for (const item of items) {
|
||||
const provider = providerOf(item);
|
||||
const bucket = grouped.get(provider);
|
||||
if (bucket) {
|
||||
bucket.push(item);
|
||||
continue;
|
||||
}
|
||||
providerOrder.push(provider);
|
||||
grouped.set(provider, [item]);
|
||||
}
|
||||
|
||||
const selected: T[] = [];
|
||||
while (selected.length < maxItems && grouped.size > 0) {
|
||||
for (const provider of providerOrder) {
|
||||
const bucket = grouped.get(provider);
|
||||
if (!bucket || bucket.length === 0) {
|
||||
continue;
|
||||
}
|
||||
const item = bucket.shift();
|
||||
if (item) {
|
||||
selected.push(item);
|
||||
}
|
||||
if (bucket.length === 0) {
|
||||
grouped.delete(provider);
|
||||
}
|
||||
if (selected.length >= maxItems) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return selected;
|
||||
}
|
||||
|
||||
function resolveTestReasoning(
|
||||
model: Model<Api>,
|
||||
): "minimal" | "low" | "medium" | "high" | "xhigh" | undefined {
|
||||
@@ -122,16 +169,32 @@ async function completeSimpleWithTimeout<TApi extends Api>(
|
||||
options: Parameters<typeof completeSimple<TApi>>[2],
|
||||
timeoutMs: number,
|
||||
) {
|
||||
const maxTimeoutMs = Math.max(1, timeoutMs);
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), Math.max(1, timeoutMs));
|
||||
timer.unref?.();
|
||||
const abortTimer = setTimeout(() => {
|
||||
controller.abort();
|
||||
}, maxTimeoutMs);
|
||||
abortTimer.unref?.();
|
||||
let hardTimer: ReturnType<typeof setTimeout> | undefined;
|
||||
const timeout = new Promise<never>((_, reject) => {
|
||||
hardTimer = setTimeout(() => {
|
||||
reject(new Error(`model call timed out after ${maxTimeoutMs}ms`));
|
||||
}, maxTimeoutMs);
|
||||
hardTimer.unref?.();
|
||||
});
|
||||
try {
|
||||
return await completeSimple(model, context, {
|
||||
...options,
|
||||
signal: controller.signal,
|
||||
});
|
||||
return await Promise.race([
|
||||
completeSimple(model, context, {
|
||||
...options,
|
||||
signal: controller.signal,
|
||||
}),
|
||||
timeout,
|
||||
]);
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
clearTimeout(abortTimer);
|
||||
if (hardTimer) {
|
||||
clearTimeout(hardTimer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -205,6 +268,7 @@ describeLive("live models (profile keys)", () => {
|
||||
const allowNotFoundSkip = useModern;
|
||||
const providers = parseProviderFilter(process.env.OPENCLAW_LIVE_PROVIDERS);
|
||||
const perModelTimeoutMs = toInt(process.env.OPENCLAW_LIVE_MODEL_TIMEOUT_MS, 30_000);
|
||||
const maxModels = toInt(process.env.OPENCLAW_LIVE_MAX_MODELS, 0);
|
||||
|
||||
const failures: Array<{ model: string; error: string }> = [];
|
||||
const skipped: Array<{ model: string; reason: string }> = [];
|
||||
@@ -246,11 +310,21 @@ describeLive("live models (profile keys)", () => {
|
||||
return;
|
||||
}
|
||||
|
||||
const selectedCandidates = capByProviderSpread(
|
||||
candidates,
|
||||
maxModels > 0 ? maxModels : candidates.length,
|
||||
(entry) => entry.model.provider,
|
||||
);
|
||||
logProgress(`[live-models] selection=${useExplicit ? "explicit" : "modern"}`);
|
||||
logProgress(`[live-models] running ${candidates.length} models`);
|
||||
const total = candidates.length;
|
||||
if (selectedCandidates.length < candidates.length) {
|
||||
logProgress(
|
||||
`[live-models] capped to ${selectedCandidates.length}/${candidates.length} via OPENCLAW_LIVE_MAX_MODELS=${maxModels}`,
|
||||
);
|
||||
}
|
||||
logProgress(`[live-models] running ${selectedCandidates.length} models`);
|
||||
const total = selectedCandidates.length;
|
||||
|
||||
for (const [index, entry] of candidates.entries()) {
|
||||
for (const [index, entry] of selectedCandidates.entries()) {
|
||||
const { model, apiKeyInfo } = entry;
|
||||
const id = `${model.provider}/${model.id}`;
|
||||
const progressLabel = `[live-models] ${index + 1}/${total} ${id}`;
|
||||
@@ -513,6 +587,11 @@ describeLive("live models (profile keys)", () => {
|
||||
logProgress(`${progressLabel}: skip (instructions required)`);
|
||||
break;
|
||||
}
|
||||
if (allowNotFoundSkip && isModelTimeoutError(message)) {
|
||||
skipped.push({ model: id, reason: message });
|
||||
logProgress(`${progressLabel}: skip (timeout)`);
|
||||
break;
|
||||
}
|
||||
logProgress(`${progressLabel}: failed`);
|
||||
failures.push({ model: id, error: message });
|
||||
break;
|
||||
|
||||
@@ -55,6 +55,58 @@ function parseFilter(raw?: string): Set<string> | null {
|
||||
return ids.length ? new Set(ids) : null;
|
||||
}
|
||||
|
||||
function toInt(value: string | undefined, fallback: number): number {
|
||||
const trimmed = value?.trim();
|
||||
if (!trimmed) {
|
||||
return fallback;
|
||||
}
|
||||
const parsed = Number.parseInt(trimmed, 10);
|
||||
return Number.isFinite(parsed) ? parsed : fallback;
|
||||
}
|
||||
|
||||
function capByProviderSpread<T>(
|
||||
items: T[],
|
||||
maxItems: number,
|
||||
providerOf: (item: T) => string,
|
||||
): T[] {
|
||||
if (maxItems <= 0 || items.length <= maxItems) {
|
||||
return items;
|
||||
}
|
||||
const providerOrder: string[] = [];
|
||||
const grouped = new Map<string, T[]>();
|
||||
for (const item of items) {
|
||||
const provider = providerOf(item);
|
||||
const bucket = grouped.get(provider);
|
||||
if (bucket) {
|
||||
bucket.push(item);
|
||||
continue;
|
||||
}
|
||||
providerOrder.push(provider);
|
||||
grouped.set(provider, [item]);
|
||||
}
|
||||
|
||||
const selected: T[] = [];
|
||||
while (selected.length < maxItems && grouped.size > 0) {
|
||||
for (const provider of providerOrder) {
|
||||
const bucket = grouped.get(provider);
|
||||
if (!bucket || bucket.length === 0) {
|
||||
continue;
|
||||
}
|
||||
const item = bucket.shift();
|
||||
if (item) {
|
||||
selected.push(item);
|
||||
}
|
||||
if (bucket.length === 0) {
|
||||
grouped.delete(provider);
|
||||
}
|
||||
if (selected.length >= maxItems) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return selected;
|
||||
}
|
||||
|
||||
function logProgress(message: string): void {
|
||||
console.log(`[live] ${message}`);
|
||||
}
|
||||
@@ -1061,6 +1113,7 @@ describeLive("gateway live (dev agent, profile keys)", () => {
|
||||
const useModern = !rawModels || rawModels === "modern" || rawModels === "all";
|
||||
const useExplicit = Boolean(rawModels) && !useModern;
|
||||
const filter = useExplicit ? parseFilter(rawModels) : null;
|
||||
const maxModels = toInt(process.env.OPENCLAW_LIVE_GATEWAY_MAX_MODELS, 0);
|
||||
const wanted = filter
|
||||
? all.filter((m) => filter.has(`${m.provider}/${m.id}`))
|
||||
: all.filter((m) => isModernModelRef({ provider: m.provider, id: m.id }));
|
||||
@@ -1091,21 +1144,31 @@ describeLive("gateway live (dev agent, profile keys)", () => {
|
||||
logProgress("[all-models] no API keys found; skipping");
|
||||
return;
|
||||
}
|
||||
const selectedCandidates = capByProviderSpread(
|
||||
candidates,
|
||||
maxModels > 0 ? maxModels : candidates.length,
|
||||
(model) => model.provider,
|
||||
);
|
||||
logProgress(`[all-models] selection=${useExplicit ? "explicit" : "modern"}`);
|
||||
const imageCandidates = candidates.filter((m) => m.input?.includes("image"));
|
||||
if (selectedCandidates.length < candidates.length) {
|
||||
logProgress(
|
||||
`[all-models] capped to ${selectedCandidates.length}/${candidates.length} via OPENCLAW_LIVE_GATEWAY_MAX_MODELS=${maxModels}`,
|
||||
);
|
||||
}
|
||||
const imageCandidates = selectedCandidates.filter((m) => m.input?.includes("image"));
|
||||
if (imageCandidates.length === 0) {
|
||||
logProgress("[all-models] no image-capable models selected; image probe will be skipped");
|
||||
}
|
||||
await runGatewayModelSuite({
|
||||
label: "all-models",
|
||||
cfg,
|
||||
candidates,
|
||||
candidates: selectedCandidates,
|
||||
extraToolProbes: true,
|
||||
extraImageProbes: true,
|
||||
thinkingLevel: THINKING_LEVEL,
|
||||
});
|
||||
|
||||
const minimaxCandidates = candidates.filter((model) => model.provider === "minimax");
|
||||
const minimaxCandidates = selectedCandidates.filter((model) => model.provider === "minimax");
|
||||
if (minimaxCandidates.length === 0) {
|
||||
logProgress("[minimax] no candidates with keys; skipping dual endpoint probes");
|
||||
return;
|
||||
|
||||
Reference in New Issue
Block a user