mirror of
https://github.com/moltbot/moltbot.git
synced 2026-04-27 00:17:29 +00:00
fix: stabilize docker test suite
This commit is contained in:
@@ -51,4 +51,40 @@ describe("syncExternalCliCredentials", () => {
|
||||
});
|
||||
expect(store.profiles[CODEX_CLI_PROFILE_ID]).toBeUndefined();
|
||||
});
|
||||
|
||||
it("refreshes stored Codex expiry from external CLI even when the cached profile looks fresh", () => {
|
||||
const staleExpiry = Date.now() + 30 * 60_000;
|
||||
const freshExpiry = Date.now() + 5 * 24 * 60 * 60_000;
|
||||
mocks.readCodexCliCredentialsCached.mockReturnValue({
|
||||
type: "oauth",
|
||||
provider: "openai-codex",
|
||||
access: "new-access-token",
|
||||
refresh: "new-refresh-token",
|
||||
expires: freshExpiry,
|
||||
accountId: "acct_456",
|
||||
});
|
||||
|
||||
const store: AuthProfileStore = {
|
||||
version: 1,
|
||||
profiles: {
|
||||
[OPENAI_CODEX_DEFAULT_PROFILE_ID]: {
|
||||
type: "oauth",
|
||||
provider: "openai-codex",
|
||||
access: "old-access-token",
|
||||
refresh: "old-refresh-token",
|
||||
expires: staleExpiry,
|
||||
accountId: "acct_456",
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const mutated = syncExternalCliCredentials(store);
|
||||
|
||||
expect(mutated).toBe(true);
|
||||
expect(store.profiles[OPENAI_CODEX_DEFAULT_PROFILE_ID]).toMatchObject({
|
||||
access: "new-access-token",
|
||||
refresh: "new-refresh-token",
|
||||
expires: freshExpiry,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -4,13 +4,12 @@ import {
|
||||
readMiniMaxCliCredentialsCached,
|
||||
} from "../cli-credentials.js";
|
||||
import {
|
||||
EXTERNAL_CLI_NEAR_EXPIRY_MS,
|
||||
EXTERNAL_CLI_SYNC_TTL_MS,
|
||||
QWEN_CLI_PROFILE_ID,
|
||||
MINIMAX_CLI_PROFILE_ID,
|
||||
log,
|
||||
} from "./constants.js";
|
||||
import type { AuthProfileCredential, AuthProfileStore, OAuthCredential } from "./types.js";
|
||||
import type { AuthProfileStore, OAuthCredential } from "./types.js";
|
||||
|
||||
const OPENAI_CODEX_DEFAULT_PROFILE_ID = "openai-codex:default";
|
||||
|
||||
@@ -37,62 +36,33 @@ function shallowEqualOAuthCredentials(a: OAuthCredential | undefined, b: OAuthCr
|
||||
);
|
||||
}
|
||||
|
||||
function isExternalProfileFresh(cred: AuthProfileCredential | undefined, now: number): boolean {
|
||||
if (!cred) {
|
||||
return false;
|
||||
}
|
||||
if (cred.type !== "oauth" && cred.type !== "token") {
|
||||
return false;
|
||||
}
|
||||
if (
|
||||
cred.provider !== "qwen-portal" &&
|
||||
cred.provider !== "minimax-portal" &&
|
||||
cred.provider !== "openai-codex"
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
if (typeof cred.expires !== "number") {
|
||||
return true;
|
||||
}
|
||||
return cred.expires > now + EXTERNAL_CLI_NEAR_EXPIRY_MS;
|
||||
}
|
||||
|
||||
/** Sync external CLI credentials into the store for a given provider. */
|
||||
function syncExternalCliCredentialsForProvider(
|
||||
store: AuthProfileStore,
|
||||
profileId: string,
|
||||
provider: string,
|
||||
readCredentials: () => OAuthCredential | null,
|
||||
now: number,
|
||||
options: ExternalCliSyncOptions,
|
||||
): boolean {
|
||||
const existing = store.profiles[profileId];
|
||||
const shouldSync =
|
||||
!existing || existing.provider !== provider || !isExternalProfileFresh(existing, now);
|
||||
const creds = shouldSync ? readCredentials() : null;
|
||||
const creds = readCredentials();
|
||||
if (!creds) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const existingOAuth = existing?.type === "oauth" ? existing : undefined;
|
||||
const shouldUpdate =
|
||||
!existingOAuth ||
|
||||
existingOAuth.provider !== provider ||
|
||||
existingOAuth.expires <= now ||
|
||||
creds.expires > existingOAuth.expires;
|
||||
|
||||
if (shouldUpdate && !shallowEqualOAuthCredentials(existingOAuth, creds)) {
|
||||
store.profiles[profileId] = creds;
|
||||
if (options.log !== false) {
|
||||
log.info(`synced ${provider} credentials from external cli`, {
|
||||
profileId,
|
||||
expires: new Date(creds.expires).toISOString(),
|
||||
});
|
||||
}
|
||||
return true;
|
||||
if (shallowEqualOAuthCredentials(existingOAuth, creds)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
store.profiles[profileId] = creds;
|
||||
if (options.log !== false) {
|
||||
log.info(`synced ${provider} credentials from external cli`, {
|
||||
profileId,
|
||||
expires: new Date(creds.expires).toISOString(),
|
||||
});
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -106,46 +76,24 @@ export function syncExternalCliCredentials(
|
||||
options: ExternalCliSyncOptions = {},
|
||||
): boolean {
|
||||
let mutated = false;
|
||||
const now = Date.now();
|
||||
|
||||
// Sync from Qwen Code CLI
|
||||
const existingQwen = store.profiles[QWEN_CLI_PROFILE_ID];
|
||||
const shouldSyncQwen =
|
||||
!existingQwen ||
|
||||
existingQwen.provider !== "qwen-portal" ||
|
||||
!isExternalProfileFresh(existingQwen, now);
|
||||
const qwenCreds = shouldSyncQwen
|
||||
? readQwenCliCredentialsCached({ ttlMs: EXTERNAL_CLI_SYNC_TTL_MS })
|
||||
: null;
|
||||
if (qwenCreds) {
|
||||
const existing = store.profiles[QWEN_CLI_PROFILE_ID];
|
||||
const existingOAuth = existing?.type === "oauth" ? existing : undefined;
|
||||
const shouldUpdate =
|
||||
!existingOAuth ||
|
||||
existingOAuth.provider !== "qwen-portal" ||
|
||||
existingOAuth.expires <= now ||
|
||||
qwenCreds.expires > existingOAuth.expires;
|
||||
|
||||
if (shouldUpdate && !shallowEqualOAuthCredentials(existingOAuth, qwenCreds)) {
|
||||
store.profiles[QWEN_CLI_PROFILE_ID] = qwenCreds;
|
||||
mutated = true;
|
||||
if (options.log !== false) {
|
||||
log.info("synced qwen credentials from qwen cli", {
|
||||
profileId: QWEN_CLI_PROFILE_ID,
|
||||
expires: new Date(qwenCreds.expires).toISOString(),
|
||||
});
|
||||
}
|
||||
}
|
||||
if (
|
||||
syncExternalCliCredentialsForProvider(
|
||||
store,
|
||||
QWEN_CLI_PROFILE_ID,
|
||||
"qwen-portal",
|
||||
() => readQwenCliCredentialsCached({ ttlMs: EXTERNAL_CLI_SYNC_TTL_MS }),
|
||||
options,
|
||||
)
|
||||
) {
|
||||
mutated = true;
|
||||
}
|
||||
|
||||
// Sync from MiniMax Portal CLI
|
||||
if (
|
||||
syncExternalCliCredentialsForProvider(
|
||||
store,
|
||||
MINIMAX_CLI_PROFILE_ID,
|
||||
"minimax-portal",
|
||||
() => readMiniMaxCliCredentialsCached({ ttlMs: EXTERNAL_CLI_SYNC_TTL_MS }),
|
||||
now,
|
||||
options,
|
||||
)
|
||||
) {
|
||||
@@ -157,7 +105,6 @@ export function syncExternalCliCredentials(
|
||||
OPENAI_CODEX_DEFAULT_PROFILE_ID,
|
||||
"openai-codex",
|
||||
() => readCodexCliCredentialsCached({ ttlMs: EXTERNAL_CLI_SYNC_TTL_MS }),
|
||||
now,
|
||||
options,
|
||||
)
|
||||
) {
|
||||
|
||||
@@ -46,6 +46,12 @@ async function readCachedClaudeCliCredentials(allowKeychainPrompt: boolean) {
|
||||
});
|
||||
}
|
||||
|
||||
function createJwtWithExp(expSeconds: number): string {
|
||||
const encode = (value: Record<string, unknown>) =>
|
||||
Buffer.from(JSON.stringify(value)).toString("base64url");
|
||||
return `${encode({ alg: "RS256", typ: "JWT" })}.${encode({ exp: expSeconds })}.signature`;
|
||||
}
|
||||
|
||||
describe("cli credentials", () => {
|
||||
beforeAll(async () => {
|
||||
({
|
||||
@@ -229,6 +235,7 @@ describe("cli credentials", () => {
|
||||
it("reads Codex credentials from keychain when available", async () => {
|
||||
const tempHome = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-codex-"));
|
||||
process.env.CODEX_HOME = tempHome;
|
||||
const expSeconds = Math.floor(Date.parse("2026-03-23T00:48:49Z") / 1000);
|
||||
|
||||
const accountHash = "cli|";
|
||||
|
||||
@@ -238,7 +245,7 @@ describe("cli credentials", () => {
|
||||
expect(cmd).toContain(accountHash);
|
||||
return JSON.stringify({
|
||||
tokens: {
|
||||
access_token: "keychain-access",
|
||||
access_token: createJwtWithExp(expSeconds),
|
||||
refresh_token: "keychain-refresh",
|
||||
},
|
||||
last_refresh: "2026-01-01T00:00:00Z",
|
||||
@@ -248,15 +255,17 @@ describe("cli credentials", () => {
|
||||
const creds = readCodexCliCredentials({ platform: "darwin", execSync: execSyncMock });
|
||||
|
||||
expect(creds).toMatchObject({
|
||||
access: "keychain-access",
|
||||
access: createJwtWithExp(expSeconds),
|
||||
refresh: "keychain-refresh",
|
||||
provider: "openai-codex",
|
||||
expires: expSeconds * 1000,
|
||||
});
|
||||
});
|
||||
|
||||
it("falls back to Codex auth.json when keychain is unavailable", async () => {
|
||||
const tempHome = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-codex-"));
|
||||
process.env.CODEX_HOME = tempHome;
|
||||
const expSeconds = Math.floor(Date.parse("2026-03-24T12:34:56Z") / 1000);
|
||||
execSyncMock.mockImplementation(() => {
|
||||
throw new Error("not found");
|
||||
});
|
||||
@@ -267,7 +276,7 @@ describe("cli credentials", () => {
|
||||
authPath,
|
||||
JSON.stringify({
|
||||
tokens: {
|
||||
access_token: "file-access",
|
||||
access_token: createJwtWithExp(expSeconds),
|
||||
refresh_token: "file-refresh",
|
||||
},
|
||||
}),
|
||||
@@ -277,9 +286,10 @@ describe("cli credentials", () => {
|
||||
const creds = readCodexCliCredentials({ execSync: execSyncMock });
|
||||
|
||||
expect(creds).toMatchObject({
|
||||
access: "file-access",
|
||||
access: createJwtWithExp(expSeconds),
|
||||
refresh: "file-refresh",
|
||||
provider: "openai-codex",
|
||||
expires: expSeconds * 1000,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -153,6 +153,22 @@ function computeCodexKeychainAccount(codexHome: string) {
|
||||
return `cli|${hash.slice(0, 16)}`;
|
||||
}
|
||||
|
||||
function decodeJwtExpiryMs(token: string): number | null {
|
||||
const parts = token.split(".");
|
||||
if (parts.length < 2) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
const payloadRaw = Buffer.from(parts[1], "base64url").toString("utf8");
|
||||
const payload = JSON.parse(payloadRaw) as { exp?: unknown };
|
||||
return typeof payload.exp === "number" && Number.isFinite(payload.exp) && payload.exp > 0
|
||||
? payload.exp * 1000
|
||||
: null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function readCodexKeychainCredentials(options?: {
|
||||
platform?: NodeJS.Platform;
|
||||
execSync?: ExecSyncFn;
|
||||
@@ -193,9 +209,10 @@ function readCodexKeychainCredentials(options?: {
|
||||
typeof lastRefreshRaw === "string" || typeof lastRefreshRaw === "number"
|
||||
? new Date(lastRefreshRaw).getTime()
|
||||
: Date.now();
|
||||
const expires = Number.isFinite(lastRefresh)
|
||||
const fallbackExpiry = Number.isFinite(lastRefresh)
|
||||
? lastRefresh + 60 * 60 * 1000
|
||||
: Date.now() + 60 * 60 * 1000;
|
||||
const expires = decodeJwtExpiryMs(accessToken) ?? fallbackExpiry;
|
||||
const accountId = typeof tokens?.account_id === "string" ? tokens.account_id : undefined;
|
||||
|
||||
log.info("read codex credentials from keychain", {
|
||||
@@ -483,13 +500,14 @@ export function readCodexCliCredentials(options?: {
|
||||
return null;
|
||||
}
|
||||
|
||||
let expires: number;
|
||||
let fallbackExpiry: number;
|
||||
try {
|
||||
const stat = fs.statSync(authPath);
|
||||
expires = stat.mtimeMs + 60 * 60 * 1000;
|
||||
fallbackExpiry = stat.mtimeMs + 60 * 60 * 1000;
|
||||
} catch {
|
||||
expires = Date.now() + 60 * 60 * 1000;
|
||||
fallbackExpiry = Date.now() + 60 * 60 * 1000;
|
||||
}
|
||||
const expires = decodeJwtExpiryMs(accessToken) ?? fallbackExpiry;
|
||||
|
||||
return {
|
||||
type: "oauth",
|
||||
|
||||
@@ -117,6 +117,10 @@ function isChatGPTUsageLimitErrorMessage(raw: string): boolean {
|
||||
return msg.includes("hit your chatgpt usage limit") && msg.includes("try again in");
|
||||
}
|
||||
|
||||
function isRefreshTokenReused(raw: string): boolean {
|
||||
return /refresh_token_reused/i.test(raw);
|
||||
}
|
||||
|
||||
function isInstructionsRequiredError(raw: string): boolean {
|
||||
return /instructions are required/i.test(raw);
|
||||
}
|
||||
@@ -643,6 +647,15 @@ describeLive("live models (profile keys)", () => {
|
||||
logProgress(`${progressLabel}: skip (rate limit)`);
|
||||
break;
|
||||
}
|
||||
if (
|
||||
allowNotFoundSkip &&
|
||||
model.provider === "openai-codex" &&
|
||||
isRefreshTokenReused(message)
|
||||
) {
|
||||
skipped.push({ model: id, reason: message });
|
||||
logProgress(`${progressLabel}: skip (codex refresh token reused)`);
|
||||
break;
|
||||
}
|
||||
if (
|
||||
allowNotFoundSkip &&
|
||||
model.provider === "openai-codex" &&
|
||||
|
||||
@@ -24,7 +24,7 @@ import { shouldSuppressBuiltInModel } from "../agents/model-suppression.js";
|
||||
import { ensureOpenClawModelsJson } from "../agents/models-config.js";
|
||||
import { isRateLimitErrorMessage } from "../agents/pi-embedded-helpers/errors.js";
|
||||
import { discoverAuthStorage, discoverModels } from "../agents/pi-model-discovery.js";
|
||||
import { loadConfig } from "../config/config.js";
|
||||
import { clearRuntimeConfigSnapshot, loadConfig } from "../config/config.js";
|
||||
import type { ModelsConfig, OpenClawConfig, ModelProviderConfig } from "../config/types.js";
|
||||
import { isTruthyEnvValue } from "../infra/env.js";
|
||||
import { DEFAULT_AGENT_ID } from "../routing/session-key.js";
|
||||
@@ -38,7 +38,7 @@ import {
|
||||
shouldRetryToolReadProbe,
|
||||
} from "./live-tool-probe-utils.js";
|
||||
import { startGatewayServer } from "./server.js";
|
||||
import { extractPayloadText } from "./test-helpers.agent-results.js";
|
||||
import { loadSessionEntry, readSessionMessages } from "./session-utils.js";
|
||||
|
||||
const LIVE = isTruthyEnvValue(process.env.LIVE) || isTruthyEnvValue(process.env.OPENCLAW_LIVE_TEST);
|
||||
const GATEWAY_LIVE = isTruthyEnvValue(process.env.OPENCLAW_LIVE_GATEWAY);
|
||||
@@ -171,6 +171,32 @@ function logProgress(message: string): void {
|
||||
console.log(`[live] ${message}`);
|
||||
}
|
||||
|
||||
function enterProductionEnvForLiveRun() {
|
||||
const previous = {
|
||||
vitest: process.env.VITEST,
|
||||
nodeEnv: process.env.NODE_ENV,
|
||||
};
|
||||
delete process.env.VITEST;
|
||||
process.env.NODE_ENV = "production";
|
||||
return previous;
|
||||
}
|
||||
|
||||
function restoreProductionEnvForLiveRun(previous: {
|
||||
vitest: string | undefined;
|
||||
nodeEnv: string | undefined;
|
||||
}) {
|
||||
if (previous.vitest === undefined) {
|
||||
delete process.env.VITEST;
|
||||
} else {
|
||||
process.env.VITEST = previous.vitest;
|
||||
}
|
||||
if (previous.nodeEnv === undefined) {
|
||||
delete process.env.NODE_ENV;
|
||||
} else {
|
||||
process.env.NODE_ENV = previous.nodeEnv;
|
||||
}
|
||||
}
|
||||
|
||||
function formatFailurePreview(
|
||||
failures: Array<{ model: string; error: string }>,
|
||||
maxItems: number,
|
||||
@@ -319,25 +345,14 @@ async function runAnthropicRefusalProbe(params: {
|
||||
}): Promise<void> {
|
||||
logProgress(`${params.label}: refusal-probe`);
|
||||
const magic = buildAnthropicRefusalToken();
|
||||
const runId = randomUUID();
|
||||
const probe = await withGatewayLiveProbeTimeout(
|
||||
params.client.request<AgentFinalPayload>(
|
||||
"agent",
|
||||
{
|
||||
sessionKey: params.sessionKey,
|
||||
idempotencyKey: `idem-${runId}-refusal`,
|
||||
message: `Reply with the single word ok. Test token: ${magic}`,
|
||||
thinking: params.thinkingLevel,
|
||||
deliver: false,
|
||||
},
|
||||
{ expectFinal: true },
|
||||
),
|
||||
`${params.label}: refusal-probe`,
|
||||
);
|
||||
if (probe?.status !== "ok") {
|
||||
throw new Error(`refusal probe failed: status=${String(probe?.status)}`);
|
||||
}
|
||||
const probeText = extractPayloadText(probe?.result);
|
||||
const probeText = await requestGatewayAgentText({
|
||||
client: params.client,
|
||||
sessionKey: params.sessionKey,
|
||||
idempotencyKey: `idem-${randomUUID()}-refusal`,
|
||||
message: `Reply with the single word ok. Test token: ${magic}`,
|
||||
thinkingLevel: params.thinkingLevel,
|
||||
context: `${params.label}: refusal-probe`,
|
||||
});
|
||||
assertNoReasoningTags({
|
||||
text: probeText,
|
||||
model: params.modelKey,
|
||||
@@ -348,25 +363,14 @@ async function runAnthropicRefusalProbe(params: {
|
||||
throw new Error(`refusal probe missing ok: ${probeText}`);
|
||||
}
|
||||
|
||||
const followupId = randomUUID();
|
||||
const followup = await withGatewayLiveProbeTimeout(
|
||||
params.client.request<AgentFinalPayload>(
|
||||
"agent",
|
||||
{
|
||||
sessionKey: params.sessionKey,
|
||||
idempotencyKey: `idem-${followupId}-refusal-followup`,
|
||||
message: "Now reply with exactly: still ok.",
|
||||
thinking: params.thinkingLevel,
|
||||
deliver: false,
|
||||
},
|
||||
{ expectFinal: true },
|
||||
),
|
||||
`${params.label}: refusal-followup`,
|
||||
);
|
||||
if (followup?.status !== "ok") {
|
||||
throw new Error(`refusal followup failed: status=${String(followup?.status)}`);
|
||||
}
|
||||
const followupText = extractPayloadText(followup?.result);
|
||||
const followupText = await requestGatewayAgentText({
|
||||
client: params.client,
|
||||
sessionKey: params.sessionKey,
|
||||
idempotencyKey: `idem-${randomUUID()}-refusal-followup`,
|
||||
message: "Now reply with exactly: still ok.",
|
||||
thinkingLevel: params.thinkingLevel,
|
||||
context: `${params.label}: refusal-followup`,
|
||||
});
|
||||
assertNoReasoningTags({
|
||||
text: followupText,
|
||||
model: params.modelKey,
|
||||
@@ -475,11 +479,6 @@ async function getFreeGatewayPort(): Promise<number> {
|
||||
throw new Error("failed to acquire a free gateway port block");
|
||||
}
|
||||
|
||||
type AgentFinalPayload = {
|
||||
status?: unknown;
|
||||
result?: unknown;
|
||||
};
|
||||
|
||||
async function connectClient(params: { url: string; token: string }) {
|
||||
return await new Promise<GatewayClient>((resolve, reject) => {
|
||||
let settled = false;
|
||||
@@ -513,6 +512,115 @@ async function connectClient(params: { url: string; token: string }) {
|
||||
});
|
||||
}
|
||||
|
||||
function extractTranscriptMessageText(message: unknown): string {
|
||||
if (!message || typeof message !== "object") {
|
||||
return "";
|
||||
}
|
||||
const record = message as {
|
||||
text?: unknown;
|
||||
content?: unknown;
|
||||
};
|
||||
if (typeof record.text === "string" && record.text.trim()) {
|
||||
return record.text.trim();
|
||||
}
|
||||
if (typeof record.content === "string" && record.content.trim()) {
|
||||
return record.content.trim();
|
||||
}
|
||||
if (!Array.isArray(record.content)) {
|
||||
return "";
|
||||
}
|
||||
return record.content
|
||||
.map((entry) => {
|
||||
if (!entry || typeof entry !== "object") {
|
||||
return "";
|
||||
}
|
||||
const text = (entry as { text?: unknown }).text;
|
||||
return typeof text === "string" && text.trim() ? text.trim() : "";
|
||||
})
|
||||
.filter(Boolean)
|
||||
.join("\n")
|
||||
.trim();
|
||||
}
|
||||
|
||||
function readSessionAssistantTexts(sessionKey: string): string[] {
|
||||
const { storePath, entry } = loadSessionEntry(sessionKey);
|
||||
if (!entry?.sessionId) {
|
||||
return [];
|
||||
}
|
||||
const messages = readSessionMessages(entry.sessionId, storePath, entry.sessionFile);
|
||||
const assistantTexts: string[] = [];
|
||||
for (const message of messages) {
|
||||
if (!message || typeof message !== "object") {
|
||||
continue;
|
||||
}
|
||||
const role = (message as { role?: unknown }).role;
|
||||
if (role !== "assistant") {
|
||||
continue;
|
||||
}
|
||||
assistantTexts.push(extractTranscriptMessageText(message));
|
||||
}
|
||||
return assistantTexts;
|
||||
}
|
||||
|
||||
async function waitForSessionAssistantText(params: {
|
||||
sessionKey: string;
|
||||
baselineAssistantCount: number;
|
||||
context: string;
|
||||
}) {
|
||||
const startedAt = Date.now();
|
||||
let delayMs = 50;
|
||||
while (Date.now() - startedAt < GATEWAY_LIVE_PROBE_TIMEOUT_MS) {
|
||||
const assistantTexts = readSessionAssistantTexts(params.sessionKey);
|
||||
if (assistantTexts.length > params.baselineAssistantCount) {
|
||||
const freshText = assistantTexts
|
||||
.slice(params.baselineAssistantCount)
|
||||
.map((text) => text.trim())
|
||||
.findLast((text) => text.length > 0);
|
||||
if (freshText) {
|
||||
return freshText;
|
||||
}
|
||||
}
|
||||
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
||||
delayMs = Math.min(delayMs * 2, 250);
|
||||
}
|
||||
throw new Error(`probe timeout after ${GATEWAY_LIVE_PROBE_TIMEOUT_MS}ms (${params.context})`);
|
||||
}
|
||||
|
||||
async function requestGatewayAgentText(params: {
|
||||
client: GatewayClient;
|
||||
sessionKey: string;
|
||||
message: string;
|
||||
thinkingLevel: string;
|
||||
context: string;
|
||||
idempotencyKey: string;
|
||||
attachments?: Array<{
|
||||
mimeType: string;
|
||||
fileName: string;
|
||||
content: string;
|
||||
}>;
|
||||
}) {
|
||||
const baselineAssistantCount = readSessionAssistantTexts(params.sessionKey).length;
|
||||
const accepted = await withGatewayLiveProbeTimeout(
|
||||
params.client.request<{ runId?: unknown; status?: unknown }>("agent", {
|
||||
sessionKey: params.sessionKey,
|
||||
idempotencyKey: params.idempotencyKey,
|
||||
message: params.message,
|
||||
thinking: params.thinkingLevel,
|
||||
deliver: false,
|
||||
attachments: params.attachments,
|
||||
}),
|
||||
`${params.context}: agent-accept`,
|
||||
);
|
||||
if (accepted?.status !== "accepted") {
|
||||
throw new Error(`agent status=${String(accepted?.status)}`);
|
||||
}
|
||||
return await waitForSessionAssistantText({
|
||||
sessionKey: params.sessionKey,
|
||||
baselineAssistantCount,
|
||||
context: `${params.context}: transcript-final`,
|
||||
});
|
||||
}
|
||||
|
||||
type GatewayModelSuiteParams = {
|
||||
label: string;
|
||||
cfg: OpenClawConfig;
|
||||
@@ -636,6 +744,8 @@ function buildMinimaxProviderOverride(params: {
|
||||
}
|
||||
|
||||
async function runGatewayModelSuite(params: GatewayModelSuiteParams) {
|
||||
clearRuntimeConfigSnapshot();
|
||||
const runtimeEnv = enterProductionEnvForLiveRun();
|
||||
const previous = {
|
||||
configPath: process.env.OPENCLAW_CONFIG_PATH,
|
||||
token: process.env.OPENCLAW_GATEWAY_TOKEN,
|
||||
@@ -793,48 +903,26 @@ async function runGatewayModelSuite(params: GatewayModelSuiteParams) {
|
||||
);
|
||||
|
||||
logProgress(`${progressLabel}: prompt`);
|
||||
const runId = randomUUID();
|
||||
const payload = await withGatewayLiveProbeTimeout(
|
||||
client.request<AgentFinalPayload>(
|
||||
"agent",
|
||||
{
|
||||
sessionKey,
|
||||
idempotencyKey: `idem-${runId}`,
|
||||
message:
|
||||
"Explain in 2-3 sentences how the JavaScript event loop handles microtasks vs macrotasks. Must mention both words: microtask and macrotask.",
|
||||
thinking: params.thinkingLevel,
|
||||
deliver: false,
|
||||
},
|
||||
{ expectFinal: true },
|
||||
),
|
||||
`${progressLabel}: prompt`,
|
||||
);
|
||||
|
||||
if (payload?.status !== "ok") {
|
||||
throw new Error(`agent status=${String(payload?.status)}`);
|
||||
}
|
||||
let text = extractPayloadText(payload?.result);
|
||||
let text = await requestGatewayAgentText({
|
||||
client,
|
||||
sessionKey,
|
||||
idempotencyKey: `idem-${randomUUID()}`,
|
||||
message:
|
||||
"Explain in 2-3 sentences how the JavaScript event loop handles microtasks vs macrotasks. Must mention both words: microtask and macrotask.",
|
||||
thinkingLevel: params.thinkingLevel,
|
||||
context: `${progressLabel}: prompt`,
|
||||
});
|
||||
if (!text) {
|
||||
logProgress(`${progressLabel}: empty response, retrying`);
|
||||
const retry = await withGatewayLiveProbeTimeout(
|
||||
client.request<AgentFinalPayload>(
|
||||
"agent",
|
||||
{
|
||||
sessionKey,
|
||||
idempotencyKey: `idem-${randomUUID()}-retry`,
|
||||
message:
|
||||
"Explain in 2-3 sentences how the JavaScript event loop handles microtasks vs macrotasks. Must mention both words: microtask and macrotask.",
|
||||
thinking: params.thinkingLevel,
|
||||
deliver: false,
|
||||
},
|
||||
{ expectFinal: true },
|
||||
),
|
||||
`${progressLabel}: prompt-retry`,
|
||||
);
|
||||
if (retry?.status !== "ok") {
|
||||
throw new Error(`agent status=${String(retry?.status)}`);
|
||||
}
|
||||
text = extractPayloadText(retry?.result);
|
||||
text = await requestGatewayAgentText({
|
||||
client,
|
||||
sessionKey,
|
||||
idempotencyKey: `idem-${randomUUID()}-retry`,
|
||||
message:
|
||||
"Explain in 2-3 sentences how the JavaScript event loop handles microtasks vs macrotasks. Must mention both words: microtask and macrotask.",
|
||||
thinkingLevel: params.thinkingLevel,
|
||||
context: `${progressLabel}: prompt-retry`,
|
||||
});
|
||||
}
|
||||
if (!text && isGoogleishProvider(model.provider)) {
|
||||
logProgress(`${progressLabel}: skip (google empty response)`);
|
||||
@@ -881,36 +969,20 @@ async function runGatewayModelSuite(params: GatewayModelSuiteParams) {
|
||||
toolReadAttempt += 1
|
||||
) {
|
||||
const strictReply = toolReadAttempt > 0;
|
||||
const toolProbe = await withGatewayLiveProbeTimeout(
|
||||
client.request<AgentFinalPayload>(
|
||||
"agent",
|
||||
{
|
||||
sessionKey,
|
||||
idempotencyKey: `idem-${runIdTool}-tool-${toolReadAttempt + 1}`,
|
||||
message: strictReply
|
||||
? "OpenClaw live tool probe (local, safe): " +
|
||||
`use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolProbePath}"}. ` +
|
||||
`Then reply with exactly: ${nonceA} ${nonceB}. No extra text.`
|
||||
: "OpenClaw live tool probe (local, safe): " +
|
||||
`use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolProbePath}"}. ` +
|
||||
"Then reply with the two nonce values you read (include both).",
|
||||
thinking: params.thinkingLevel,
|
||||
deliver: false,
|
||||
},
|
||||
{ expectFinal: true },
|
||||
),
|
||||
`${progressLabel}: tool-read`,
|
||||
);
|
||||
if (toolProbe?.status !== "ok") {
|
||||
if (toolReadAttempt + 1 < maxToolReadAttempts) {
|
||||
logProgress(
|
||||
`${progressLabel}: tool-read retry (${toolReadAttempt + 2}/${maxToolReadAttempts}) status=${String(toolProbe?.status)}`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
throw new Error(`tool probe failed: status=${String(toolProbe?.status)}`);
|
||||
}
|
||||
toolText = extractPayloadText(toolProbe?.result);
|
||||
toolText = await requestGatewayAgentText({
|
||||
client,
|
||||
sessionKey,
|
||||
idempotencyKey: `idem-${runIdTool}-tool-${toolReadAttempt + 1}`,
|
||||
message: strictReply
|
||||
? "OpenClaw live tool probe (local, safe): " +
|
||||
`use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolProbePath}"}. ` +
|
||||
`Then reply with exactly: ${nonceA} ${nonceB}. No extra text.`
|
||||
: "OpenClaw live tool probe (local, safe): " +
|
||||
`use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolProbePath}"}. ` +
|
||||
"Then reply with the two nonce values you read (include both).",
|
||||
thinkingLevel: params.thinkingLevel,
|
||||
context: `${progressLabel}: tool-read`,
|
||||
});
|
||||
if (
|
||||
isEmptyStreamText(toolText) &&
|
||||
(model.provider === "minimax" || model.provider === "openai-codex")
|
||||
@@ -960,40 +1032,24 @@ async function runGatewayModelSuite(params: GatewayModelSuiteParams) {
|
||||
execReadAttempt += 1
|
||||
) {
|
||||
const strictReply = execReadAttempt > 0;
|
||||
const execReadProbe = await withGatewayLiveProbeTimeout(
|
||||
client.request<AgentFinalPayload>(
|
||||
"agent",
|
||||
{
|
||||
sessionKey,
|
||||
idempotencyKey: `idem-${runIdTool}-exec-read-${execReadAttempt + 1}`,
|
||||
message: strictReply
|
||||
? "OpenClaw live tool probe (local, safe): " +
|
||||
"use the tool named `exec` (or `Exec`) to run this command: " +
|
||||
`mkdir -p "${tempDir}" && printf '%s' '${nonceC}' > "${toolWritePath}". ` +
|
||||
`Then use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolWritePath}"}. ` +
|
||||
`Then reply with exactly: ${nonceC}. No extra text.`
|
||||
: "OpenClaw live tool probe (local, safe): " +
|
||||
"use the tool named `exec` (or `Exec`) to run this command: " +
|
||||
`mkdir -p "${tempDir}" && printf '%s' '${nonceC}' > "${toolWritePath}". ` +
|
||||
`Then use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolWritePath}"}. ` +
|
||||
"Finally reply including the nonce text you read back.",
|
||||
thinking: params.thinkingLevel,
|
||||
deliver: false,
|
||||
},
|
||||
{ expectFinal: true },
|
||||
),
|
||||
`${progressLabel}: tool-exec`,
|
||||
);
|
||||
if (execReadProbe?.status !== "ok") {
|
||||
if (execReadAttempt + 1 < maxExecReadAttempts) {
|
||||
logProgress(
|
||||
`${progressLabel}: tool-exec retry (${execReadAttempt + 2}/${maxExecReadAttempts}) status=${String(execReadProbe?.status)}`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
throw new Error(`exec+read probe failed: status=${String(execReadProbe?.status)}`);
|
||||
}
|
||||
execReadText = extractPayloadText(execReadProbe?.result);
|
||||
execReadText = await requestGatewayAgentText({
|
||||
client,
|
||||
sessionKey,
|
||||
idempotencyKey: `idem-${runIdTool}-exec-read-${execReadAttempt + 1}`,
|
||||
message: strictReply
|
||||
? "OpenClaw live tool probe (local, safe): " +
|
||||
"use the tool named `exec` (or `Exec`) to run this command: " +
|
||||
`mkdir -p "${tempDir}" && printf '%s' '${nonceC}' > "${toolWritePath}". ` +
|
||||
`Then use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolWritePath}"}. ` +
|
||||
`Then reply with exactly: ${nonceC}. No extra text.`
|
||||
: "OpenClaw live tool probe (local, safe): " +
|
||||
"use the tool named `exec` (or `Exec`) to run this command: " +
|
||||
`mkdir -p "${tempDir}" && printf '%s' '${nonceC}' > "${toolWritePath}". ` +
|
||||
`Then use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolWritePath}"}. ` +
|
||||
"Finally reply including the nonce text you read back.",
|
||||
thinkingLevel: params.thinkingLevel,
|
||||
context: `${progressLabel}: tool-exec`,
|
||||
});
|
||||
if (
|
||||
isEmptyStreamText(execReadText) &&
|
||||
(model.provider === "minimax" || model.provider === "openai-codex")
|
||||
@@ -1040,62 +1096,51 @@ async function runGatewayModelSuite(params: GatewayModelSuiteParams) {
|
||||
const imageBase64 = renderCatNoncePngBase64(imageCode);
|
||||
const runIdImage = randomUUID();
|
||||
|
||||
const imageProbe = await withGatewayLiveProbeTimeout(
|
||||
client.request<AgentFinalPayload>(
|
||||
"agent",
|
||||
const imageText = await requestGatewayAgentText({
|
||||
client,
|
||||
sessionKey,
|
||||
idempotencyKey: `idem-${runIdImage}-image`,
|
||||
message:
|
||||
"Look at the attached image. Reply with exactly two tokens separated by a single space: " +
|
||||
"(1) the animal shown or written in the image, lowercase; " +
|
||||
"(2) the code printed in the image, uppercase. No extra text.",
|
||||
attachments: [
|
||||
{
|
||||
sessionKey,
|
||||
idempotencyKey: `idem-${runIdImage}-image`,
|
||||
message:
|
||||
"Look at the attached image. Reply with exactly two tokens separated by a single space: " +
|
||||
"(1) the animal shown or written in the image, lowercase; " +
|
||||
"(2) the code printed in the image, uppercase. No extra text.",
|
||||
attachments: [
|
||||
{
|
||||
mimeType: "image/png",
|
||||
fileName: `probe-${runIdImage}.png`,
|
||||
content: imageBase64,
|
||||
},
|
||||
],
|
||||
thinking: params.thinkingLevel,
|
||||
deliver: false,
|
||||
mimeType: "image/png",
|
||||
fileName: `probe-${runIdImage}.png`,
|
||||
content: imageBase64,
|
||||
},
|
||||
{ expectFinal: true },
|
||||
),
|
||||
`${progressLabel}: image`,
|
||||
);
|
||||
],
|
||||
thinkingLevel: params.thinkingLevel,
|
||||
context: `${progressLabel}: image`,
|
||||
});
|
||||
// Best-effort: do not fail the whole live suite on flaky image handling.
|
||||
// (We still keep prompt + tool probes as hard checks.)
|
||||
if (imageProbe?.status !== "ok") {
|
||||
logProgress(`${progressLabel}: image skip (status=${String(imageProbe?.status)})`);
|
||||
if (
|
||||
isEmptyStreamText(imageText) &&
|
||||
(model.provider === "minimax" || model.provider === "openai-codex")
|
||||
) {
|
||||
logProgress(`${progressLabel}: image skip (${model.provider} empty response)`);
|
||||
} else {
|
||||
const imageText = extractPayloadText(imageProbe?.result);
|
||||
if (
|
||||
isEmptyStreamText(imageText) &&
|
||||
(model.provider === "minimax" || model.provider === "openai-codex")
|
||||
) {
|
||||
logProgress(`${progressLabel}: image skip (${model.provider} empty response)`);
|
||||
assertNoReasoningTags({
|
||||
text: imageText,
|
||||
model: modelKey,
|
||||
phase: "image",
|
||||
label: params.label,
|
||||
});
|
||||
if (!/\bcat\b/i.test(imageText)) {
|
||||
logProgress(`${progressLabel}: image skip (missing 'cat')`);
|
||||
} else {
|
||||
assertNoReasoningTags({
|
||||
text: imageText,
|
||||
model: modelKey,
|
||||
phase: "image",
|
||||
label: params.label,
|
||||
});
|
||||
if (!/\bcat\b/i.test(imageText)) {
|
||||
logProgress(`${progressLabel}: image skip (missing 'cat')`);
|
||||
} else {
|
||||
const candidates = imageText.toUpperCase().match(/[A-Z0-9]{6,20}/g) ?? [];
|
||||
const bestDistance = candidates.reduce((best, cand) => {
|
||||
if (Math.abs(cand.length - imageCode.length) > 2) {
|
||||
return best;
|
||||
}
|
||||
return Math.min(best, editDistance(cand, imageCode));
|
||||
}, Number.POSITIVE_INFINITY);
|
||||
// OCR / image-read flake: allow a small edit distance, but still require the "cat" token above.
|
||||
if (!(bestDistance <= 3)) {
|
||||
logProgress(`${progressLabel}: image skip (code mismatch)`);
|
||||
const candidates = imageText.toUpperCase().match(/[A-Z0-9]{6,20}/g) ?? [];
|
||||
const bestDistance = candidates.reduce((best, cand) => {
|
||||
if (Math.abs(cand.length - imageCode.length) > 2) {
|
||||
return best;
|
||||
}
|
||||
return Math.min(best, editDistance(cand, imageCode));
|
||||
}, Number.POSITIVE_INFINITY);
|
||||
// OCR / image-read flake: allow a small edit distance, but still require the "cat" token above.
|
||||
if (!(bestDistance <= 3)) {
|
||||
logProgress(`${progressLabel}: image skip (code mismatch)`);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1108,24 +1153,14 @@ async function runGatewayModelSuite(params: GatewayModelSuiteParams) {
|
||||
) {
|
||||
logProgress(`${progressLabel}: tool-only regression`);
|
||||
const runId2 = randomUUID();
|
||||
const first = await withGatewayLiveProbeTimeout(
|
||||
client.request<AgentFinalPayload>(
|
||||
"agent",
|
||||
{
|
||||
sessionKey,
|
||||
idempotencyKey: `idem-${runId2}-1`,
|
||||
message: `Call the tool named \`read\` (or \`Read\`) on "${toolProbePath}". Do not write any other text.`,
|
||||
thinking: params.thinkingLevel,
|
||||
deliver: false,
|
||||
},
|
||||
{ expectFinal: true },
|
||||
),
|
||||
`${progressLabel}: tool-only-regression-first`,
|
||||
);
|
||||
if (first?.status !== "ok") {
|
||||
throw new Error(`tool-only turn failed: status=${String(first?.status)}`);
|
||||
}
|
||||
const firstText = extractPayloadText(first?.result);
|
||||
const firstText = await requestGatewayAgentText({
|
||||
client,
|
||||
sessionKey,
|
||||
idempotencyKey: `idem-${runId2}-1`,
|
||||
message: `Call the tool named \`read\` (or \`Read\`) on "${toolProbePath}". Do not write any other text.`,
|
||||
thinkingLevel: params.thinkingLevel,
|
||||
context: `${progressLabel}: tool-only-regression-first`,
|
||||
});
|
||||
assertNoReasoningTags({
|
||||
text: firstText,
|
||||
model: modelKey,
|
||||
@@ -1133,24 +1168,14 @@ async function runGatewayModelSuite(params: GatewayModelSuiteParams) {
|
||||
label: params.label,
|
||||
});
|
||||
|
||||
const second = await withGatewayLiveProbeTimeout(
|
||||
client.request<AgentFinalPayload>(
|
||||
"agent",
|
||||
{
|
||||
sessionKey,
|
||||
idempotencyKey: `idem-${runId2}-2`,
|
||||
message: `Now answer: what are the values of nonceA and nonceB in "${toolProbePath}"? Reply with exactly: ${nonceA} ${nonceB}.`,
|
||||
thinking: params.thinkingLevel,
|
||||
deliver: false,
|
||||
},
|
||||
{ expectFinal: true },
|
||||
),
|
||||
`${progressLabel}: tool-only-regression-second`,
|
||||
);
|
||||
if (second?.status !== "ok") {
|
||||
throw new Error(`post-tool message failed: status=${String(second?.status)}`);
|
||||
}
|
||||
const reply = extractPayloadText(second?.result);
|
||||
const reply = await requestGatewayAgentText({
|
||||
client,
|
||||
sessionKey,
|
||||
idempotencyKey: `idem-${runId2}-2`,
|
||||
message: `Now answer: what are the values of nonceA and nonceB in "${toolProbePath}"? Reply with exactly: ${nonceA} ${nonceB}.`,
|
||||
thinkingLevel: params.thinkingLevel,
|
||||
context: `${progressLabel}: tool-only-regression-second`,
|
||||
});
|
||||
assertNoReasoningTags({
|
||||
text: reply,
|
||||
model: modelKey,
|
||||
@@ -1290,6 +1315,8 @@ async function runGatewayModelSuite(params: GatewayModelSuiteParams) {
|
||||
logProgress(`[${params.label}] skipped all models (missing profiles)`);
|
||||
}
|
||||
} finally {
|
||||
clearRuntimeConfigSnapshot();
|
||||
restoreProductionEnvForLiveRun(runtimeEnv);
|
||||
client.stop();
|
||||
await server.close({ reason: "live test complete" });
|
||||
await fs.rm(toolProbePath, { force: true });
|
||||
@@ -1317,6 +1344,7 @@ describeLive("gateway live (dev agent, profile keys)", () => {
|
||||
it(
|
||||
"runs meaningful prompts across models with available keys",
|
||||
async () => {
|
||||
clearRuntimeConfigSnapshot();
|
||||
const cfg = loadConfig();
|
||||
await ensureOpenClawModelsJson(cfg);
|
||||
|
||||
@@ -1422,6 +1450,8 @@ describeLive("gateway live (dev agent, profile keys)", () => {
|
||||
if (!ZAI_FALLBACK) {
|
||||
return;
|
||||
}
|
||||
clearRuntimeConfigSnapshot();
|
||||
const runtimeEnv = enterProductionEnvForLiveRun();
|
||||
const previous = {
|
||||
configPath: process.env.OPENCLAW_CONFIG_PATH,
|
||||
token: process.env.OPENCLAW_GATEWAY_TOKEN,
|
||||
@@ -1520,27 +1550,16 @@ describeLive("gateway live (dev agent, profile keys)", () => {
|
||||
"zai-fallback: sessions-reset",
|
||||
);
|
||||
|
||||
const runId = randomUUID();
|
||||
const toolProbe = await withGatewayLiveProbeTimeout(
|
||||
client.request<AgentFinalPayload>(
|
||||
"agent",
|
||||
{
|
||||
sessionKey,
|
||||
idempotencyKey: `idem-${runId}-tool`,
|
||||
message:
|
||||
`Call the tool named \`read\` (or \`Read\` if \`read\` is unavailable) with JSON arguments {"path":"${toolProbePath}"}. ` +
|
||||
`Then reply with exactly: ${nonceA} ${nonceB}. No extra text.`,
|
||||
thinking: THINKING_LEVEL,
|
||||
deliver: false,
|
||||
},
|
||||
{ expectFinal: true },
|
||||
),
|
||||
"zai-fallback: tool-probe",
|
||||
);
|
||||
if (toolProbe?.status !== "ok") {
|
||||
throw new Error(`anthropic tool probe failed: status=${String(toolProbe?.status)}`);
|
||||
}
|
||||
const toolText = extractPayloadText(toolProbe?.result);
|
||||
const toolText = await requestGatewayAgentText({
|
||||
client,
|
||||
sessionKey,
|
||||
idempotencyKey: `idem-${randomUUID()}-tool`,
|
||||
message:
|
||||
`Call the tool named \`read\` (or \`Read\` if \`read\` is unavailable) with JSON arguments {"path":"${toolProbePath}"}. ` +
|
||||
`Then reply with exactly: ${nonceA} ${nonceB}. No extra text.`,
|
||||
thinkingLevel: THINKING_LEVEL,
|
||||
context: "zai-fallback: tool-probe",
|
||||
});
|
||||
assertNoReasoningTags({
|
||||
text: toolText,
|
||||
model: "anthropic/claude-opus-4-5",
|
||||
@@ -1559,27 +1578,16 @@ describeLive("gateway live (dev agent, profile keys)", () => {
|
||||
"zai-fallback: sessions-patch-zai",
|
||||
);
|
||||
|
||||
const followupId = randomUUID();
|
||||
const followup = await withGatewayLiveProbeTimeout(
|
||||
client.request<AgentFinalPayload>(
|
||||
"agent",
|
||||
{
|
||||
sessionKey,
|
||||
idempotencyKey: `idem-${followupId}-followup`,
|
||||
message:
|
||||
`What are the values of nonceA and nonceB in "${toolProbePath}"? ` +
|
||||
`Reply with exactly: ${nonceA} ${nonceB}.`,
|
||||
thinking: THINKING_LEVEL,
|
||||
deliver: false,
|
||||
},
|
||||
{ expectFinal: true },
|
||||
),
|
||||
"zai-fallback: followup",
|
||||
);
|
||||
if (followup?.status !== "ok") {
|
||||
throw new Error(`zai followup failed: status=${String(followup?.status)}`);
|
||||
}
|
||||
const followupText = extractPayloadText(followup?.result);
|
||||
const followupText = await requestGatewayAgentText({
|
||||
client,
|
||||
sessionKey,
|
||||
idempotencyKey: `idem-${randomUUID()}-followup`,
|
||||
message:
|
||||
`What are the values of nonceA and nonceB in "${toolProbePath}"? ` +
|
||||
`Reply with exactly: ${nonceA} ${nonceB}.`,
|
||||
thinkingLevel: THINKING_LEVEL,
|
||||
context: "zai-fallback: followup",
|
||||
});
|
||||
assertNoReasoningTags({
|
||||
text: followupText,
|
||||
model: "zai/glm-4.7",
|
||||
@@ -1590,6 +1598,8 @@ describeLive("gateway live (dev agent, profile keys)", () => {
|
||||
throw new Error(`zai followup missing nonce: ${followupText}`);
|
||||
}
|
||||
} finally {
|
||||
clearRuntimeConfigSnapshot();
|
||||
restoreProductionEnvForLiveRun(runtimeEnv);
|
||||
client.stop();
|
||||
await server.close({ reason: "live test complete" });
|
||||
await fs.rm(toolProbePath, { force: true });
|
||||
|
||||
Reference in New Issue
Block a user