diff --git a/CHANGELOG.md b/CHANGELOG.md index 32de2c31f6c..b9654c1c218 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ Docs: https://docs.openclaw.ai - Security/Hooks: normalize hook session-key classification with trim/lowercase plus Unicode NFKC folding (for example full-width `HOOK:...`) so external-content wrapping cannot be bypassed by mixed-case or lookalike prefixes. (#25750) Thanks @bmendonca3. - Security/Voice Call: add Telnyx webhook replay detection and canonicalize replay-key signature encoding (Base64/Base64URL equivalent forms dedupe together), so duplicate signed webhook deliveries no longer re-trigger side effects. (#25832) Thanks @bmendonca3. - Providers/OpenRouter/Auth profiles: bypass auth-profile cooldown/disable windows for OpenRouter, so provider failures no longer put OpenRouter profiles into local cooldown and stale legacy cooldown markers are ignored in fallback and status selection paths. (#25892) Thanks @alexanderatallah for raising this and @vincentkoc for the fix. +- Providers/Google reasoning: sanitize invalid negative `thinkingBudget` payloads for Gemini 3.1 requests by dropping `-1` budgets and mapping configured reasoning effort to `thinkingLevel`, preventing malformed reasoning payloads on `google-generative-ai`. (#25900) - WhatsApp/Web reconnect: treat close status `440` as non-retryable (including string-form status values), stop reconnect loops immediately, and emit operator guidance to relink after resolving session conflicts. (#25858) Thanks @markmusson. - WhatsApp/Reasoning safety: suppress outbound payloads marked as reasoning and hard-drop text payloads that begin with `Reasoning:` before WhatsApp delivery, preventing hidden thinking blocks from leaking to end users through final-message paths. (#25804, #25214, #24328) - Onboarding/Telegram: keep core-channel onboarding available when plugin registry population is missing by falling back to built-in adapters and continuing wizard setup with actionable recovery guidance. (#25803) Thanks @Suko. diff --git a/src/agents/pi-embedded-runner-extraparams.live.test.ts b/src/agents/pi-embedded-runner-extraparams.live.test.ts index 38c500cf60d..8da5bef6f57 100644 --- a/src/agents/pi-embedded-runner-extraparams.live.test.ts +++ b/src/agents/pi-embedded-runner-extraparams.live.test.ts @@ -6,9 +6,13 @@ import { isTruthyEnvValue } from "../infra/env.js"; import { applyExtraParamsToAgent } from "./pi-embedded-runner.js"; const OPENAI_KEY = process.env.OPENAI_API_KEY ?? ""; +const GEMINI_KEY = process.env.GEMINI_API_KEY ?? ""; const LIVE = isTruthyEnvValue(process.env.OPENAI_LIVE_TEST) || isTruthyEnvValue(process.env.LIVE); +const GEMINI_LIVE = + isTruthyEnvValue(process.env.GEMINI_LIVE_TEST) || isTruthyEnvValue(process.env.LIVE); const describeLive = LIVE && OPENAI_KEY ? describe : describe.skip; +const describeGeminiLive = GEMINI_LIVE && GEMINI_KEY ? describe : describe.skip; describeLive("pi embedded extra params (live)", () => { it("applies config maxTokens to openai streamFn", async () => { @@ -62,3 +66,170 @@ describeLive("pi embedded extra params (live)", () => { expect(outputTokens ?? 0).toBeLessThanOrEqual(20); }, 30_000); }); + +describeGeminiLive("pi embedded extra params (gemini live)", () => { + function isGoogleModelUnavailableError(raw: string | undefined): boolean { + const msg = (raw ?? "").toLowerCase(); + if (!msg) { + return false; + } + return ( + msg.includes("not found") || + msg.includes("404") || + msg.includes("not_available") || + msg.includes("permission denied") || + msg.includes("unsupported model") + ); + } + + function isGoogleImageProcessingError(raw: string | undefined): boolean { + const msg = (raw ?? "").toLowerCase(); + if (!msg) { + return false; + } + return ( + msg.includes("unable to process input image") || + msg.includes("invalid_argument") || + msg.includes("bad request") + ); + } + + async function runGeminiProbe(params: { + agentStreamFn: typeof streamSimple; + model: Model<"google-generative-ai">; + apiKey: string; + oneByOneRedPngBase64: string; + includeImage?: boolean; + prompt: string; + onPayload?: (payload: Record) => void; + }): Promise<{ sawDone: boolean; stopReason?: string; errorMessage?: string }> { + const userContent: Array< + { type: "text"; text: string } | { type: "image"; mimeType: string; data: string } + > = [{ type: "text", text: params.prompt }]; + if (params.includeImage ?? true) { + userContent.push({ + type: "image", + mimeType: "image/png", + data: params.oneByOneRedPngBase64, + }); + } + + const stream = params.agentStreamFn( + params.model, + { + messages: [ + { + role: "user", + content: userContent, + timestamp: Date.now(), + }, + ], + }, + { + apiKey: params.apiKey, + reasoning: "high", + maxTokens: 64, + onPayload: (payload) => { + params.onPayload?.(payload as Record); + }, + }, + ); + + let sawDone = false; + let stopReason: string | undefined; + let errorMessage: string | undefined; + + for await (const event of stream) { + if (event.type === "done") { + sawDone = true; + stopReason = event.reason; + } else if (event.type === "error") { + stopReason = event.reason; + errorMessage = event.error?.errorMessage; + } + } + + return { sawDone, stopReason, errorMessage }; + } + + it("sanitizes Gemini 3.1 thinking payload and keeps image parts with reasoning enabled", async () => { + const model = getModel( + "google", + "gemini-3.1-pro-preview", + ) as unknown as Model<"google-generative-ai">; + + const agent = { streamFn: streamSimple }; + applyExtraParamsToAgent(agent, undefined, "google", model.id, undefined, "high"); + + const oneByOneRedPngBase64 = + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGP4zwAAAgIBAJBzWgkAAAAASUVORK5CYII="; + + let capturedPayload: Record | undefined; + const imageResult = await runGeminiProbe({ + agentStreamFn: agent.streamFn, + model, + apiKey: GEMINI_KEY, + oneByOneRedPngBase64, + includeImage: true, + prompt: "What color is this image? Reply with one word.", + onPayload: (payload) => { + capturedPayload = payload; + }, + }); + + expect(capturedPayload).toBeDefined(); + const thinkingConfig = ( + capturedPayload?.config as { thinkingConfig?: Record } | undefined + )?.thinkingConfig; + expect(thinkingConfig?.thinkingBudget).toBeUndefined(); + expect(thinkingConfig?.thinkingLevel).toBe("HIGH"); + + const imagePart = ( + capturedPayload?.contents as + | Array<{ parts?: Array<{ inlineData?: { mimeType?: string; data?: string } }> }> + | undefined + )?.[0]?.parts?.find((part) => part.inlineData !== undefined)?.inlineData; + expect(imagePart).toEqual({ + mimeType: "image/png", + data: oneByOneRedPngBase64, + }); + + if (!imageResult.sawDone && !isGoogleModelUnavailableError(imageResult.errorMessage)) { + expect(isGoogleImageProcessingError(imageResult.errorMessage)).toBe(true); + } + + const textResult = await runGeminiProbe({ + agentStreamFn: agent.streamFn, + model, + apiKey: GEMINI_KEY, + oneByOneRedPngBase64, + includeImage: false, + prompt: "Reply with exactly OK.", + }); + + if (!textResult.sawDone && isGoogleModelUnavailableError(textResult.errorMessage)) { + // Some keys/regions do not expose Gemini 3.1 preview. Fall back to a + // stable model to keep live reasoning verification active. + const fallbackModel = getModel( + "google", + "gemini-2.5-pro", + ) as unknown as Model<"google-generative-ai">; + const fallback = await runGeminiProbe({ + agentStreamFn: agent.streamFn, + model: fallbackModel, + apiKey: GEMINI_KEY, + oneByOneRedPngBase64, + includeImage: false, + prompt: "Reply with exactly OK.", + }); + expect(fallback.sawDone).toBe(true); + expect(fallback.stopReason).toBeDefined(); + expect(fallback.stopReason).not.toBe("error"); + return; + } + + expect(textResult.sawDone).toBe(true); + expect(textResult.stopReason).toBeDefined(); + expect(textResult.stopReason).not.toBe("error"); + }, 45_000); +}); diff --git a/src/agents/pi-embedded-runner-extraparams.test.ts b/src/agents/pi-embedded-runner-extraparams.test.ts index 4392edfb3e1..404d4439da4 100644 --- a/src/agents/pi-embedded-runner-extraparams.test.ts +++ b/src/agents/pi-embedded-runner-extraparams.test.ts @@ -372,6 +372,102 @@ describe("applyExtraParamsToAgent", () => { expect(payloads[0]?.thinking).toBe("off"); }); + it("removes invalid negative Google thinkingBudget and maps Gemini 3.1 to thinkingLevel", () => { + const payloads: Record[] = []; + const baseStreamFn: StreamFn = (_model, _context, options) => { + const payload: Record = { + contents: [ + { + role: "user", + parts: [ + { text: "describe image" }, + { + inlineData: { + mimeType: "image/png", + data: "ZmFrZQ==", + }, + }, + ], + }, + ], + config: { + thinkingConfig: { + includeThoughts: true, + thinkingBudget: -1, + }, + }, + }; + options?.onPayload?.(payload); + payloads.push(payload); + return {} as ReturnType; + }; + const agent = { streamFn: baseStreamFn }; + + applyExtraParamsToAgent(agent, undefined, "atproxy", "gemini-3.1-pro-high", undefined, "high"); + + const model = { + api: "google-generative-ai", + provider: "atproxy", + id: "gemini-3.1-pro-high", + } as Model<"google-generative-ai">; + const context: Context = { messages: [] }; + void agent.streamFn?.(model, context, {}); + + expect(payloads).toHaveLength(1); + const thinkingConfig = ( + payloads[0]?.config as { thinkingConfig?: Record } | undefined + )?.thinkingConfig; + expect(thinkingConfig).toEqual({ + includeThoughts: true, + thinkingLevel: "HIGH", + }); + expect( + ( + payloads[0]?.contents as + | Array<{ parts?: Array<{ inlineData?: { mimeType?: string; data?: string } }> }> + | undefined + )?.[0]?.parts?.[1]?.inlineData, + ).toEqual({ + mimeType: "image/png", + data: "ZmFrZQ==", + }); + }); + + it("keeps valid Google thinkingBudget unchanged", () => { + const payloads: Record[] = []; + const baseStreamFn: StreamFn = (_model, _context, options) => { + const payload: Record = { + config: { + thinkingConfig: { + includeThoughts: true, + thinkingBudget: 2048, + }, + }, + }; + options?.onPayload?.(payload); + payloads.push(payload); + return {} as ReturnType; + }; + const agent = { streamFn: baseStreamFn }; + + applyExtraParamsToAgent(agent, undefined, "atproxy", "gemini-3.1-pro-high", undefined, "high"); + + const model = { + api: "google-generative-ai", + provider: "atproxy", + id: "gemini-3.1-pro-high", + } as Model<"google-generative-ai">; + const context: Context = { messages: [] }; + void agent.streamFn?.(model, context, {}); + + expect(payloads).toHaveLength(1); + expect(payloads[0]?.config).toEqual({ + thinkingConfig: { + includeThoughts: true, + thinkingBudget: 2048, + }, + }); + }); it("adds OpenRouter attribution headers to stream options", () => { const { calls, agent } = createOptionsCaptureAgent(); diff --git a/src/agents/pi-embedded-runner/extra-params.ts b/src/agents/pi-embedded-runner/extra-params.ts index 05c764d15c7..2e87dcee608 100644 --- a/src/agents/pi-embedded-runner/extra-params.ts +++ b/src/agents/pi-embedded-runner/extra-params.ts @@ -504,6 +504,94 @@ function createOpenRouterWrapper( }; } +function isGemini31Model(modelId: string): boolean { + const normalized = modelId.toLowerCase(); + return normalized.includes("gemini-3.1-pro") || normalized.includes("gemini-3.1-flash"); +} + +function mapThinkLevelToGoogleThinkingLevel( + thinkingLevel: ThinkLevel, +): "MINIMAL" | "LOW" | "MEDIUM" | "HIGH" | undefined { + switch (thinkingLevel) { + case "minimal": + return "MINIMAL"; + case "low": + return "LOW"; + case "medium": + return "MEDIUM"; + case "high": + case "xhigh": + return "HIGH"; + default: + return undefined; + } +} + +function sanitizeGoogleThinkingPayload(params: { + payload: unknown; + modelId?: string; + thinkingLevel?: ThinkLevel; +}): void { + if (!params.payload || typeof params.payload !== "object") { + return; + } + const payloadObj = params.payload as Record; + const config = payloadObj.config; + if (!config || typeof config !== "object") { + return; + } + const configObj = config as Record; + const thinkingConfig = configObj.thinkingConfig; + if (!thinkingConfig || typeof thinkingConfig !== "object") { + return; + } + const thinkingConfigObj = thinkingConfig as Record; + const thinkingBudget = thinkingConfigObj.thinkingBudget; + if (typeof thinkingBudget !== "number" || thinkingBudget >= 0) { + return; + } + + // pi-ai can emit thinkingBudget=-1 for some Gemini 3.1 IDs; a negative budget + // is invalid for Google-compatible backends and can lead to malformed handling. + delete thinkingConfigObj.thinkingBudget; + + if ( + typeof params.modelId === "string" && + isGemini31Model(params.modelId) && + params.thinkingLevel && + params.thinkingLevel !== "off" && + thinkingConfigObj.thinkingLevel === undefined + ) { + const mappedLevel = mapThinkLevelToGoogleThinkingLevel(params.thinkingLevel); + if (mappedLevel) { + thinkingConfigObj.thinkingLevel = mappedLevel; + } + } +} + +function createGoogleThinkingPayloadWrapper( + baseStreamFn: StreamFn | undefined, + thinkingLevel?: ThinkLevel, +): StreamFn { + const underlying = baseStreamFn ?? streamSimple; + return (model, context, options) => { + const onPayload = options?.onPayload; + return underlying(model, context, { + ...options, + onPayload: (payload) => { + if (model.api === "google-generative-ai") { + sanitizeGoogleThinkingPayload({ + payload, + modelId: model.id, + thinkingLevel, + }); + } + onPayload?.(payload); + }, + }); + }; +} + /** * Create a streamFn wrapper that injects tool_stream=true for Z.AI providers. * @@ -615,6 +703,10 @@ export function applyExtraParamsToAgent( } } + // Guard Google payloads against invalid negative thinking budgets emitted by + // upstream model-ID heuristics for Gemini 3.1 variants. + agent.streamFn = createGoogleThinkingPayloadWrapper(agent.streamFn, thinkingLevel); + // Work around upstream pi-ai hardcoding `store: false` for Responses API. // Force `store=true` for direct OpenAI/OpenAI Codex providers so multi-turn // server-side conversation state is preserved.