From bd213cf2ad05a5443df96364c4ff0afa7c8a4bf0 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Wed, 25 Feb 2026 01:11:20 +0000 Subject: [PATCH] fix(agents): normalize SiliconFlow Pro thinking=off payload (#25435) Land PR #25435 from @Zjianru. Changelog: add 2026.2.24 fix entry with contributor credit. Co-authored-by: codez --- CHANGELOG.md | 1 + .../pi-embedded-runner-extraparams.test.ts | 62 +++++++++++++++++++ src/agents/pi-embedded-runner/extra-params.ts | 43 +++++++++++++ 3 files changed, 106 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 53a58d8ae4a..72be42da620 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ Docs: https://docs.openclaw.ai - WhatsApp/Web reconnect: treat close status `440` as non-retryable (including string-form status values), stop reconnect loops immediately, and emit operator guidance to relink after resolving session conflicts. (#25858) Thanks @markmusson. - Onboarding/Telegram: keep core-channel onboarding available when plugin registry population is missing by falling back to built-in adapters and continuing wizard setup with actionable recovery guidance. (#25803) Thanks @Suko. - Models/Bedrock auth: normalize additional Bedrock provider aliases (`bedrock`, `aws-bedrock`, `aws_bedrock`, `amazon bedrock`) to canonical `amazon-bedrock`, ensuring auth-mode resolution consistently selects AWS SDK fallback. (#25756) Thanks @fwhite13. +- Providers/SiliconFlow: normalize `thinking="off"` to `thinking: null` for `Pro/*` model payloads to avoid provider-side 400 loops and misleading compaction retries. (#25435) Thanks @Zjianru. - Gateway/Models: honor explicit `agents.defaults.models` allowlist refs even when bundled model catalog data is stale, synthesize missing allowlist entries in `models.list`, and allow `sessions.patch`/`/model` selection for those refs without false `model not allowed` errors. (#20291) Thanks @kensipe, @nikolasdehor, and @vincentkoc. - Automation/Subagent/Cron reliability: honor `ANNOUNCE_SKIP` in `sessions_spawn` completion/direct announce flows (no user-visible token leaks), add transient direct-announce retries for channel unavailability (for example WhatsApp listener reconnect windows), and include `cron` in the `coding` tool profile so `/tools/invoke` can execute cron actions when explicitly allowed by gateway policy. (#25800, #25656, #25842, #25813, #25822, #25821) Thanks @astra-fer, @aaajiao, @dwight11232-coder, @kevinWangSheng, @widingmarcus-cyber, and @stakeswky. - Discord/Proxy + reactions + model picker: thread channel proxy fetch into inbound media/sticker downloads, use proxy-aware gateway metadata fetch for WSL/corporate proxy setups, wire `messages.statusReactions.{emojis,timing}` into Discord reaction lifecycle control, and compact model-picker `custom_id` keys to stay under Discord's 100-char limit while keeping backward-compatible parsing. (#25232, #25507, #25564, #25695) Thanks @openperf, @chilu18, @Yipsh, @lbo728, and @s1korrrr. diff --git a/src/agents/pi-embedded-runner-extraparams.test.ts b/src/agents/pi-embedded-runner-extraparams.test.ts index 1e47be3ee1f..4392edfb3e1 100644 --- a/src/agents/pi-embedded-runner-extraparams.test.ts +++ b/src/agents/pi-embedded-runner-extraparams.test.ts @@ -310,6 +310,68 @@ describe("applyExtraParamsToAgent", () => { expect(payloads[0]).toEqual({ reasoning: { max_tokens: 256 } }); }); + it("normalizes thinking=off to null for SiliconFlow Pro models", () => { + const payloads: Record[] = []; + const baseStreamFn: StreamFn = (_model, _context, options) => { + const payload: Record = { thinking: "off" }; + options?.onPayload?.(payload); + payloads.push(payload); + return {} as ReturnType; + }; + const agent = { streamFn: baseStreamFn }; + + applyExtraParamsToAgent( + agent, + undefined, + "siliconflow", + "Pro/MiniMaxAI/MiniMax-M2.1", + undefined, + "off", + ); + + const model = { + api: "openai-completions", + provider: "siliconflow", + id: "Pro/MiniMaxAI/MiniMax-M2.1", + } as Model<"openai-completions">; + const context: Context = { messages: [] }; + void agent.streamFn?.(model, context, {}); + + expect(payloads).toHaveLength(1); + expect(payloads[0]?.thinking).toBeNull(); + }); + + it("keeps thinking=off unchanged for non-Pro SiliconFlow model IDs", () => { + const payloads: Record[] = []; + const baseStreamFn: StreamFn = (_model, _context, options) => { + const payload: Record = { thinking: "off" }; + options?.onPayload?.(payload); + payloads.push(payload); + return {} as ReturnType; + }; + const agent = { streamFn: baseStreamFn }; + + applyExtraParamsToAgent( + agent, + undefined, + "siliconflow", + "deepseek-ai/DeepSeek-V3.2", + undefined, + "off", + ); + + const model = { + api: "openai-completions", + provider: "siliconflow", + id: "deepseek-ai/DeepSeek-V3.2", + } as Model<"openai-completions">; + const context: Context = { messages: [] }; + void agent.streamFn?.(model, context, {}); + + expect(payloads).toHaveLength(1); + expect(payloads[0]?.thinking).toBe("off"); + }); + it("adds OpenRouter attribution headers to stream options", () => { const { calls, agent } = createOptionsCaptureAgent(); diff --git a/src/agents/pi-embedded-runner/extra-params.ts b/src/agents/pi-embedded-runner/extra-params.ts index 0d88bdf08f3..05c764d15c7 100644 --- a/src/agents/pi-embedded-runner/extra-params.ts +++ b/src/agents/pi-embedded-runner/extra-params.ts @@ -408,6 +408,42 @@ function mapThinkingLevelToOpenRouterReasoningEffort( return thinkingLevel; } +function shouldApplySiliconFlowThinkingOffCompat(params: { + provider: string; + modelId: string; + thinkingLevel?: ThinkLevel; +}): boolean { + return ( + params.provider === "siliconflow" && + params.thinkingLevel === "off" && + params.modelId.startsWith("Pro/") + ); +} + +/** + * SiliconFlow's Pro/* models reject string thinking modes (including "off") + * with HTTP 400 invalid-parameter errors. Normalize to `thinking: null` to + * preserve "thinking disabled" intent without sending an invalid enum value. + */ +function createSiliconFlowThinkingWrapper(baseStreamFn: StreamFn | undefined): StreamFn { + const underlying = baseStreamFn ?? streamSimple; + return (model, context, options) => { + const originalOnPayload = options?.onPayload; + return underlying(model, context, { + ...options, + onPayload: (payload) => { + if (payload && typeof payload === "object") { + const payloadObj = payload as Record; + if (payloadObj.thinking === "off") { + payloadObj.thinking = null; + } + } + originalOnPayload?.(payload); + }, + }); + }; +} + /** * Create a streamFn wrapper that adds OpenRouter app attribution headers * and injects reasoning.effort based on the configured thinking level. @@ -544,6 +580,13 @@ export function applyExtraParamsToAgent( agent.streamFn = createAnthropicBetaHeadersWrapper(agent.streamFn, anthropicBetas); } + if (shouldApplySiliconFlowThinkingOffCompat({ provider, modelId, thinkingLevel })) { + log.debug( + `normalizing thinking=off to thinking=null for SiliconFlow compatibility (${provider}/${modelId})`, + ); + agent.streamFn = createSiliconFlowThinkingWrapper(agent.streamFn); + } + if (provider === "openrouter") { log.debug(`applying OpenRouter app attribution headers for ${provider}/${modelId}`); // "auto" is a dynamic routing model — we don't know which underlying model