From 252c63429ecf392e24dade8f72855fff5f63f89e Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 27 Apr 2026 13:27:37 +0100 Subject: [PATCH] fix(providers): map native reasoning efforts --- CHANGELOG.md | 1 + docs/providers/groq.md | 8 ++ docs/providers/lmstudio.md | 10 +- extensions/groq/api.ts | 60 ++++++++++ extensions/groq/index.test.ts | 51 +++++++++ extensions/groq/index.ts | 14 ++- extensions/lmstudio/index.test.ts | 12 +- extensions/lmstudio/index.ts | 2 +- extensions/lmstudio/src/models.fetch.ts | 2 +- extensions/lmstudio/src/models.test.ts | 47 +++++++- extensions/lmstudio/src/models.ts | 125 ++++++++++++++++++++- src/agents/openai-reasoning-effort.test.ts | 41 +++++++ src/agents/openai-reasoning-effort.ts | 28 ++--- src/agents/openai-transport-stream.test.ts | 71 ++++++++++++ src/agents/openai-transport-stream.ts | 18 +-- src/config/types.models.ts | 1 + src/config/zod-schema.core.ts | 2 + src/model-catalog/normalize.ts | 11 ++ 18 files changed, 461 insertions(+), 43 deletions(-) create mode 100644 extensions/groq/api.ts create mode 100644 extensions/groq/index.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 5274f4794fc..9844d7d98bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ Docs: https://docs.openclaw.ai - Nodes/CLI: add `openclaw nodes remove --node ` and `node.pair.remove` so stale gateway-owned node pairing records can be cleaned without hand-editing state files. Thanks @openclaw. - Docker: install the CA certificate bundle in the slim runtime image so HTTPS calls from containerized gateways no longer fail TLS setup after the `bookworm-slim` base switch. Fixes #72787. Thanks @ryuhaneul. +- Providers/reasoning: let Groq and LM Studio declare provider-native reasoning effort values, so Qwen thinking models receive `none`/`default` or `off`/`on` instead of OpenAI-only `low`/`medium` values. Fixes #32638. Thanks @Aqu1bp, @mgoulart, @Norpps, and @BSTail. - Local models: default custom providers with only `baseUrl` to the Chat Completions adapter and trust loopback model requests automatically, so local OpenAI-compatible proxies receive `/v1/chat/completions` without timing out. Fixes #40024. Thanks @parachuteshe. - Channels/message tool: surface Discord, Slack, and Mattermost `user:`/`channel:` target syntax in the shared message target schema and Discord ambiguity errors, so DM sends by numeric id stop burning retries before finding `user:`. Fixes #72401. Thanks @garyd9, @hclsys, and @praveen9354. - Agents/tools: scope tool-loop detection history to the active run when available, so scheduled heartbeat cycles no longer inherit stale repeated-call counts from previous runs. Fixes #40144. Thanks @mattbrown319. diff --git a/docs/providers/groq.md b/docs/providers/groq.md index aa87c5693c5..c6e49118bfa 100644 --- a/docs/providers/groq.md +++ b/docs/providers/groq.md @@ -71,6 +71,14 @@ Use `openclaw models list --provider groq` for the most up-to-date list of models available on your account. +## Reasoning models + +OpenClaw maps its shared `/think` levels to Groq's model-specific +`reasoning_effort` values. For `qwen/qwen3-32b`, disabled thinking sends +`none` and enabled thinking sends `default`. For Groq GPT-OSS reasoning models, +OpenClaw sends `low`, `medium`, or `high`; disabled thinking omits +`reasoning_effort` because those models do not support a disabled value. + ## Audio transcription Groq also provides fast Whisper-based audio transcription. When configured as a diff --git a/docs/providers/lmstudio.md b/docs/providers/lmstudio.md index d335fde69a6..5af70dc7de0 100644 --- a/docs/providers/lmstudio.md +++ b/docs/providers/lmstudio.md @@ -104,7 +104,7 @@ LM Studio is streaming-usage compatible. When it does not emit an OpenAI-shaped `usage` object, OpenClaw recovers token counts from llama.cpp-style `timings.prompt_n` / `timings.predicted_n` metadata instead. -Same behavior applies to these OpenAI-compatible local backends: +Same streaming usage behavior applies to these OpenAI-compatible local backends: - vLLM - SGLang @@ -114,6 +114,14 @@ Same behavior applies to these OpenAI-compatible local backends: - TabbyAPI - text-generation-webui +### Thinking compatibility + +When LM Studio's `/api/v1/models` discovery reports model-specific reasoning +options, OpenClaw preserves those native values in model compat metadata. For +binary thinking models that advertise `allowed_options: ["off", "on"]`, +OpenClaw maps disabled thinking to `off` and enabled `/think` levels to `on` +instead of sending OpenAI-only values such as `low` or `medium`. + ### Explicit configuration ```json5 diff --git a/extensions/groq/api.ts b/extensions/groq/api.ts new file mode 100644 index 00000000000..0557d906c4a --- /dev/null +++ b/extensions/groq/api.ts @@ -0,0 +1,60 @@ +import type { ModelCompatConfig } from "openclaw/plugin-sdk/provider-model-shared"; + +const GROQ_QWEN3_32B_ID = "qwen/qwen3-32b"; +const GROQ_GPT_OSS_REASONING_IDS = new Set([ + "openai/gpt-oss-20b", + "openai/gpt-oss-120b", + "openai/gpt-oss-safeguard-20b", +]); + +export const GROQ_QWEN_REASONING_EFFORTS = ["none", "default"] as const; +export const GROQ_GPT_OSS_REASONING_EFFORTS = ["low", "medium", "high"] as const; + +export const GROQ_QWEN_REASONING_EFFORT_MAP: Record = { + off: "none", + none: "none", + minimal: "default", + low: "default", + medium: "default", + high: "default", + xhigh: "default", + adaptive: "default", + max: "default", +}; + +function normalizeGroqModelId(modelId: string | undefined): string { + return modelId?.trim().toLowerCase() ?? ""; +} + +export function resolveGroqReasoningCompatPatch( + modelId: string, +): Pick< + ModelCompatConfig, + "supportsReasoningEffort" | "supportedReasoningEfforts" | "reasoningEffortMap" +> | null { + const normalized = normalizeGroqModelId(modelId); + if (normalized === GROQ_QWEN3_32B_ID) { + return { + supportsReasoningEffort: true, + supportedReasoningEfforts: [...GROQ_QWEN_REASONING_EFFORTS], + reasoningEffortMap: GROQ_QWEN_REASONING_EFFORT_MAP, + }; + } + if (GROQ_GPT_OSS_REASONING_IDS.has(normalized)) { + return { + supportsReasoningEffort: true, + supportedReasoningEfforts: [...GROQ_GPT_OSS_REASONING_EFFORTS], + }; + } + return null; +} + +export function contributeGroqResolvedModelCompat(params: { + modelId: string; + model: { api?: unknown; provider?: unknown }; +}): Partial | undefined { + if (params.model.api !== "openai-completions" || params.model.provider !== "groq") { + return undefined; + } + return resolveGroqReasoningCompatPatch(params.modelId) ?? undefined; +} diff --git a/extensions/groq/index.test.ts b/extensions/groq/index.test.ts new file mode 100644 index 00000000000..5bf876e2b99 --- /dev/null +++ b/extensions/groq/index.test.ts @@ -0,0 +1,51 @@ +import { capturePluginRegistration } from "openclaw/plugin-sdk/testing"; +import { describe, expect, it } from "vitest"; +import { contributeGroqResolvedModelCompat, resolveGroqReasoningCompatPatch } from "./api.js"; +import plugin from "./index.js"; + +describe("groq provider compat", () => { + it("maps Groq Qwen 3 reasoning to provider-native none/default values", () => { + expect(resolveGroqReasoningCompatPatch("qwen/qwen3-32b")).toEqual({ + supportsReasoningEffort: true, + supportedReasoningEfforts: ["none", "default"], + reasoningEffortMap: expect.objectContaining({ + off: "none", + low: "default", + medium: "default", + high: "default", + }), + }); + }); + + it("keeps GPT-OSS reasoning on the Groq low/medium/high contract", () => { + expect(resolveGroqReasoningCompatPatch("openai/gpt-oss-120b")).toEqual({ + supportsReasoningEffort: true, + supportedReasoningEfforts: ["low", "medium", "high"], + }); + }); + + it("contributes compat only for Groq OpenAI-compatible chat models", () => { + expect( + contributeGroqResolvedModelCompat({ + modelId: "qwen/qwen3-32b", + model: { api: "openai-completions", provider: "groq" }, + }), + ).toMatchObject({ supportedReasoningEfforts: ["none", "default"] }); + expect( + contributeGroqResolvedModelCompat({ + modelId: "qwen/qwen3-32b", + model: { api: "openai-completions", provider: "openrouter" }, + }), + ).toBeUndefined(); + }); + + it("registers Groq model and media providers", () => { + const captured = capturePluginRegistration(plugin); + expect(captured.providers[0]).toMatchObject({ + id: "groq", + label: "Groq", + envVars: ["GROQ_API_KEY"], + }); + expect(captured.mediaUnderstandingProviders[0]?.id).toBe("groq"); + }); +}); diff --git a/extensions/groq/index.ts b/extensions/groq/index.ts index 7900519262a..34fe3ea50e0 100644 --- a/extensions/groq/index.ts +++ b/extensions/groq/index.ts @@ -1,11 +1,21 @@ import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry"; +import { contributeGroqResolvedModelCompat } from "./api.js"; import { groqMediaUnderstandingProvider } from "./media-understanding-provider.js"; export default definePluginEntry({ id: "groq", - name: "Groq Media Understanding", - description: "Bundled Groq audio transcription provider", + name: "Groq Provider", + description: "Bundled Groq provider plugin", register(api) { + api.registerProvider({ + id: "groq", + label: "Groq", + docsPath: "/providers/groq", + envVars: ["GROQ_API_KEY"], + auth: [], + contributeResolvedModelCompat: ({ modelId, model }) => + contributeGroqResolvedModelCompat({ modelId, model }), + }); api.registerMediaUnderstandingProvider(groqMediaUnderstandingProvider); }, }); diff --git a/extensions/lmstudio/index.test.ts b/extensions/lmstudio/index.test.ts index ed6bea91561..6942ad12fb3 100644 --- a/extensions/lmstudio/index.test.ts +++ b/extensions/lmstudio/index.test.ts @@ -147,6 +147,11 @@ describe("lmstudio plugin", () => { contextTokens: 8192, reasoning: true, input: ["text", "image"], + compat: { + supportsReasoningEffort: true, + supportedReasoningEfforts: ["off", "on"], + reasoningEffortMap: { off: "off", high: "on" }, + }, }, { id: "phi-4", @@ -173,7 +178,12 @@ describe("lmstudio plugin", () => { provider: "lmstudio", id: "qwen3-8b-instruct", name: "Qwen 3 8B Instruct", - compat: { supportsUsageInStreaming: true }, + compat: { + supportsUsageInStreaming: true, + supportsReasoningEffort: true, + supportedReasoningEfforts: ["off", "on"], + reasoningEffortMap: { off: "off", high: "on" }, + }, contextWindow: 32768, contextTokens: 8192, reasoning: true, diff --git a/extensions/lmstudio/index.ts b/extensions/lmstudio/index.ts index 2492251b4b3..ff932ecc370 100644 --- a/extensions/lmstudio/index.ts +++ b/extensions/lmstudio/index.ts @@ -34,7 +34,7 @@ function resolveLmstudioAugmentedCatalogEntries(config: OpenClawConfig | undefin provider: PROVIDER_ID, id: entry.id, name: entry.name ?? entry.id, - compat: { supportsUsageInStreaming: true }, + compat: { ...entry.compat, supportsUsageInStreaming: true }, contextWindow: entry.contextWindow, contextTokens: entry.contextTokens, reasoning: entry.reasoning, diff --git a/extensions/lmstudio/src/models.fetch.ts b/extensions/lmstudio/src/models.fetch.ts index a2397665f01..5ffb0d2c1e5 100644 --- a/extensions/lmstudio/src/models.fetch.ts +++ b/extensions/lmstudio/src/models.fetch.ts @@ -163,7 +163,7 @@ export async function discoverLmstudioModels( reasoning: base.reasoning, input: base.input, cost: SELF_HOSTED_DEFAULT_COST, - compat: { supportsUsageInStreaming: true }, + compat: { ...base.compat, supportsUsageInStreaming: true }, contextWindow: base.contextWindow, contextTokens: base.contextTokens, maxTokens: base.maxTokens, diff --git a/extensions/lmstudio/src/models.test.ts b/extensions/lmstudio/src/models.test.ts index a4ace071cbe..dad5b33a262 100644 --- a/extensions/lmstudio/src/models.test.ts +++ b/extensions/lmstudio/src/models.test.ts @@ -8,6 +8,7 @@ import { discoverLmstudioModels, ensureLmstudioModelLoaded } from "./models.fetc import { normalizeLmstudioProviderConfig, resolveLmstudioInferenceBase, + resolveLmstudioReasoningCompat, resolveLmstudioReasoningCapability, resolveLmstudioServerBase, } from "./models.js"; @@ -145,6 +146,40 @@ describe("lmstudio-models", () => { ).toBe(false); }); + it("maps LM Studio native reasoning options into OpenAI-compatible effort compat", () => { + expect( + resolveLmstudioReasoningCompat({ + capabilities: { + reasoning: { + allowed_options: ["off", "on"], + default: "on", + }, + }, + }), + ).toEqual({ + supportsReasoningEffort: true, + supportedReasoningEfforts: ["off", "on"], + reasoningEffortMap: expect.objectContaining({ + off: "off", + none: "off", + low: "on", + medium: "on", + high: "on", + }), + }); + + expect( + resolveLmstudioReasoningCompat({ + capabilities: { + reasoning: { + allowed_options: ["off"], + default: "off", + }, + }, + }), + ).toBeUndefined(); + }); + it("discovers llm models and maps metadata", async () => { const fetchMock = vi.fn(async (_url: string | URL) => ({ ok: true, @@ -205,7 +240,17 @@ describe("lmstudio-models", () => { reasoning: true, input: ["text", "image"], cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, - compat: { supportsUsageInStreaming: true }, + compat: { + supportsUsageInStreaming: true, + supportsReasoningEffort: true, + supportedReasoningEfforts: ["off", "on"], + reasoningEffortMap: expect.objectContaining({ + off: "off", + none: "off", + medium: "on", + high: "on", + }), + }, contextWindow: 262144, contextTokens: LMSTUDIO_DEFAULT_LOAD_CONTEXT_LENGTH, maxTokens: SELF_HOSTED_DEFAULT_MAX_TOKENS, diff --git a/extensions/lmstudio/src/models.ts b/extensions/lmstudio/src/models.ts index 8873c63b424..db190ee53b0 100644 --- a/extensions/lmstudio/src/models.ts +++ b/extensions/lmstudio/src/models.ts @@ -40,6 +40,7 @@ type LmstudioConfiguredCatalogEntry = { contextTokens?: number; reasoning?: boolean; input?: ("text" | "image" | "document")[]; + compat?: ModelDefinitionConfig["compat"]; }; function normalizeReasoningOption(value: unknown): string | null { @@ -58,6 +59,83 @@ function isReasoningEnabledOption(value: unknown): boolean { return normalized !== "off"; } +function normalizeReasoningOptions(value: unknown): string[] { + if (!Array.isArray(value)) { + return []; + } + return [ + ...new Set( + value + .map((option) => normalizeReasoningOption(option)) + .filter((option): option is string => option !== null), + ), + ]; +} + +function resolveLmstudioReasoningDefault( + reasoning: LmstudioReasoningCapabilityWire, +): string | null { + const normalizedDefault = normalizeReasoningOption(reasoning.default); + return normalizedDefault && isReasoningEnabledOption(normalizedDefault) + ? normalizedDefault + : null; +} + +function resolveLmstudioEnabledReasoningOption( + allowedOptions: readonly string[], + reasoning: LmstudioReasoningCapabilityWire, +): string | undefined { + const normalizedDefault = resolveLmstudioReasoningDefault(reasoning); + if (normalizedDefault && allowedOptions.includes(normalizedDefault)) { + return normalizedDefault; + } + return ( + allowedOptions.find((option) => option === "on" || option === "default") ?? + allowedOptions.find((option) => isReasoningEnabledOption(option)) + ); +} + +function resolveLmstudioDisabledReasoningOption( + allowedOptions: readonly string[], +): string | undefined { + return ( + allowedOptions.find((option) => option === "off") ?? + allowedOptions.find((option) => option === "none") + ); +} + +export function resolveLmstudioReasoningCompat( + entry: Pick, +): ModelDefinitionConfig["compat"] | undefined { + const reasoning = entry.capabilities?.reasoning; + if (reasoning === undefined || reasoning === null) { + return undefined; + } + const allowedOptions = normalizeReasoningOptions(reasoning.allowed_options); + if (allowedOptions.length === 0) { + return undefined; + } + const enabled = resolveLmstudioEnabledReasoningOption(allowedOptions, reasoning); + if (!enabled) { + return undefined; + } + const disabled = resolveLmstudioDisabledReasoningOption(allowedOptions); + return { + supportsReasoningEffort: true, + supportedReasoningEfforts: allowedOptions, + reasoningEffortMap: { + ...(disabled ? { off: disabled, none: disabled } : {}), + minimal: enabled, + low: enabled, + medium: enabled, + high: enabled, + xhigh: enabled, + adaptive: enabled, + max: enabled, + }, + }; +} + /** * Resolves LM Studio reasoning support from capabilities payloads. * Defaults to false when the server omits reasoning metadata. @@ -69,12 +147,7 @@ export function resolveLmstudioReasoningCapability( if (reasoning === undefined || reasoning === null) { return false; } - const allowedOptionsRaw = reasoning.allowed_options; - const allowedOptions = Array.isArray(allowedOptionsRaw) - ? allowedOptionsRaw - .map((option) => normalizeReasoningOption(option)) - .filter((option): option is string => option !== null) - : []; + const allowedOptions = normalizeReasoningOptions(reasoning.allowed_options); if (allowedOptions.length > 0) { return allowedOptions.some((option) => isReasoningEnabledOption(option)); } @@ -130,6 +203,41 @@ function isLikelyHostBaseUrl(value: string): boolean { ); } +function normalizeConfiguredReasoningEffortMap(value: unknown): Record | undefined { + if (!value || typeof value !== "object" || Array.isArray(value)) { + return undefined; + } + const normalized = Object.fromEntries( + Object.entries(value) + .map(([key, mapped]) => [key.trim(), typeof mapped === "string" ? mapped.trim() : ""]) + .filter(([key, mapped]) => key.length > 0 && mapped.length > 0), + ); + return Object.keys(normalized).length > 0 ? normalized : undefined; +} + +function normalizeLmstudioConfiguredCompat(value: unknown): ModelDefinitionConfig["compat"] { + if (!value || typeof value !== "object" || Array.isArray(value)) { + return undefined; + } + const record = value as Record; + const supportedReasoningEfforts = normalizeReasoningOptions(record.supportedReasoningEfforts); + const reasoningEffortMap = normalizeConfiguredReasoningEffortMap(record.reasoningEffortMap); + const compat: NonNullable = {}; + if (typeof record.supportsUsageInStreaming === "boolean") { + compat.supportsUsageInStreaming = record.supportsUsageInStreaming; + } + if (typeof record.supportsReasoningEffort === "boolean") { + compat.supportsReasoningEffort = record.supportsReasoningEffort; + } + if (supportedReasoningEfforts.length > 0) { + compat.supportedReasoningEfforts = supportedReasoningEfforts; + } + if (reasoningEffortMap) { + compat.reasoningEffortMap = reasoningEffortMap; + } + return Object.keys(compat).length > 0 ? compat : undefined; +} + function toFetchableLmstudioBaseUrl(value: string): string { if (hasExplicitHttpScheme(value) || !isLikelyHostBaseUrl(value)) { return value; @@ -226,6 +334,7 @@ export function normalizeLmstudioConfiguredCatalogEntry( item === "text" || item === "image" || item === "document", ) : undefined; + const compat = normalizeLmstudioConfiguredCompat(record.compat); return { id, name, @@ -233,6 +342,7 @@ export function normalizeLmstudioConfiguredCatalogEntry( contextTokens, reasoning, input: input && input.length > 0 ? input : undefined, + compat, }; } @@ -290,6 +400,7 @@ export type LmstudioModelBase = { reasoning: boolean; input: Array<"text" | "image">; cost: ModelDefinitionConfig["cost"]; + compat?: ModelDefinitionConfig["compat"]; contextWindow: number; contextTokens: number; maxTokens: number; @@ -335,6 +446,7 @@ export function mapLmstudioWireEntry(entry: LmstudioModelWire): LmstudioModelBas reasoning: resolveLmstudioReasoningCapability(entry), input: entry.capabilities?.vision ? ["text", "image"] : ["text"], cost: SELF_HOSTED_DEFAULT_COST, + compat: resolveLmstudioReasoningCompat(entry), contextWindow, contextTokens, maxTokens: Math.max(1, Math.min(contextWindow, SELF_HOSTED_DEFAULT_MAX_TOKENS)), @@ -361,6 +473,7 @@ export function mapLmstudioWireModelsToConfig( reasoning: base.reasoning, input: base.input, cost: base.cost, + ...(base.compat ? { compat: base.compat } : {}), contextWindow: base.contextWindow, contextTokens: base.contextTokens, maxTokens: base.maxTokens, diff --git a/src/agents/openai-reasoning-effort.test.ts b/src/agents/openai-reasoning-effort.test.ts index 09e9af74e5a..ab5c0afa4d5 100644 --- a/src/agents/openai-reasoning-effort.test.ts +++ b/src/agents/openai-reasoning-effort.test.ts @@ -24,4 +24,45 @@ describe("OpenAI reasoning effort support", () => { expect(resolveOpenAIReasoningEffortForModel({ model, effort: "xhigh" })).toBe("xhigh"); }); + + it("allows provider-native compat values when explicitly declared", () => { + const model = { + provider: "groq", + id: "qwen/qwen3-32b", + compat: { + supportedReasoningEfforts: ["none", "default"], + reasoningEffortMap: { + off: "none", + low: "default", + medium: "default", + high: "default", + }, + }, + }; + + expect(resolveOpenAISupportedReasoningEfforts(model)).toEqual(["none", "default"]); + expect( + resolveOpenAIReasoningEffortForModel({ + model, + effort: "medium", + fallbackMap: model.compat.reasoningEffortMap, + }), + ).toBe("default"); + expect( + resolveOpenAIReasoningEffortForModel({ + model, + effort: "off", + fallbackMap: model.compat.reasoningEffortMap, + }), + ).toBe("none"); + }); + + it("omits unsupported disabled reasoning instead of falling back to enabled effort", () => { + expect( + resolveOpenAIReasoningEffortForModel({ + model: { provider: "groq", id: "openai/gpt-oss-120b" }, + effort: "off", + }), + ).toBeUndefined(); + }); }); diff --git a/src/agents/openai-reasoning-effort.ts b/src/agents/openai-reasoning-effort.ts index 97d1f8feefa..1bb2148ea79 100644 --- a/src/agents/openai-reasoning-effort.ts +++ b/src/agents/openai-reasoning-effort.ts @@ -2,7 +2,7 @@ import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js"; export type OpenAIReasoningEffort = "none" | "minimal" | "low" | "medium" | "high" | "xhigh"; -export type OpenAIApiReasoningEffort = OpenAIReasoningEffort; +export type OpenAIApiReasoningEffort = OpenAIReasoningEffort | (string & {}); type OpenAIReasoningModel = { provider?: unknown; @@ -12,15 +12,6 @@ type OpenAIReasoningModel = { compat?: unknown; }; -const ALL_OPENAI_REASONING_EFFORTS = [ - "none", - "minimal", - "low", - "medium", - "high", - "xhigh", -] as const satisfies readonly OpenAIApiReasoningEffort[]; - const GPT_5_REASONING_EFFORTS = ["minimal", "low", "medium", "high"] as const; const GPT_51_REASONING_EFFORTS = ["none", "low", "medium", "high"] as const; const GPT_52_REASONING_EFFORTS = ["none", "low", "medium", "high", "xhigh"] as const; @@ -47,12 +38,21 @@ function readCompatReasoningEfforts(compat: unknown): OpenAIApiReasoningEffort[] if (!Array.isArray(raw)) { return undefined; } - const supported = raw.filter((value): value is OpenAIApiReasoningEffort => - ALL_OPENAI_REASONING_EFFORTS.includes(value as OpenAIApiReasoningEffort), - ); + const supported = [ + ...new Set( + raw + .filter((value): value is string => typeof value === "string") + .map((value) => value.trim()) + .filter(Boolean), + ), + ]; return supported.length > 0 ? supported : undefined; } +function isDisabledReasoningEffort(effort: string): boolean { + return effort === "none" || effort === "off"; +} + export function resolveOpenAISupportedReasoningEfforts( model: OpenAIReasoningModel, ): readonly OpenAIApiReasoningEffort[] { @@ -113,7 +113,7 @@ export function resolveOpenAIReasoningEffortForModel(params: { if (supported.includes(normalized as OpenAIApiReasoningEffort)) { return normalized as OpenAIApiReasoningEffort; } - if (requested === "none") { + if (isDisabledReasoningEffort(requested) || isDisabledReasoningEffort(normalized)) { return undefined; } if (requested === "minimal" && supported.includes("low")) { diff --git a/src/agents/openai-transport-stream.test.ts b/src/agents/openai-transport-stream.test.ts index dd143d0a905..6776d56a3b1 100644 --- a/src/agents/openai-transport-stream.test.ts +++ b/src/agents/openai-transport-stream.test.ts @@ -1791,6 +1791,77 @@ describe("openai transport stream", () => { expect(params.reasoning_effort).toBe("high"); }); + it("uses provider-native reasoning effort values declared by model compat", () => { + const baseModel = { + id: "qwen/qwen3-32b", + name: "Qwen 3 32B", + api: "openai-completions", + provider: "groq", + baseUrl: "https://api.groq.com/openai/v1", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 131072, + maxTokens: 8192, + compat: { + supportsReasoningEffort: true, + supportedReasoningEfforts: ["none", "default"], + reasoningEffortMap: { + off: "none", + low: "default", + medium: "default", + high: "default", + }, + }, + } as unknown as Model<"openai-completions">; + const context = { + systemPrompt: "system", + messages: [], + tools: [], + } as never; + + const enabled = buildOpenAICompletionsParams(baseModel, context, { + reasoning: "medium", + } as never) as { reasoning_effort?: unknown }; + const disabled = buildOpenAICompletionsParams(baseModel, context, { + reasoning: "off", + } as never) as { reasoning_effort?: unknown }; + + expect(enabled.reasoning_effort).toBe("default"); + expect(disabled.reasoning_effort).toBe("none"); + }); + + it("omits unsupported disabled reasoning for completions providers", () => { + const params = buildOpenAICompletionsParams( + { + id: "openai/gpt-oss-120b", + name: "GPT OSS 120B", + api: "openai-completions", + provider: "groq", + baseUrl: "https://api.groq.com/openai/v1", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 131072, + maxTokens: 8192, + compat: { + supportsReasoningEffort: true, + supportedReasoningEfforts: ["low", "medium", "high"], + }, + } as unknown as Model<"openai-completions">, + { + systemPrompt: "system", + messages: [], + tools: [], + } as never, + { + reasoning: "off", + } as never, + ) as { reasoning_effort?: unknown }; + + expect(params).not.toHaveProperty("reasoning_effort"); + }); + it("uses system role and streaming usage compat for native Qwen completions providers", () => { const params = buildOpenAICompletionsParams( { diff --git a/src/agents/openai-transport-stream.ts b/src/agents/openai-transport-stream.ts index 7ae8ee6f453..c966ca3c1ff 100644 --- a/src/agents/openai-transport-stream.ts +++ b/src/agents/openai-transport-stream.ts @@ -1526,26 +1526,12 @@ function getCompletionsReasoningDeltas( } function detectCompat(model: OpenAIModeModel) { - const provider = model.provider; - const { capabilities, defaults: compatDefaults } = detectOpenAICompletionsCompat(model); - const endpointClass = capabilities.endpointClass; - const isDefaultRoute = endpointClass === "default"; - const isGroq = endpointClass === "groq-native" || (isDefaultRoute && provider === "groq"); - const reasoningEffortMap: Record = - isGroq && model.id === "qwen/qwen3-32b" - ? { - minimal: "default", - low: "default", - medium: "default", - high: "default", - xhigh: "default", - } - : {}; + const { defaults: compatDefaults } = detectOpenAICompletionsCompat(model); return { supportsStore: compatDefaults.supportsStore, supportsDeveloperRole: compatDefaults.supportsDeveloperRole, supportsReasoningEffort: compatDefaults.supportsReasoningEffort, - reasoningEffortMap, + reasoningEffortMap: {}, supportsUsageInStreaming: compatDefaults.supportsUsageInStreaming, maxTokensField: compatDefaults.maxTokensField, requiresToolResultName: false, diff --git a/src/config/types.models.ts b/src/config/types.models.ts index dcb4474dfcf..9d33bbe333f 100644 --- a/src/config/types.models.ts +++ b/src/config/types.models.ts @@ -59,6 +59,7 @@ export type ModelCompatConfig = SupportedOpenAICompatFields & SupportedAnthropicMessagesCompatFields & { thinkingFormat?: SupportedThinkingFormat; supportedReasoningEfforts?: string[]; + reasoningEffortMap?: Record; visibleReasoningDetailTypes?: string[]; supportsTools?: boolean; supportsPromptCacheKey?: boolean; diff --git a/src/config/zod-schema.core.ts b/src/config/zod-schema.core.ts index 5d6aa3631f4..cd4d2ca2d2b 100644 --- a/src/config/zod-schema.core.ts +++ b/src/config/zod-schema.core.ts @@ -195,6 +195,8 @@ export const ModelCompatSchema = z supportsStrictMode: z.boolean().optional(), requiresStringContent: z.boolean().optional(), visibleReasoningDetailTypes: z.array(z.string().min(1)).optional(), + supportedReasoningEfforts: z.array(z.string().min(1)).optional(), + reasoningEffortMap: z.record(z.string().min(1), z.string().min(1)).optional(), maxTokensField: z .union([z.literal("max_completion_tokens"), z.literal("max_tokens")]) .optional(), diff --git a/src/model-catalog/normalize.ts b/src/model-catalog/normalize.ts index ff6b72c7489..300b67a72b3 100644 --- a/src/model-catalog/normalize.ts +++ b/src/model-catalog/normalize.ts @@ -202,6 +202,17 @@ function normalizeModelCatalogCompat(value: unknown): ModelCompatConfig | undefi } } + if (isRecord(value.reasoningEffortMap)) { + const reasoningEffortMap = Object.fromEntries( + Object.entries(value.reasoningEffortMap) + .map(([key, mapped]) => [key.trim(), typeof mapped === "string" ? mapped.trim() : ""]) + .filter(([key, mapped]) => key.length > 0 && mapped.length > 0), + ); + if (Object.keys(reasoningEffortMap).length > 0) { + compat.reasoningEffortMap = reasoningEffortMap; + } + } + const maxTokensField = normalizeOptionalString(value.maxTokensField) ?? ""; if (maxTokensField === "max_completion_tokens" || maxTokensField === "max_tokens") { compat.maxTokensField = maxTokensField;