mirror of
https://github.com/moltbot/moltbot.git
synced 2026-05-06 23:55:12 +00:00
fix(providers): map native reasoning efforts
This commit is contained in:
@@ -23,6 +23,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
- Nodes/CLI: add `openclaw nodes remove --node <id|name|ip>` and `node.pair.remove` so stale gateway-owned node pairing records can be cleaned without hand-editing state files. Thanks @openclaw.
|
||||
- Docker: install the CA certificate bundle in the slim runtime image so HTTPS calls from containerized gateways no longer fail TLS setup after the `bookworm-slim` base switch. Fixes #72787. Thanks @ryuhaneul.
|
||||
- Providers/reasoning: let Groq and LM Studio declare provider-native reasoning effort values, so Qwen thinking models receive `none`/`default` or `off`/`on` instead of OpenAI-only `low`/`medium` values. Fixes #32638. Thanks @Aqu1bp, @mgoulart, @Norpps, and @BSTail.
|
||||
- Local models: default custom providers with only `baseUrl` to the Chat Completions adapter and trust loopback model requests automatically, so local OpenAI-compatible proxies receive `/v1/chat/completions` without timing out. Fixes #40024. Thanks @parachuteshe.
|
||||
- Channels/message tool: surface Discord, Slack, and Mattermost `user:`/`channel:` target syntax in the shared message target schema and Discord ambiguity errors, so DM sends by numeric id stop burning retries before finding `user:<id>`. Fixes #72401. Thanks @garyd9, @hclsys, and @praveen9354.
|
||||
- Agents/tools: scope tool-loop detection history to the active run when available, so scheduled heartbeat cycles no longer inherit stale repeated-call counts from previous runs. Fixes #40144. Thanks @mattbrown319.
|
||||
|
||||
@@ -71,6 +71,14 @@ Use `openclaw models list --provider groq` for the most up-to-date list of
|
||||
models available on your account.
|
||||
</Tip>
|
||||
|
||||
## Reasoning models
|
||||
|
||||
OpenClaw maps its shared `/think` levels to Groq's model-specific
|
||||
`reasoning_effort` values. For `qwen/qwen3-32b`, disabled thinking sends
|
||||
`none` and enabled thinking sends `default`. For Groq GPT-OSS reasoning models,
|
||||
OpenClaw sends `low`, `medium`, or `high`; disabled thinking omits
|
||||
`reasoning_effort` because those models do not support a disabled value.
|
||||
|
||||
## Audio transcription
|
||||
|
||||
Groq also provides fast Whisper-based audio transcription. When configured as a
|
||||
|
||||
@@ -104,7 +104,7 @@ LM Studio is streaming-usage compatible. When it does not emit an OpenAI-shaped
|
||||
`usage` object, OpenClaw recovers token counts from llama.cpp-style
|
||||
`timings.prompt_n` / `timings.predicted_n` metadata instead.
|
||||
|
||||
Same behavior applies to these OpenAI-compatible local backends:
|
||||
Same streaming usage behavior applies to these OpenAI-compatible local backends:
|
||||
|
||||
- vLLM
|
||||
- SGLang
|
||||
@@ -114,6 +114,14 @@ Same behavior applies to these OpenAI-compatible local backends:
|
||||
- TabbyAPI
|
||||
- text-generation-webui
|
||||
|
||||
### Thinking compatibility
|
||||
|
||||
When LM Studio's `/api/v1/models` discovery reports model-specific reasoning
|
||||
options, OpenClaw preserves those native values in model compat metadata. For
|
||||
binary thinking models that advertise `allowed_options: ["off", "on"]`,
|
||||
OpenClaw maps disabled thinking to `off` and enabled `/think` levels to `on`
|
||||
instead of sending OpenAI-only values such as `low` or `medium`.
|
||||
|
||||
### Explicit configuration
|
||||
|
||||
```json5
|
||||
|
||||
60
extensions/groq/api.ts
Normal file
60
extensions/groq/api.ts
Normal file
@@ -0,0 +1,60 @@
|
||||
import type { ModelCompatConfig } from "openclaw/plugin-sdk/provider-model-shared";
|
||||
|
||||
const GROQ_QWEN3_32B_ID = "qwen/qwen3-32b";
|
||||
const GROQ_GPT_OSS_REASONING_IDS = new Set([
|
||||
"openai/gpt-oss-20b",
|
||||
"openai/gpt-oss-120b",
|
||||
"openai/gpt-oss-safeguard-20b",
|
||||
]);
|
||||
|
||||
export const GROQ_QWEN_REASONING_EFFORTS = ["none", "default"] as const;
|
||||
export const GROQ_GPT_OSS_REASONING_EFFORTS = ["low", "medium", "high"] as const;
|
||||
|
||||
export const GROQ_QWEN_REASONING_EFFORT_MAP: Record<string, string> = {
|
||||
off: "none",
|
||||
none: "none",
|
||||
minimal: "default",
|
||||
low: "default",
|
||||
medium: "default",
|
||||
high: "default",
|
||||
xhigh: "default",
|
||||
adaptive: "default",
|
||||
max: "default",
|
||||
};
|
||||
|
||||
function normalizeGroqModelId(modelId: string | undefined): string {
|
||||
return modelId?.trim().toLowerCase() ?? "";
|
||||
}
|
||||
|
||||
export function resolveGroqReasoningCompatPatch(
|
||||
modelId: string,
|
||||
): Pick<
|
||||
ModelCompatConfig,
|
||||
"supportsReasoningEffort" | "supportedReasoningEfforts" | "reasoningEffortMap"
|
||||
> | null {
|
||||
const normalized = normalizeGroqModelId(modelId);
|
||||
if (normalized === GROQ_QWEN3_32B_ID) {
|
||||
return {
|
||||
supportsReasoningEffort: true,
|
||||
supportedReasoningEfforts: [...GROQ_QWEN_REASONING_EFFORTS],
|
||||
reasoningEffortMap: GROQ_QWEN_REASONING_EFFORT_MAP,
|
||||
};
|
||||
}
|
||||
if (GROQ_GPT_OSS_REASONING_IDS.has(normalized)) {
|
||||
return {
|
||||
supportsReasoningEffort: true,
|
||||
supportedReasoningEfforts: [...GROQ_GPT_OSS_REASONING_EFFORTS],
|
||||
};
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
export function contributeGroqResolvedModelCompat(params: {
|
||||
modelId: string;
|
||||
model: { api?: unknown; provider?: unknown };
|
||||
}): Partial<ModelCompatConfig> | undefined {
|
||||
if (params.model.api !== "openai-completions" || params.model.provider !== "groq") {
|
||||
return undefined;
|
||||
}
|
||||
return resolveGroqReasoningCompatPatch(params.modelId) ?? undefined;
|
||||
}
|
||||
51
extensions/groq/index.test.ts
Normal file
51
extensions/groq/index.test.ts
Normal file
@@ -0,0 +1,51 @@
|
||||
import { capturePluginRegistration } from "openclaw/plugin-sdk/testing";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { contributeGroqResolvedModelCompat, resolveGroqReasoningCompatPatch } from "./api.js";
|
||||
import plugin from "./index.js";
|
||||
|
||||
describe("groq provider compat", () => {
|
||||
it("maps Groq Qwen 3 reasoning to provider-native none/default values", () => {
|
||||
expect(resolveGroqReasoningCompatPatch("qwen/qwen3-32b")).toEqual({
|
||||
supportsReasoningEffort: true,
|
||||
supportedReasoningEfforts: ["none", "default"],
|
||||
reasoningEffortMap: expect.objectContaining({
|
||||
off: "none",
|
||||
low: "default",
|
||||
medium: "default",
|
||||
high: "default",
|
||||
}),
|
||||
});
|
||||
});
|
||||
|
||||
it("keeps GPT-OSS reasoning on the Groq low/medium/high contract", () => {
|
||||
expect(resolveGroqReasoningCompatPatch("openai/gpt-oss-120b")).toEqual({
|
||||
supportsReasoningEffort: true,
|
||||
supportedReasoningEfforts: ["low", "medium", "high"],
|
||||
});
|
||||
});
|
||||
|
||||
it("contributes compat only for Groq OpenAI-compatible chat models", () => {
|
||||
expect(
|
||||
contributeGroqResolvedModelCompat({
|
||||
modelId: "qwen/qwen3-32b",
|
||||
model: { api: "openai-completions", provider: "groq" },
|
||||
}),
|
||||
).toMatchObject({ supportedReasoningEfforts: ["none", "default"] });
|
||||
expect(
|
||||
contributeGroqResolvedModelCompat({
|
||||
modelId: "qwen/qwen3-32b",
|
||||
model: { api: "openai-completions", provider: "openrouter" },
|
||||
}),
|
||||
).toBeUndefined();
|
||||
});
|
||||
|
||||
it("registers Groq model and media providers", () => {
|
||||
const captured = capturePluginRegistration(plugin);
|
||||
expect(captured.providers[0]).toMatchObject({
|
||||
id: "groq",
|
||||
label: "Groq",
|
||||
envVars: ["GROQ_API_KEY"],
|
||||
});
|
||||
expect(captured.mediaUnderstandingProviders[0]?.id).toBe("groq");
|
||||
});
|
||||
});
|
||||
@@ -1,11 +1,21 @@
|
||||
import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
|
||||
import { contributeGroqResolvedModelCompat } from "./api.js";
|
||||
import { groqMediaUnderstandingProvider } from "./media-understanding-provider.js";
|
||||
|
||||
export default definePluginEntry({
|
||||
id: "groq",
|
||||
name: "Groq Media Understanding",
|
||||
description: "Bundled Groq audio transcription provider",
|
||||
name: "Groq Provider",
|
||||
description: "Bundled Groq provider plugin",
|
||||
register(api) {
|
||||
api.registerProvider({
|
||||
id: "groq",
|
||||
label: "Groq",
|
||||
docsPath: "/providers/groq",
|
||||
envVars: ["GROQ_API_KEY"],
|
||||
auth: [],
|
||||
contributeResolvedModelCompat: ({ modelId, model }) =>
|
||||
contributeGroqResolvedModelCompat({ modelId, model }),
|
||||
});
|
||||
api.registerMediaUnderstandingProvider(groqMediaUnderstandingProvider);
|
||||
},
|
||||
});
|
||||
|
||||
@@ -147,6 +147,11 @@ describe("lmstudio plugin", () => {
|
||||
contextTokens: 8192,
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
compat: {
|
||||
supportsReasoningEffort: true,
|
||||
supportedReasoningEfforts: ["off", "on"],
|
||||
reasoningEffortMap: { off: "off", high: "on" },
|
||||
},
|
||||
},
|
||||
{
|
||||
id: "phi-4",
|
||||
@@ -173,7 +178,12 @@ describe("lmstudio plugin", () => {
|
||||
provider: "lmstudio",
|
||||
id: "qwen3-8b-instruct",
|
||||
name: "Qwen 3 8B Instruct",
|
||||
compat: { supportsUsageInStreaming: true },
|
||||
compat: {
|
||||
supportsUsageInStreaming: true,
|
||||
supportsReasoningEffort: true,
|
||||
supportedReasoningEfforts: ["off", "on"],
|
||||
reasoningEffortMap: { off: "off", high: "on" },
|
||||
},
|
||||
contextWindow: 32768,
|
||||
contextTokens: 8192,
|
||||
reasoning: true,
|
||||
|
||||
@@ -34,7 +34,7 @@ function resolveLmstudioAugmentedCatalogEntries(config: OpenClawConfig | undefin
|
||||
provider: PROVIDER_ID,
|
||||
id: entry.id,
|
||||
name: entry.name ?? entry.id,
|
||||
compat: { supportsUsageInStreaming: true },
|
||||
compat: { ...entry.compat, supportsUsageInStreaming: true },
|
||||
contextWindow: entry.contextWindow,
|
||||
contextTokens: entry.contextTokens,
|
||||
reasoning: entry.reasoning,
|
||||
|
||||
@@ -163,7 +163,7 @@ export async function discoverLmstudioModels(
|
||||
reasoning: base.reasoning,
|
||||
input: base.input,
|
||||
cost: SELF_HOSTED_DEFAULT_COST,
|
||||
compat: { supportsUsageInStreaming: true },
|
||||
compat: { ...base.compat, supportsUsageInStreaming: true },
|
||||
contextWindow: base.contextWindow,
|
||||
contextTokens: base.contextTokens,
|
||||
maxTokens: base.maxTokens,
|
||||
|
||||
@@ -8,6 +8,7 @@ import { discoverLmstudioModels, ensureLmstudioModelLoaded } from "./models.fetc
|
||||
import {
|
||||
normalizeLmstudioProviderConfig,
|
||||
resolveLmstudioInferenceBase,
|
||||
resolveLmstudioReasoningCompat,
|
||||
resolveLmstudioReasoningCapability,
|
||||
resolveLmstudioServerBase,
|
||||
} from "./models.js";
|
||||
@@ -145,6 +146,40 @@ describe("lmstudio-models", () => {
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("maps LM Studio native reasoning options into OpenAI-compatible effort compat", () => {
|
||||
expect(
|
||||
resolveLmstudioReasoningCompat({
|
||||
capabilities: {
|
||||
reasoning: {
|
||||
allowed_options: ["off", "on"],
|
||||
default: "on",
|
||||
},
|
||||
},
|
||||
}),
|
||||
).toEqual({
|
||||
supportsReasoningEffort: true,
|
||||
supportedReasoningEfforts: ["off", "on"],
|
||||
reasoningEffortMap: expect.objectContaining({
|
||||
off: "off",
|
||||
none: "off",
|
||||
low: "on",
|
||||
medium: "on",
|
||||
high: "on",
|
||||
}),
|
||||
});
|
||||
|
||||
expect(
|
||||
resolveLmstudioReasoningCompat({
|
||||
capabilities: {
|
||||
reasoning: {
|
||||
allowed_options: ["off"],
|
||||
default: "off",
|
||||
},
|
||||
},
|
||||
}),
|
||||
).toBeUndefined();
|
||||
});
|
||||
|
||||
it("discovers llm models and maps metadata", async () => {
|
||||
const fetchMock = vi.fn(async (_url: string | URL) => ({
|
||||
ok: true,
|
||||
@@ -205,7 +240,17 @@ describe("lmstudio-models", () => {
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
compat: { supportsUsageInStreaming: true },
|
||||
compat: {
|
||||
supportsUsageInStreaming: true,
|
||||
supportsReasoningEffort: true,
|
||||
supportedReasoningEfforts: ["off", "on"],
|
||||
reasoningEffortMap: expect.objectContaining({
|
||||
off: "off",
|
||||
none: "off",
|
||||
medium: "on",
|
||||
high: "on",
|
||||
}),
|
||||
},
|
||||
contextWindow: 262144,
|
||||
contextTokens: LMSTUDIO_DEFAULT_LOAD_CONTEXT_LENGTH,
|
||||
maxTokens: SELF_HOSTED_DEFAULT_MAX_TOKENS,
|
||||
|
||||
@@ -40,6 +40,7 @@ type LmstudioConfiguredCatalogEntry = {
|
||||
contextTokens?: number;
|
||||
reasoning?: boolean;
|
||||
input?: ("text" | "image" | "document")[];
|
||||
compat?: ModelDefinitionConfig["compat"];
|
||||
};
|
||||
|
||||
function normalizeReasoningOption(value: unknown): string | null {
|
||||
@@ -58,6 +59,83 @@ function isReasoningEnabledOption(value: unknown): boolean {
|
||||
return normalized !== "off";
|
||||
}
|
||||
|
||||
function normalizeReasoningOptions(value: unknown): string[] {
|
||||
if (!Array.isArray(value)) {
|
||||
return [];
|
||||
}
|
||||
return [
|
||||
...new Set(
|
||||
value
|
||||
.map((option) => normalizeReasoningOption(option))
|
||||
.filter((option): option is string => option !== null),
|
||||
),
|
||||
];
|
||||
}
|
||||
|
||||
function resolveLmstudioReasoningDefault(
|
||||
reasoning: LmstudioReasoningCapabilityWire,
|
||||
): string | null {
|
||||
const normalizedDefault = normalizeReasoningOption(reasoning.default);
|
||||
return normalizedDefault && isReasoningEnabledOption(normalizedDefault)
|
||||
? normalizedDefault
|
||||
: null;
|
||||
}
|
||||
|
||||
function resolveLmstudioEnabledReasoningOption(
|
||||
allowedOptions: readonly string[],
|
||||
reasoning: LmstudioReasoningCapabilityWire,
|
||||
): string | undefined {
|
||||
const normalizedDefault = resolveLmstudioReasoningDefault(reasoning);
|
||||
if (normalizedDefault && allowedOptions.includes(normalizedDefault)) {
|
||||
return normalizedDefault;
|
||||
}
|
||||
return (
|
||||
allowedOptions.find((option) => option === "on" || option === "default") ??
|
||||
allowedOptions.find((option) => isReasoningEnabledOption(option))
|
||||
);
|
||||
}
|
||||
|
||||
function resolveLmstudioDisabledReasoningOption(
|
||||
allowedOptions: readonly string[],
|
||||
): string | undefined {
|
||||
return (
|
||||
allowedOptions.find((option) => option === "off") ??
|
||||
allowedOptions.find((option) => option === "none")
|
||||
);
|
||||
}
|
||||
|
||||
export function resolveLmstudioReasoningCompat(
|
||||
entry: Pick<LmstudioModelWire, "capabilities">,
|
||||
): ModelDefinitionConfig["compat"] | undefined {
|
||||
const reasoning = entry.capabilities?.reasoning;
|
||||
if (reasoning === undefined || reasoning === null) {
|
||||
return undefined;
|
||||
}
|
||||
const allowedOptions = normalizeReasoningOptions(reasoning.allowed_options);
|
||||
if (allowedOptions.length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
const enabled = resolveLmstudioEnabledReasoningOption(allowedOptions, reasoning);
|
||||
if (!enabled) {
|
||||
return undefined;
|
||||
}
|
||||
const disabled = resolveLmstudioDisabledReasoningOption(allowedOptions);
|
||||
return {
|
||||
supportsReasoningEffort: true,
|
||||
supportedReasoningEfforts: allowedOptions,
|
||||
reasoningEffortMap: {
|
||||
...(disabled ? { off: disabled, none: disabled } : {}),
|
||||
minimal: enabled,
|
||||
low: enabled,
|
||||
medium: enabled,
|
||||
high: enabled,
|
||||
xhigh: enabled,
|
||||
adaptive: enabled,
|
||||
max: enabled,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves LM Studio reasoning support from capabilities payloads.
|
||||
* Defaults to false when the server omits reasoning metadata.
|
||||
@@ -69,12 +147,7 @@ export function resolveLmstudioReasoningCapability(
|
||||
if (reasoning === undefined || reasoning === null) {
|
||||
return false;
|
||||
}
|
||||
const allowedOptionsRaw = reasoning.allowed_options;
|
||||
const allowedOptions = Array.isArray(allowedOptionsRaw)
|
||||
? allowedOptionsRaw
|
||||
.map((option) => normalizeReasoningOption(option))
|
||||
.filter((option): option is string => option !== null)
|
||||
: [];
|
||||
const allowedOptions = normalizeReasoningOptions(reasoning.allowed_options);
|
||||
if (allowedOptions.length > 0) {
|
||||
return allowedOptions.some((option) => isReasoningEnabledOption(option));
|
||||
}
|
||||
@@ -130,6 +203,41 @@ function isLikelyHostBaseUrl(value: string): boolean {
|
||||
);
|
||||
}
|
||||
|
||||
function normalizeConfiguredReasoningEffortMap(value: unknown): Record<string, string> | undefined {
|
||||
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
||||
return undefined;
|
||||
}
|
||||
const normalized = Object.fromEntries(
|
||||
Object.entries(value)
|
||||
.map(([key, mapped]) => [key.trim(), typeof mapped === "string" ? mapped.trim() : ""])
|
||||
.filter(([key, mapped]) => key.length > 0 && mapped.length > 0),
|
||||
);
|
||||
return Object.keys(normalized).length > 0 ? normalized : undefined;
|
||||
}
|
||||
|
||||
function normalizeLmstudioConfiguredCompat(value: unknown): ModelDefinitionConfig["compat"] {
|
||||
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
||||
return undefined;
|
||||
}
|
||||
const record = value as Record<string, unknown>;
|
||||
const supportedReasoningEfforts = normalizeReasoningOptions(record.supportedReasoningEfforts);
|
||||
const reasoningEffortMap = normalizeConfiguredReasoningEffortMap(record.reasoningEffortMap);
|
||||
const compat: NonNullable<ModelDefinitionConfig["compat"]> = {};
|
||||
if (typeof record.supportsUsageInStreaming === "boolean") {
|
||||
compat.supportsUsageInStreaming = record.supportsUsageInStreaming;
|
||||
}
|
||||
if (typeof record.supportsReasoningEffort === "boolean") {
|
||||
compat.supportsReasoningEffort = record.supportsReasoningEffort;
|
||||
}
|
||||
if (supportedReasoningEfforts.length > 0) {
|
||||
compat.supportedReasoningEfforts = supportedReasoningEfforts;
|
||||
}
|
||||
if (reasoningEffortMap) {
|
||||
compat.reasoningEffortMap = reasoningEffortMap;
|
||||
}
|
||||
return Object.keys(compat).length > 0 ? compat : undefined;
|
||||
}
|
||||
|
||||
function toFetchableLmstudioBaseUrl(value: string): string {
|
||||
if (hasExplicitHttpScheme(value) || !isLikelyHostBaseUrl(value)) {
|
||||
return value;
|
||||
@@ -226,6 +334,7 @@ export function normalizeLmstudioConfiguredCatalogEntry(
|
||||
item === "text" || item === "image" || item === "document",
|
||||
)
|
||||
: undefined;
|
||||
const compat = normalizeLmstudioConfiguredCompat(record.compat);
|
||||
return {
|
||||
id,
|
||||
name,
|
||||
@@ -233,6 +342,7 @@ export function normalizeLmstudioConfiguredCatalogEntry(
|
||||
contextTokens,
|
||||
reasoning,
|
||||
input: input && input.length > 0 ? input : undefined,
|
||||
compat,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -290,6 +400,7 @@ export type LmstudioModelBase = {
|
||||
reasoning: boolean;
|
||||
input: Array<"text" | "image">;
|
||||
cost: ModelDefinitionConfig["cost"];
|
||||
compat?: ModelDefinitionConfig["compat"];
|
||||
contextWindow: number;
|
||||
contextTokens: number;
|
||||
maxTokens: number;
|
||||
@@ -335,6 +446,7 @@ export function mapLmstudioWireEntry(entry: LmstudioModelWire): LmstudioModelBas
|
||||
reasoning: resolveLmstudioReasoningCapability(entry),
|
||||
input: entry.capabilities?.vision ? ["text", "image"] : ["text"],
|
||||
cost: SELF_HOSTED_DEFAULT_COST,
|
||||
compat: resolveLmstudioReasoningCompat(entry),
|
||||
contextWindow,
|
||||
contextTokens,
|
||||
maxTokens: Math.max(1, Math.min(contextWindow, SELF_HOSTED_DEFAULT_MAX_TOKENS)),
|
||||
@@ -361,6 +473,7 @@ export function mapLmstudioWireModelsToConfig(
|
||||
reasoning: base.reasoning,
|
||||
input: base.input,
|
||||
cost: base.cost,
|
||||
...(base.compat ? { compat: base.compat } : {}),
|
||||
contextWindow: base.contextWindow,
|
||||
contextTokens: base.contextTokens,
|
||||
maxTokens: base.maxTokens,
|
||||
|
||||
@@ -24,4 +24,45 @@ describe("OpenAI reasoning effort support", () => {
|
||||
|
||||
expect(resolveOpenAIReasoningEffortForModel({ model, effort: "xhigh" })).toBe("xhigh");
|
||||
});
|
||||
|
||||
it("allows provider-native compat values when explicitly declared", () => {
|
||||
const model = {
|
||||
provider: "groq",
|
||||
id: "qwen/qwen3-32b",
|
||||
compat: {
|
||||
supportedReasoningEfforts: ["none", "default"],
|
||||
reasoningEffortMap: {
|
||||
off: "none",
|
||||
low: "default",
|
||||
medium: "default",
|
||||
high: "default",
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
expect(resolveOpenAISupportedReasoningEfforts(model)).toEqual(["none", "default"]);
|
||||
expect(
|
||||
resolveOpenAIReasoningEffortForModel({
|
||||
model,
|
||||
effort: "medium",
|
||||
fallbackMap: model.compat.reasoningEffortMap,
|
||||
}),
|
||||
).toBe("default");
|
||||
expect(
|
||||
resolveOpenAIReasoningEffortForModel({
|
||||
model,
|
||||
effort: "off",
|
||||
fallbackMap: model.compat.reasoningEffortMap,
|
||||
}),
|
||||
).toBe("none");
|
||||
});
|
||||
|
||||
it("omits unsupported disabled reasoning instead of falling back to enabled effort", () => {
|
||||
expect(
|
||||
resolveOpenAIReasoningEffortForModel({
|
||||
model: { provider: "groq", id: "openai/gpt-oss-120b" },
|
||||
effort: "off",
|
||||
}),
|
||||
).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -2,7 +2,7 @@ import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js";
|
||||
|
||||
export type OpenAIReasoningEffort = "none" | "minimal" | "low" | "medium" | "high" | "xhigh";
|
||||
|
||||
export type OpenAIApiReasoningEffort = OpenAIReasoningEffort;
|
||||
export type OpenAIApiReasoningEffort = OpenAIReasoningEffort | (string & {});
|
||||
|
||||
type OpenAIReasoningModel = {
|
||||
provider?: unknown;
|
||||
@@ -12,15 +12,6 @@ type OpenAIReasoningModel = {
|
||||
compat?: unknown;
|
||||
};
|
||||
|
||||
const ALL_OPENAI_REASONING_EFFORTS = [
|
||||
"none",
|
||||
"minimal",
|
||||
"low",
|
||||
"medium",
|
||||
"high",
|
||||
"xhigh",
|
||||
] as const satisfies readonly OpenAIApiReasoningEffort[];
|
||||
|
||||
const GPT_5_REASONING_EFFORTS = ["minimal", "low", "medium", "high"] as const;
|
||||
const GPT_51_REASONING_EFFORTS = ["none", "low", "medium", "high"] as const;
|
||||
const GPT_52_REASONING_EFFORTS = ["none", "low", "medium", "high", "xhigh"] as const;
|
||||
@@ -47,12 +38,21 @@ function readCompatReasoningEfforts(compat: unknown): OpenAIApiReasoningEffort[]
|
||||
if (!Array.isArray(raw)) {
|
||||
return undefined;
|
||||
}
|
||||
const supported = raw.filter((value): value is OpenAIApiReasoningEffort =>
|
||||
ALL_OPENAI_REASONING_EFFORTS.includes(value as OpenAIApiReasoningEffort),
|
||||
);
|
||||
const supported = [
|
||||
...new Set(
|
||||
raw
|
||||
.filter((value): value is string => typeof value === "string")
|
||||
.map((value) => value.trim())
|
||||
.filter(Boolean),
|
||||
),
|
||||
];
|
||||
return supported.length > 0 ? supported : undefined;
|
||||
}
|
||||
|
||||
function isDisabledReasoningEffort(effort: string): boolean {
|
||||
return effort === "none" || effort === "off";
|
||||
}
|
||||
|
||||
export function resolveOpenAISupportedReasoningEfforts(
|
||||
model: OpenAIReasoningModel,
|
||||
): readonly OpenAIApiReasoningEffort[] {
|
||||
@@ -113,7 +113,7 @@ export function resolveOpenAIReasoningEffortForModel(params: {
|
||||
if (supported.includes(normalized as OpenAIApiReasoningEffort)) {
|
||||
return normalized as OpenAIApiReasoningEffort;
|
||||
}
|
||||
if (requested === "none") {
|
||||
if (isDisabledReasoningEffort(requested) || isDisabledReasoningEffort(normalized)) {
|
||||
return undefined;
|
||||
}
|
||||
if (requested === "minimal" && supported.includes("low")) {
|
||||
|
||||
@@ -1791,6 +1791,77 @@ describe("openai transport stream", () => {
|
||||
expect(params.reasoning_effort).toBe("high");
|
||||
});
|
||||
|
||||
it("uses provider-native reasoning effort values declared by model compat", () => {
|
||||
const baseModel = {
|
||||
id: "qwen/qwen3-32b",
|
||||
name: "Qwen 3 32B",
|
||||
api: "openai-completions",
|
||||
provider: "groq",
|
||||
baseUrl: "https://api.groq.com/openai/v1",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 131072,
|
||||
maxTokens: 8192,
|
||||
compat: {
|
||||
supportsReasoningEffort: true,
|
||||
supportedReasoningEfforts: ["none", "default"],
|
||||
reasoningEffortMap: {
|
||||
off: "none",
|
||||
low: "default",
|
||||
medium: "default",
|
||||
high: "default",
|
||||
},
|
||||
},
|
||||
} as unknown as Model<"openai-completions">;
|
||||
const context = {
|
||||
systemPrompt: "system",
|
||||
messages: [],
|
||||
tools: [],
|
||||
} as never;
|
||||
|
||||
const enabled = buildOpenAICompletionsParams(baseModel, context, {
|
||||
reasoning: "medium",
|
||||
} as never) as { reasoning_effort?: unknown };
|
||||
const disabled = buildOpenAICompletionsParams(baseModel, context, {
|
||||
reasoning: "off",
|
||||
} as never) as { reasoning_effort?: unknown };
|
||||
|
||||
expect(enabled.reasoning_effort).toBe("default");
|
||||
expect(disabled.reasoning_effort).toBe("none");
|
||||
});
|
||||
|
||||
it("omits unsupported disabled reasoning for completions providers", () => {
|
||||
const params = buildOpenAICompletionsParams(
|
||||
{
|
||||
id: "openai/gpt-oss-120b",
|
||||
name: "GPT OSS 120B",
|
||||
api: "openai-completions",
|
||||
provider: "groq",
|
||||
baseUrl: "https://api.groq.com/openai/v1",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 131072,
|
||||
maxTokens: 8192,
|
||||
compat: {
|
||||
supportsReasoningEffort: true,
|
||||
supportedReasoningEfforts: ["low", "medium", "high"],
|
||||
},
|
||||
} as unknown as Model<"openai-completions">,
|
||||
{
|
||||
systemPrompt: "system",
|
||||
messages: [],
|
||||
tools: [],
|
||||
} as never,
|
||||
{
|
||||
reasoning: "off",
|
||||
} as never,
|
||||
) as { reasoning_effort?: unknown };
|
||||
|
||||
expect(params).not.toHaveProperty("reasoning_effort");
|
||||
});
|
||||
|
||||
it("uses system role and streaming usage compat for native Qwen completions providers", () => {
|
||||
const params = buildOpenAICompletionsParams(
|
||||
{
|
||||
|
||||
@@ -1526,26 +1526,12 @@ function getCompletionsReasoningDeltas(
|
||||
}
|
||||
|
||||
function detectCompat(model: OpenAIModeModel) {
|
||||
const provider = model.provider;
|
||||
const { capabilities, defaults: compatDefaults } = detectOpenAICompletionsCompat(model);
|
||||
const endpointClass = capabilities.endpointClass;
|
||||
const isDefaultRoute = endpointClass === "default";
|
||||
const isGroq = endpointClass === "groq-native" || (isDefaultRoute && provider === "groq");
|
||||
const reasoningEffortMap: Record<string, string> =
|
||||
isGroq && model.id === "qwen/qwen3-32b"
|
||||
? {
|
||||
minimal: "default",
|
||||
low: "default",
|
||||
medium: "default",
|
||||
high: "default",
|
||||
xhigh: "default",
|
||||
}
|
||||
: {};
|
||||
const { defaults: compatDefaults } = detectOpenAICompletionsCompat(model);
|
||||
return {
|
||||
supportsStore: compatDefaults.supportsStore,
|
||||
supportsDeveloperRole: compatDefaults.supportsDeveloperRole,
|
||||
supportsReasoningEffort: compatDefaults.supportsReasoningEffort,
|
||||
reasoningEffortMap,
|
||||
reasoningEffortMap: {},
|
||||
supportsUsageInStreaming: compatDefaults.supportsUsageInStreaming,
|
||||
maxTokensField: compatDefaults.maxTokensField,
|
||||
requiresToolResultName: false,
|
||||
|
||||
@@ -59,6 +59,7 @@ export type ModelCompatConfig = SupportedOpenAICompatFields &
|
||||
SupportedAnthropicMessagesCompatFields & {
|
||||
thinkingFormat?: SupportedThinkingFormat;
|
||||
supportedReasoningEfforts?: string[];
|
||||
reasoningEffortMap?: Record<string, string>;
|
||||
visibleReasoningDetailTypes?: string[];
|
||||
supportsTools?: boolean;
|
||||
supportsPromptCacheKey?: boolean;
|
||||
|
||||
@@ -195,6 +195,8 @@ export const ModelCompatSchema = z
|
||||
supportsStrictMode: z.boolean().optional(),
|
||||
requiresStringContent: z.boolean().optional(),
|
||||
visibleReasoningDetailTypes: z.array(z.string().min(1)).optional(),
|
||||
supportedReasoningEfforts: z.array(z.string().min(1)).optional(),
|
||||
reasoningEffortMap: z.record(z.string().min(1), z.string().min(1)).optional(),
|
||||
maxTokensField: z
|
||||
.union([z.literal("max_completion_tokens"), z.literal("max_tokens")])
|
||||
.optional(),
|
||||
|
||||
@@ -202,6 +202,17 @@ function normalizeModelCatalogCompat(value: unknown): ModelCompatConfig | undefi
|
||||
}
|
||||
}
|
||||
|
||||
if (isRecord(value.reasoningEffortMap)) {
|
||||
const reasoningEffortMap = Object.fromEntries(
|
||||
Object.entries(value.reasoningEffortMap)
|
||||
.map(([key, mapped]) => [key.trim(), typeof mapped === "string" ? mapped.trim() : ""])
|
||||
.filter(([key, mapped]) => key.length > 0 && mapped.length > 0),
|
||||
);
|
||||
if (Object.keys(reasoningEffortMap).length > 0) {
|
||||
compat.reasoningEffortMap = reasoningEffortMap;
|
||||
}
|
||||
}
|
||||
|
||||
const maxTokensField = normalizeOptionalString(value.maxTokensField) ?? "";
|
||||
if (maxTokensField === "max_completion_tokens" || maxTokensField === "max_tokens") {
|
||||
compat.maxTokensField = maxTokensField;
|
||||
|
||||
Reference in New Issue
Block a user