fix(models): honor provider context defaults

This commit is contained in:
Peter Steinberger
2026-04-27 06:32:15 +01:00
parent 5e9a96fafb
commit 422fa99197
16 changed files with 264 additions and 29 deletions

View File

@@ -49,6 +49,7 @@ Docs: https://docs.openclaw.ai
- Providers/Ollama: parse stringified native tool-call arguments before dispatch, preserving unsafe integer values so Ollama tool use receives structured parameters. Fixes #69735; supersedes #69910. Thanks @rongshuzhao and @yfge.
- Providers/Ollama: skip ambient localhost discovery unless Ollama auth or meaningful config opts in, preventing unexpected probes to `127.0.0.1:11434` for users who are not using Ollama. Fixes #56939; supersedes #57116. Thanks @IanxDev and @tsukhani.
- Providers/Ollama: skip implicit localhost discovery when a custom remote `api: "ollama"` provider is configured, while still treating `127/8` loopback hosts as local. Carries forward #43224. Thanks @issacthekaylon.
- Providers/models: honor provider-level `contextWindow`, `contextTokens`, and `maxTokens` as defaults when resolving discovered models, so local Ollama and other self-hosted providers can cap all models without repeating per-model entries. Fixes #44786; carries forward #44955. Thanks @voltwake and @maweibin.
- Providers/Ollama: move memory embeddings to Ollama's current `/api/embed` endpoint with batched `input` requests while preserving vector normalization and custom provider auth/header overrides. Fixes #39983. Thanks @sskkcc and @LiudengZhang.
- Providers/Ollama: route local web search through Ollama's signed `/api/experimental/web_search` daemon proxy, use hosted `/api/web_search` directly for `ollama.com`, and keep `OLLAMA_API_KEY` scoped to cloud fallback auth. Fixes #69132. Thanks @yoon1012 and @hyspacex.
- Providers/Ollama: accept OpenAI SDK-style `baseURL` as an alias for `baseUrl` across discovery, streaming, setup pulls, embeddings, and web search so remote Ollama hosts are not silently ignored. Fixes #62533; supersedes #62549. Thanks @Julien-BKK and @Linux2010.

View File

@@ -1,4 +1,4 @@
6fceeca87ecf3245c9f3a184f1ec66c8dee8df6e5a14c6d9d1924557f8d36408 config-baseline.json
15b6223907d0930307e950752e6498edc40f7df597e8e36914490f7611eab413 config-baseline.core.json
c4b54de7557cd14b35a629585ad706a4e7de411cc725bcbce921f22bfaf14ada config-baseline.json
3fd4da36f28b508f8e6ac4fceb18262244d8ed70df15244192032ec71027bb4f config-baseline.core.json
07963db49502132f26db396c56b36e018b110e6c55a68b3cb012d3ec96f43901 config-baseline.channel.json
74b74cb18ac37c0acaa765f398f1f9edbcee4c43567f02d45c89598a1e13afb4 config-baseline.plugin.json

View File

@@ -16,7 +16,7 @@ Reference for **LLM/model providers** (not chat channels like WhatsApp/Telegram)
- Model refs use `provider/model` (example: `opencode/claude-opus-4-6`).
- `agents.defaults.models` acts as an allowlist when set.
- CLI helpers: `openclaw onboard`, `openclaw models list`, `openclaw models set <provider/model>`.
- `models.providers.*.models[].contextWindow` is native model metadata; `contextTokens` is the effective runtime cap.
- `models.providers.*.contextWindow` / `contextTokens` / `maxTokens` set provider-level defaults; `models.providers.*.models[].contextWindow` / `contextTokens` / `maxTokens` override them per model.
- Fallback rules, cooldown probes, and session-override persistence: [Model failover](/concepts/model-failover).
</Accordion>
<Accordion title="OpenAI provider/runtime split">

View File

@@ -429,6 +429,9 @@ OpenClaw uses the built-in model catalog. Add custom providers via `models.provi
- `models.providers.*.api`: request adapter (`openai-completions`, `openai-responses`, `anthropic-messages`, `google-generative-ai`, etc).
- `models.providers.*.apiKey`: provider credential (prefer SecretRef/env substitution).
- `models.providers.*.auth`: auth strategy (`api-key`, `token`, `oauth`, `aws-sdk`).
- `models.providers.*.contextWindow`: default native context window for models under this provider when the model entry does not set `contextWindow`.
- `models.providers.*.contextTokens`: default effective runtime context cap for models under this provider when the model entry does not set `contextTokens`.
- `models.providers.*.maxTokens`: default output-token cap for models under this provider when the model entry does not set `maxTokens`.
- `models.providers.*.timeoutSeconds`: optional per-provider model HTTP request timeout in seconds, including connect, headers, body, and total request abort handling.
- `models.providers.*.injectNumCtxForOpenAICompat`: for Ollama + `openai-completions`, inject `options.num_ctx` into requests (default: `true`).
- `models.providers.*.authHeader`: force credential transport in the `Authorization` header when required.
@@ -447,8 +450,8 @@ OpenClaw uses the built-in model catalog. Add custom providers via `models.provi
</Accordion>
<Accordion title="Model catalog entries">
- `models.providers.*.models`: explicit provider model catalog entries.
- `models.providers.*.models.*.contextWindow`: native model context window metadata.
- `models.providers.*.models.*.contextTokens`: optional runtime context cap. Use this when you want a smaller effective context budget than the model's native `contextWindow`; `openclaw models list` shows both values when they differ.
- `models.providers.*.models.*.contextWindow`: native model context window metadata. This overrides provider-level `contextWindow` for that model.
- `models.providers.*.models.*.contextTokens`: optional runtime context cap. This overrides provider-level `contextTokens`; use it when you want a smaller effective context budget than the model's native `contextWindow`; `openclaw models list` shows both values when they differ.
- `models.providers.*.models.*.compat.supportsDeveloperRole`: optional compatibility hint. For `api: "openai-completions"` with a non-empty non-native `baseUrl` (host not `api.openai.com`), OpenClaw forces this to `false` at runtime. Empty/omitted `baseUrl` keeps default OpenAI behavior.
- `models.providers.*.models.*.compat.requiresStringContent`: optional compatibility hint for string-only OpenAI-compatible chat endpoints. When `true`, OpenClaw flattens pure text `messages[].content` arrays into plain strings before sending the request.
</Accordion>

View File

@@ -462,7 +462,7 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
<Accordion title="Context windows">
For auto-discovered models, OpenClaw uses the context window reported by Ollama when available, including larger `PARAMETER num_ctx` values from custom Modelfiles. Otherwise it falls back to the default Ollama context window used by OpenClaw.
You can override `contextWindow` and `maxTokens` in explicit provider config. To cap Ollama's per-request runtime context without rebuilding a Modelfile, set `params.num_ctx`; OpenClaw sends it as `options.num_ctx` for both native Ollama and the OpenAI-compatible Ollama adapter. Invalid, zero, negative, and non-finite values are ignored and fall back to `contextWindow`.
You can set provider-level `contextWindow`, `contextTokens`, and `maxTokens` defaults for every model under that Ollama provider, then override them per model when needed. To cap Ollama's per-request runtime context without rebuilding a Modelfile, set `params.num_ctx`; OpenClaw sends it as `options.num_ctx` for both native Ollama and the OpenAI-compatible Ollama adapter. Invalid, zero, negative, and non-finite values are ignored and fall back to `contextWindow`.
Native Ollama model entries also accept the common Ollama runtime options under `params`, including `temperature`, `top_p`, `top_k`, `min_p`, `num_predict`, `stop`, `repeat_penalty`, `num_batch`, `num_thread`, and `use_mmap`. OpenClaw forwards only Ollama request keys, so OpenClaw runtime params such as `streaming` are not leaked to Ollama. Use `params.think` or `params.thinking` to send top-level Ollama `think`; `false` disables API-level thinking for Qwen-style thinking models.
@@ -471,6 +471,7 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
models: {
providers: {
ollama: {
contextWindow: 32768,
models: [
{
id: "llama3.3",

View File

@@ -187,6 +187,23 @@ describe("applyConfiguredContextWindows", () => {
expect(cache.get("custom/model")).toBe(200_000);
});
it("uses provider-level context defaults for configured model entries", () => {
const cache = new Map<string, number>();
applyConfiguredContextWindows({
cache,
modelsConfig: {
providers: {
ollama: {
contextWindow: 8_192,
models: [{ id: "qwen3.5:9b" }],
},
},
},
});
expect(cache.get("qwen3.5:9b")).toBe(8_192);
});
});
describe("createSessionManagerRuntimeRegistry", () => {
@@ -210,6 +227,50 @@ describe("createSessionManagerRuntimeRegistry", () => {
});
describe("resolveContextTokensForModel", () => {
it("uses provider-level context defaults when no model-level cap is set", () => {
const result = resolveContextTokensForModel({
cfg: {
models: {
providers: {
ollama: {
baseUrl: "http://localhost:11434",
contextWindow: 8_192,
models: [],
},
},
},
},
provider: "ollama",
model: "qwen3.5:9b",
fallbackContextTokens: 216_000,
allowAsyncLoad: false,
});
expect(result).toBe(8_192);
});
it("prefers model-level context caps over provider-level defaults", () => {
const result = resolveContextTokensForModel({
cfg: {
models: {
providers: {
ollama: {
baseUrl: "http://localhost:11434",
contextWindow: 8_192,
models: [{ ...testModelContextWindow("qwen3.5:9b", 216_000), contextTokens: 16_000 }],
},
},
},
},
provider: "ollama",
model: "qwen3.5:9b",
fallbackContextTokens: 216_000,
allowAsyncLoad: false,
});
expect(result).toBe(16_000);
});
it("returns 1M context when anthropic context1m is enabled for opus/sonnet", () => {
const result = resolveContextTokensForModel({
cfg: {

View File

@@ -21,7 +21,11 @@ type ModelRegistryLike = {
getAll: () => ModelEntry[];
};
type ConfigModelEntry = { id?: string; contextWindow?: number; contextTokens?: number };
type ProviderConfigEntry = { models?: ConfigModelEntry[] };
type ProviderConfigEntry = {
contextWindow?: number;
contextTokens?: number;
models?: ConfigModelEntry[];
};
type ModelsConfig = { providers?: Record<string, ProviderConfigEntry | undefined> };
type AgentModelEntry = { params?: Record<string, unknown> };
@@ -83,7 +87,11 @@ export function applyConfiguredContextWindows(params: {
? model.contextTokens
: typeof model?.contextWindow === "number"
? model.contextWindow
: undefined;
: typeof provider?.contextTokens === "number"
? provider.contextTokens
: typeof provider?.contextWindow === "number"
? provider.contextWindow
: undefined;
if (!modelId || !contextTokens || contextTokens <= 0) {
continue;
}
@@ -340,30 +348,41 @@ function resolveConfiguredProviderContextTokens(
// Mirror the lookup order in pi-embedded-runner/model.ts: exact key first,
// then normalized fallback. This prevents alias collisions from picking the
// wrong configured cap based on Object.entries iteration order.
function readProviderContextTokens(providerConfig: ProviderConfigEntry | undefined) {
return typeof providerConfig?.contextTokens === "number"
? providerConfig.contextTokens
: typeof providerConfig?.contextWindow === "number"
? providerConfig.contextWindow
: undefined;
}
function findContextTokens(matchProviderId: (id: string) => boolean): number | undefined {
for (const [providerId, providerConfig] of Object.entries(providers!)) {
if (!matchProviderId(providerId)) {
continue;
}
if (!Array.isArray(providerConfig?.models)) {
continue;
}
for (const m of providerConfig.models) {
const contextTokens =
typeof m?.contextTokens === "number"
? m.contextTokens
: typeof m?.contextWindow === "number"
? m.contextWindow
: undefined;
if (
typeof m?.id === "string" &&
m.id === model &&
typeof contextTokens === "number" &&
contextTokens > 0
) {
return contextTokens;
if (Array.isArray(providerConfig?.models)) {
for (const m of providerConfig.models) {
const contextTokens =
typeof m?.contextTokens === "number"
? m.contextTokens
: typeof m?.contextWindow === "number"
? m.contextWindow
: undefined;
if (
typeof m?.id === "string" &&
m.id === model &&
typeof contextTokens === "number" &&
contextTokens > 0
) {
return contextTokens;
}
}
}
const providerContextTokens = readProviderContextTokens(providerConfig);
if (typeof providerContextTokens === "number" && providerContextTokens > 0) {
return providerContextTokens;
}
}
return undefined;
}

View File

@@ -20,6 +20,9 @@ export type InlineProviderConfig = {
baseUrl?: string;
api?: ModelDefinitionConfig["api"];
models?: ModelDefinitionConfig[];
contextWindow?: ModelProviderConfig["contextWindow"];
contextTokens?: ModelProviderConfig["contextTokens"];
maxTokens?: ModelProviderConfig["maxTokens"];
headers?: unknown;
authHeader?: boolean;
timeoutSeconds?: ModelProviderConfig["timeoutSeconds"];
@@ -154,6 +157,9 @@ export function buildInlineProviderModels(
return attachModelProviderRequestTransport(
{
...model,
contextWindow: model.contextWindow ?? entry?.contextWindow,
contextTokens: model.contextTokens ?? entry?.contextTokens,
maxTokens: model.maxTokens ?? entry?.maxTokens,
input: resolveProviderModelInput({
provider: trimmed,
modelId: model.id,

View File

@@ -443,6 +443,77 @@ describe("resolveModel", () => {
);
});
it("uses provider-level context defaults over discovered metadata", () => {
mockDiscoveredModel(discoverModels, {
provider: "ollama",
modelId: "qwen3.5:9b",
templateModel: {
...makeModel("qwen3.5:9b"),
provider: "ollama",
contextWindow: 216_000,
contextTokens: 216_000,
maxTokens: 65_536,
},
});
const cfg = {
models: {
providers: {
ollama: {
baseUrl: "http://localhost:11434",
contextWindow: 8_192,
contextTokens: 8_000,
models: [{ id: "qwen3.5:9b", name: "qwen3.5:9b" }],
},
},
},
} as unknown as OpenClawConfig;
const result = resolveModelForTest("ollama", "qwen3.5:9b", "/tmp/agent", cfg);
expect(result.error).toBeUndefined();
expect(result.model?.contextWindow).toBe(8_192);
expect((result.model as { contextTokens?: number } | undefined)?.contextTokens).toBe(8_000);
expect(result.model?.maxTokens).toBe(8_192);
});
it("keeps per-model context values above provider-level defaults", () => {
mockDiscoveredModel(discoverModels, {
provider: "ollama",
modelId: "qwen3.5:9b",
templateModel: {
...makeModel("qwen3.5:9b"),
provider: "ollama",
contextWindow: 216_000,
maxTokens: 65_536,
},
});
const cfg = {
models: {
providers: {
ollama: {
baseUrl: "http://localhost:11434",
contextWindow: 8_192,
maxTokens: 4_096,
models: [
{
id: "qwen3.5:9b",
name: "qwen3.5:9b",
contextWindow: 16_384,
maxTokens: 12_000,
},
],
},
},
},
} as unknown as OpenClawConfig;
const result = resolveModelForTest("ollama", "qwen3.5:9b", "/tmp/agent", cfg);
expect(result.error).toBeUndefined();
expect(result.model?.contextWindow).toBe(16_384);
expect(result.model?.maxTokens).toBe(12_000);
});
it("applies agent default model params without explicit provider config", () => {
mockDiscoveredModel(discoverModels, {
provider: "ollama",

View File

@@ -483,6 +483,9 @@ function applyConfiguredProviderOverrides(params: {
!configuredModel &&
!providerConfig.baseUrl &&
!providerConfig.api &&
providerConfig.contextWindow === undefined &&
providerConfig.contextTokens === undefined &&
providerConfig.maxTokens === undefined &&
requestTimeoutMs === undefined &&
!providerHeaders &&
!providerRequest
@@ -518,6 +521,10 @@ function applyConfiguredProviderOverrides(params: {
cfg: params.cfg,
runtimeHooks: params.runtimeHooks,
});
const resolvedContextWindow =
metadataOverrideModel?.contextWindow ?? providerConfig.contextWindow;
const resolvedMaxTokens =
metadataOverrideModel?.maxTokens ?? providerConfig.maxTokens ?? discoveredModel.maxTokens;
const requestConfig = resolveProviderRequestConfig({
provider: params.provider,
api:
@@ -541,9 +548,15 @@ function applyConfiguredProviderOverrides(params: {
reasoning: metadataOverrideModel?.reasoning ?? discoveredModel.reasoning,
input: normalizedInput,
cost: metadataOverrideModel?.cost ?? discoveredModel.cost,
contextWindow: metadataOverrideModel?.contextWindow ?? discoveredModel.contextWindow,
contextTokens: metadataOverrideModel?.contextTokens ?? discoveredModel.contextTokens,
maxTokens: metadataOverrideModel?.maxTokens ?? discoveredModel.maxTokens,
contextWindow: resolvedContextWindow ?? discoveredModel.contextWindow,
contextTokens:
metadataOverrideModel?.contextTokens ??
providerConfig.contextTokens ??
discoveredModel.contextTokens,
maxTokens:
typeof resolvedContextWindow === "number"
? Math.min(resolvedMaxTokens, resolvedContextWindow)
: resolvedMaxTokens,
...(resolvedParams ? { params: resolvedParams } : {}),
...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}),
headers: requestConfig.headers,
@@ -774,11 +787,16 @@ function resolveConfiguredFallbackModel(params: {
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow:
configuredModel?.contextWindow ??
providerConfig?.contextWindow ??
providerConfig?.models?.[0]?.contextWindow ??
DEFAULT_CONTEXT_TOKENS,
contextTokens: configuredModel?.contextTokens ?? providerConfig?.models?.[0]?.contextTokens,
contextTokens:
configuredModel?.contextTokens ??
providerConfig?.contextTokens ??
providerConfig?.models?.[0]?.contextTokens,
maxTokens:
configuredModel?.maxTokens ??
providerConfig?.maxTokens ??
providerConfig?.models?.[0]?.maxTokens ??
DEFAULT_CONTEXT_TOKENS,
...(resolvedParams ? { params: resolvedParams } : {}),

View File

@@ -1554,6 +1554,28 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
description:
"Provider API adapter selection controlling request/response compatibility handling for model calls. Use the adapter that matches your upstream provider protocol to avoid feature mismatch.",
},
contextWindow: {
type: "number",
exclusiveMinimum: 0,
title: "Model Provider Context Window",
description:
"Default native context window applied to models under this provider when a model entry does not set contextWindow. Use model-level contextWindow for per-model overrides.",
},
contextTokens: {
type: "integer",
exclusiveMinimum: 0,
maximum: 9007199254740991,
title: "Model Provider Context Tokens",
description:
"Default effective runtime context cap applied to models under this provider when a model entry does not set contextTokens. Use this when runtime should budget below the native contextWindow.",
},
maxTokens: {
type: "number",
exclusiveMinimum: 0,
title: "Model Provider Max Tokens",
description:
"Default maximum output token budget applied to models under this provider when a model entry does not set maxTokens.",
},
timeoutSeconds: {
type: "integer",
exclusiveMinimum: 0,
@@ -26485,6 +26507,21 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
help: "Provider API adapter selection controlling request/response compatibility handling for model calls. Use the adapter that matches your upstream provider protocol to avoid feature mismatch.",
tags: ["models"],
},
"models.providers.*.contextWindow": {
label: "Model Provider Context Window",
help: "Default native context window applied to models under this provider when a model entry does not set contextWindow. Use model-level contextWindow for per-model overrides.",
tags: ["models"],
},
"models.providers.*.contextTokens": {
label: "Model Provider Context Tokens",
help: "Default effective runtime context cap applied to models under this provider when a model entry does not set contextTokens. Use this when runtime should budget below the native contextWindow.",
tags: ["security", "auth", "models"],
},
"models.providers.*.maxTokens": {
label: "Model Provider Max Tokens",
help: "Default maximum output token budget applied to models under this provider when a model entry does not set maxTokens.",
tags: ["security", "auth", "performance", "models"],
},
"models.providers.*.timeoutSeconds": {
label: "Model Provider Request Timeout",
help: "Optional per-provider model request timeout in seconds. Applies to provider HTTP fetches, including connect, headers, body, and total request abort handling. Use this for slow local or self-hosted model servers instead of changing global agent timeouts.",

View File

@@ -367,6 +367,9 @@ const TARGET_KEYS = [
"models.providers.*.baseUrl",
"models.providers.*.apiKey",
"models.providers.*.api",
"models.providers.*.contextWindow",
"models.providers.*.contextTokens",
"models.providers.*.maxTokens",
"models.providers.*.headers",
"models.providers.*.models",
"agents",

View File

@@ -826,6 +826,12 @@ export const FIELD_HELP: Record<string, string> = {
'Selects provider auth style: "api-key" for API key auth, "token" for bearer token auth, "oauth" for OAuth credentials, and "aws-sdk" for AWS credential resolution. Match this to your provider requirements.',
"models.providers.*.api":
"Provider API adapter selection controlling request/response compatibility handling for model calls. Use the adapter that matches your upstream provider protocol to avoid feature mismatch.",
"models.providers.*.contextWindow":
"Default native context window applied to models under this provider when a model entry does not set contextWindow. Use model-level contextWindow for per-model overrides.",
"models.providers.*.contextTokens":
"Default effective runtime context cap applied to models under this provider when a model entry does not set contextTokens. Use this when runtime should budget below the native contextWindow.",
"models.providers.*.maxTokens":
"Default maximum output token budget applied to models under this provider when a model entry does not set maxTokens.",
"models.providers.*.timeoutSeconds":
"Optional per-provider model request timeout in seconds. Applies to provider HTTP fetches, including connect, headers, body, and total request abort handling. Use this for slow local or self-hosted model servers instead of changing global agent timeouts.",
"models.providers.*.injectNumCtxForOpenAICompat":

View File

@@ -515,6 +515,9 @@ export const FIELD_LABELS: Record<string, string> = {
"models.providers.*.apiKey": "Model Provider API Key", // pragma: allowlist secret
"models.providers.*.auth": "Model Provider Auth Mode",
"models.providers.*.api": "Model Provider API Adapter",
"models.providers.*.contextWindow": "Model Provider Context Window",
"models.providers.*.contextTokens": "Model Provider Context Tokens",
"models.providers.*.maxTokens": "Model Provider Max Tokens",
"models.providers.*.timeoutSeconds": "Model Provider Request Timeout",
"models.providers.*.injectNumCtxForOpenAICompat": "Model Provider Inject num_ctx (OpenAI Compat)",
"models.providers.*.headers": "Model Provider Headers",

View File

@@ -119,6 +119,9 @@ export type ModelProviderConfig = {
apiKey?: SecretInput;
auth?: ModelProviderAuthMode;
api?: ModelApi;
contextWindow?: number;
contextTokens?: number;
maxTokens?: number;
timeoutSeconds?: number;
injectNumCtxForOpenAICompat?: boolean;
headers?: Record<string, SecretInput>;

View File

@@ -357,6 +357,9 @@ export const ModelProviderSchema = z
.union([z.literal("api-key"), z.literal("aws-sdk"), z.literal("oauth"), z.literal("token")])
.optional(),
api: ModelApiSchema.optional(),
contextWindow: z.number().positive().optional(),
contextTokens: z.number().int().positive().optional(),
maxTokens: z.number().positive().optional(),
timeoutSeconds: z.number().int().positive().optional(),
injectNumCtxForOpenAICompat: z.boolean().optional(),
headers: z.record(z.string(), SecretInputSchema.register(sensitive)).optional(),