fix(models): honor provider context defaults

2026-05-07 07:58:36 +00:00 · 2026-04-27 06:32:15 +01:00
parent 5e9a96fafb
commit 422fa99197
16 changed files with 264 additions and 29 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -49,6 +49,7 @@ Docs: https://docs.openclaw.ai
 - Providers/Ollama: parse stringified native tool-call arguments before dispatch, preserving unsafe integer values so Ollama tool use receives structured parameters. Fixes #69735; supersedes #69910. Thanks @rongshuzhao and @yfge.
 - Providers/Ollama: skip ambient localhost discovery unless Ollama auth or meaningful config opts in, preventing unexpected probes to `127.0.0.1:11434` for users who are not using Ollama. Fixes #56939; supersedes #57116. Thanks @IanxDev and @tsukhani.
 - Providers/Ollama: skip implicit localhost discovery when a custom remote `api: "ollama"` provider is configured, while still treating `127/8` loopback hosts as local. Carries forward #43224. Thanks @issacthekaylon.
+- Providers/models: honor provider-level `contextWindow`, `contextTokens`, and `maxTokens` as defaults when resolving discovered models, so local Ollama and other self-hosted providers can cap all models without repeating per-model entries. Fixes #44786; carries forward #44955. Thanks @voltwake and @maweibin.
 - Providers/Ollama: move memory embeddings to Ollama's current `/api/embed` endpoint with batched `input` requests while preserving vector normalization and custom provider auth/header overrides. Fixes #39983. Thanks @sskkcc and @LiudengZhang.
 - Providers/Ollama: route local web search through Ollama's signed `/api/experimental/web_search` daemon proxy, use hosted `/api/web_search` directly for `ollama.com`, and keep `OLLAMA_API_KEY` scoped to cloud fallback auth. Fixes #69132. Thanks @yoon1012 and @hyspacex.
 - Providers/Ollama: accept OpenAI SDK-style `baseURL` as an alias for `baseUrl` across discovery, streaming, setup pulls, embeddings, and web search so remote Ollama hosts are not silently ignored. Fixes #62533; supersedes #62549. Thanks @Julien-BKK and @Linux2010.
--- a/docs/.generated/config-baseline.sha256
+++ b/docs/.generated/config-baseline.sha256
@@ -1,4 +1,4 @@
-6fceeca87ecf3245c9f3a184f1ec66c8dee8df6e5a14c6d9d1924557f8d36408  config-baseline.json
-15b6223907d0930307e950752e6498edc40f7df597e8e36914490f7611eab413  config-baseline.core.json
+c4b54de7557cd14b35a629585ad706a4e7de411cc725bcbce921f22bfaf14ada  config-baseline.json
+3fd4da36f28b508f8e6ac4fceb18262244d8ed70df15244192032ec71027bb4f  config-baseline.core.json
 07963db49502132f26db396c56b36e018b110e6c55a68b3cb012d3ec96f43901  config-baseline.channel.json
 74b74cb18ac37c0acaa765f398f1f9edbcee4c43567f02d45c89598a1e13afb4  config-baseline.plugin.json
--- a/docs/concepts/model-providers.md
+++ b/docs/concepts/model-providers.md
@@ -16,7 +16,7 @@ Reference for **LLM/model providers** (not chat channels like WhatsApp/Telegram)
    - Model refs use `provider/model` (example: `opencode/claude-opus-4-6`).
    - `agents.defaults.models` acts as an allowlist when set.
    - CLI helpers: `openclaw onboard`, `openclaw models list`, `openclaw models set <provider/model>`.
-    - `models.providers.*.models[].contextWindow` is native model metadata; `contextTokens` is the effective runtime cap.
+    - `models.providers.*.contextWindow` / `contextTokens` / `maxTokens` set provider-level defaults; `models.providers.*.models[].contextWindow` / `contextTokens` / `maxTokens` override them per model.
    - Fallback rules, cooldown probes, and session-override persistence: [Model failover](/concepts/model-failover).
  </Accordion>
  <Accordion title="OpenAI provider/runtime split">
--- a/docs/gateway/config-tools.md
+++ b/docs/gateway/config-tools.md
@@ -429,6 +429,9 @@ OpenClaw uses the built-in model catalog. Add custom providers via `models.provi
    - `models.providers.*.api`: request adapter (`openai-completions`, `openai-responses`, `anthropic-messages`, `google-generative-ai`, etc).
    - `models.providers.*.apiKey`: provider credential (prefer SecretRef/env substitution).
    - `models.providers.*.auth`: auth strategy (`api-key`, `token`, `oauth`, `aws-sdk`).
+    - `models.providers.*.contextWindow`: default native context window for models under this provider when the model entry does not set `contextWindow`.
+    - `models.providers.*.contextTokens`: default effective runtime context cap for models under this provider when the model entry does not set `contextTokens`.
+    - `models.providers.*.maxTokens`: default output-token cap for models under this provider when the model entry does not set `maxTokens`.
    - `models.providers.*.timeoutSeconds`: optional per-provider model HTTP request timeout in seconds, including connect, headers, body, and total request abort handling.
    - `models.providers.*.injectNumCtxForOpenAICompat`: for Ollama + `openai-completions`, inject `options.num_ctx` into requests (default: `true`).
    - `models.providers.*.authHeader`: force credential transport in the `Authorization` header when required.
@@ -447,8 +450,8 @@ OpenClaw uses the built-in model catalog. Add custom providers via `models.provi
  </Accordion>
  <Accordion title="Model catalog entries">
    - `models.providers.*.models`: explicit provider model catalog entries.
-    - `models.providers.*.models.*.contextWindow`: native model context window metadata.
-    - `models.providers.*.models.*.contextTokens`: optional runtime context cap. Use this when you want a smaller effective context budget than the model's native `contextWindow`; `openclaw models list` shows both values when they differ.
+    - `models.providers.*.models.*.contextWindow`: native model context window metadata. This overrides provider-level `contextWindow` for that model.
+    - `models.providers.*.models.*.contextTokens`: optional runtime context cap. This overrides provider-level `contextTokens`; use it when you want a smaller effective context budget than the model's native `contextWindow`; `openclaw models list` shows both values when they differ.
    - `models.providers.*.models.*.compat.supportsDeveloperRole`: optional compatibility hint. For `api: "openai-completions"` with a non-empty non-native `baseUrl` (host not `api.openai.com`), OpenClaw forces this to `false` at runtime. Empty/omitted `baseUrl` keeps default OpenAI behavior.
    - `models.providers.*.models.*.compat.requiresStringContent`: optional compatibility hint for string-only OpenAI-compatible chat endpoints. When `true`, OpenClaw flattens pure text `messages[].content` arrays into plain strings before sending the request.
  </Accordion>
--- a/docs/providers/ollama.md
+++ b/docs/providers/ollama.md
@@ -462,7 +462,7 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
  <Accordion title="Context windows">
    For auto-discovered models, OpenClaw uses the context window reported by Ollama when available, including larger `PARAMETER num_ctx` values from custom Modelfiles. Otherwise it falls back to the default Ollama context window used by OpenClaw.

-    You can override `contextWindow` and `maxTokens` in explicit provider config. To cap Ollama's per-request runtime context without rebuilding a Modelfile, set `params.num_ctx`; OpenClaw sends it as `options.num_ctx` for both native Ollama and the OpenAI-compatible Ollama adapter. Invalid, zero, negative, and non-finite values are ignored and fall back to `contextWindow`.
+    You can set provider-level `contextWindow`, `contextTokens`, and `maxTokens` defaults for every model under that Ollama provider, then override them per model when needed. To cap Ollama's per-request runtime context without rebuilding a Modelfile, set `params.num_ctx`; OpenClaw sends it as `options.num_ctx` for both native Ollama and the OpenAI-compatible Ollama adapter. Invalid, zero, negative, and non-finite values are ignored and fall back to `contextWindow`.

    Native Ollama model entries also accept the common Ollama runtime options under `params`, including `temperature`, `top_p`, `top_k`, `min_p`, `num_predict`, `stop`, `repeat_penalty`, `num_batch`, `num_thread`, and `use_mmap`. OpenClaw forwards only Ollama request keys, so OpenClaw runtime params such as `streaming` are not leaked to Ollama. Use `params.think` or `params.thinking` to send top-level Ollama `think`; `false` disables API-level thinking for Qwen-style thinking models.

@@ -471,6 +471,7 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
      models: {
        providers: {
          ollama: {
+            contextWindow: 32768,
            models: [
              {
                id: "llama3.3",
--- a/src/agents/context.test.ts
+++ b/src/agents/context.test.ts
@@ -187,6 +187,23 @@ describe("applyConfiguredContextWindows", () => {

    expect(cache.get("custom/model")).toBe(200_000);
  });
+
+  it("uses provider-level context defaults for configured model entries", () => {
+    const cache = new Map<string, number>();
+    applyConfiguredContextWindows({
+      cache,
+      modelsConfig: {
+        providers: {
+          ollama: {
+            contextWindow: 8_192,
+            models: [{ id: "qwen3.5:9b" }],
+          },
+        },
+      },
+    });
+
+    expect(cache.get("qwen3.5:9b")).toBe(8_192);
+  });
 });

 describe("createSessionManagerRuntimeRegistry", () => {
@@ -210,6 +227,50 @@ describe("createSessionManagerRuntimeRegistry", () => {
 });

 describe("resolveContextTokensForModel", () => {
+  it("uses provider-level context defaults when no model-level cap is set", () => {
+    const result = resolveContextTokensForModel({
+      cfg: {
+        models: {
+          providers: {
+            ollama: {
+              baseUrl: "http://localhost:11434",
+              contextWindow: 8_192,
+              models: [],
+            },
+          },
+        },
+      },
+      provider: "ollama",
+      model: "qwen3.5:9b",
+      fallbackContextTokens: 216_000,
+      allowAsyncLoad: false,
+    });
+
+    expect(result).toBe(8_192);
+  });
+
+  it("prefers model-level context caps over provider-level defaults", () => {
+    const result = resolveContextTokensForModel({
+      cfg: {
+        models: {
+          providers: {
+            ollama: {
+              baseUrl: "http://localhost:11434",
+              contextWindow: 8_192,
+              models: [{ ...testModelContextWindow("qwen3.5:9b", 216_000), contextTokens: 16_000 }],
+            },
+          },
+        },
+      },
+      provider: "ollama",
+      model: "qwen3.5:9b",
+      fallbackContextTokens: 216_000,
+      allowAsyncLoad: false,
+    });
+
+    expect(result).toBe(16_000);
+  });
+
  it("returns 1M context when anthropic context1m is enabled for opus/sonnet", () => {
    const result = resolveContextTokensForModel({
      cfg: {
--- a/src/agents/context.ts
+++ b/src/agents/context.ts
@@ -21,7 +21,11 @@ type ModelRegistryLike = {
  getAll: () => ModelEntry[];
 };
 type ConfigModelEntry = { id?: string; contextWindow?: number; contextTokens?: number };
-type ProviderConfigEntry = { models?: ConfigModelEntry[] };
+type ProviderConfigEntry = {
+  contextWindow?: number;
+  contextTokens?: number;
+  models?: ConfigModelEntry[];
+};
 type ModelsConfig = { providers?: Record<string, ProviderConfigEntry | undefined> };
 type AgentModelEntry = { params?: Record<string, unknown> };

@@ -83,7 +87,11 @@ export function applyConfiguredContextWindows(params: {
          ? model.contextTokens
          : typeof model?.contextWindow === "number"
            ? model.contextWindow
-            : undefined;
+            : typeof provider?.contextTokens === "number"
+              ? provider.contextTokens
+              : typeof provider?.contextWindow === "number"
+                ? provider.contextWindow
+                : undefined;
      if (!modelId || !contextTokens || contextTokens <= 0) {
        continue;
      }
@@ -340,30 +348,41 @@ function resolveConfiguredProviderContextTokens(
  // Mirror the lookup order in pi-embedded-runner/model.ts: exact key first,
  // then normalized fallback. This prevents alias collisions from picking the
  // wrong configured cap based on Object.entries iteration order.
+  function readProviderContextTokens(providerConfig: ProviderConfigEntry | undefined) {
+    return typeof providerConfig?.contextTokens === "number"
+      ? providerConfig.contextTokens
+      : typeof providerConfig?.contextWindow === "number"
+        ? providerConfig.contextWindow
+        : undefined;
+  }
+
  function findContextTokens(matchProviderId: (id: string) => boolean): number | undefined {
    for (const [providerId, providerConfig] of Object.entries(providers!)) {
      if (!matchProviderId(providerId)) {
        continue;
      }
-      if (!Array.isArray(providerConfig?.models)) {
-        continue;
-      }
-      for (const m of providerConfig.models) {
-        const contextTokens =
-          typeof m?.contextTokens === "number"
-            ? m.contextTokens
-            : typeof m?.contextWindow === "number"
-              ? m.contextWindow
-              : undefined;
-        if (
-          typeof m?.id === "string" &&
-          m.id === model &&
-          typeof contextTokens === "number" &&
-          contextTokens > 0
-        ) {
-          return contextTokens;
+      if (Array.isArray(providerConfig?.models)) {
+        for (const m of providerConfig.models) {
+          const contextTokens =
+            typeof m?.contextTokens === "number"
+              ? m.contextTokens
+              : typeof m?.contextWindow === "number"
+                ? m.contextWindow
+                : undefined;
+          if (
+            typeof m?.id === "string" &&
+            m.id === model &&
+            typeof contextTokens === "number" &&
+            contextTokens > 0
+          ) {
+            return contextTokens;
+          }
        }
      }
+      const providerContextTokens = readProviderContextTokens(providerConfig);
+      if (typeof providerContextTokens === "number" && providerContextTokens > 0) {
+        return providerContextTokens;
+      }
    }
    return undefined;
  }
--- a/src/agents/pi-embedded-runner/model.inline-provider.ts
+++ b/src/agents/pi-embedded-runner/model.inline-provider.ts
@@ -20,6 +20,9 @@ export type InlineProviderConfig = {
  baseUrl?: string;
  api?: ModelDefinitionConfig["api"];
  models?: ModelDefinitionConfig[];
+  contextWindow?: ModelProviderConfig["contextWindow"];
+  contextTokens?: ModelProviderConfig["contextTokens"];
+  maxTokens?: ModelProviderConfig["maxTokens"];
  headers?: unknown;
  authHeader?: boolean;
  timeoutSeconds?: ModelProviderConfig["timeoutSeconds"];
@@ -154,6 +157,9 @@ export function buildInlineProviderModels(
      return attachModelProviderRequestTransport(
        {
          ...model,
+          contextWindow: model.contextWindow ?? entry?.contextWindow,
+          contextTokens: model.contextTokens ?? entry?.contextTokens,
+          maxTokens: model.maxTokens ?? entry?.maxTokens,
          input: resolveProviderModelInput({
            provider: trimmed,
            modelId: model.id,
--- a/src/agents/pi-embedded-runner/model.test.ts
+++ b/src/agents/pi-embedded-runner/model.test.ts
@@ -443,6 +443,77 @@ describe("resolveModel", () => {
    );
  });

+  it("uses provider-level context defaults over discovered metadata", () => {
+    mockDiscoveredModel(discoverModels, {
+      provider: "ollama",
+      modelId: "qwen3.5:9b",
+      templateModel: {
+        ...makeModel("qwen3.5:9b"),
+        provider: "ollama",
+        contextWindow: 216_000,
+        contextTokens: 216_000,
+        maxTokens: 65_536,
+      },
+    });
+    const cfg = {
+      models: {
+        providers: {
+          ollama: {
+            baseUrl: "http://localhost:11434",
+            contextWindow: 8_192,
+            contextTokens: 8_000,
+            models: [{ id: "qwen3.5:9b", name: "qwen3.5:9b" }],
+          },
+        },
+      },
+    } as unknown as OpenClawConfig;
+
+    const result = resolveModelForTest("ollama", "qwen3.5:9b", "/tmp/agent", cfg);
+
+    expect(result.error).toBeUndefined();
+    expect(result.model?.contextWindow).toBe(8_192);
+    expect((result.model as { contextTokens?: number } | undefined)?.contextTokens).toBe(8_000);
+    expect(result.model?.maxTokens).toBe(8_192);
+  });
+
+  it("keeps per-model context values above provider-level defaults", () => {
+    mockDiscoveredModel(discoverModels, {
+      provider: "ollama",
+      modelId: "qwen3.5:9b",
+      templateModel: {
+        ...makeModel("qwen3.5:9b"),
+        provider: "ollama",
+        contextWindow: 216_000,
+        maxTokens: 65_536,
+      },
+    });
+    const cfg = {
+      models: {
+        providers: {
+          ollama: {
+            baseUrl: "http://localhost:11434",
+            contextWindow: 8_192,
+            maxTokens: 4_096,
+            models: [
+              {
+                id: "qwen3.5:9b",
+                name: "qwen3.5:9b",
+                contextWindow: 16_384,
+                maxTokens: 12_000,
+              },
+            ],
+          },
+        },
+      },
+    } as unknown as OpenClawConfig;
+
+    const result = resolveModelForTest("ollama", "qwen3.5:9b", "/tmp/agent", cfg);
+
+    expect(result.error).toBeUndefined();
+    expect(result.model?.contextWindow).toBe(16_384);
+    expect(result.model?.maxTokens).toBe(12_000);
+  });
+
  it("applies agent default model params without explicit provider config", () => {
    mockDiscoveredModel(discoverModels, {
      provider: "ollama",
--- a/src/agents/pi-embedded-runner/model.ts
+++ b/src/agents/pi-embedded-runner/model.ts
@@ -483,6 +483,9 @@ function applyConfiguredProviderOverrides(params: {
    !configuredModel &&
    !providerConfig.baseUrl &&
    !providerConfig.api &&
+    providerConfig.contextWindow === undefined &&
+    providerConfig.contextTokens === undefined &&
+    providerConfig.maxTokens === undefined &&
    requestTimeoutMs === undefined &&
    !providerHeaders &&
    !providerRequest
@@ -518,6 +521,10 @@ function applyConfiguredProviderOverrides(params: {
    cfg: params.cfg,
    runtimeHooks: params.runtimeHooks,
  });
+  const resolvedContextWindow =
+    metadataOverrideModel?.contextWindow ?? providerConfig.contextWindow;
+  const resolvedMaxTokens =
+    metadataOverrideModel?.maxTokens ?? providerConfig.maxTokens ?? discoveredModel.maxTokens;
  const requestConfig = resolveProviderRequestConfig({
    provider: params.provider,
    api:
@@ -541,9 +548,15 @@ function applyConfiguredProviderOverrides(params: {
      reasoning: metadataOverrideModel?.reasoning ?? discoveredModel.reasoning,
      input: normalizedInput,
      cost: metadataOverrideModel?.cost ?? discoveredModel.cost,
-      contextWindow: metadataOverrideModel?.contextWindow ?? discoveredModel.contextWindow,
-      contextTokens: metadataOverrideModel?.contextTokens ?? discoveredModel.contextTokens,
-      maxTokens: metadataOverrideModel?.maxTokens ?? discoveredModel.maxTokens,
+      contextWindow: resolvedContextWindow ?? discoveredModel.contextWindow,
+      contextTokens:
+        metadataOverrideModel?.contextTokens ??
+        providerConfig.contextTokens ??
+        discoveredModel.contextTokens,
+      maxTokens:
+        typeof resolvedContextWindow === "number"
+          ? Math.min(resolvedMaxTokens, resolvedContextWindow)
+          : resolvedMaxTokens,
      ...(resolvedParams ? { params: resolvedParams } : {}),
      ...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}),
      headers: requestConfig.headers,
@@ -774,11 +787,16 @@ function resolveConfiguredFallbackModel(params: {
        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
        contextWindow:
          configuredModel?.contextWindow ??
+          providerConfig?.contextWindow ??
          providerConfig?.models?.[0]?.contextWindow ??
          DEFAULT_CONTEXT_TOKENS,
-        contextTokens: configuredModel?.contextTokens ?? providerConfig?.models?.[0]?.contextTokens,
+        contextTokens:
+          configuredModel?.contextTokens ??
+          providerConfig?.contextTokens ??
+          providerConfig?.models?.[0]?.contextTokens,
        maxTokens:
          configuredModel?.maxTokens ??
+          providerConfig?.maxTokens ??
          providerConfig?.models?.[0]?.maxTokens ??
          DEFAULT_CONTEXT_TOKENS,
        ...(resolvedParams ? { params: resolvedParams } : {}),
--- a/src/config/schema.base.generated.ts
+++ b/src/config/schema.base.generated.ts
@@ -1554,6 +1554,28 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
                  description:
                    "Provider API adapter selection controlling request/response compatibility handling for model calls. Use the adapter that matches your upstream provider protocol to avoid feature mismatch.",
                },
+                contextWindow: {
+                  type: "number",
+                  exclusiveMinimum: 0,
+                  title: "Model Provider Context Window",
+                  description:
+                    "Default native context window applied to models under this provider when a model entry does not set contextWindow. Use model-level contextWindow for per-model overrides.",
+                },
+                contextTokens: {
+                  type: "integer",
+                  exclusiveMinimum: 0,
+                  maximum: 9007199254740991,
+                  title: "Model Provider Context Tokens",
+                  description:
+                    "Default effective runtime context cap applied to models under this provider when a model entry does not set contextTokens. Use this when runtime should budget below the native contextWindow.",
+                },
+                maxTokens: {
+                  type: "number",
+                  exclusiveMinimum: 0,
+                  title: "Model Provider Max Tokens",
+                  description:
+                    "Default maximum output token budget applied to models under this provider when a model entry does not set maxTokens.",
+                },
                timeoutSeconds: {
                  type: "integer",
                  exclusiveMinimum: 0,
@@ -26485,6 +26507,21 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
      help: "Provider API adapter selection controlling request/response compatibility handling for model calls. Use the adapter that matches your upstream provider protocol to avoid feature mismatch.",
      tags: ["models"],
    },
+    "models.providers.*.contextWindow": {
+      label: "Model Provider Context Window",
+      help: "Default native context window applied to models under this provider when a model entry does not set contextWindow. Use model-level contextWindow for per-model overrides.",
+      tags: ["models"],
+    },
+    "models.providers.*.contextTokens": {
+      label: "Model Provider Context Tokens",
+      help: "Default effective runtime context cap applied to models under this provider when a model entry does not set contextTokens. Use this when runtime should budget below the native contextWindow.",
+      tags: ["security", "auth", "models"],
+    },
+    "models.providers.*.maxTokens": {
+      label: "Model Provider Max Tokens",
+      help: "Default maximum output token budget applied to models under this provider when a model entry does not set maxTokens.",
+      tags: ["security", "auth", "performance", "models"],
+    },
    "models.providers.*.timeoutSeconds": {
      label: "Model Provider Request Timeout",
      help: "Optional per-provider model request timeout in seconds. Applies to provider HTTP fetches, including connect, headers, body, and total request abort handling. Use this for slow local or self-hosted model servers instead of changing global agent timeouts.",
--- a/src/config/schema.help.quality.test.ts
+++ b/src/config/schema.help.quality.test.ts
@@ -367,6 +367,9 @@ const TARGET_KEYS = [
  "models.providers.*.baseUrl",
  "models.providers.*.apiKey",
  "models.providers.*.api",
+  "models.providers.*.contextWindow",
+  "models.providers.*.contextTokens",
+  "models.providers.*.maxTokens",
  "models.providers.*.headers",
  "models.providers.*.models",
  "agents",
--- a/src/config/schema.help.ts
+++ b/src/config/schema.help.ts
@@ -826,6 +826,12 @@ export const FIELD_HELP: Record<string, string> = {
    'Selects provider auth style: "api-key" for API key auth, "token" for bearer token auth, "oauth" for OAuth credentials, and "aws-sdk" for AWS credential resolution. Match this to your provider requirements.',
  "models.providers.*.api":
    "Provider API adapter selection controlling request/response compatibility handling for model calls. Use the adapter that matches your upstream provider protocol to avoid feature mismatch.",
+  "models.providers.*.contextWindow":
+    "Default native context window applied to models under this provider when a model entry does not set contextWindow. Use model-level contextWindow for per-model overrides.",
+  "models.providers.*.contextTokens":
+    "Default effective runtime context cap applied to models under this provider when a model entry does not set contextTokens. Use this when runtime should budget below the native contextWindow.",
+  "models.providers.*.maxTokens":
+    "Default maximum output token budget applied to models under this provider when a model entry does not set maxTokens.",
  "models.providers.*.timeoutSeconds":
    "Optional per-provider model request timeout in seconds. Applies to provider HTTP fetches, including connect, headers, body, and total request abort handling. Use this for slow local or self-hosted model servers instead of changing global agent timeouts.",
  "models.providers.*.injectNumCtxForOpenAICompat":
--- a/src/config/schema.labels.ts
+++ b/src/config/schema.labels.ts
@@ -515,6 +515,9 @@ export const FIELD_LABELS: Record<string, string> = {
  "models.providers.*.apiKey": "Model Provider API Key", // pragma: allowlist secret
  "models.providers.*.auth": "Model Provider Auth Mode",
  "models.providers.*.api": "Model Provider API Adapter",
+  "models.providers.*.contextWindow": "Model Provider Context Window",
+  "models.providers.*.contextTokens": "Model Provider Context Tokens",
+  "models.providers.*.maxTokens": "Model Provider Max Tokens",
  "models.providers.*.timeoutSeconds": "Model Provider Request Timeout",
  "models.providers.*.injectNumCtxForOpenAICompat": "Model Provider Inject num_ctx (OpenAI Compat)",
  "models.providers.*.headers": "Model Provider Headers",
--- a/src/config/types.models.ts
+++ b/src/config/types.models.ts
@@ -119,6 +119,9 @@ export type ModelProviderConfig = {
  apiKey?: SecretInput;
  auth?: ModelProviderAuthMode;
  api?: ModelApi;
+  contextWindow?: number;
+  contextTokens?: number;
+  maxTokens?: number;
  timeoutSeconds?: number;
  injectNumCtxForOpenAICompat?: boolean;
  headers?: Record<string, SecretInput>;
--- a/src/config/zod-schema.core.ts
+++ b/src/config/zod-schema.core.ts
@@ -357,6 +357,9 @@ export const ModelProviderSchema = z
      .union([z.literal("api-key"), z.literal("aws-sdk"), z.literal("oauth"), z.literal("token")])
      .optional(),
    api: ModelApiSchema.optional(),
+    contextWindow: z.number().positive().optional(),
+    contextTokens: z.number().int().positive().optional(),
+    maxTokens: z.number().positive().optional(),
    timeoutSeconds: z.number().int().positive().optional(),
    injectNumCtxForOpenAICompat: z.boolean().optional(),
    headers: z.record(z.string(), SecretInputSchema.register(sensitive)).optional(),