mirror of
https://github.com/moltbot/moltbot.git
synced 2026-05-07 07:58:36 +00:00
fix(models): honor provider context defaults
This commit is contained in:
@@ -49,6 +49,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Providers/Ollama: parse stringified native tool-call arguments before dispatch, preserving unsafe integer values so Ollama tool use receives structured parameters. Fixes #69735; supersedes #69910. Thanks @rongshuzhao and @yfge.
|
||||
- Providers/Ollama: skip ambient localhost discovery unless Ollama auth or meaningful config opts in, preventing unexpected probes to `127.0.0.1:11434` for users who are not using Ollama. Fixes #56939; supersedes #57116. Thanks @IanxDev and @tsukhani.
|
||||
- Providers/Ollama: skip implicit localhost discovery when a custom remote `api: "ollama"` provider is configured, while still treating `127/8` loopback hosts as local. Carries forward #43224. Thanks @issacthekaylon.
|
||||
- Providers/models: honor provider-level `contextWindow`, `contextTokens`, and `maxTokens` as defaults when resolving discovered models, so local Ollama and other self-hosted providers can cap all models without repeating per-model entries. Fixes #44786; carries forward #44955. Thanks @voltwake and @maweibin.
|
||||
- Providers/Ollama: move memory embeddings to Ollama's current `/api/embed` endpoint with batched `input` requests while preserving vector normalization and custom provider auth/header overrides. Fixes #39983. Thanks @sskkcc and @LiudengZhang.
|
||||
- Providers/Ollama: route local web search through Ollama's signed `/api/experimental/web_search` daemon proxy, use hosted `/api/web_search` directly for `ollama.com`, and keep `OLLAMA_API_KEY` scoped to cloud fallback auth. Fixes #69132. Thanks @yoon1012 and @hyspacex.
|
||||
- Providers/Ollama: accept OpenAI SDK-style `baseURL` as an alias for `baseUrl` across discovery, streaming, setup pulls, embeddings, and web search so remote Ollama hosts are not silently ignored. Fixes #62533; supersedes #62549. Thanks @Julien-BKK and @Linux2010.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
6fceeca87ecf3245c9f3a184f1ec66c8dee8df6e5a14c6d9d1924557f8d36408 config-baseline.json
|
||||
15b6223907d0930307e950752e6498edc40f7df597e8e36914490f7611eab413 config-baseline.core.json
|
||||
c4b54de7557cd14b35a629585ad706a4e7de411cc725bcbce921f22bfaf14ada config-baseline.json
|
||||
3fd4da36f28b508f8e6ac4fceb18262244d8ed70df15244192032ec71027bb4f config-baseline.core.json
|
||||
07963db49502132f26db396c56b36e018b110e6c55a68b3cb012d3ec96f43901 config-baseline.channel.json
|
||||
74b74cb18ac37c0acaa765f398f1f9edbcee4c43567f02d45c89598a1e13afb4 config-baseline.plugin.json
|
||||
|
||||
@@ -16,7 +16,7 @@ Reference for **LLM/model providers** (not chat channels like WhatsApp/Telegram)
|
||||
- Model refs use `provider/model` (example: `opencode/claude-opus-4-6`).
|
||||
- `agents.defaults.models` acts as an allowlist when set.
|
||||
- CLI helpers: `openclaw onboard`, `openclaw models list`, `openclaw models set <provider/model>`.
|
||||
- `models.providers.*.models[].contextWindow` is native model metadata; `contextTokens` is the effective runtime cap.
|
||||
- `models.providers.*.contextWindow` / `contextTokens` / `maxTokens` set provider-level defaults; `models.providers.*.models[].contextWindow` / `contextTokens` / `maxTokens` override them per model.
|
||||
- Fallback rules, cooldown probes, and session-override persistence: [Model failover](/concepts/model-failover).
|
||||
</Accordion>
|
||||
<Accordion title="OpenAI provider/runtime split">
|
||||
|
||||
@@ -429,6 +429,9 @@ OpenClaw uses the built-in model catalog. Add custom providers via `models.provi
|
||||
- `models.providers.*.api`: request adapter (`openai-completions`, `openai-responses`, `anthropic-messages`, `google-generative-ai`, etc).
|
||||
- `models.providers.*.apiKey`: provider credential (prefer SecretRef/env substitution).
|
||||
- `models.providers.*.auth`: auth strategy (`api-key`, `token`, `oauth`, `aws-sdk`).
|
||||
- `models.providers.*.contextWindow`: default native context window for models under this provider when the model entry does not set `contextWindow`.
|
||||
- `models.providers.*.contextTokens`: default effective runtime context cap for models under this provider when the model entry does not set `contextTokens`.
|
||||
- `models.providers.*.maxTokens`: default output-token cap for models under this provider when the model entry does not set `maxTokens`.
|
||||
- `models.providers.*.timeoutSeconds`: optional per-provider model HTTP request timeout in seconds, including connect, headers, body, and total request abort handling.
|
||||
- `models.providers.*.injectNumCtxForOpenAICompat`: for Ollama + `openai-completions`, inject `options.num_ctx` into requests (default: `true`).
|
||||
- `models.providers.*.authHeader`: force credential transport in the `Authorization` header when required.
|
||||
@@ -447,8 +450,8 @@ OpenClaw uses the built-in model catalog. Add custom providers via `models.provi
|
||||
</Accordion>
|
||||
<Accordion title="Model catalog entries">
|
||||
- `models.providers.*.models`: explicit provider model catalog entries.
|
||||
- `models.providers.*.models.*.contextWindow`: native model context window metadata.
|
||||
- `models.providers.*.models.*.contextTokens`: optional runtime context cap. Use this when you want a smaller effective context budget than the model's native `contextWindow`; `openclaw models list` shows both values when they differ.
|
||||
- `models.providers.*.models.*.contextWindow`: native model context window metadata. This overrides provider-level `contextWindow` for that model.
|
||||
- `models.providers.*.models.*.contextTokens`: optional runtime context cap. This overrides provider-level `contextTokens`; use it when you want a smaller effective context budget than the model's native `contextWindow`; `openclaw models list` shows both values when they differ.
|
||||
- `models.providers.*.models.*.compat.supportsDeveloperRole`: optional compatibility hint. For `api: "openai-completions"` with a non-empty non-native `baseUrl` (host not `api.openai.com`), OpenClaw forces this to `false` at runtime. Empty/omitted `baseUrl` keeps default OpenAI behavior.
|
||||
- `models.providers.*.models.*.compat.requiresStringContent`: optional compatibility hint for string-only OpenAI-compatible chat endpoints. When `true`, OpenClaw flattens pure text `messages[].content` arrays into plain strings before sending the request.
|
||||
</Accordion>
|
||||
|
||||
@@ -462,7 +462,7 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
|
||||
<Accordion title="Context windows">
|
||||
For auto-discovered models, OpenClaw uses the context window reported by Ollama when available, including larger `PARAMETER num_ctx` values from custom Modelfiles. Otherwise it falls back to the default Ollama context window used by OpenClaw.
|
||||
|
||||
You can override `contextWindow` and `maxTokens` in explicit provider config. To cap Ollama's per-request runtime context without rebuilding a Modelfile, set `params.num_ctx`; OpenClaw sends it as `options.num_ctx` for both native Ollama and the OpenAI-compatible Ollama adapter. Invalid, zero, negative, and non-finite values are ignored and fall back to `contextWindow`.
|
||||
You can set provider-level `contextWindow`, `contextTokens`, and `maxTokens` defaults for every model under that Ollama provider, then override them per model when needed. To cap Ollama's per-request runtime context without rebuilding a Modelfile, set `params.num_ctx`; OpenClaw sends it as `options.num_ctx` for both native Ollama and the OpenAI-compatible Ollama adapter. Invalid, zero, negative, and non-finite values are ignored and fall back to `contextWindow`.
|
||||
|
||||
Native Ollama model entries also accept the common Ollama runtime options under `params`, including `temperature`, `top_p`, `top_k`, `min_p`, `num_predict`, `stop`, `repeat_penalty`, `num_batch`, `num_thread`, and `use_mmap`. OpenClaw forwards only Ollama request keys, so OpenClaw runtime params such as `streaming` are not leaked to Ollama. Use `params.think` or `params.thinking` to send top-level Ollama `think`; `false` disables API-level thinking for Qwen-style thinking models.
|
||||
|
||||
@@ -471,6 +471,7 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
|
||||
models: {
|
||||
providers: {
|
||||
ollama: {
|
||||
contextWindow: 32768,
|
||||
models: [
|
||||
{
|
||||
id: "llama3.3",
|
||||
|
||||
@@ -187,6 +187,23 @@ describe("applyConfiguredContextWindows", () => {
|
||||
|
||||
expect(cache.get("custom/model")).toBe(200_000);
|
||||
});
|
||||
|
||||
it("uses provider-level context defaults for configured model entries", () => {
|
||||
const cache = new Map<string, number>();
|
||||
applyConfiguredContextWindows({
|
||||
cache,
|
||||
modelsConfig: {
|
||||
providers: {
|
||||
ollama: {
|
||||
contextWindow: 8_192,
|
||||
models: [{ id: "qwen3.5:9b" }],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(cache.get("qwen3.5:9b")).toBe(8_192);
|
||||
});
|
||||
});
|
||||
|
||||
describe("createSessionManagerRuntimeRegistry", () => {
|
||||
@@ -210,6 +227,50 @@ describe("createSessionManagerRuntimeRegistry", () => {
|
||||
});
|
||||
|
||||
describe("resolveContextTokensForModel", () => {
|
||||
it("uses provider-level context defaults when no model-level cap is set", () => {
|
||||
const result = resolveContextTokensForModel({
|
||||
cfg: {
|
||||
models: {
|
||||
providers: {
|
||||
ollama: {
|
||||
baseUrl: "http://localhost:11434",
|
||||
contextWindow: 8_192,
|
||||
models: [],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
provider: "ollama",
|
||||
model: "qwen3.5:9b",
|
||||
fallbackContextTokens: 216_000,
|
||||
allowAsyncLoad: false,
|
||||
});
|
||||
|
||||
expect(result).toBe(8_192);
|
||||
});
|
||||
|
||||
it("prefers model-level context caps over provider-level defaults", () => {
|
||||
const result = resolveContextTokensForModel({
|
||||
cfg: {
|
||||
models: {
|
||||
providers: {
|
||||
ollama: {
|
||||
baseUrl: "http://localhost:11434",
|
||||
contextWindow: 8_192,
|
||||
models: [{ ...testModelContextWindow("qwen3.5:9b", 216_000), contextTokens: 16_000 }],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
provider: "ollama",
|
||||
model: "qwen3.5:9b",
|
||||
fallbackContextTokens: 216_000,
|
||||
allowAsyncLoad: false,
|
||||
});
|
||||
|
||||
expect(result).toBe(16_000);
|
||||
});
|
||||
|
||||
it("returns 1M context when anthropic context1m is enabled for opus/sonnet", () => {
|
||||
const result = resolveContextTokensForModel({
|
||||
cfg: {
|
||||
|
||||
@@ -21,7 +21,11 @@ type ModelRegistryLike = {
|
||||
getAll: () => ModelEntry[];
|
||||
};
|
||||
type ConfigModelEntry = { id?: string; contextWindow?: number; contextTokens?: number };
|
||||
type ProviderConfigEntry = { models?: ConfigModelEntry[] };
|
||||
type ProviderConfigEntry = {
|
||||
contextWindow?: number;
|
||||
contextTokens?: number;
|
||||
models?: ConfigModelEntry[];
|
||||
};
|
||||
type ModelsConfig = { providers?: Record<string, ProviderConfigEntry | undefined> };
|
||||
type AgentModelEntry = { params?: Record<string, unknown> };
|
||||
|
||||
@@ -83,7 +87,11 @@ export function applyConfiguredContextWindows(params: {
|
||||
? model.contextTokens
|
||||
: typeof model?.contextWindow === "number"
|
||||
? model.contextWindow
|
||||
: undefined;
|
||||
: typeof provider?.contextTokens === "number"
|
||||
? provider.contextTokens
|
||||
: typeof provider?.contextWindow === "number"
|
||||
? provider.contextWindow
|
||||
: undefined;
|
||||
if (!modelId || !contextTokens || contextTokens <= 0) {
|
||||
continue;
|
||||
}
|
||||
@@ -340,30 +348,41 @@ function resolveConfiguredProviderContextTokens(
|
||||
// Mirror the lookup order in pi-embedded-runner/model.ts: exact key first,
|
||||
// then normalized fallback. This prevents alias collisions from picking the
|
||||
// wrong configured cap based on Object.entries iteration order.
|
||||
function readProviderContextTokens(providerConfig: ProviderConfigEntry | undefined) {
|
||||
return typeof providerConfig?.contextTokens === "number"
|
||||
? providerConfig.contextTokens
|
||||
: typeof providerConfig?.contextWindow === "number"
|
||||
? providerConfig.contextWindow
|
||||
: undefined;
|
||||
}
|
||||
|
||||
function findContextTokens(matchProviderId: (id: string) => boolean): number | undefined {
|
||||
for (const [providerId, providerConfig] of Object.entries(providers!)) {
|
||||
if (!matchProviderId(providerId)) {
|
||||
continue;
|
||||
}
|
||||
if (!Array.isArray(providerConfig?.models)) {
|
||||
continue;
|
||||
}
|
||||
for (const m of providerConfig.models) {
|
||||
const contextTokens =
|
||||
typeof m?.contextTokens === "number"
|
||||
? m.contextTokens
|
||||
: typeof m?.contextWindow === "number"
|
||||
? m.contextWindow
|
||||
: undefined;
|
||||
if (
|
||||
typeof m?.id === "string" &&
|
||||
m.id === model &&
|
||||
typeof contextTokens === "number" &&
|
||||
contextTokens > 0
|
||||
) {
|
||||
return contextTokens;
|
||||
if (Array.isArray(providerConfig?.models)) {
|
||||
for (const m of providerConfig.models) {
|
||||
const contextTokens =
|
||||
typeof m?.contextTokens === "number"
|
||||
? m.contextTokens
|
||||
: typeof m?.contextWindow === "number"
|
||||
? m.contextWindow
|
||||
: undefined;
|
||||
if (
|
||||
typeof m?.id === "string" &&
|
||||
m.id === model &&
|
||||
typeof contextTokens === "number" &&
|
||||
contextTokens > 0
|
||||
) {
|
||||
return contextTokens;
|
||||
}
|
||||
}
|
||||
}
|
||||
const providerContextTokens = readProviderContextTokens(providerConfig);
|
||||
if (typeof providerContextTokens === "number" && providerContextTokens > 0) {
|
||||
return providerContextTokens;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
@@ -20,6 +20,9 @@ export type InlineProviderConfig = {
|
||||
baseUrl?: string;
|
||||
api?: ModelDefinitionConfig["api"];
|
||||
models?: ModelDefinitionConfig[];
|
||||
contextWindow?: ModelProviderConfig["contextWindow"];
|
||||
contextTokens?: ModelProviderConfig["contextTokens"];
|
||||
maxTokens?: ModelProviderConfig["maxTokens"];
|
||||
headers?: unknown;
|
||||
authHeader?: boolean;
|
||||
timeoutSeconds?: ModelProviderConfig["timeoutSeconds"];
|
||||
@@ -154,6 +157,9 @@ export function buildInlineProviderModels(
|
||||
return attachModelProviderRequestTransport(
|
||||
{
|
||||
...model,
|
||||
contextWindow: model.contextWindow ?? entry?.contextWindow,
|
||||
contextTokens: model.contextTokens ?? entry?.contextTokens,
|
||||
maxTokens: model.maxTokens ?? entry?.maxTokens,
|
||||
input: resolveProviderModelInput({
|
||||
provider: trimmed,
|
||||
modelId: model.id,
|
||||
|
||||
@@ -443,6 +443,77 @@ describe("resolveModel", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("uses provider-level context defaults over discovered metadata", () => {
|
||||
mockDiscoveredModel(discoverModels, {
|
||||
provider: "ollama",
|
||||
modelId: "qwen3.5:9b",
|
||||
templateModel: {
|
||||
...makeModel("qwen3.5:9b"),
|
||||
provider: "ollama",
|
||||
contextWindow: 216_000,
|
||||
contextTokens: 216_000,
|
||||
maxTokens: 65_536,
|
||||
},
|
||||
});
|
||||
const cfg = {
|
||||
models: {
|
||||
providers: {
|
||||
ollama: {
|
||||
baseUrl: "http://localhost:11434",
|
||||
contextWindow: 8_192,
|
||||
contextTokens: 8_000,
|
||||
models: [{ id: "qwen3.5:9b", name: "qwen3.5:9b" }],
|
||||
},
|
||||
},
|
||||
},
|
||||
} as unknown as OpenClawConfig;
|
||||
|
||||
const result = resolveModelForTest("ollama", "qwen3.5:9b", "/tmp/agent", cfg);
|
||||
|
||||
expect(result.error).toBeUndefined();
|
||||
expect(result.model?.contextWindow).toBe(8_192);
|
||||
expect((result.model as { contextTokens?: number } | undefined)?.contextTokens).toBe(8_000);
|
||||
expect(result.model?.maxTokens).toBe(8_192);
|
||||
});
|
||||
|
||||
it("keeps per-model context values above provider-level defaults", () => {
|
||||
mockDiscoveredModel(discoverModels, {
|
||||
provider: "ollama",
|
||||
modelId: "qwen3.5:9b",
|
||||
templateModel: {
|
||||
...makeModel("qwen3.5:9b"),
|
||||
provider: "ollama",
|
||||
contextWindow: 216_000,
|
||||
maxTokens: 65_536,
|
||||
},
|
||||
});
|
||||
const cfg = {
|
||||
models: {
|
||||
providers: {
|
||||
ollama: {
|
||||
baseUrl: "http://localhost:11434",
|
||||
contextWindow: 8_192,
|
||||
maxTokens: 4_096,
|
||||
models: [
|
||||
{
|
||||
id: "qwen3.5:9b",
|
||||
name: "qwen3.5:9b",
|
||||
contextWindow: 16_384,
|
||||
maxTokens: 12_000,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
} as unknown as OpenClawConfig;
|
||||
|
||||
const result = resolveModelForTest("ollama", "qwen3.5:9b", "/tmp/agent", cfg);
|
||||
|
||||
expect(result.error).toBeUndefined();
|
||||
expect(result.model?.contextWindow).toBe(16_384);
|
||||
expect(result.model?.maxTokens).toBe(12_000);
|
||||
});
|
||||
|
||||
it("applies agent default model params without explicit provider config", () => {
|
||||
mockDiscoveredModel(discoverModels, {
|
||||
provider: "ollama",
|
||||
|
||||
@@ -483,6 +483,9 @@ function applyConfiguredProviderOverrides(params: {
|
||||
!configuredModel &&
|
||||
!providerConfig.baseUrl &&
|
||||
!providerConfig.api &&
|
||||
providerConfig.contextWindow === undefined &&
|
||||
providerConfig.contextTokens === undefined &&
|
||||
providerConfig.maxTokens === undefined &&
|
||||
requestTimeoutMs === undefined &&
|
||||
!providerHeaders &&
|
||||
!providerRequest
|
||||
@@ -518,6 +521,10 @@ function applyConfiguredProviderOverrides(params: {
|
||||
cfg: params.cfg,
|
||||
runtimeHooks: params.runtimeHooks,
|
||||
});
|
||||
const resolvedContextWindow =
|
||||
metadataOverrideModel?.contextWindow ?? providerConfig.contextWindow;
|
||||
const resolvedMaxTokens =
|
||||
metadataOverrideModel?.maxTokens ?? providerConfig.maxTokens ?? discoveredModel.maxTokens;
|
||||
const requestConfig = resolveProviderRequestConfig({
|
||||
provider: params.provider,
|
||||
api:
|
||||
@@ -541,9 +548,15 @@ function applyConfiguredProviderOverrides(params: {
|
||||
reasoning: metadataOverrideModel?.reasoning ?? discoveredModel.reasoning,
|
||||
input: normalizedInput,
|
||||
cost: metadataOverrideModel?.cost ?? discoveredModel.cost,
|
||||
contextWindow: metadataOverrideModel?.contextWindow ?? discoveredModel.contextWindow,
|
||||
contextTokens: metadataOverrideModel?.contextTokens ?? discoveredModel.contextTokens,
|
||||
maxTokens: metadataOverrideModel?.maxTokens ?? discoveredModel.maxTokens,
|
||||
contextWindow: resolvedContextWindow ?? discoveredModel.contextWindow,
|
||||
contextTokens:
|
||||
metadataOverrideModel?.contextTokens ??
|
||||
providerConfig.contextTokens ??
|
||||
discoveredModel.contextTokens,
|
||||
maxTokens:
|
||||
typeof resolvedContextWindow === "number"
|
||||
? Math.min(resolvedMaxTokens, resolvedContextWindow)
|
||||
: resolvedMaxTokens,
|
||||
...(resolvedParams ? { params: resolvedParams } : {}),
|
||||
...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}),
|
||||
headers: requestConfig.headers,
|
||||
@@ -774,11 +787,16 @@ function resolveConfiguredFallbackModel(params: {
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow:
|
||||
configuredModel?.contextWindow ??
|
||||
providerConfig?.contextWindow ??
|
||||
providerConfig?.models?.[0]?.contextWindow ??
|
||||
DEFAULT_CONTEXT_TOKENS,
|
||||
contextTokens: configuredModel?.contextTokens ?? providerConfig?.models?.[0]?.contextTokens,
|
||||
contextTokens:
|
||||
configuredModel?.contextTokens ??
|
||||
providerConfig?.contextTokens ??
|
||||
providerConfig?.models?.[0]?.contextTokens,
|
||||
maxTokens:
|
||||
configuredModel?.maxTokens ??
|
||||
providerConfig?.maxTokens ??
|
||||
providerConfig?.models?.[0]?.maxTokens ??
|
||||
DEFAULT_CONTEXT_TOKENS,
|
||||
...(resolvedParams ? { params: resolvedParams } : {}),
|
||||
|
||||
@@ -1554,6 +1554,28 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
|
||||
description:
|
||||
"Provider API adapter selection controlling request/response compatibility handling for model calls. Use the adapter that matches your upstream provider protocol to avoid feature mismatch.",
|
||||
},
|
||||
contextWindow: {
|
||||
type: "number",
|
||||
exclusiveMinimum: 0,
|
||||
title: "Model Provider Context Window",
|
||||
description:
|
||||
"Default native context window applied to models under this provider when a model entry does not set contextWindow. Use model-level contextWindow for per-model overrides.",
|
||||
},
|
||||
contextTokens: {
|
||||
type: "integer",
|
||||
exclusiveMinimum: 0,
|
||||
maximum: 9007199254740991,
|
||||
title: "Model Provider Context Tokens",
|
||||
description:
|
||||
"Default effective runtime context cap applied to models under this provider when a model entry does not set contextTokens. Use this when runtime should budget below the native contextWindow.",
|
||||
},
|
||||
maxTokens: {
|
||||
type: "number",
|
||||
exclusiveMinimum: 0,
|
||||
title: "Model Provider Max Tokens",
|
||||
description:
|
||||
"Default maximum output token budget applied to models under this provider when a model entry does not set maxTokens.",
|
||||
},
|
||||
timeoutSeconds: {
|
||||
type: "integer",
|
||||
exclusiveMinimum: 0,
|
||||
@@ -26485,6 +26507,21 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
|
||||
help: "Provider API adapter selection controlling request/response compatibility handling for model calls. Use the adapter that matches your upstream provider protocol to avoid feature mismatch.",
|
||||
tags: ["models"],
|
||||
},
|
||||
"models.providers.*.contextWindow": {
|
||||
label: "Model Provider Context Window",
|
||||
help: "Default native context window applied to models under this provider when a model entry does not set contextWindow. Use model-level contextWindow for per-model overrides.",
|
||||
tags: ["models"],
|
||||
},
|
||||
"models.providers.*.contextTokens": {
|
||||
label: "Model Provider Context Tokens",
|
||||
help: "Default effective runtime context cap applied to models under this provider when a model entry does not set contextTokens. Use this when runtime should budget below the native contextWindow.",
|
||||
tags: ["security", "auth", "models"],
|
||||
},
|
||||
"models.providers.*.maxTokens": {
|
||||
label: "Model Provider Max Tokens",
|
||||
help: "Default maximum output token budget applied to models under this provider when a model entry does not set maxTokens.",
|
||||
tags: ["security", "auth", "performance", "models"],
|
||||
},
|
||||
"models.providers.*.timeoutSeconds": {
|
||||
label: "Model Provider Request Timeout",
|
||||
help: "Optional per-provider model request timeout in seconds. Applies to provider HTTP fetches, including connect, headers, body, and total request abort handling. Use this for slow local or self-hosted model servers instead of changing global agent timeouts.",
|
||||
|
||||
@@ -367,6 +367,9 @@ const TARGET_KEYS = [
|
||||
"models.providers.*.baseUrl",
|
||||
"models.providers.*.apiKey",
|
||||
"models.providers.*.api",
|
||||
"models.providers.*.contextWindow",
|
||||
"models.providers.*.contextTokens",
|
||||
"models.providers.*.maxTokens",
|
||||
"models.providers.*.headers",
|
||||
"models.providers.*.models",
|
||||
"agents",
|
||||
|
||||
@@ -826,6 +826,12 @@ export const FIELD_HELP: Record<string, string> = {
|
||||
'Selects provider auth style: "api-key" for API key auth, "token" for bearer token auth, "oauth" for OAuth credentials, and "aws-sdk" for AWS credential resolution. Match this to your provider requirements.',
|
||||
"models.providers.*.api":
|
||||
"Provider API adapter selection controlling request/response compatibility handling for model calls. Use the adapter that matches your upstream provider protocol to avoid feature mismatch.",
|
||||
"models.providers.*.contextWindow":
|
||||
"Default native context window applied to models under this provider when a model entry does not set contextWindow. Use model-level contextWindow for per-model overrides.",
|
||||
"models.providers.*.contextTokens":
|
||||
"Default effective runtime context cap applied to models under this provider when a model entry does not set contextTokens. Use this when runtime should budget below the native contextWindow.",
|
||||
"models.providers.*.maxTokens":
|
||||
"Default maximum output token budget applied to models under this provider when a model entry does not set maxTokens.",
|
||||
"models.providers.*.timeoutSeconds":
|
||||
"Optional per-provider model request timeout in seconds. Applies to provider HTTP fetches, including connect, headers, body, and total request abort handling. Use this for slow local or self-hosted model servers instead of changing global agent timeouts.",
|
||||
"models.providers.*.injectNumCtxForOpenAICompat":
|
||||
|
||||
@@ -515,6 +515,9 @@ export const FIELD_LABELS: Record<string, string> = {
|
||||
"models.providers.*.apiKey": "Model Provider API Key", // pragma: allowlist secret
|
||||
"models.providers.*.auth": "Model Provider Auth Mode",
|
||||
"models.providers.*.api": "Model Provider API Adapter",
|
||||
"models.providers.*.contextWindow": "Model Provider Context Window",
|
||||
"models.providers.*.contextTokens": "Model Provider Context Tokens",
|
||||
"models.providers.*.maxTokens": "Model Provider Max Tokens",
|
||||
"models.providers.*.timeoutSeconds": "Model Provider Request Timeout",
|
||||
"models.providers.*.injectNumCtxForOpenAICompat": "Model Provider Inject num_ctx (OpenAI Compat)",
|
||||
"models.providers.*.headers": "Model Provider Headers",
|
||||
|
||||
@@ -119,6 +119,9 @@ export type ModelProviderConfig = {
|
||||
apiKey?: SecretInput;
|
||||
auth?: ModelProviderAuthMode;
|
||||
api?: ModelApi;
|
||||
contextWindow?: number;
|
||||
contextTokens?: number;
|
||||
maxTokens?: number;
|
||||
timeoutSeconds?: number;
|
||||
injectNumCtxForOpenAICompat?: boolean;
|
||||
headers?: Record<string, SecretInput>;
|
||||
|
||||
@@ -357,6 +357,9 @@ export const ModelProviderSchema = z
|
||||
.union([z.literal("api-key"), z.literal("aws-sdk"), z.literal("oauth"), z.literal("token")])
|
||||
.optional(),
|
||||
api: ModelApiSchema.optional(),
|
||||
contextWindow: z.number().positive().optional(),
|
||||
contextTokens: z.number().int().positive().optional(),
|
||||
maxTokens: z.number().positive().optional(),
|
||||
timeoutSeconds: z.number().int().positive().optional(),
|
||||
injectNumCtxForOpenAICompat: z.boolean().optional(),
|
||||
headers: z.record(z.string(), SecretInputSchema.register(sensitive)).optional(),
|
||||
|
||||
Reference in New Issue
Block a user