diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f6cf0a523a..d0bad2cd589 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,7 @@ Docs: https://docs.openclaw.ai - Auto-reply/Sessions: remove auth-key labels from `/new` and `/reset` confirmation messages so session reset notices never expose API key prefixes or env-key labels in chat output. (#24384, #24409) Thanks @Clawborn. - Slack/Group policy: move Slack account `groupPolicy` defaulting to provider-level schema defaults so multi-account configs inherit top-level `channels.slack.groupPolicy` instead of silently overriding inheritance with per-account `allowlist`. (#17579) Thanks @ZetiMente. - Providers/Anthropic: skip `context-1m-*` beta injection for OAuth/subscription tokens (`sk-ant-oat-*`) while preserving OAuth-required betas, avoiding Anthropic 401 auth failures when `params.context1m` is enabled. (#10647, #20354) Thanks @ClumsyWizardHands and @dcruver. +- Providers/Bedrock: disable prompt-cache retention for non-Anthropic Bedrock models so Nova/Mistral requests do not send unsupported cache metadata. (#20866) Thanks @pierreeurope. - Providers/OpenRouter: remove conflicting top-level `reasoning_effort` when injecting nested `reasoning.effort`, preventing OpenRouter 400 payload-validation failures for reasoning models. (#24120) thanks @tenequm. - Providers/Groq: avoid classifying Groq TPM limit errors as context overflow so throttling paths no longer trigger overflow recovery logic. (#16176) Thanks @dddabtc. - Gateway/WS: close repeated post-handshake `unauthorized role:*` request floods per connection and sample duplicate rejection logs, preventing a single misbehaving client from degrading gateway responsiveness. (#20168) Thanks @acy103, @vibecodooor, and @vincentkoc. diff --git a/src/agents/pi-embedded-runner-extraparams.test.ts b/src/agents/pi-embedded-runner-extraparams.test.ts index 433bd816d6b..68d7327c33e 100644 --- a/src/agents/pi-embedded-runner-extraparams.test.ts +++ b/src/agents/pi-embedded-runner-extraparams.test.ts @@ -151,6 +151,42 @@ describe("applyExtraParamsToAgent", () => { }); }); + it("disables prompt caching for non-Anthropic Bedrock models", () => { + const { calls, agent } = createOptionsCaptureAgent(); + + applyExtraParamsToAgent(agent, undefined, "amazon-bedrock", "amazon.nova-micro-v1"); + + const model = { + api: "openai-completions", + provider: "amazon-bedrock", + id: "amazon.nova-micro-v1", + } as Model<"openai-completions">; + const context: Context = { messages: [] }; + + void agent.streamFn?.(model, context, {}); + + expect(calls).toHaveLength(1); + expect(calls[0]?.cacheRetention).toBe("none"); + }); + + it("keeps Anthropic Bedrock models eligible for provider-side caching", () => { + const { calls, agent } = createOptionsCaptureAgent(); + + applyExtraParamsToAgent(agent, undefined, "amazon-bedrock", "us.anthropic.claude-sonnet-4-5"); + + const model = { + api: "openai-completions", + provider: "amazon-bedrock", + id: "us.anthropic.claude-sonnet-4-5", + } as Model<"openai-completions">; + const context: Context = { messages: [] }; + + void agent.streamFn?.(model, context, {}); + + expect(calls).toHaveLength(1); + expect(calls[0]?.cacheRetention).toBeUndefined(); + }); + it("adds Anthropic 1M beta header when context1m is enabled for Opus/Sonnet", () => { const { calls, agent } = createOptionsCaptureAgent(); const cfg = buildAnthropicModelConfig("anthropic/claude-opus-4-6", { context1m: true }); diff --git a/src/agents/pi-embedded-runner/extra-params.ts b/src/agents/pi-embedded-runner/extra-params.ts index 3f69b5d5534..285ae6a5b23 100644 --- a/src/agents/pi-embedded-runner/extra-params.ts +++ b/src/agents/pi-embedded-runner/extra-params.ts @@ -137,6 +137,20 @@ function createStreamFnWithExtraParams( return wrappedStreamFn; } +function isAnthropicBedrockModel(modelId: string): boolean { + const normalized = modelId.toLowerCase(); + return normalized.includes("anthropic.claude") || normalized.includes("anthropic/claude"); +} + +function createBedrockNoCacheWrapper(baseStreamFn: StreamFn | undefined): StreamFn { + const underlying = baseStreamFn ?? streamSimple; + return (model, context, options) => + underlying(model, context, { + ...options, + cacheRetention: "none", + }); +} + function isDirectOpenAIBaseUrl(baseUrl: unknown): boolean { if (typeof baseUrl !== "string" || !baseUrl.trim()) { return true; @@ -501,6 +515,11 @@ export function applyExtraParamsToAgent( agent.streamFn = createOpenRouterSystemCacheWrapper(agent.streamFn); } + if (provider === "amazon-bedrock" && !isAnthropicBedrockModel(modelId)) { + log.debug(`disabling prompt caching for non-Anthropic Bedrock model ${provider}/${modelId}`); + agent.streamFn = createBedrockNoCacheWrapper(agent.streamFn); + } + // Enable Z.AI tool_stream for real-time tool call streaming. // Enabled by default for Z.AI provider, can be disabled via params.tool_stream: false if (provider === "zai" || provider === "z-ai") {