fix: preserve thinking blocks for Claude Opus 4.5+/Sonnet 4.5+ to fix cache

Claude Opus 4.5+ and Sonnet 4.5+ preserve thinking blocks in model context
by default. Dropping them from prior turns (as was correct for Sonnet 3.7)
breaks Anthropic's prefix-based prompt cache matching, causing cache misses
after every thinking turn.

This change conditions dropThinkingBlocks on the model version:
- Preserve (no drop) for: opus-4.x, sonnet-4.5+, haiku-4.x, and future models
- Drop for: claude-3-7-sonnet and earlier

Fixes #61793

See: https://platform.claude.com/docs/en/build-with-claude/extended-thinking#differences-in-thinking-across-model-versions
This commit is contained in:
Qinyao He
2026-04-06 02:42:22 -07:00
committed by Peter Steinberger
parent 2751874cbb
commit 7a3514664d
4 changed files with 154 additions and 7 deletions

View File

@@ -262,6 +262,32 @@ describe("resolveTranscriptPolicy", () => {
expect(policy.validateAnthropicTurns).toBe(true);
});
it("preserves thinking blocks for newer Claude models in unowned Anthropic transport fallback", () => {
// Opus 4.6 via custom proxy: should NOT drop thinking blocks
const opus46 = resolveTranscriptPolicy({
provider: "custom-anthropic-proxy",
modelId: "claude-opus-4-6",
modelApi: "anthropic-messages",
});
expect(opus46.dropThinkingBlocks).toBe(false);
// Sonnet 4.5 via custom proxy: should NOT drop
const sonnet45 = resolveTranscriptPolicy({
provider: "custom-anthropic-proxy",
modelId: "claude-sonnet-4-5-20250929",
modelApi: "anthropic-messages",
});
expect(sonnet45.dropThinkingBlocks).toBe(false);
// Legacy Sonnet 3.7 via custom proxy: SHOULD drop
const sonnet37 = resolveTranscriptPolicy({
provider: "custom-anthropic-proxy",
modelId: "claude-3-7-sonnet-20250219",
modelApi: "anthropic-messages",
});
expect(sonnet37.dropThinkingBlocks).toBe(true);
});
it("preserves transport defaults when a runtime plugin has not adopted replay hooks", () => {
const policy = resolveTranscriptPolicy({
provider: "vllm",

View File

@@ -46,6 +46,35 @@ function isAnthropicApi(modelApi?: string | null): boolean {
return modelApi === "anthropic-messages" || modelApi === "bedrock-converse-stream";
}
/**
* Returns true for Claude models that preserve thinking blocks in context
* natively (Opus 4.5+, Sonnet 4.5+, Haiku 4.5+). For these models, dropping
* thinking blocks from prior turns breaks prompt cache prefix matching.
*
* See: https://platform.claude.com/docs/en/build-with-claude/extended-thinking#differences-in-thinking-across-model-versions
*/
function shouldPreserveThinkingBlocksForModel(modelId: string): boolean {
if (!modelId.includes("claude")) return false;
if (
modelId.includes("opus-4") ||
modelId.includes("sonnet-4-5") ||
modelId.includes("sonnet-4-6") ||
modelId.includes("sonnet-4.5") ||
modelId.includes("sonnet-4.6") ||
modelId.includes("haiku-4")
) {
return true;
}
// Future-proofing: claude-5-x, claude-6-x etc.
if (/claude-[5-9]/.test(modelId) || /claude-\d{2,}/.test(modelId)) {
return true;
}
return false;
}
/**
* Provides a narrow replay-policy fallback for providers that do not have an
* owning runtime plugin.
@@ -93,7 +122,7 @@ function buildUnownedProviderTransportReplayFallback(params: {
},
}
: {}),
...(isAnthropic && modelId.includes("claude") ? { dropThinkingBlocks: true } : {}),
...(isAnthropic && modelId.includes("claude") ? { dropThinkingBlocks: !shouldPreserveThinkingBlocksForModel(modelId) } : {}),
...(isGoogle || isStrictOpenAiCompatible ? { applyAssistantFirstOrderingFix: true } : {}),
...(isGoogle || isStrictOpenAiCompatible ? { validateGeminiTurns: true } : {}),
...(isAnthropic || isStrictOpenAiCompatible ? { validateAnthropicTurns: true } : {}),

View File

@@ -33,19 +33,51 @@ describe("provider replay helpers", () => {
});
it("derives claude-only anthropic replay policy from the model id", () => {
// Sonnet 4.6 preserves thinking blocks (no drop)
expect(buildAnthropicReplayPolicyForModel("claude-sonnet-4-6")).toMatchObject({
sanitizeToolCallIds: true,
toolCallIdMode: "strict",
dropThinkingBlocks: true,
validateAnthropicTurns: true,
});
expect(buildAnthropicReplayPolicyForModel("claude-sonnet-4-6")).not.toHaveProperty(
"dropThinkingBlocks",
);
// Legacy models still drop thinking blocks
expect(buildAnthropicReplayPolicyForModel("claude-3-7-sonnet-20250219")).toMatchObject({
dropThinkingBlocks: true,
});
expect(buildAnthropicReplayPolicyForModel("amazon.nova-pro-v1")).not.toHaveProperty(
"dropThinkingBlocks",
);
});
it("preserves thinking blocks for Claude Opus 4.5+ and Sonnet 4.5+ models", () => {
// These models should NOT drop thinking blocks
for (const modelId of [
"claude-opus-4-5-20251101",
"claude-opus-4-6",
"claude-sonnet-4-5-20250929",
"claude-sonnet-4-6",
"claude-haiku-4-5-20251001",
]) {
const policy = buildAnthropicReplayPolicyForModel(modelId);
expect(policy).not.toHaveProperty("dropThinkingBlocks");
}
// These legacy models SHOULD drop thinking blocks
for (const modelId of [
"claude-3-7-sonnet-20250219",
"claude-3-5-sonnet-20240620",
]) {
const policy = buildAnthropicReplayPolicyForModel(modelId);
expect(policy).toMatchObject({ dropThinkingBlocks: true });
}
});
it("builds native Anthropic replay policy with selective tool-call id preservation", () => {
expect(buildNativeAnthropicReplayPolicyForModel("claude-sonnet-4-6")).toMatchObject({
// Sonnet 4.6 preserves thinking blocks
const policy46 = buildNativeAnthropicReplayPolicyForModel("claude-sonnet-4-6");
expect(policy46).toMatchObject({
sanitizeMode: "full",
sanitizeToolCallIds: true,
toolCallIdMode: "strict",
@@ -54,17 +86,37 @@ describe("provider replay helpers", () => {
repairToolUseResultPairing: true,
validateAnthropicTurns: true,
allowSyntheticToolResults: true,
});
expect(policy46).not.toHaveProperty("dropThinkingBlocks");
// Legacy model drops thinking blocks
expect(buildNativeAnthropicReplayPolicyForModel("claude-3-7-sonnet-20250219")).toMatchObject({
dropThinkingBlocks: true,
});
});
it("builds hybrid anthropic or openai replay policy", () => {
// Sonnet 4.6 preserves thinking blocks even when flag is set
const sonnet46Policy = buildHybridAnthropicOrOpenAIReplayPolicy(
{
provider: "minimax",
modelApi: "anthropic-messages",
modelId: "claude-sonnet-4-6",
} as never,
{ anthropicModelDropThinkingBlocks: true },
);
expect(sonnet46Policy).toMatchObject({
validateAnthropicTurns: true,
});
expect(sonnet46Policy).not.toHaveProperty("dropThinkingBlocks");
// Legacy model still drops
expect(
buildHybridAnthropicOrOpenAIReplayPolicy(
{
provider: "minimax",
modelApi: "anthropic-messages",
modelId: "claude-sonnet-4-6",
modelId: "claude-3-7-sonnet-20250219",
} as never,
{ anthropicModelDropThinkingBlocks: true },
),

View File

@@ -63,15 +63,53 @@ export function buildStrictAnthropicReplayPolicy(
};
}
/**
* Returns true for Claude models that preserve thinking blocks in context
* natively (Opus 4.5+, Sonnet 4.5+, Haiku 4.5+). For these models, dropping
* thinking blocks from prior turns breaks prompt cache prefix matching.
*
* See: https://platform.claude.com/docs/en/build-with-claude/extended-thinking#differences-in-thinking-across-model-versions
*/
function shouldPreserveThinkingBlocks(modelId?: string): boolean {
const id = (modelId ?? "").toLowerCase();
if (!id.includes("claude")) return false;
// Models that preserve thinking blocks natively:
// - claude-opus-4-5, claude-opus-4-6 (and any future opus-4.x+)
// - claude-sonnet-4-5, claude-sonnet-4-6 (and any future sonnet-4.x+)
// - claude-haiku-4-5 (and any future haiku-4.x+)
// Models that require dropping thinking blocks:
// - claude-3-7-sonnet, claude-3-5-sonnet, and earlier
if (
id.includes("opus-4") ||
id.includes("sonnet-4-5") ||
id.includes("sonnet-4-6") ||
id.includes("sonnet-4.5") ||
id.includes("sonnet-4.6") ||
id.includes("haiku-4")
) {
return true;
}
// Future-proofing: claude-5-x, claude-6-x etc. should also preserve
if (/claude-[5-9]/.test(id) || /claude-\d{2,}/.test(id)) {
return true;
}
return false;
}
export function buildAnthropicReplayPolicyForModel(modelId?: string): ProviderReplayPolicy {
const isClaude = (modelId?.toLowerCase() ?? "").includes("claude");
return buildStrictAnthropicReplayPolicy({
dropThinkingBlocks: (modelId?.toLowerCase() ?? "").includes("claude"),
dropThinkingBlocks: isClaude && !shouldPreserveThinkingBlocks(modelId),
});
}
export function buildNativeAnthropicReplayPolicyForModel(modelId?: string): ProviderReplayPolicy {
const isClaude = (modelId?.toLowerCase() ?? "").includes("claude");
return buildStrictAnthropicReplayPolicy({
dropThinkingBlocks: (modelId?.toLowerCase() ?? "").includes("claude"),
dropThinkingBlocks: isClaude && !shouldPreserveThinkingBlocks(modelId),
sanitizeToolCallIds: true,
preserveNativeAnthropicToolUseIds: true,
});
@@ -82,10 +120,12 @@ export function buildHybridAnthropicOrOpenAIReplayPolicy(
options: { anthropicModelDropThinkingBlocks?: boolean } = {},
): ProviderReplayPolicy | undefined {
if (ctx.modelApi === "anthropic-messages" || ctx.modelApi === "bedrock-converse-stream") {
const isClaude = (ctx.modelId?.toLowerCase() ?? "").includes("claude");
return buildStrictAnthropicReplayPolicy({
dropThinkingBlocks:
options.anthropicModelDropThinkingBlocks &&
(ctx.modelId?.toLowerCase() ?? "").includes("claude"),
isClaude &&
!shouldPreserveThinkingBlocks(ctx.modelId),
});
}