mirror of
https://github.com/moltbot/moltbot.git
synced 2026-04-21 05:32:53 +00:00
fix: preserve thinking blocks for Claude Opus 4.5+/Sonnet 4.5+ to fix cache
Claude Opus 4.5+ and Sonnet 4.5+ preserve thinking blocks in model context by default. Dropping them from prior turns (as was correct for Sonnet 3.7) breaks Anthropic's prefix-based prompt cache matching, causing cache misses after every thinking turn. This change conditions dropThinkingBlocks on the model version: - Preserve (no drop) for: opus-4.x, sonnet-4.5+, haiku-4.x, and future models - Drop for: claude-3-7-sonnet and earlier Fixes #61793 See: https://platform.claude.com/docs/en/build-with-claude/extended-thinking#differences-in-thinking-across-model-versions
This commit is contained in:
committed by
Peter Steinberger
parent
2751874cbb
commit
7a3514664d
@@ -262,6 +262,32 @@ describe("resolveTranscriptPolicy", () => {
|
||||
expect(policy.validateAnthropicTurns).toBe(true);
|
||||
});
|
||||
|
||||
it("preserves thinking blocks for newer Claude models in unowned Anthropic transport fallback", () => {
|
||||
// Opus 4.6 via custom proxy: should NOT drop thinking blocks
|
||||
const opus46 = resolveTranscriptPolicy({
|
||||
provider: "custom-anthropic-proxy",
|
||||
modelId: "claude-opus-4-6",
|
||||
modelApi: "anthropic-messages",
|
||||
});
|
||||
expect(opus46.dropThinkingBlocks).toBe(false);
|
||||
|
||||
// Sonnet 4.5 via custom proxy: should NOT drop
|
||||
const sonnet45 = resolveTranscriptPolicy({
|
||||
provider: "custom-anthropic-proxy",
|
||||
modelId: "claude-sonnet-4-5-20250929",
|
||||
modelApi: "anthropic-messages",
|
||||
});
|
||||
expect(sonnet45.dropThinkingBlocks).toBe(false);
|
||||
|
||||
// Legacy Sonnet 3.7 via custom proxy: SHOULD drop
|
||||
const sonnet37 = resolveTranscriptPolicy({
|
||||
provider: "custom-anthropic-proxy",
|
||||
modelId: "claude-3-7-sonnet-20250219",
|
||||
modelApi: "anthropic-messages",
|
||||
});
|
||||
expect(sonnet37.dropThinkingBlocks).toBe(true);
|
||||
});
|
||||
|
||||
it("preserves transport defaults when a runtime plugin has not adopted replay hooks", () => {
|
||||
const policy = resolveTranscriptPolicy({
|
||||
provider: "vllm",
|
||||
|
||||
@@ -46,6 +46,35 @@ function isAnthropicApi(modelApi?: string | null): boolean {
|
||||
return modelApi === "anthropic-messages" || modelApi === "bedrock-converse-stream";
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true for Claude models that preserve thinking blocks in context
|
||||
* natively (Opus 4.5+, Sonnet 4.5+, Haiku 4.5+). For these models, dropping
|
||||
* thinking blocks from prior turns breaks prompt cache prefix matching.
|
||||
*
|
||||
* See: https://platform.claude.com/docs/en/build-with-claude/extended-thinking#differences-in-thinking-across-model-versions
|
||||
*/
|
||||
function shouldPreserveThinkingBlocksForModel(modelId: string): boolean {
|
||||
if (!modelId.includes("claude")) return false;
|
||||
|
||||
if (
|
||||
modelId.includes("opus-4") ||
|
||||
modelId.includes("sonnet-4-5") ||
|
||||
modelId.includes("sonnet-4-6") ||
|
||||
modelId.includes("sonnet-4.5") ||
|
||||
modelId.includes("sonnet-4.6") ||
|
||||
modelId.includes("haiku-4")
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Future-proofing: claude-5-x, claude-6-x etc.
|
||||
if (/claude-[5-9]/.test(modelId) || /claude-\d{2,}/.test(modelId)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides a narrow replay-policy fallback for providers that do not have an
|
||||
* owning runtime plugin.
|
||||
@@ -93,7 +122,7 @@ function buildUnownedProviderTransportReplayFallback(params: {
|
||||
},
|
||||
}
|
||||
: {}),
|
||||
...(isAnthropic && modelId.includes("claude") ? { dropThinkingBlocks: true } : {}),
|
||||
...(isAnthropic && modelId.includes("claude") ? { dropThinkingBlocks: !shouldPreserveThinkingBlocksForModel(modelId) } : {}),
|
||||
...(isGoogle || isStrictOpenAiCompatible ? { applyAssistantFirstOrderingFix: true } : {}),
|
||||
...(isGoogle || isStrictOpenAiCompatible ? { validateGeminiTurns: true } : {}),
|
||||
...(isAnthropic || isStrictOpenAiCompatible ? { validateAnthropicTurns: true } : {}),
|
||||
|
||||
@@ -33,19 +33,51 @@ describe("provider replay helpers", () => {
|
||||
});
|
||||
|
||||
it("derives claude-only anthropic replay policy from the model id", () => {
|
||||
// Sonnet 4.6 preserves thinking blocks (no drop)
|
||||
expect(buildAnthropicReplayPolicyForModel("claude-sonnet-4-6")).toMatchObject({
|
||||
sanitizeToolCallIds: true,
|
||||
toolCallIdMode: "strict",
|
||||
dropThinkingBlocks: true,
|
||||
validateAnthropicTurns: true,
|
||||
});
|
||||
expect(buildAnthropicReplayPolicyForModel("claude-sonnet-4-6")).not.toHaveProperty(
|
||||
"dropThinkingBlocks",
|
||||
);
|
||||
// Legacy models still drop thinking blocks
|
||||
expect(buildAnthropicReplayPolicyForModel("claude-3-7-sonnet-20250219")).toMatchObject({
|
||||
dropThinkingBlocks: true,
|
||||
});
|
||||
expect(buildAnthropicReplayPolicyForModel("amazon.nova-pro-v1")).not.toHaveProperty(
|
||||
"dropThinkingBlocks",
|
||||
);
|
||||
});
|
||||
|
||||
it("preserves thinking blocks for Claude Opus 4.5+ and Sonnet 4.5+ models", () => {
|
||||
// These models should NOT drop thinking blocks
|
||||
for (const modelId of [
|
||||
"claude-opus-4-5-20251101",
|
||||
"claude-opus-4-6",
|
||||
"claude-sonnet-4-5-20250929",
|
||||
"claude-sonnet-4-6",
|
||||
"claude-haiku-4-5-20251001",
|
||||
]) {
|
||||
const policy = buildAnthropicReplayPolicyForModel(modelId);
|
||||
expect(policy).not.toHaveProperty("dropThinkingBlocks");
|
||||
}
|
||||
|
||||
// These legacy models SHOULD drop thinking blocks
|
||||
for (const modelId of [
|
||||
"claude-3-7-sonnet-20250219",
|
||||
"claude-3-5-sonnet-20240620",
|
||||
]) {
|
||||
const policy = buildAnthropicReplayPolicyForModel(modelId);
|
||||
expect(policy).toMatchObject({ dropThinkingBlocks: true });
|
||||
}
|
||||
});
|
||||
|
||||
it("builds native Anthropic replay policy with selective tool-call id preservation", () => {
|
||||
expect(buildNativeAnthropicReplayPolicyForModel("claude-sonnet-4-6")).toMatchObject({
|
||||
// Sonnet 4.6 preserves thinking blocks
|
||||
const policy46 = buildNativeAnthropicReplayPolicyForModel("claude-sonnet-4-6");
|
||||
expect(policy46).toMatchObject({
|
||||
sanitizeMode: "full",
|
||||
sanitizeToolCallIds: true,
|
||||
toolCallIdMode: "strict",
|
||||
@@ -54,17 +86,37 @@ describe("provider replay helpers", () => {
|
||||
repairToolUseResultPairing: true,
|
||||
validateAnthropicTurns: true,
|
||||
allowSyntheticToolResults: true,
|
||||
});
|
||||
expect(policy46).not.toHaveProperty("dropThinkingBlocks");
|
||||
|
||||
// Legacy model drops thinking blocks
|
||||
expect(buildNativeAnthropicReplayPolicyForModel("claude-3-7-sonnet-20250219")).toMatchObject({
|
||||
dropThinkingBlocks: true,
|
||||
});
|
||||
});
|
||||
|
||||
it("builds hybrid anthropic or openai replay policy", () => {
|
||||
// Sonnet 4.6 preserves thinking blocks even when flag is set
|
||||
const sonnet46Policy = buildHybridAnthropicOrOpenAIReplayPolicy(
|
||||
{
|
||||
provider: "minimax",
|
||||
modelApi: "anthropic-messages",
|
||||
modelId: "claude-sonnet-4-6",
|
||||
} as never,
|
||||
{ anthropicModelDropThinkingBlocks: true },
|
||||
);
|
||||
expect(sonnet46Policy).toMatchObject({
|
||||
validateAnthropicTurns: true,
|
||||
});
|
||||
expect(sonnet46Policy).not.toHaveProperty("dropThinkingBlocks");
|
||||
|
||||
// Legacy model still drops
|
||||
expect(
|
||||
buildHybridAnthropicOrOpenAIReplayPolicy(
|
||||
{
|
||||
provider: "minimax",
|
||||
modelApi: "anthropic-messages",
|
||||
modelId: "claude-sonnet-4-6",
|
||||
modelId: "claude-3-7-sonnet-20250219",
|
||||
} as never,
|
||||
{ anthropicModelDropThinkingBlocks: true },
|
||||
),
|
||||
|
||||
@@ -63,15 +63,53 @@ export function buildStrictAnthropicReplayPolicy(
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true for Claude models that preserve thinking blocks in context
|
||||
* natively (Opus 4.5+, Sonnet 4.5+, Haiku 4.5+). For these models, dropping
|
||||
* thinking blocks from prior turns breaks prompt cache prefix matching.
|
||||
*
|
||||
* See: https://platform.claude.com/docs/en/build-with-claude/extended-thinking#differences-in-thinking-across-model-versions
|
||||
*/
|
||||
function shouldPreserveThinkingBlocks(modelId?: string): boolean {
|
||||
const id = (modelId ?? "").toLowerCase();
|
||||
if (!id.includes("claude")) return false;
|
||||
|
||||
// Models that preserve thinking blocks natively:
|
||||
// - claude-opus-4-5, claude-opus-4-6 (and any future opus-4.x+)
|
||||
// - claude-sonnet-4-5, claude-sonnet-4-6 (and any future sonnet-4.x+)
|
||||
// - claude-haiku-4-5 (and any future haiku-4.x+)
|
||||
// Models that require dropping thinking blocks:
|
||||
// - claude-3-7-sonnet, claude-3-5-sonnet, and earlier
|
||||
if (
|
||||
id.includes("opus-4") ||
|
||||
id.includes("sonnet-4-5") ||
|
||||
id.includes("sonnet-4-6") ||
|
||||
id.includes("sonnet-4.5") ||
|
||||
id.includes("sonnet-4.6") ||
|
||||
id.includes("haiku-4")
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Future-proofing: claude-5-x, claude-6-x etc. should also preserve
|
||||
if (/claude-[5-9]/.test(id) || /claude-\d{2,}/.test(id)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
export function buildAnthropicReplayPolicyForModel(modelId?: string): ProviderReplayPolicy {
|
||||
const isClaude = (modelId?.toLowerCase() ?? "").includes("claude");
|
||||
return buildStrictAnthropicReplayPolicy({
|
||||
dropThinkingBlocks: (modelId?.toLowerCase() ?? "").includes("claude"),
|
||||
dropThinkingBlocks: isClaude && !shouldPreserveThinkingBlocks(modelId),
|
||||
});
|
||||
}
|
||||
|
||||
export function buildNativeAnthropicReplayPolicyForModel(modelId?: string): ProviderReplayPolicy {
|
||||
const isClaude = (modelId?.toLowerCase() ?? "").includes("claude");
|
||||
return buildStrictAnthropicReplayPolicy({
|
||||
dropThinkingBlocks: (modelId?.toLowerCase() ?? "").includes("claude"),
|
||||
dropThinkingBlocks: isClaude && !shouldPreserveThinkingBlocks(modelId),
|
||||
sanitizeToolCallIds: true,
|
||||
preserveNativeAnthropicToolUseIds: true,
|
||||
});
|
||||
@@ -82,10 +120,12 @@ export function buildHybridAnthropicOrOpenAIReplayPolicy(
|
||||
options: { anthropicModelDropThinkingBlocks?: boolean } = {},
|
||||
): ProviderReplayPolicy | undefined {
|
||||
if (ctx.modelApi === "anthropic-messages" || ctx.modelApi === "bedrock-converse-stream") {
|
||||
const isClaude = (ctx.modelId?.toLowerCase() ?? "").includes("claude");
|
||||
return buildStrictAnthropicReplayPolicy({
|
||||
dropThinkingBlocks:
|
||||
options.anthropicModelDropThinkingBlocks &&
|
||||
(ctx.modelId?.toLowerCase() ?? "").includes("claude"),
|
||||
isClaude &&
|
||||
!shouldPreserveThinkingBlocks(ctx.modelId),
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user