From 134f41496dd3d3bcbd1601b223856830c8f3a88e Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Sun, 1 Mar 2026 10:05:29 +0800 Subject: [PATCH 1/7] fix(antigravity): update model configurations and add new models for Antigravity --- internal/registry/model_definitions_static_data.go | 9 ++++----- internal/runtime/executor/antigravity_executor.go | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/internal/registry/model_definitions_static_data.go b/internal/registry/model_definitions_static_data.go index 7cfe15db..f70d3984 100644 --- a/internal/registry/model_definitions_static_data.go +++ b/internal/registry/model_definitions_static_data.go @@ -947,18 +947,17 @@ type AntigravityModelConfig struct { // Keys use upstream model names returned by the Antigravity models endpoint. func GetAntigravityModelConfig() map[string]*AntigravityModelConfig { return map[string]*AntigravityModelConfig{ - // "rev19-uic3-1p": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}}, "gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}}, "gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}}, "gemini-3-pro-high": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}}, - "gemini-3-pro-image": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}}, + "gemini-3-pro-low": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}}, "gemini-3.1-pro-high": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}}, + "gemini-3.1-pro-low": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}}, "gemini-3.1-flash-image": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "high"}}}, "gemini-3-flash": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}}, - "claude-opus-4-6-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 64000, ZeroAllowed: true, DynamicAllowed: true}}, - "claude-sonnet-4-6": {Thinking: &ThinkingSupport{Min: 1024, Max: 64000, ZeroAllowed: true, DynamicAllowed: true}}, + "claude-opus-4-6-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 64000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000}, + "claude-sonnet-4-6": {Thinking: &ThinkingSupport{Min: 1024, Max: 64000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000}, "gpt-oss-120b-medium": {}, - "tab_flash_lite_preview": {}, } } diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 00959a22..919d96fa 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -1152,7 +1152,7 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c continue } switch modelID { - case "chat_20706", "chat_23310", "gemini-2.5-flash-thinking", "gemini-3-pro-low", "gemini-2.5-pro": + case "chat_20706", "chat_23310", "tab_flash_lite_preview", "tab_jump_flash_lite_preview", "gemini-2.5-flash-thinking", "gemini-2.5-pro": continue } modelCfg := modelConfig[modelID] From b148820c358480220e2a5ca8958accec8599071d Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Sun, 1 Mar 2026 10:30:19 +0800 Subject: [PATCH 2/7] fix(translator): handle Claude thinking type "auto" like adaptive --- .../antigravity/claude/antigravity_claude_request.go | 10 ++-------- .../translator/codex/claude/codex_claude_request.go | 4 ++-- .../gemini-cli/claude/gemini-cli_claude_request.go | 4 ++-- .../translator/gemini/claude/gemini_claude_request.go | 4 ++-- .../translator/openai/claude/openai_claude_request.go | 4 ++-- 5 files changed, 10 insertions(+), 16 deletions(-) diff --git a/internal/translator/antigravity/claude/antigravity_claude_request.go b/internal/translator/antigravity/claude/antigravity_claude_request.go index a3f9fa48..c4e07b6a 100644 --- a/internal/translator/antigravity/claude/antigravity_claude_request.go +++ b/internal/translator/antigravity/claude/antigravity_claude_request.go @@ -440,14 +440,8 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _ out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true) } - case "auto": - // Amp sends thinking.type="auto" — use max budget from model config - // Antigravity API for Claude models requires a concrete positive budget, - // not -1. Use a high default that ApplyThinking will cap to model max. - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 64000) - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true) - case "adaptive": - // Keep adaptive as a high level sentinel; ApplyThinking resolves it + case "adaptive", "auto": + // Keep adaptive/auto as a high level sentinel; ApplyThinking resolves it // to model-specific max capability. out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingLevel", "high") out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true) diff --git a/internal/translator/codex/claude/codex_claude_request.go b/internal/translator/codex/claude/codex_claude_request.go index 64e41fb5..739b39e9 100644 --- a/internal/translator/codex/claude/codex_claude_request.go +++ b/internal/translator/codex/claude/codex_claude_request.go @@ -230,8 +230,8 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool) reasoningEffort = effort } } - case "adaptive": - // Claude adaptive means "enable with max capacity"; keep it as highest level + case "adaptive", "auto": + // Claude adaptive/auto means "enable with max capacity"; keep it as highest level // and let ApplyThinking normalize per target model capability. reasoningEffort = string(thinking.LevelXHigh) case "disabled": diff --git a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go index ee661381..653bbeb2 100644 --- a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go +++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go @@ -180,8 +180,8 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) [] out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true) } - case "adaptive": - // Keep adaptive as a high level sentinel; ApplyThinking resolves it + case "adaptive", "auto": + // Keep adaptive/auto as a high level sentinel; ApplyThinking resolves it // to model-specific max capability. out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingLevel", "high") out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true) diff --git a/internal/translator/gemini/claude/gemini_claude_request.go b/internal/translator/gemini/claude/gemini_claude_request.go index e882f769..b5756d20 100644 --- a/internal/translator/gemini/claude/gemini_claude_request.go +++ b/internal/translator/gemini/claude/gemini_claude_request.go @@ -161,8 +161,8 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.includeThoughts", true) } - case "adaptive": - // Keep adaptive as a high level sentinel; ApplyThinking resolves it + case "adaptive", "auto": + // Keep adaptive/auto as a high level sentinel; ApplyThinking resolves it // to model-specific max capability. out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingLevel", "high") out, _ = sjson.Set(out, "generationConfig.thinkingConfig.includeThoughts", true) diff --git a/internal/translator/openai/claude/openai_claude_request.go b/internal/translator/openai/claude/openai_claude_request.go index acb79a13..e3efb83c 100644 --- a/internal/translator/openai/claude/openai_claude_request.go +++ b/internal/translator/openai/claude/openai_claude_request.go @@ -75,8 +75,8 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream out, _ = sjson.Set(out, "reasoning_effort", effort) } } - case "adaptive": - // Claude adaptive means "enable with max capacity"; keep it as highest level + case "adaptive", "auto": + // Claude adaptive/auto means "enable with max capacity"; keep it as highest level // and let ApplyThinking normalize per target model capability. out, _ = sjson.Set(out, "reasoning_effort", string(thinking.LevelXHigh)) case "disabled": From 444a47ae63375aaf5b29a322e13f2d4f21623c8e Mon Sep 17 00:00:00 2001 From: edlsh Date: Sat, 28 Feb 2026 22:32:33 -0500 Subject: [PATCH 3/7] Fix Claude cache-control guardrails and gzip error decoding --- internal/runtime/executor/claude_executor.go | 303 +++++++++++++++++- .../runtime/executor/claude_executor_test.go | 171 ++++++++++ 2 files changed, 465 insertions(+), 9 deletions(-) diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index fcb3a9c9..8826b061 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -135,6 +135,15 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r body = ensureCacheControl(body) } + // Enforce Anthropic's cache_control block limit (max 4 breakpoints per request). + // Cloaking and ensureCacheControl may push the total over 4 when the client + // (e.g. Amp CLI) already sends multiple cache_control blocks. + body = enforceCacheControlLimit(body, 4) + + // Normalize TTL values to prevent ordering violations under prompt-caching-scope-2026-01-05. + // A 1h-TTL block must not appear after a 5m-TTL block in evaluation order (tools→system→messages). + body = normalizeCacheControlTTL(body) + // Extract betas from body and convert to header var extraBetas []string extraBetas, body = extractAndRemoveBetas(body) @@ -176,11 +185,18 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r } recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { - b, _ := io.ReadAll(httpResp.Body) + // Decompress error responses (e.g. gzip-compressed 400 errors from Anthropic API) + errBody := httpResp.Body + if ce := httpResp.Header.Get("Content-Encoding"); ce != "" { + if decoded, decErr := decodeResponseBody(httpResp.Body, ce); decErr == nil { + errBody = decoded + } + } + b, _ := io.ReadAll(errBody) appendAPIResponseChunk(ctx, e.cfg, b) logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) err = statusErr{code: httpResp.StatusCode, msg: string(b)} - if errClose := httpResp.Body.Close(); errClose != nil { + if errClose := errBody.Close(); errClose != nil { log.Errorf("response body close error: %v", errClose) } return resp, err @@ -276,6 +292,12 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A body = ensureCacheControl(body) } + // Enforce Anthropic's cache_control block limit (max 4 breakpoints per request). + body = enforceCacheControlLimit(body, 4) + + // Normalize TTL values to prevent ordering violations under prompt-caching-scope-2026-01-05. + body = normalizeCacheControlTTL(body) + // Extract betas from body and convert to header var extraBetas []string extraBetas, body = extractAndRemoveBetas(body) @@ -317,10 +339,17 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A } recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { - b, _ := io.ReadAll(httpResp.Body) + // Decompress error responses (e.g. gzip-compressed 400 errors from Anthropic API) + errBody := httpResp.Body + if ce := httpResp.Header.Get("Content-Encoding"); ce != "" { + if decoded, decErr := decodeResponseBody(httpResp.Body, ce); decErr == nil { + errBody = decoded + } + } + b, _ := io.ReadAll(errBody) appendAPIResponseChunk(ctx, e.cfg, b) logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) - if errClose := httpResp.Body.Close(); errClose != nil { + if errClose := errBody.Close(); errClose != nil { log.Errorf("response body close error: %v", errClose) } err = statusErr{code: httpResp.StatusCode, msg: string(b)} @@ -425,6 +454,10 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut body = checkSystemInstructions(body) } + // Keep count_tokens requests compatible with Anthropic cache-control constraints too. + body = enforceCacheControlLimit(body, 4) + body = normalizeCacheControlTTL(body) + // Extract betas from body and convert to header (for count_tokens too) var extraBetas []string extraBetas, body = extractAndRemoveBetas(body) @@ -464,9 +497,16 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut } recordAPIResponseMetadata(ctx, e.cfg, resp.StatusCode, resp.Header.Clone()) if resp.StatusCode < 200 || resp.StatusCode >= 300 { - b, _ := io.ReadAll(resp.Body) + // Decompress error responses (e.g. gzip-compressed 400 errors from Anthropic API) + errBody := io.ReadCloser(resp.Body) + if ce := resp.Header.Get("Content-Encoding"); ce != "" { + if decoded, decErr := decodeResponseBody(resp.Body, ce); decErr == nil { + errBody = decoded + } + } + b, _ := io.ReadAll(errBody) appendAPIResponseChunk(ctx, e.cfg, b) - if errClose := resp.Body.Close(); errClose != nil { + if errClose := errBody.Close(); errClose != nil { log.Errorf("response body close error: %v", errClose) } return cliproxyexecutor.Response{}, statusErr{code: resp.StatusCode, msg: string(b)} @@ -1083,7 +1123,12 @@ func checkSystemInstructionsWithMode(payload []byte, strictMode bool) []byte { billingText := generateBillingHeader(payload) billingBlock := fmt.Sprintf(`{"type":"text","text":"%s"}`, billingText) - agentBlock := `{"type":"text","text":"You are a Claude agent, built on Anthropic's Claude Agent SDK.","cache_control":{"type":"ephemeral","ttl":"1h"}}` + // No cache_control on the agent block. It is a cloaking artifact with zero cache + // value (the last system block is what actually triggers caching of all system content). + // Including any cache_control here creates an intra-system TTL ordering violation + // when the client's system blocks use ttl='1h' (prompt-caching-scope-2026-01-05 beta + // forbids 1h blocks after 5m blocks, and a no-TTL block defaults to 5m). + agentBlock := `{"type":"text","text":"You are a Claude agent, built on Anthropic's Claude Agent SDK."}` if strictMode { // Strict mode: billing header + agent identifier only @@ -1103,11 +1148,12 @@ func checkSystemInstructionsWithMode(payload []byte, strictMode bool) []byte { if system.IsArray() { system.ForEach(func(_, part gjson.Result) bool { if part.Get("type").String() == "text" { - // Add cache_control with ttl to user system messages if not present + // Add cache_control to user system messages if not present. + // Do NOT add ttl — let it inherit the default (5m) to avoid + // TTL ordering violations with the prompt-caching-scope-2026-01-05 beta. partJSON := part.Raw if !part.Get("cache_control").Exists() { partJSON, _ = sjson.Set(partJSON, "cache_control.type", "ephemeral") - partJSON, _ = sjson.Set(partJSON, "cache_control.ttl", "1h") } result += "," + partJSON } @@ -1254,6 +1300,245 @@ func countCacheControls(payload []byte) int { return count } +// normalizeCacheControlTTL ensures cache_control TTL values don't violate the +// prompt-caching-scope-2026-01-05 ordering constraint: a 1h-TTL block must not +// appear after a 5m-TTL block anywhere in the evaluation order. +// +// Anthropic evaluates blocks in order: tools → system (index 0..N) → messages. +// Within each section, blocks are evaluated in array order. A 5m (default) block +// followed by a 1h block at ANY later position is an error — including within +// the same section (e.g. system[1]=5m then system[3]=1h). +// +// Strategy: walk all cache_control blocks in evaluation order. Once a 5m block +// is seen, strip ttl from ALL subsequent 1h blocks (downgrading them to 5m). +func normalizeCacheControlTTL(payload []byte) []byte { + seen5m := false // once true, all subsequent 1h blocks must be downgraded + + // Phase 1: tools (evaluated first) + tools := gjson.GetBytes(payload, "tools") + if tools.IsArray() { + idx := 0 + tools.ForEach(func(_, tool gjson.Result) bool { + cc := tool.Get("cache_control") + if cc.Exists() { + ttl := cc.Get("ttl").String() + if ttl != "1h" { + seen5m = true + } else if seen5m { + payload, _ = sjson.DeleteBytes(payload, fmt.Sprintf("tools.%d.cache_control.ttl", idx)) + } + } + idx++ + return true + }) + } + + // Phase 2: system blocks (evaluated second, in array order) + system := gjson.GetBytes(payload, "system") + if system.IsArray() { + idx := 0 + system.ForEach(func(_, item gjson.Result) bool { + cc := item.Get("cache_control") + if cc.Exists() { + ttl := cc.Get("ttl").String() + if ttl != "1h" { + seen5m = true + } else if seen5m { + payload, _ = sjson.DeleteBytes(payload, fmt.Sprintf("system.%d.cache_control.ttl", idx)) + } + } + idx++ + return true + }) + } + + // Phase 3: message content blocks (evaluated last, in array order) + messages := gjson.GetBytes(payload, "messages") + if messages.IsArray() { + msgIdx := 0 + messages.ForEach(func(_, msg gjson.Result) bool { + content := msg.Get("content") + if content.IsArray() { + contentIdx := 0 + content.ForEach(func(_, item gjson.Result) bool { + cc := item.Get("cache_control") + if cc.Exists() { + ttl := cc.Get("ttl").String() + if ttl != "1h" { + seen5m = true + } else if seen5m { + payload, _ = sjson.DeleteBytes(payload, fmt.Sprintf("messages.%d.content.%d.cache_control.ttl", msgIdx, contentIdx)) + } + } + contentIdx++ + return true + }) + } + msgIdx++ + return true + }) + } + + return payload +} + +// enforceCacheControlLimit removes excess cache_control blocks from a payload +// so the total does not exceed the Anthropic API limit (currently 4). +// +// Anthropic evaluates cache breakpoints in order: tools → system → messages. +// The most valuable breakpoints are: +// 1. Last tool — caches ALL tool definitions +// 2. Last system block — caches ALL system content +// 3. Recent messages — cache conversation context +// +// Removal priority (strip lowest-value first): +// Phase 1: system blocks earliest-first, preserving the last one. +// Phase 2: tool blocks earliest-first, preserving the last one. +// Phase 3: message content blocks earliest-first. +// Phase 4: remaining system blocks (last system). +// Phase 5: remaining tool blocks (last tool). +func enforceCacheControlLimit(payload []byte, maxBlocks int) []byte { + total := countCacheControls(payload) + if total <= maxBlocks { + return payload + } + + excess := total - maxBlocks + + // Phase 1: strip cache_control from system blocks earliest-first, but SKIP the last one. + // The last system cache_control is high-value because it caches all system content. + system := gjson.GetBytes(payload, "system") + if system.IsArray() { + lastSysCCIdx := -1 + sysIdx := 0 + system.ForEach(func(_, item gjson.Result) bool { + if item.Get("cache_control").Exists() { + lastSysCCIdx = sysIdx + } + sysIdx++ + return true + }) + + idx := 0 + system.ForEach(func(_, item gjson.Result) bool { + if excess <= 0 { + return false + } + if item.Get("cache_control").Exists() && idx != lastSysCCIdx { + payload, _ = sjson.DeleteBytes(payload, fmt.Sprintf("system.%d.cache_control", idx)) + excess-- + } + idx++ + return true + }) + } + if excess <= 0 { + return payload + } + + // Phase 2: strip cache_control from tools earliest-first, but SKIP the last one. + // Only the last tool cache_control is needed to cache all tool definitions. + tools := gjson.GetBytes(payload, "tools") + if tools.IsArray() { + lastToolCCIdx := -1 + toolIdx := 0 + tools.ForEach(func(_, tool gjson.Result) bool { + if tool.Get("cache_control").Exists() { + lastToolCCIdx = toolIdx + } + toolIdx++ + return true + }) + + idx := 0 + tools.ForEach(func(_, tool gjson.Result) bool { + if excess <= 0 { + return false + } + if tool.Get("cache_control").Exists() && idx != lastToolCCIdx { + payload, _ = sjson.DeleteBytes(payload, fmt.Sprintf("tools.%d.cache_control", idx)) + excess-- + } + idx++ + return true + }) + } + if excess <= 0 { + return payload + } + + // Phase 3: strip cache_control from message content blocks, earliest first. + // Older conversation turns are least likely to help immediate reuse. + messages := gjson.GetBytes(payload, "messages") + if messages.IsArray() { + msgIdx := 0 + messages.ForEach(func(_, msg gjson.Result) bool { + if excess <= 0 { + return false + } + content := msg.Get("content") + if content.IsArray() { + contentIdx := 0 + content.ForEach(func(_, item gjson.Result) bool { + if excess <= 0 { + return false + } + if item.Get("cache_control").Exists() { + payload, _ = sjson.DeleteBytes(payload, fmt.Sprintf("messages.%d.content.%d.cache_control", msgIdx, contentIdx)) + excess-- + } + contentIdx++ + return true + }) + } + msgIdx++ + return true + }) + } + if excess <= 0 { + return payload + } + + // Phase 4: strip any remaining system cache_control blocks. + system = gjson.GetBytes(payload, "system") + if system.IsArray() { + idx := 0 + system.ForEach(func(_, item gjson.Result) bool { + if excess <= 0 { + return false + } + if item.Get("cache_control").Exists() { + payload, _ = sjson.DeleteBytes(payload, fmt.Sprintf("system.%d.cache_control", idx)) + excess-- + } + idx++ + return true + }) + } + if excess <= 0 { + return payload + } + + // Phase 5: strip any remaining tool cache_control blocks (including the last tool). + tools = gjson.GetBytes(payload, "tools") + if tools.IsArray() { + idx := 0 + tools.ForEach(func(_, tool gjson.Result) bool { + if excess <= 0 { + return false + } + if tool.Get("cache_control").Exists() { + payload, _ = sjson.DeleteBytes(payload, fmt.Sprintf("tools.%d.cache_control", idx)) + excess-- + } + idx++ + return true + }) + } + + return payload +} + // injectMessagesCacheControl adds cache_control to the second-to-last user turn for multi-turn caching. // Per Anthropic docs: "Place cache_control on the second-to-last User message to let the model reuse the earlier cache." // This enables caching of conversation history, which is especially beneficial for long multi-turn conversations. diff --git a/internal/runtime/executor/claude_executor_test.go b/internal/runtime/executor/claude_executor_test.go index dd29ed8a..d90076b6 100644 --- a/internal/runtime/executor/claude_executor_test.go +++ b/internal/runtime/executor/claude_executor_test.go @@ -348,3 +348,174 @@ func TestApplyClaudeToolPrefix_SkipsBuiltinToolReference(t *testing.T) { t.Fatalf("built-in tool_reference should not be prefixed, got %q", got) } } + +func TestNormalizeCacheControlTTL_DowngradesLaterOneHourBlocks(t *testing.T) { + payload := []byte(`{ + "tools": [{"name":"t1","cache_control":{"type":"ephemeral","ttl":"1h"}}], + "system": [{"type":"text","text":"s1","cache_control":{"type":"ephemeral"}}], + "messages": [{"role":"user","content":[{"type":"text","text":"u1","cache_control":{"type":"ephemeral","ttl":"1h"}}]}] + }`) + + out := normalizeCacheControlTTL(payload) + + if got := gjson.GetBytes(out, "tools.0.cache_control.ttl").String(); got != "1h" { + t.Fatalf("tools.0.cache_control.ttl = %q, want %q", got, "1h") + } + if gjson.GetBytes(out, "messages.0.content.0.cache_control.ttl").Exists() { + t.Fatalf("messages.0.content.0.cache_control.ttl should be removed after a default-5m block") + } +} + +func TestEnforceCacheControlLimit_StripsNonLastToolBeforeMessages(t *testing.T) { + payload := []byte(`{ + "tools": [ + {"name":"t1","cache_control":{"type":"ephemeral"}}, + {"name":"t2","cache_control":{"type":"ephemeral"}} + ], + "system": [{"type":"text","text":"s1","cache_control":{"type":"ephemeral"}}], + "messages": [ + {"role":"user","content":[{"type":"text","text":"u1","cache_control":{"type":"ephemeral"}}]}, + {"role":"user","content":[{"type":"text","text":"u2","cache_control":{"type":"ephemeral"}}]} + ] + }`) + + out := enforceCacheControlLimit(payload, 4) + + if got := countCacheControls(out); got != 4 { + t.Fatalf("cache_control count = %d, want 4", got) + } + if gjson.GetBytes(out, "tools.0.cache_control").Exists() { + t.Fatalf("tools.0.cache_control should be removed first (non-last tool)") + } + if !gjson.GetBytes(out, "tools.1.cache_control").Exists() { + t.Fatalf("tools.1.cache_control (last tool) should be preserved") + } + if !gjson.GetBytes(out, "messages.0.content.0.cache_control").Exists() || !gjson.GetBytes(out, "messages.1.content.0.cache_control").Exists() { + t.Fatalf("message cache_control blocks should be preserved when non-last tool removal is enough") + } +} + +func TestEnforceCacheControlLimit_ToolOnlyPayloadStillRespectsLimit(t *testing.T) { + payload := []byte(`{ + "tools": [ + {"name":"t1","cache_control":{"type":"ephemeral"}}, + {"name":"t2","cache_control":{"type":"ephemeral"}}, + {"name":"t3","cache_control":{"type":"ephemeral"}}, + {"name":"t4","cache_control":{"type":"ephemeral"}}, + {"name":"t5","cache_control":{"type":"ephemeral"}} + ] + }`) + + out := enforceCacheControlLimit(payload, 4) + + if got := countCacheControls(out); got != 4 { + t.Fatalf("cache_control count = %d, want 4", got) + } + if gjson.GetBytes(out, "tools.0.cache_control").Exists() { + t.Fatalf("tools.0.cache_control should be removed to satisfy max=4") + } + if !gjson.GetBytes(out, "tools.4.cache_control").Exists() { + t.Fatalf("last tool cache_control should be preserved when possible") + } +} + +func TestClaudeExecutor_CountTokens_AppliesCacheControlGuards(t *testing.T) { + var seenBody []byte + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + seenBody = bytes.Clone(body) + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"input_tokens":42}`)) + })) + defer server.Close() + + executor := NewClaudeExecutor(&config.Config{}) + auth := &cliproxyauth.Auth{Attributes: map[string]string{ + "api_key": "key-123", + "base_url": server.URL, + }} + + payload := []byte(`{ + "tools": [ + {"name":"t1","cache_control":{"type":"ephemeral","ttl":"1h"}}, + {"name":"t2","cache_control":{"type":"ephemeral"}} + ], + "system": [ + {"type":"text","text":"s1","cache_control":{"type":"ephemeral","ttl":"1h"}}, + {"type":"text","text":"s2","cache_control":{"type":"ephemeral","ttl":"1h"}} + ], + "messages": [ + {"role":"user","content":[{"type":"text","text":"u1","cache_control":{"type":"ephemeral","ttl":"1h"}}]}, + {"role":"user","content":[{"type":"text","text":"u2","cache_control":{"type":"ephemeral","ttl":"1h"}}]} + ] + }`) + + _, err := executor.CountTokens(context.Background(), auth, cliproxyexecutor.Request{ + Model: "claude-3-5-haiku-20241022", + Payload: payload, + }, cliproxyexecutor.Options{SourceFormat: sdktranslator.FromString("claude")}) + if err != nil { + t.Fatalf("CountTokens error: %v", err) + } + + if len(seenBody) == 0 { + t.Fatal("expected count_tokens request body to be captured") + } + if got := countCacheControls(seenBody); got > 4 { + t.Fatalf("count_tokens body has %d cache_control blocks, want <= 4", got) + } + if hasTTLOrderingViolation(seenBody) { + t.Fatalf("count_tokens body still has ttl ordering violations: %s", string(seenBody)) + } +} + +func hasTTLOrderingViolation(payload []byte) bool { + seen5m := false + violates := false + + checkCC := func(cc gjson.Result) { + if !cc.Exists() || violates { + return + } + ttl := cc.Get("ttl").String() + if ttl != "1h" { + seen5m = true + return + } + if seen5m { + violates = true + } + } + + tools := gjson.GetBytes(payload, "tools") + if tools.IsArray() { + tools.ForEach(func(_, tool gjson.Result) bool { + checkCC(tool.Get("cache_control")) + return !violates + }) + } + + system := gjson.GetBytes(payload, "system") + if system.IsArray() { + system.ForEach(func(_, item gjson.Result) bool { + checkCC(item.Get("cache_control")) + return !violates + }) + } + + messages := gjson.GetBytes(payload, "messages") + if messages.IsArray() { + messages.ForEach(func(_, msg gjson.Result) bool { + content := msg.Get("content") + if content.IsArray() { + content.ForEach(func(_, item gjson.Result) bool { + checkCC(item.Get("cache_control")) + return !violates + }) + } + return !violates + }) + } + + return violates +} From 0ad3e8457f9d3121b0fa24b95c96b4d6d3030ca3 Mon Sep 17 00:00:00 2001 From: edlsh Date: Sat, 28 Feb 2026 22:34:14 -0500 Subject: [PATCH 4/7] Clarify cloaking system block cache-control comments --- internal/runtime/executor/claude_executor.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index 8826b061..ddbe9297 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -1113,11 +1113,10 @@ func generateBillingHeader(payload []byte) string { return fmt.Sprintf("x-anthropic-billing-header: cc_version=2.1.63.%s; cc_entrypoint=cli; cch=%s;", buildHash, cch) } -// checkSystemInstructionsWithMode injects Claude Code system prompt to match -// the real Claude Code request format: +// checkSystemInstructionsWithMode injects Claude Code-style system blocks: // system[0]: billing header (no cache_control) -// system[1]: "You are a Claude agent, built on Anthropic's Claude Agent SDK." (with cache_control) -// system[2..]: user's system messages (with cache_control on last) +// system[1]: agent identifier (no cache_control) +// system[2..]: user system messages (cache_control added when missing) func checkSystemInstructionsWithMode(payload []byte, strictMode bool) []byte { system := gjson.GetBytes(payload, "system") From 6ac9b31e4eeb743b89b9fbccee1c4fe2e2c5b43a Mon Sep 17 00:00:00 2001 From: edlsh Date: Sat, 28 Feb 2026 22:43:46 -0500 Subject: [PATCH 5/7] Handle compressed error decode failures safely --- internal/runtime/executor/claude_executor.go | 59 +++++++++++++---- .../runtime/executor/claude_executor_test.go | 64 +++++++++++++++++++ 2 files changed, 110 insertions(+), 13 deletions(-) diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index ddbe9297..483a4830 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -185,14 +185,25 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r } recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { - // Decompress error responses (e.g. gzip-compressed 400 errors from Anthropic API) + // Decompress error responses (e.g. gzip-compressed 400 errors from Anthropic API). errBody := httpResp.Body if ce := httpResp.Header.Get("Content-Encoding"); ce != "" { - if decoded, decErr := decodeResponseBody(httpResp.Body, ce); decErr == nil { - errBody = decoded + var decErr error + errBody, decErr = decodeResponseBody(httpResp.Body, ce) + if decErr != nil { + recordAPIResponseError(ctx, e.cfg, decErr) + msg := fmt.Sprintf("failed to decode error response body (encoding=%s): %v", ce, decErr) + logWithRequestID(ctx).Warn(msg) + return resp, statusErr{code: httpResp.StatusCode, msg: msg} } } - b, _ := io.ReadAll(errBody) + b, readErr := io.ReadAll(errBody) + if readErr != nil { + recordAPIResponseError(ctx, e.cfg, readErr) + msg := fmt.Sprintf("failed to read error response body: %v", readErr) + logWithRequestID(ctx).Warn(msg) + b = []byte(msg) + } appendAPIResponseChunk(ctx, e.cfg, b) logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) err = statusErr{code: httpResp.StatusCode, msg: string(b)} @@ -339,14 +350,25 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A } recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { - // Decompress error responses (e.g. gzip-compressed 400 errors from Anthropic API) + // Decompress error responses (e.g. gzip-compressed 400 errors from Anthropic API). errBody := httpResp.Body if ce := httpResp.Header.Get("Content-Encoding"); ce != "" { - if decoded, decErr := decodeResponseBody(httpResp.Body, ce); decErr == nil { - errBody = decoded + var decErr error + errBody, decErr = decodeResponseBody(httpResp.Body, ce) + if decErr != nil { + recordAPIResponseError(ctx, e.cfg, decErr) + msg := fmt.Sprintf("failed to decode error response body (encoding=%s): %v", ce, decErr) + logWithRequestID(ctx).Warn(msg) + return nil, statusErr{code: httpResp.StatusCode, msg: msg} } } - b, _ := io.ReadAll(errBody) + b, readErr := io.ReadAll(errBody) + if readErr != nil { + recordAPIResponseError(ctx, e.cfg, readErr) + msg := fmt.Sprintf("failed to read error response body: %v", readErr) + logWithRequestID(ctx).Warn(msg) + b = []byte(msg) + } appendAPIResponseChunk(ctx, e.cfg, b) logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) if errClose := errBody.Close(); errClose != nil { @@ -497,14 +519,25 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut } recordAPIResponseMetadata(ctx, e.cfg, resp.StatusCode, resp.Header.Clone()) if resp.StatusCode < 200 || resp.StatusCode >= 300 { - // Decompress error responses (e.g. gzip-compressed 400 errors from Anthropic API) - errBody := io.ReadCloser(resp.Body) + // Decompress error responses (e.g. gzip-compressed 400 errors from Anthropic API). + errBody := resp.Body if ce := resp.Header.Get("Content-Encoding"); ce != "" { - if decoded, decErr := decodeResponseBody(resp.Body, ce); decErr == nil { - errBody = decoded + var decErr error + errBody, decErr = decodeResponseBody(resp.Body, ce) + if decErr != nil { + recordAPIResponseError(ctx, e.cfg, decErr) + msg := fmt.Sprintf("failed to decode error response body (encoding=%s): %v", ce, decErr) + logWithRequestID(ctx).Warn(msg) + return cliproxyexecutor.Response{}, statusErr{code: resp.StatusCode, msg: msg} } } - b, _ := io.ReadAll(errBody) + b, readErr := io.ReadAll(errBody) + if readErr != nil { + recordAPIResponseError(ctx, e.cfg, readErr) + msg := fmt.Sprintf("failed to read error response body: %v", readErr) + logWithRequestID(ctx).Warn(msg) + b = []byte(msg) + } appendAPIResponseChunk(ctx, e.cfg, b) if errClose := errBody.Close(); errClose != nil { log.Errorf("response body close error: %v", errClose) diff --git a/internal/runtime/executor/claude_executor_test.go b/internal/runtime/executor/claude_executor_test.go index d90076b6..f9553f9a 100644 --- a/internal/runtime/executor/claude_executor_test.go +++ b/internal/runtime/executor/claude_executor_test.go @@ -6,6 +6,7 @@ import ( "io" "net/http" "net/http/httptest" + "strings" "testing" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" @@ -519,3 +520,66 @@ func hasTTLOrderingViolation(payload []byte) bool { return violates } + +func TestClaudeExecutor_Execute_InvalidGzipErrorBodyReturnsDecodeMessage(t *testing.T) { + testClaudeExecutorInvalidCompressedErrorBody(t, func(executor *ClaudeExecutor, auth *cliproxyauth.Auth, payload []byte) error { + _, err := executor.Execute(context.Background(), auth, cliproxyexecutor.Request{ + Model: "claude-3-5-sonnet-20241022", + Payload: payload, + }, cliproxyexecutor.Options{SourceFormat: sdktranslator.FromString("claude")}) + return err + }) +} + +func TestClaudeExecutor_ExecuteStream_InvalidGzipErrorBodyReturnsDecodeMessage(t *testing.T) { + testClaudeExecutorInvalidCompressedErrorBody(t, func(executor *ClaudeExecutor, auth *cliproxyauth.Auth, payload []byte) error { + _, err := executor.ExecuteStream(context.Background(), auth, cliproxyexecutor.Request{ + Model: "claude-3-5-sonnet-20241022", + Payload: payload, + }, cliproxyexecutor.Options{SourceFormat: sdktranslator.FromString("claude")}) + return err + }) +} + +func TestClaudeExecutor_CountTokens_InvalidGzipErrorBodyReturnsDecodeMessage(t *testing.T) { + testClaudeExecutorInvalidCompressedErrorBody(t, func(executor *ClaudeExecutor, auth *cliproxyauth.Auth, payload []byte) error { + _, err := executor.CountTokens(context.Background(), auth, cliproxyexecutor.Request{ + Model: "claude-3-5-sonnet-20241022", + Payload: payload, + }, cliproxyexecutor.Options{SourceFormat: sdktranslator.FromString("claude")}) + return err + }) +} + +func testClaudeExecutorInvalidCompressedErrorBody( + t *testing.T, + invoke func(executor *ClaudeExecutor, auth *cliproxyauth.Auth, payload []byte) error, +) { + t.Helper() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Content-Encoding", "gzip") + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte("not-a-valid-gzip-stream")) + })) + defer server.Close() + + executor := NewClaudeExecutor(&config.Config{}) + auth := &cliproxyauth.Auth{Attributes: map[string]string{ + "api_key": "key-123", + "base_url": server.URL, + }} + payload := []byte(`{"messages":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`) + + err := invoke(executor, auth, payload) + if err == nil { + t.Fatal("expected error, got nil") + } + if !strings.Contains(err.Error(), "failed to decode error response body") { + t.Fatalf("expected decode failure message, got: %v", err) + } + if statusProvider, ok := err.(interface{ StatusCode() int }); !ok || statusProvider.StatusCode() != http.StatusBadRequest { + t.Fatalf("expected status code 400, got: %v", err) + } +} From 76aa917882acb78eb98d08b32ce35354ba2f162d Mon Sep 17 00:00:00 2001 From: edlsh Date: Sat, 28 Feb 2026 22:47:04 -0500 Subject: [PATCH 6/7] Optimize cache-control JSON mutations in Claude executor --- internal/runtime/executor/claude_executor.go | 446 +++++++++++-------- 1 file changed, 258 insertions(+), 188 deletions(-) diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index 483a4830..0845d168 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -9,6 +9,7 @@ import ( "crypto/rand" "crypto/sha256" "encoding/hex" + "encoding/json" "fmt" "io" "net/http" @@ -1147,9 +1148,10 @@ func generateBillingHeader(payload []byte) string { } // checkSystemInstructionsWithMode injects Claude Code-style system blocks: -// system[0]: billing header (no cache_control) -// system[1]: agent identifier (no cache_control) -// system[2..]: user system messages (cache_control added when missing) +// +// system[0]: billing header (no cache_control) +// system[1]: agent identifier (no cache_control) +// system[2..]: user system messages (cache_control added when missing) func checkSystemInstructionsWithMode(payload []byte, strictMode bool) []byte { system := gjson.GetBytes(payload, "system") @@ -1332,6 +1334,180 @@ func countCacheControls(payload []byte) int { return count } +func parsePayloadObject(payload []byte) (map[string]any, bool) { + if len(payload) == 0 { + return nil, false + } + var root map[string]any + if err := json.Unmarshal(payload, &root); err != nil { + return nil, false + } + return root, true +} + +func marshalPayloadObject(original []byte, root map[string]any) []byte { + if root == nil { + return original + } + out, err := json.Marshal(root) + if err != nil { + return original + } + return out +} + +func asObject(v any) (map[string]any, bool) { + obj, ok := v.(map[string]any) + return obj, ok +} + +func asArray(v any) ([]any, bool) { + arr, ok := v.([]any) + return arr, ok +} + +func countCacheControlsMap(root map[string]any) int { + count := 0 + + if system, ok := asArray(root["system"]); ok { + for _, item := range system { + if obj, ok := asObject(item); ok { + if _, exists := obj["cache_control"]; exists { + count++ + } + } + } + } + + if tools, ok := asArray(root["tools"]); ok { + for _, item := range tools { + if obj, ok := asObject(item); ok { + if _, exists := obj["cache_control"]; exists { + count++ + } + } + } + } + + if messages, ok := asArray(root["messages"]); ok { + for _, msg := range messages { + msgObj, ok := asObject(msg) + if !ok { + continue + } + content, ok := asArray(msgObj["content"]) + if !ok { + continue + } + for _, item := range content { + if obj, ok := asObject(item); ok { + if _, exists := obj["cache_control"]; exists { + count++ + } + } + } + } + } + + return count +} + +func normalizeTTLForBlock(obj map[string]any, seen5m *bool) { + ccRaw, exists := obj["cache_control"] + if !exists { + return + } + cc, ok := asObject(ccRaw) + if !ok { + *seen5m = true + return + } + ttlRaw, ttlExists := cc["ttl"] + ttl, ttlIsString := ttlRaw.(string) + if !ttlExists || !ttlIsString || ttl != "1h" { + *seen5m = true + return + } + if *seen5m { + delete(cc, "ttl") + } +} + +func findLastCacheControlIndex(arr []any) int { + last := -1 + for idx, item := range arr { + obj, ok := asObject(item) + if !ok { + continue + } + if _, exists := obj["cache_control"]; exists { + last = idx + } + } + return last +} + +func stripCacheControlExceptIndex(arr []any, preserveIdx int, excess *int) { + for idx, item := range arr { + if *excess <= 0 { + return + } + obj, ok := asObject(item) + if !ok { + continue + } + if _, exists := obj["cache_control"]; exists && idx != preserveIdx { + delete(obj, "cache_control") + *excess-- + } + } +} + +func stripAllCacheControl(arr []any, excess *int) { + for _, item := range arr { + if *excess <= 0 { + return + } + obj, ok := asObject(item) + if !ok { + continue + } + if _, exists := obj["cache_control"]; exists { + delete(obj, "cache_control") + *excess-- + } + } +} + +func stripMessageCacheControl(messages []any, excess *int) { + for _, msg := range messages { + if *excess <= 0 { + return + } + msgObj, ok := asObject(msg) + if !ok { + continue + } + content, ok := asArray(msgObj["content"]) + if !ok { + continue + } + for _, item := range content { + if *excess <= 0 { + return + } + obj, ok := asObject(item) + if !ok { + continue + } + if _, exists := obj["cache_control"]; exists { + delete(obj, "cache_control") + *excess-- + } + } + } +} + // normalizeCacheControlTTL ensures cache_control TTL values don't violate the // prompt-caching-scope-2026-01-05 ordering constraint: a 1h-TTL block must not // appear after a 5m-TTL block anywhere in the evaluation order. @@ -1344,74 +1520,48 @@ func countCacheControls(payload []byte) int { // Strategy: walk all cache_control blocks in evaluation order. Once a 5m block // is seen, strip ttl from ALL subsequent 1h blocks (downgrading them to 5m). func normalizeCacheControlTTL(payload []byte) []byte { - seen5m := false // once true, all subsequent 1h blocks must be downgraded + root, ok := parsePayloadObject(payload) + if !ok { + return payload + } - // Phase 1: tools (evaluated first) - tools := gjson.GetBytes(payload, "tools") - if tools.IsArray() { - idx := 0 - tools.ForEach(func(_, tool gjson.Result) bool { - cc := tool.Get("cache_control") - if cc.Exists() { - ttl := cc.Get("ttl").String() - if ttl != "1h" { - seen5m = true - } else if seen5m { - payload, _ = sjson.DeleteBytes(payload, fmt.Sprintf("tools.%d.cache_control.ttl", idx)) + seen5m := false + + if tools, ok := asArray(root["tools"]); ok { + for _, tool := range tools { + if obj, ok := asObject(tool); ok { + normalizeTTLForBlock(obj, &seen5m) + } + } + } + + if system, ok := asArray(root["system"]); ok { + for _, item := range system { + if obj, ok := asObject(item); ok { + normalizeTTLForBlock(obj, &seen5m) + } + } + } + + if messages, ok := asArray(root["messages"]); ok { + for _, msg := range messages { + msgObj, ok := asObject(msg) + if !ok { + continue + } + content, ok := asArray(msgObj["content"]) + if !ok { + continue + } + for _, item := range content { + if obj, ok := asObject(item); ok { + normalizeTTLForBlock(obj, &seen5m) } } - idx++ - return true - }) + } } - // Phase 2: system blocks (evaluated second, in array order) - system := gjson.GetBytes(payload, "system") - if system.IsArray() { - idx := 0 - system.ForEach(func(_, item gjson.Result) bool { - cc := item.Get("cache_control") - if cc.Exists() { - ttl := cc.Get("ttl").String() - if ttl != "1h" { - seen5m = true - } else if seen5m { - payload, _ = sjson.DeleteBytes(payload, fmt.Sprintf("system.%d.cache_control.ttl", idx)) - } - } - idx++ - return true - }) - } - - // Phase 3: message content blocks (evaluated last, in array order) - messages := gjson.GetBytes(payload, "messages") - if messages.IsArray() { - msgIdx := 0 - messages.ForEach(func(_, msg gjson.Result) bool { - content := msg.Get("content") - if content.IsArray() { - contentIdx := 0 - content.ForEach(func(_, item gjson.Result) bool { - cc := item.Get("cache_control") - if cc.Exists() { - ttl := cc.Get("ttl").String() - if ttl != "1h" { - seen5m = true - } else if seen5m { - payload, _ = sjson.DeleteBytes(payload, fmt.Sprintf("messages.%d.content.%d.cache_control.ttl", msgIdx, contentIdx)) - } - } - contentIdx++ - return true - }) - } - msgIdx++ - return true - }) - } - - return payload + return marshalPayloadObject(payload, root) } // enforceCacheControlLimit removes excess cache_control blocks from a payload @@ -1419,156 +1569,76 @@ func normalizeCacheControlTTL(payload []byte) []byte { // // Anthropic evaluates cache breakpoints in order: tools → system → messages. // The most valuable breakpoints are: -// 1. Last tool — caches ALL tool definitions -// 2. Last system block — caches ALL system content -// 3. Recent messages — cache conversation context +// 1. Last tool — caches ALL tool definitions +// 2. Last system block — caches ALL system content +// 3. Recent messages — cache conversation context // // Removal priority (strip lowest-value first): -// Phase 1: system blocks earliest-first, preserving the last one. -// Phase 2: tool blocks earliest-first, preserving the last one. -// Phase 3: message content blocks earliest-first. -// Phase 4: remaining system blocks (last system). -// Phase 5: remaining tool blocks (last tool). +// +// Phase 1: system blocks earliest-first, preserving the last one. +// Phase 2: tool blocks earliest-first, preserving the last one. +// Phase 3: message content blocks earliest-first. +// Phase 4: remaining system blocks (last system). +// Phase 5: remaining tool blocks (last tool). func enforceCacheControlLimit(payload []byte, maxBlocks int) []byte { - total := countCacheControls(payload) + root, ok := parsePayloadObject(payload) + if !ok { + return payload + } + + total := countCacheControlsMap(root) if total <= maxBlocks { return payload } excess := total - maxBlocks - // Phase 1: strip cache_control from system blocks earliest-first, but SKIP the last one. - // The last system cache_control is high-value because it caches all system content. - system := gjson.GetBytes(payload, "system") - if system.IsArray() { - lastSysCCIdx := -1 - sysIdx := 0 - system.ForEach(func(_, item gjson.Result) bool { - if item.Get("cache_control").Exists() { - lastSysCCIdx = sysIdx - } - sysIdx++ - return true - }) + var system []any + if arr, ok := asArray(root["system"]); ok { + system = arr + } + var tools []any + if arr, ok := asArray(root["tools"]); ok { + tools = arr + } + var messages []any + if arr, ok := asArray(root["messages"]); ok { + messages = arr + } - idx := 0 - system.ForEach(func(_, item gjson.Result) bool { - if excess <= 0 { - return false - } - if item.Get("cache_control").Exists() && idx != lastSysCCIdx { - payload, _ = sjson.DeleteBytes(payload, fmt.Sprintf("system.%d.cache_control", idx)) - excess-- - } - idx++ - return true - }) + if len(system) > 0 { + stripCacheControlExceptIndex(system, findLastCacheControlIndex(system), &excess) } if excess <= 0 { - return payload + return marshalPayloadObject(payload, root) } - // Phase 2: strip cache_control from tools earliest-first, but SKIP the last one. - // Only the last tool cache_control is needed to cache all tool definitions. - tools := gjson.GetBytes(payload, "tools") - if tools.IsArray() { - lastToolCCIdx := -1 - toolIdx := 0 - tools.ForEach(func(_, tool gjson.Result) bool { - if tool.Get("cache_control").Exists() { - lastToolCCIdx = toolIdx - } - toolIdx++ - return true - }) - - idx := 0 - tools.ForEach(func(_, tool gjson.Result) bool { - if excess <= 0 { - return false - } - if tool.Get("cache_control").Exists() && idx != lastToolCCIdx { - payload, _ = sjson.DeleteBytes(payload, fmt.Sprintf("tools.%d.cache_control", idx)) - excess-- - } - idx++ - return true - }) + if len(tools) > 0 { + stripCacheControlExceptIndex(tools, findLastCacheControlIndex(tools), &excess) } if excess <= 0 { - return payload + return marshalPayloadObject(payload, root) } - // Phase 3: strip cache_control from message content blocks, earliest first. - // Older conversation turns are least likely to help immediate reuse. - messages := gjson.GetBytes(payload, "messages") - if messages.IsArray() { - msgIdx := 0 - messages.ForEach(func(_, msg gjson.Result) bool { - if excess <= 0 { - return false - } - content := msg.Get("content") - if content.IsArray() { - contentIdx := 0 - content.ForEach(func(_, item gjson.Result) bool { - if excess <= 0 { - return false - } - if item.Get("cache_control").Exists() { - payload, _ = sjson.DeleteBytes(payload, fmt.Sprintf("messages.%d.content.%d.cache_control", msgIdx, contentIdx)) - excess-- - } - contentIdx++ - return true - }) - } - msgIdx++ - return true - }) + if len(messages) > 0 { + stripMessageCacheControl(messages, &excess) } if excess <= 0 { - return payload + return marshalPayloadObject(payload, root) } - // Phase 4: strip any remaining system cache_control blocks. - system = gjson.GetBytes(payload, "system") - if system.IsArray() { - idx := 0 - system.ForEach(func(_, item gjson.Result) bool { - if excess <= 0 { - return false - } - if item.Get("cache_control").Exists() { - payload, _ = sjson.DeleteBytes(payload, fmt.Sprintf("system.%d.cache_control", idx)) - excess-- - } - idx++ - return true - }) + if len(system) > 0 { + stripAllCacheControl(system, &excess) } if excess <= 0 { - return payload + return marshalPayloadObject(payload, root) } - // Phase 5: strip any remaining tool cache_control blocks (including the last tool). - tools = gjson.GetBytes(payload, "tools") - if tools.IsArray() { - idx := 0 - tools.ForEach(func(_, tool gjson.Result) bool { - if excess <= 0 { - return false - } - if tool.Get("cache_control").Exists() { - payload, _ = sjson.DeleteBytes(payload, fmt.Sprintf("tools.%d.cache_control", idx)) - excess-- - } - idx++ - return true - }) + if len(tools) > 0 { + stripAllCacheControl(tools, &excess) } - return payload + return marshalPayloadObject(payload, root) } // injectMessagesCacheControl adds cache_control to the second-to-last user turn for multi-turn caching. From a8a5d03c33609f05703114ec7a27e8a455761de2 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Sun, 1 Mar 2026 12:42:59 +0800 Subject: [PATCH 7/7] chore: ignore .idea directory in git and docker builds --- .dockerignore | 1 + .gitignore | 1 + 2 files changed, 2 insertions(+) diff --git a/.dockerignore b/.dockerignore index ef021aea..843c7e04 100644 --- a/.dockerignore +++ b/.dockerignore @@ -31,6 +31,7 @@ bin/* .agent/* .agents/* .opencode/* +.idea/* .bmad/* _bmad/* _bmad-output/* diff --git a/.gitignore b/.gitignore index 183138f9..90ff3a94 100644 --- a/.gitignore +++ b/.gitignore @@ -41,6 +41,7 @@ GEMINI.md .agents/* .agents/* .opencode/* +.idea/* .bmad/* _bmad/* _bmad-output/*