From 7ff3936efe988034200918cd3ededb0189f8e5bf Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Sat, 31 Jan 2026 01:42:58 +0800 Subject: [PATCH] fix(caching): ensure prompt-caching beta is always appended and add multi-turn cache control tests --- .../runtime/executor/caching_verify_test.go | 48 +++++++++++++++++++ internal/runtime/executor/claude_executor.go | 6 ++- 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/internal/runtime/executor/caching_verify_test.go b/internal/runtime/executor/caching_verify_test.go index 599c1aec..6088d304 100644 --- a/internal/runtime/executor/caching_verify_test.go +++ b/internal/runtime/executor/caching_verify_test.go @@ -165,6 +165,54 @@ func TestEnsureCacheControl(t *testing.T) { t.Errorf("system should have cache_control even with empty tools array") } }) + + // Test case 8: Messages caching for multi-turn (second-to-last user) + t.Run("Messages Caching Second-To-Last User", func(t *testing.T) { + input := []byte(`{ + "model": "claude-3-5-sonnet", + "messages": [ + {"role": "user", "content": "First user"}, + {"role": "assistant", "content": "Assistant reply"}, + {"role": "user", "content": "Second user"}, + {"role": "assistant", "content": "Assistant reply 2"}, + {"role": "user", "content": "Third user"} + ] + }`) + output := ensureCacheControl(input) + + cacheType := gjson.GetBytes(output, "messages.2.content.0.cache_control.type") + if cacheType.String() != "ephemeral" { + t.Errorf("cache_control not found on second-to-last user turn. Output: %s", string(output)) + } + + lastUserCache := gjson.GetBytes(output, "messages.4.content.0.cache_control") + if lastUserCache.Exists() { + t.Errorf("last user turn should NOT have cache_control") + } + }) + + // Test case 9: Existing message cache_control should skip injection + t.Run("Messages Skip When Cache Control Exists", func(t *testing.T) { + input := []byte(`{ + "model": "claude-3-5-sonnet", + "messages": [ + {"role": "user", "content": [{"type": "text", "text": "First user"}]}, + {"role": "assistant", "content": [{"type": "text", "text": "Assistant reply", "cache_control": {"type": "ephemeral"}}]}, + {"role": "user", "content": [{"type": "text", "text": "Second user"}]} + ] + }`) + output := ensureCacheControl(input) + + userCache := gjson.GetBytes(output, "messages.0.content.0.cache_control") + if userCache.Exists() { + t.Errorf("cache_control should NOT be injected when a message already has cache_control") + } + + existingCache := gjson.GetBytes(output, "messages.1.content.0.cache_control.type") + if existingCache.String() != "ephemeral" { + t.Errorf("existing cache_control should be preserved. Output: %s", string(output)) + } + }) } // TestCacheControlOrder verifies the correct order: tools -> system -> messages diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index 3edf5080..83c231bd 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -642,13 +642,17 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string, ginHeaders = ginCtx.Request.Header } - baseBetas := "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14,prompt-caching-2024-07-31" + promptCachingBeta := "prompt-caching-2024-07-31" + baseBetas := "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14," + promptCachingBeta if val := strings.TrimSpace(ginHeaders.Get("Anthropic-Beta")); val != "" { baseBetas = val if !strings.Contains(val, "oauth") { baseBetas += ",oauth-2025-04-20" } } + if !strings.Contains(baseBetas, promptCachingBeta) { + baseBetas += "," + promptCachingBeta + } // Merge extra betas from request body if len(extraBetas) > 0 {