diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index fcb3a9c9..0845d168 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -9,6 +9,7 @@ import ( "crypto/rand" "crypto/sha256" "encoding/hex" + "encoding/json" "fmt" "io" "net/http" @@ -135,6 +136,15 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r body = ensureCacheControl(body) } + // Enforce Anthropic's cache_control block limit (max 4 breakpoints per request). + // Cloaking and ensureCacheControl may push the total over 4 when the client + // (e.g. Amp CLI) already sends multiple cache_control blocks. + body = enforceCacheControlLimit(body, 4) + + // Normalize TTL values to prevent ordering violations under prompt-caching-scope-2026-01-05. + // A 1h-TTL block must not appear after a 5m-TTL block in evaluation order (tools→system→messages). + body = normalizeCacheControlTTL(body) + // Extract betas from body and convert to header var extraBetas []string extraBetas, body = extractAndRemoveBetas(body) @@ -176,11 +186,29 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r } recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { - b, _ := io.ReadAll(httpResp.Body) + // Decompress error responses (e.g. gzip-compressed 400 errors from Anthropic API). + errBody := httpResp.Body + if ce := httpResp.Header.Get("Content-Encoding"); ce != "" { + var decErr error + errBody, decErr = decodeResponseBody(httpResp.Body, ce) + if decErr != nil { + recordAPIResponseError(ctx, e.cfg, decErr) + msg := fmt.Sprintf("failed to decode error response body (encoding=%s): %v", ce, decErr) + logWithRequestID(ctx).Warn(msg) + return resp, statusErr{code: httpResp.StatusCode, msg: msg} + } + } + b, readErr := io.ReadAll(errBody) + if readErr != nil { + recordAPIResponseError(ctx, e.cfg, readErr) + msg := fmt.Sprintf("failed to read error response body: %v", readErr) + logWithRequestID(ctx).Warn(msg) + b = []byte(msg) + } appendAPIResponseChunk(ctx, e.cfg, b) logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) err = statusErr{code: httpResp.StatusCode, msg: string(b)} - if errClose := httpResp.Body.Close(); errClose != nil { + if errClose := errBody.Close(); errClose != nil { log.Errorf("response body close error: %v", errClose) } return resp, err @@ -276,6 +304,12 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A body = ensureCacheControl(body) } + // Enforce Anthropic's cache_control block limit (max 4 breakpoints per request). + body = enforceCacheControlLimit(body, 4) + + // Normalize TTL values to prevent ordering violations under prompt-caching-scope-2026-01-05. + body = normalizeCacheControlTTL(body) + // Extract betas from body and convert to header var extraBetas []string extraBetas, body = extractAndRemoveBetas(body) @@ -317,10 +351,28 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A } recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { - b, _ := io.ReadAll(httpResp.Body) + // Decompress error responses (e.g. gzip-compressed 400 errors from Anthropic API). + errBody := httpResp.Body + if ce := httpResp.Header.Get("Content-Encoding"); ce != "" { + var decErr error + errBody, decErr = decodeResponseBody(httpResp.Body, ce) + if decErr != nil { + recordAPIResponseError(ctx, e.cfg, decErr) + msg := fmt.Sprintf("failed to decode error response body (encoding=%s): %v", ce, decErr) + logWithRequestID(ctx).Warn(msg) + return nil, statusErr{code: httpResp.StatusCode, msg: msg} + } + } + b, readErr := io.ReadAll(errBody) + if readErr != nil { + recordAPIResponseError(ctx, e.cfg, readErr) + msg := fmt.Sprintf("failed to read error response body: %v", readErr) + logWithRequestID(ctx).Warn(msg) + b = []byte(msg) + } appendAPIResponseChunk(ctx, e.cfg, b) logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) - if errClose := httpResp.Body.Close(); errClose != nil { + if errClose := errBody.Close(); errClose != nil { log.Errorf("response body close error: %v", errClose) } err = statusErr{code: httpResp.StatusCode, msg: string(b)} @@ -425,6 +477,10 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut body = checkSystemInstructions(body) } + // Keep count_tokens requests compatible with Anthropic cache-control constraints too. + body = enforceCacheControlLimit(body, 4) + body = normalizeCacheControlTTL(body) + // Extract betas from body and convert to header (for count_tokens too) var extraBetas []string extraBetas, body = extractAndRemoveBetas(body) @@ -464,9 +520,27 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut } recordAPIResponseMetadata(ctx, e.cfg, resp.StatusCode, resp.Header.Clone()) if resp.StatusCode < 200 || resp.StatusCode >= 300 { - b, _ := io.ReadAll(resp.Body) + // Decompress error responses (e.g. gzip-compressed 400 errors from Anthropic API). + errBody := resp.Body + if ce := resp.Header.Get("Content-Encoding"); ce != "" { + var decErr error + errBody, decErr = decodeResponseBody(resp.Body, ce) + if decErr != nil { + recordAPIResponseError(ctx, e.cfg, decErr) + msg := fmt.Sprintf("failed to decode error response body (encoding=%s): %v", ce, decErr) + logWithRequestID(ctx).Warn(msg) + return cliproxyexecutor.Response{}, statusErr{code: resp.StatusCode, msg: msg} + } + } + b, readErr := io.ReadAll(errBody) + if readErr != nil { + recordAPIResponseError(ctx, e.cfg, readErr) + msg := fmt.Sprintf("failed to read error response body: %v", readErr) + logWithRequestID(ctx).Warn(msg) + b = []byte(msg) + } appendAPIResponseChunk(ctx, e.cfg, b) - if errClose := resp.Body.Close(); errClose != nil { + if errClose := errBody.Close(); errClose != nil { log.Errorf("response body close error: %v", errClose) } return cliproxyexecutor.Response{}, statusErr{code: resp.StatusCode, msg: string(b)} @@ -1073,17 +1147,22 @@ func generateBillingHeader(payload []byte) string { return fmt.Sprintf("x-anthropic-billing-header: cc_version=2.1.63.%s; cc_entrypoint=cli; cch=%s;", buildHash, cch) } -// checkSystemInstructionsWithMode injects Claude Code system prompt to match -// the real Claude Code request format: -// system[0]: billing header (no cache_control) -// system[1]: "You are a Claude agent, built on Anthropic's Claude Agent SDK." (with cache_control) -// system[2..]: user's system messages (with cache_control on last) +// checkSystemInstructionsWithMode injects Claude Code-style system blocks: +// +// system[0]: billing header (no cache_control) +// system[1]: agent identifier (no cache_control) +// system[2..]: user system messages (cache_control added when missing) func checkSystemInstructionsWithMode(payload []byte, strictMode bool) []byte { system := gjson.GetBytes(payload, "system") billingText := generateBillingHeader(payload) billingBlock := fmt.Sprintf(`{"type":"text","text":"%s"}`, billingText) - agentBlock := `{"type":"text","text":"You are a Claude agent, built on Anthropic's Claude Agent SDK.","cache_control":{"type":"ephemeral","ttl":"1h"}}` + // No cache_control on the agent block. It is a cloaking artifact with zero cache + // value (the last system block is what actually triggers caching of all system content). + // Including any cache_control here creates an intra-system TTL ordering violation + // when the client's system blocks use ttl='1h' (prompt-caching-scope-2026-01-05 beta + // forbids 1h blocks after 5m blocks, and a no-TTL block defaults to 5m). + agentBlock := `{"type":"text","text":"You are a Claude agent, built on Anthropic's Claude Agent SDK."}` if strictMode { // Strict mode: billing header + agent identifier only @@ -1103,11 +1182,12 @@ func checkSystemInstructionsWithMode(payload []byte, strictMode bool) []byte { if system.IsArray() { system.ForEach(func(_, part gjson.Result) bool { if part.Get("type").String() == "text" { - // Add cache_control with ttl to user system messages if not present + // Add cache_control to user system messages if not present. + // Do NOT add ttl — let it inherit the default (5m) to avoid + // TTL ordering violations with the prompt-caching-scope-2026-01-05 beta. partJSON := part.Raw if !part.Get("cache_control").Exists() { partJSON, _ = sjson.Set(partJSON, "cache_control.type", "ephemeral") - partJSON, _ = sjson.Set(partJSON, "cache_control.ttl", "1h") } result += "," + partJSON } @@ -1254,6 +1334,313 @@ func countCacheControls(payload []byte) int { return count } +func parsePayloadObject(payload []byte) (map[string]any, bool) { + if len(payload) == 0 { + return nil, false + } + var root map[string]any + if err := json.Unmarshal(payload, &root); err != nil { + return nil, false + } + return root, true +} + +func marshalPayloadObject(original []byte, root map[string]any) []byte { + if root == nil { + return original + } + out, err := json.Marshal(root) + if err != nil { + return original + } + return out +} + +func asObject(v any) (map[string]any, bool) { + obj, ok := v.(map[string]any) + return obj, ok +} + +func asArray(v any) ([]any, bool) { + arr, ok := v.([]any) + return arr, ok +} + +func countCacheControlsMap(root map[string]any) int { + count := 0 + + if system, ok := asArray(root["system"]); ok { + for _, item := range system { + if obj, ok := asObject(item); ok { + if _, exists := obj["cache_control"]; exists { + count++ + } + } + } + } + + if tools, ok := asArray(root["tools"]); ok { + for _, item := range tools { + if obj, ok := asObject(item); ok { + if _, exists := obj["cache_control"]; exists { + count++ + } + } + } + } + + if messages, ok := asArray(root["messages"]); ok { + for _, msg := range messages { + msgObj, ok := asObject(msg) + if !ok { + continue + } + content, ok := asArray(msgObj["content"]) + if !ok { + continue + } + for _, item := range content { + if obj, ok := asObject(item); ok { + if _, exists := obj["cache_control"]; exists { + count++ + } + } + } + } + } + + return count +} + +func normalizeTTLForBlock(obj map[string]any, seen5m *bool) { + ccRaw, exists := obj["cache_control"] + if !exists { + return + } + cc, ok := asObject(ccRaw) + if !ok { + *seen5m = true + return + } + ttlRaw, ttlExists := cc["ttl"] + ttl, ttlIsString := ttlRaw.(string) + if !ttlExists || !ttlIsString || ttl != "1h" { + *seen5m = true + return + } + if *seen5m { + delete(cc, "ttl") + } +} + +func findLastCacheControlIndex(arr []any) int { + last := -1 + for idx, item := range arr { + obj, ok := asObject(item) + if !ok { + continue + } + if _, exists := obj["cache_control"]; exists { + last = idx + } + } + return last +} + +func stripCacheControlExceptIndex(arr []any, preserveIdx int, excess *int) { + for idx, item := range arr { + if *excess <= 0 { + return + } + obj, ok := asObject(item) + if !ok { + continue + } + if _, exists := obj["cache_control"]; exists && idx != preserveIdx { + delete(obj, "cache_control") + *excess-- + } + } +} + +func stripAllCacheControl(arr []any, excess *int) { + for _, item := range arr { + if *excess <= 0 { + return + } + obj, ok := asObject(item) + if !ok { + continue + } + if _, exists := obj["cache_control"]; exists { + delete(obj, "cache_control") + *excess-- + } + } +} + +func stripMessageCacheControl(messages []any, excess *int) { + for _, msg := range messages { + if *excess <= 0 { + return + } + msgObj, ok := asObject(msg) + if !ok { + continue + } + content, ok := asArray(msgObj["content"]) + if !ok { + continue + } + for _, item := range content { + if *excess <= 0 { + return + } + obj, ok := asObject(item) + if !ok { + continue + } + if _, exists := obj["cache_control"]; exists { + delete(obj, "cache_control") + *excess-- + } + } + } +} + +// normalizeCacheControlTTL ensures cache_control TTL values don't violate the +// prompt-caching-scope-2026-01-05 ordering constraint: a 1h-TTL block must not +// appear after a 5m-TTL block anywhere in the evaluation order. +// +// Anthropic evaluates blocks in order: tools → system (index 0..N) → messages. +// Within each section, blocks are evaluated in array order. A 5m (default) block +// followed by a 1h block at ANY later position is an error — including within +// the same section (e.g. system[1]=5m then system[3]=1h). +// +// Strategy: walk all cache_control blocks in evaluation order. Once a 5m block +// is seen, strip ttl from ALL subsequent 1h blocks (downgrading them to 5m). +func normalizeCacheControlTTL(payload []byte) []byte { + root, ok := parsePayloadObject(payload) + if !ok { + return payload + } + + seen5m := false + + if tools, ok := asArray(root["tools"]); ok { + for _, tool := range tools { + if obj, ok := asObject(tool); ok { + normalizeTTLForBlock(obj, &seen5m) + } + } + } + + if system, ok := asArray(root["system"]); ok { + for _, item := range system { + if obj, ok := asObject(item); ok { + normalizeTTLForBlock(obj, &seen5m) + } + } + } + + if messages, ok := asArray(root["messages"]); ok { + for _, msg := range messages { + msgObj, ok := asObject(msg) + if !ok { + continue + } + content, ok := asArray(msgObj["content"]) + if !ok { + continue + } + for _, item := range content { + if obj, ok := asObject(item); ok { + normalizeTTLForBlock(obj, &seen5m) + } + } + } + } + + return marshalPayloadObject(payload, root) +} + +// enforceCacheControlLimit removes excess cache_control blocks from a payload +// so the total does not exceed the Anthropic API limit (currently 4). +// +// Anthropic evaluates cache breakpoints in order: tools → system → messages. +// The most valuable breakpoints are: +// 1. Last tool — caches ALL tool definitions +// 2. Last system block — caches ALL system content +// 3. Recent messages — cache conversation context +// +// Removal priority (strip lowest-value first): +// +// Phase 1: system blocks earliest-first, preserving the last one. +// Phase 2: tool blocks earliest-first, preserving the last one. +// Phase 3: message content blocks earliest-first. +// Phase 4: remaining system blocks (last system). +// Phase 5: remaining tool blocks (last tool). +func enforceCacheControlLimit(payload []byte, maxBlocks int) []byte { + root, ok := parsePayloadObject(payload) + if !ok { + return payload + } + + total := countCacheControlsMap(root) + if total <= maxBlocks { + return payload + } + + excess := total - maxBlocks + + var system []any + if arr, ok := asArray(root["system"]); ok { + system = arr + } + var tools []any + if arr, ok := asArray(root["tools"]); ok { + tools = arr + } + var messages []any + if arr, ok := asArray(root["messages"]); ok { + messages = arr + } + + if len(system) > 0 { + stripCacheControlExceptIndex(system, findLastCacheControlIndex(system), &excess) + } + if excess <= 0 { + return marshalPayloadObject(payload, root) + } + + if len(tools) > 0 { + stripCacheControlExceptIndex(tools, findLastCacheControlIndex(tools), &excess) + } + if excess <= 0 { + return marshalPayloadObject(payload, root) + } + + if len(messages) > 0 { + stripMessageCacheControl(messages, &excess) + } + if excess <= 0 { + return marshalPayloadObject(payload, root) + } + + if len(system) > 0 { + stripAllCacheControl(system, &excess) + } + if excess <= 0 { + return marshalPayloadObject(payload, root) + } + + if len(tools) > 0 { + stripAllCacheControl(tools, &excess) + } + + return marshalPayloadObject(payload, root) +} + // injectMessagesCacheControl adds cache_control to the second-to-last user turn for multi-turn caching. // Per Anthropic docs: "Place cache_control on the second-to-last User message to let the model reuse the earlier cache." // This enables caching of conversation history, which is especially beneficial for long multi-turn conversations. diff --git a/internal/runtime/executor/claude_executor_test.go b/internal/runtime/executor/claude_executor_test.go index dd29ed8a..f9553f9a 100644 --- a/internal/runtime/executor/claude_executor_test.go +++ b/internal/runtime/executor/claude_executor_test.go @@ -6,6 +6,7 @@ import ( "io" "net/http" "net/http/httptest" + "strings" "testing" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" @@ -348,3 +349,237 @@ func TestApplyClaudeToolPrefix_SkipsBuiltinToolReference(t *testing.T) { t.Fatalf("built-in tool_reference should not be prefixed, got %q", got) } } + +func TestNormalizeCacheControlTTL_DowngradesLaterOneHourBlocks(t *testing.T) { + payload := []byte(`{ + "tools": [{"name":"t1","cache_control":{"type":"ephemeral","ttl":"1h"}}], + "system": [{"type":"text","text":"s1","cache_control":{"type":"ephemeral"}}], + "messages": [{"role":"user","content":[{"type":"text","text":"u1","cache_control":{"type":"ephemeral","ttl":"1h"}}]}] + }`) + + out := normalizeCacheControlTTL(payload) + + if got := gjson.GetBytes(out, "tools.0.cache_control.ttl").String(); got != "1h" { + t.Fatalf("tools.0.cache_control.ttl = %q, want %q", got, "1h") + } + if gjson.GetBytes(out, "messages.0.content.0.cache_control.ttl").Exists() { + t.Fatalf("messages.0.content.0.cache_control.ttl should be removed after a default-5m block") + } +} + +func TestEnforceCacheControlLimit_StripsNonLastToolBeforeMessages(t *testing.T) { + payload := []byte(`{ + "tools": [ + {"name":"t1","cache_control":{"type":"ephemeral"}}, + {"name":"t2","cache_control":{"type":"ephemeral"}} + ], + "system": [{"type":"text","text":"s1","cache_control":{"type":"ephemeral"}}], + "messages": [ + {"role":"user","content":[{"type":"text","text":"u1","cache_control":{"type":"ephemeral"}}]}, + {"role":"user","content":[{"type":"text","text":"u2","cache_control":{"type":"ephemeral"}}]} + ] + }`) + + out := enforceCacheControlLimit(payload, 4) + + if got := countCacheControls(out); got != 4 { + t.Fatalf("cache_control count = %d, want 4", got) + } + if gjson.GetBytes(out, "tools.0.cache_control").Exists() { + t.Fatalf("tools.0.cache_control should be removed first (non-last tool)") + } + if !gjson.GetBytes(out, "tools.1.cache_control").Exists() { + t.Fatalf("tools.1.cache_control (last tool) should be preserved") + } + if !gjson.GetBytes(out, "messages.0.content.0.cache_control").Exists() || !gjson.GetBytes(out, "messages.1.content.0.cache_control").Exists() { + t.Fatalf("message cache_control blocks should be preserved when non-last tool removal is enough") + } +} + +func TestEnforceCacheControlLimit_ToolOnlyPayloadStillRespectsLimit(t *testing.T) { + payload := []byte(`{ + "tools": [ + {"name":"t1","cache_control":{"type":"ephemeral"}}, + {"name":"t2","cache_control":{"type":"ephemeral"}}, + {"name":"t3","cache_control":{"type":"ephemeral"}}, + {"name":"t4","cache_control":{"type":"ephemeral"}}, + {"name":"t5","cache_control":{"type":"ephemeral"}} + ] + }`) + + out := enforceCacheControlLimit(payload, 4) + + if got := countCacheControls(out); got != 4 { + t.Fatalf("cache_control count = %d, want 4", got) + } + if gjson.GetBytes(out, "tools.0.cache_control").Exists() { + t.Fatalf("tools.0.cache_control should be removed to satisfy max=4") + } + if !gjson.GetBytes(out, "tools.4.cache_control").Exists() { + t.Fatalf("last tool cache_control should be preserved when possible") + } +} + +func TestClaudeExecutor_CountTokens_AppliesCacheControlGuards(t *testing.T) { + var seenBody []byte + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + seenBody = bytes.Clone(body) + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"input_tokens":42}`)) + })) + defer server.Close() + + executor := NewClaudeExecutor(&config.Config{}) + auth := &cliproxyauth.Auth{Attributes: map[string]string{ + "api_key": "key-123", + "base_url": server.URL, + }} + + payload := []byte(`{ + "tools": [ + {"name":"t1","cache_control":{"type":"ephemeral","ttl":"1h"}}, + {"name":"t2","cache_control":{"type":"ephemeral"}} + ], + "system": [ + {"type":"text","text":"s1","cache_control":{"type":"ephemeral","ttl":"1h"}}, + {"type":"text","text":"s2","cache_control":{"type":"ephemeral","ttl":"1h"}} + ], + "messages": [ + {"role":"user","content":[{"type":"text","text":"u1","cache_control":{"type":"ephemeral","ttl":"1h"}}]}, + {"role":"user","content":[{"type":"text","text":"u2","cache_control":{"type":"ephemeral","ttl":"1h"}}]} + ] + }`) + + _, err := executor.CountTokens(context.Background(), auth, cliproxyexecutor.Request{ + Model: "claude-3-5-haiku-20241022", + Payload: payload, + }, cliproxyexecutor.Options{SourceFormat: sdktranslator.FromString("claude")}) + if err != nil { + t.Fatalf("CountTokens error: %v", err) + } + + if len(seenBody) == 0 { + t.Fatal("expected count_tokens request body to be captured") + } + if got := countCacheControls(seenBody); got > 4 { + t.Fatalf("count_tokens body has %d cache_control blocks, want <= 4", got) + } + if hasTTLOrderingViolation(seenBody) { + t.Fatalf("count_tokens body still has ttl ordering violations: %s", string(seenBody)) + } +} + +func hasTTLOrderingViolation(payload []byte) bool { + seen5m := false + violates := false + + checkCC := func(cc gjson.Result) { + if !cc.Exists() || violates { + return + } + ttl := cc.Get("ttl").String() + if ttl != "1h" { + seen5m = true + return + } + if seen5m { + violates = true + } + } + + tools := gjson.GetBytes(payload, "tools") + if tools.IsArray() { + tools.ForEach(func(_, tool gjson.Result) bool { + checkCC(tool.Get("cache_control")) + return !violates + }) + } + + system := gjson.GetBytes(payload, "system") + if system.IsArray() { + system.ForEach(func(_, item gjson.Result) bool { + checkCC(item.Get("cache_control")) + return !violates + }) + } + + messages := gjson.GetBytes(payload, "messages") + if messages.IsArray() { + messages.ForEach(func(_, msg gjson.Result) bool { + content := msg.Get("content") + if content.IsArray() { + content.ForEach(func(_, item gjson.Result) bool { + checkCC(item.Get("cache_control")) + return !violates + }) + } + return !violates + }) + } + + return violates +} + +func TestClaudeExecutor_Execute_InvalidGzipErrorBodyReturnsDecodeMessage(t *testing.T) { + testClaudeExecutorInvalidCompressedErrorBody(t, func(executor *ClaudeExecutor, auth *cliproxyauth.Auth, payload []byte) error { + _, err := executor.Execute(context.Background(), auth, cliproxyexecutor.Request{ + Model: "claude-3-5-sonnet-20241022", + Payload: payload, + }, cliproxyexecutor.Options{SourceFormat: sdktranslator.FromString("claude")}) + return err + }) +} + +func TestClaudeExecutor_ExecuteStream_InvalidGzipErrorBodyReturnsDecodeMessage(t *testing.T) { + testClaudeExecutorInvalidCompressedErrorBody(t, func(executor *ClaudeExecutor, auth *cliproxyauth.Auth, payload []byte) error { + _, err := executor.ExecuteStream(context.Background(), auth, cliproxyexecutor.Request{ + Model: "claude-3-5-sonnet-20241022", + Payload: payload, + }, cliproxyexecutor.Options{SourceFormat: sdktranslator.FromString("claude")}) + return err + }) +} + +func TestClaudeExecutor_CountTokens_InvalidGzipErrorBodyReturnsDecodeMessage(t *testing.T) { + testClaudeExecutorInvalidCompressedErrorBody(t, func(executor *ClaudeExecutor, auth *cliproxyauth.Auth, payload []byte) error { + _, err := executor.CountTokens(context.Background(), auth, cliproxyexecutor.Request{ + Model: "claude-3-5-sonnet-20241022", + Payload: payload, + }, cliproxyexecutor.Options{SourceFormat: sdktranslator.FromString("claude")}) + return err + }) +} + +func testClaudeExecutorInvalidCompressedErrorBody( + t *testing.T, + invoke func(executor *ClaudeExecutor, auth *cliproxyauth.Auth, payload []byte) error, +) { + t.Helper() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Content-Encoding", "gzip") + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte("not-a-valid-gzip-stream")) + })) + defer server.Close() + + executor := NewClaudeExecutor(&config.Config{}) + auth := &cliproxyauth.Auth{Attributes: map[string]string{ + "api_key": "key-123", + "base_url": server.URL, + }} + payload := []byte(`{"messages":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`) + + err := invoke(executor, auth, payload) + if err == nil { + t.Fatal("expected error, got nil") + } + if !strings.Contains(err.Error(), "failed to decode error response body") { + t.Fatalf("expected decode failure message, got: %v", err) + } + if statusProvider, ok := err.(interface{ StatusCode() int }); !ok || statusProvider.StatusCode() != http.StatusBadRequest { + t.Fatalf("expected status code 400, got: %v", err) + } +}