From 07d6689d87545f34666f1ba491a2ed9d968cd7ba Mon Sep 17 00:00:00 2001 From: Blue-B Date: Sat, 7 Mar 2026 21:31:10 +0900 Subject: [PATCH] fix(claude): add interleaved-thinking beta header, AMP gzip error decoding, normalizeClaudeBudget max_tokens MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Always include interleaved-thinking-2025-05-14 beta header so that thinking blocks are returned correctly for all Claude models. 2. Remove status-code guard in AMP reverse proxy ModifyResponse so that error responses (4xx/5xx) with hidden gzip encoding are decoded properly — prevents garbled error messages reaching the client. 3. In normalizeClaudeBudget, when the adjusted budget falls below the model minimum, set max_tokens = budgetTokens+1 instead of leaving the request unchanged (which causes a 400 from the API). --- internal/api/modules/amp/proxy.go | 5 ----- internal/runtime/executor/claude_executor.go | 3 +++ internal/thinking/provider/claude/apply.go | 4 +++- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/internal/api/modules/amp/proxy.go b/internal/api/modules/amp/proxy.go index ecc9da77..c8010854 100644 --- a/internal/api/modules/amp/proxy.go +++ b/internal/api/modules/amp/proxy.go @@ -108,11 +108,6 @@ func createReverseProxy(upstreamURL string, secretSource SecretSource) (*httputi // Modify incoming responses to handle gzip without Content-Encoding // This addresses the same issue as inline handler gzip handling, but at the proxy level proxy.ModifyResponse = func(resp *http.Response) error { - // Only process successful responses - if resp.StatusCode < 200 || resp.StatusCode >= 300 { - return nil - } - // Skip if already marked as gzip (Content-Encoding set) if resp.Header.Get("Content-Encoding") != "" { return nil diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index 7d0ddcf2..8cdbbf4f 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -832,6 +832,9 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string, baseBetas += ",oauth-2025-04-20" } } + if !strings.Contains(baseBetas, "interleaved-thinking") { + baseBetas += ",interleaved-thinking-2025-05-14" + } hasClaude1MHeader := false if ginHeaders != nil { diff --git a/internal/thinking/provider/claude/apply.go b/internal/thinking/provider/claude/apply.go index 275be469..af031907 100644 --- a/internal/thinking/provider/claude/apply.go +++ b/internal/thinking/provider/claude/apply.go @@ -194,7 +194,9 @@ func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo } if minBudget > 0 && adjustedBudget > 0 && adjustedBudget < minBudget { // If enforcing the max_tokens constraint would push the budget below the model minimum, - // leave the request unchanged. + // increase max_tokens to accommodate the original budget instead of leaving the + // request unchanged (which would cause a 400 error from the API). + body, _ = sjson.SetBytes(body, "max_tokens", budgetTokens+1) return body }