package executor import ( "bufio" "bytes" "compress/flate" "compress/gzip" "context" "crypto/sha256" "encoding/hex" "fmt" "io" "net/http" "net/textproto" "strings" "time" "github.com/andybalholm/brotli" "github.com/google/uuid" "github.com/klauspost/compress/zstd" claudeauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/misc" "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps" "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" log "github.com/sirupsen/logrus" "github.com/tidwall/gjson" "github.com/tidwall/sjson" "github.com/gin-gonic/gin" ) // ClaudeExecutor is a stateless executor for Anthropic Claude over the messages API. // If api_key is unavailable on auth, it falls back to legacy via ClientAdapter. type ClaudeExecutor struct { cfg *config.Config } // claudeToolPrefix is empty to match real Claude Code behavior (no tool name prefix). // Previously "proxy_" was used but this is a detectable fingerprint difference. const claudeToolPrefix = "" // Anthropic-compatible upstreams may reject or even crash when Claude models // omit max_tokens. Prefer registered model metadata before using a fallback. const defaultModelMaxTokens = 1024 func NewClaudeExecutor(cfg *config.Config) *ClaudeExecutor { return &ClaudeExecutor{cfg: cfg} } func (e *ClaudeExecutor) Identifier() string { return "claude" } // PrepareRequest injects Claude credentials into the outgoing HTTP request. func (e *ClaudeExecutor) PrepareRequest(req *http.Request, auth *cliproxyauth.Auth) error { if req == nil { return nil } apiKey, _ := claudeCreds(auth) if strings.TrimSpace(apiKey) == "" { return nil } useAPIKey := auth != nil && auth.Attributes != nil && strings.TrimSpace(auth.Attributes["api_key"]) != "" isAnthropicBase := req.URL != nil && strings.EqualFold(req.URL.Scheme, "https") && strings.EqualFold(req.URL.Host, "api.anthropic.com") if isAnthropicBase && useAPIKey { req.Header.Del("Authorization") req.Header.Set("x-api-key", apiKey) } else { req.Header.Del("x-api-key") req.Header.Set("Authorization", "Bearer "+apiKey) } var attrs map[string]string if auth != nil { attrs = auth.Attributes } util.ApplyCustomHeadersFromAttrs(req, attrs) return nil } // HttpRequest injects Claude credentials into the request and executes it. func (e *ClaudeExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth, req *http.Request) (*http.Response, error) { if req == nil { return nil, fmt.Errorf("claude executor: request is nil") } if ctx == nil { ctx = req.Context() } httpReq := req.WithContext(ctx) if err := e.PrepareRequest(httpReq, auth); err != nil { return nil, err } httpClient := helps.NewUtlsHTTPClient(e.cfg, auth, 0) return httpClient.Do(httpReq) } func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { if opts.Alt == "responses/compact" { return resp, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"} } baseModel := thinking.ParseSuffix(req.Model).ModelName apiKey, baseURL := claudeCreds(auth) if baseURL == "" { baseURL = "https://api.anthropic.com" } reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.TrackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("claude") // Use streaming translation to preserve function calling, except for claude. stream := from != to originalPayloadSource := req.Payload if len(opts.OriginalRequest) > 0 { originalPayloadSource = opts.OriginalRequest } originalPayload := originalPayloadSource originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, stream) body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, stream) body, _ = sjson.SetBytes(body, "model", baseModel) body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier()) if err != nil { return resp, err } // Apply cloaking (system prompt injection, fake user ID, sensitive word obfuscation) // based on client type and configuration. body = applyCloaking(ctx, e.cfg, auth, body, baseModel, apiKey) requestedModel := helps.PayloadRequestedModel(opts, req.Model) body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) body = ensureModelMaxTokens(body, baseModel) // Disable thinking if tool_choice forces tool use (Anthropic API constraint) body = disableThinkingIfToolChoiceForced(body) body = normalizeClaudeTemperatureForThinking(body) // Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support) if countCacheControls(body) == 0 { body = ensureCacheControl(body) } // Enforce Anthropic's cache_control block limit (max 4 breakpoints per request). // Cloaking and ensureCacheControl may push the total over 4 when the client // (e.g. Amp CLI) already sends multiple cache_control blocks. body = enforceCacheControlLimit(body, 4) // Normalize TTL values to prevent ordering violations under prompt-caching-scope-2026-01-05. // A 1h-TTL block must not appear after a 5m-TTL block in evaluation order (tools→system→messages). body = normalizeCacheControlTTL(body) // Extract betas from body and convert to header var extraBetas []string extraBetas, body = extractAndRemoveBetas(body) bodyForTranslation := body bodyForUpstream := body oauthToken := isClaudeOAuthToken(apiKey) if oauthToken && !auth.ToolPrefixDisabled() { bodyForUpstream = applyClaudeToolPrefix(body, claudeToolPrefix) } // Enable cch signing by default for OAuth tokens (not just experimental flag). // Claude Code always computes cch; missing or invalid cch is a detectable fingerprint. if oauthToken || experimentalCCHSigningEnabled(e.cfg, auth) { bodyForUpstream = signAnthropicMessagesBody(bodyForUpstream) } url := fmt.Sprintf("%s/v1/messages?beta=true", baseURL) httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(bodyForUpstream)) if err != nil { return resp, err } applyClaudeHeaders(httpReq, auth, apiKey, false, extraBetas, e.cfg) var authID, authLabel, authType, authValue string if auth != nil { authID = auth.ID authLabel = auth.Label authType, authValue = auth.AccountInfo() } helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: httpReq.Header.Clone(), Body: bodyForUpstream, Provider: e.Identifier(), AuthID: authID, AuthLabel: authLabel, AuthType: authType, AuthValue: authValue, }) httpClient := helps.NewUtlsHTTPClient(e.cfg, auth, 0) httpResp, err := httpClient.Do(httpReq) if err != nil { helps.RecordAPIResponseError(ctx, e.cfg, err) return resp, err } helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { // Decompress error responses — pass the Content-Encoding value (may be empty) // and let decodeResponseBody handle both header-declared and magic-byte-detected // compression. This keeps error-path behaviour consistent with the success path. errBody, decErr := decodeResponseBody(httpResp.Body, httpResp.Header.Get("Content-Encoding")) if decErr != nil { helps.RecordAPIResponseError(ctx, e.cfg, decErr) msg := fmt.Sprintf("failed to decode error response body: %v", decErr) helps.LogWithRequestID(ctx).Warn(msg) return resp, statusErr{code: httpResp.StatusCode, msg: msg} } b, readErr := io.ReadAll(errBody) if readErr != nil { helps.RecordAPIResponseError(ctx, e.cfg, readErr) msg := fmt.Sprintf("failed to read error response body: %v", readErr) helps.LogWithRequestID(ctx).Warn(msg) b = []byte(msg) } helps.AppendAPIResponseChunk(ctx, e.cfg, b) helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) err = statusErr{code: httpResp.StatusCode, msg: string(b)} if errClose := errBody.Close(); errClose != nil { log.Errorf("response body close error: %v", errClose) } return resp, err } decodedBody, err := decodeResponseBody(httpResp.Body, httpResp.Header.Get("Content-Encoding")) if err != nil { helps.RecordAPIResponseError(ctx, e.cfg, err) if errClose := httpResp.Body.Close(); errClose != nil { log.Errorf("response body close error: %v", errClose) } return resp, err } defer func() { if errClose := decodedBody.Close(); errClose != nil { log.Errorf("response body close error: %v", errClose) } }() data, err := io.ReadAll(decodedBody) if err != nil { helps.RecordAPIResponseError(ctx, e.cfg, err) return resp, err } helps.AppendAPIResponseChunk(ctx, e.cfg, data) if stream { lines := bytes.Split(data, []byte("\n")) for _, line := range lines { if detail, ok := helps.ParseClaudeStreamUsage(line); ok { reporter.Publish(ctx, detail) } } } else { reporter.Publish(ctx, helps.ParseClaudeUsage(data)) } if isClaudeOAuthToken(apiKey) && !auth.ToolPrefixDisabled() { data = stripClaudeToolPrefixFromResponse(data, claudeToolPrefix) } var param any out := sdktranslator.TranslateNonStream( ctx, to, from, req.Model, opts.OriginalRequest, bodyForTranslation, data, ¶m, ) resp = cliproxyexecutor.Response{Payload: out, Headers: httpResp.Header.Clone()} return resp, nil } func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (_ *cliproxyexecutor.StreamResult, err error) { if opts.Alt == "responses/compact" { return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"} } baseModel := thinking.ParseSuffix(req.Model).ModelName apiKey, baseURL := claudeCreds(auth) if baseURL == "" { baseURL = "https://api.anthropic.com" } reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.TrackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("claude") originalPayloadSource := req.Payload if len(opts.OriginalRequest) > 0 { originalPayloadSource = opts.OriginalRequest } originalPayload := originalPayloadSource originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true) body, _ = sjson.SetBytes(body, "model", baseModel) body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier()) if err != nil { return nil, err } // Apply cloaking (system prompt injection, fake user ID, sensitive word obfuscation) // based on client type and configuration. body = applyCloaking(ctx, e.cfg, auth, body, baseModel, apiKey) requestedModel := helps.PayloadRequestedModel(opts, req.Model) body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) body = ensureModelMaxTokens(body, baseModel) // Disable thinking if tool_choice forces tool use (Anthropic API constraint) body = disableThinkingIfToolChoiceForced(body) body = normalizeClaudeTemperatureForThinking(body) // Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support) if countCacheControls(body) == 0 { body = ensureCacheControl(body) } // Enforce Anthropic's cache_control block limit (max 4 breakpoints per request). body = enforceCacheControlLimit(body, 4) // Normalize TTL values to prevent ordering violations under prompt-caching-scope-2026-01-05. body = normalizeCacheControlTTL(body) // Extract betas from body and convert to header var extraBetas []string extraBetas, body = extractAndRemoveBetas(body) bodyForTranslation := body bodyForUpstream := body oauthToken := isClaudeOAuthToken(apiKey) if oauthToken && !auth.ToolPrefixDisabled() { bodyForUpstream = applyClaudeToolPrefix(body, claudeToolPrefix) } // Enable cch signing by default for OAuth tokens (not just experimental flag). if oauthToken || experimentalCCHSigningEnabled(e.cfg, auth) { bodyForUpstream = signAnthropicMessagesBody(bodyForUpstream) } url := fmt.Sprintf("%s/v1/messages?beta=true", baseURL) httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(bodyForUpstream)) if err != nil { return nil, err } applyClaudeHeaders(httpReq, auth, apiKey, true, extraBetas, e.cfg) var authID, authLabel, authType, authValue string if auth != nil { authID = auth.ID authLabel = auth.Label authType, authValue = auth.AccountInfo() } helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: httpReq.Header.Clone(), Body: bodyForUpstream, Provider: e.Identifier(), AuthID: authID, AuthLabel: authLabel, AuthType: authType, AuthValue: authValue, }) httpClient := helps.NewUtlsHTTPClient(e.cfg, auth, 0) httpResp, err := httpClient.Do(httpReq) if err != nil { helps.RecordAPIResponseError(ctx, e.cfg, err) return nil, err } helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { // Decompress error responses — pass the Content-Encoding value (may be empty) // and let decodeResponseBody handle both header-declared and magic-byte-detected // compression. This keeps error-path behaviour consistent with the success path. errBody, decErr := decodeResponseBody(httpResp.Body, httpResp.Header.Get("Content-Encoding")) if decErr != nil { helps.RecordAPIResponseError(ctx, e.cfg, decErr) msg := fmt.Sprintf("failed to decode error response body: %v", decErr) helps.LogWithRequestID(ctx).Warn(msg) return nil, statusErr{code: httpResp.StatusCode, msg: msg} } b, readErr := io.ReadAll(errBody) if readErr != nil { helps.RecordAPIResponseError(ctx, e.cfg, readErr) msg := fmt.Sprintf("failed to read error response body: %v", readErr) helps.LogWithRequestID(ctx).Warn(msg) b = []byte(msg) } helps.AppendAPIResponseChunk(ctx, e.cfg, b) helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) if errClose := errBody.Close(); errClose != nil { log.Errorf("response body close error: %v", errClose) } err = statusErr{code: httpResp.StatusCode, msg: string(b)} return nil, err } decodedBody, err := decodeResponseBody(httpResp.Body, httpResp.Header.Get("Content-Encoding")) if err != nil { helps.RecordAPIResponseError(ctx, e.cfg, err) if errClose := httpResp.Body.Close(); errClose != nil { log.Errorf("response body close error: %v", errClose) } return nil, err } out := make(chan cliproxyexecutor.StreamChunk) go func() { defer close(out) defer func() { if errClose := decodedBody.Close(); errClose != nil { log.Errorf("response body close error: %v", errClose) } }() // If from == to (Claude → Claude), directly forward the SSE stream without translation if from == to { scanner := bufio.NewScanner(decodedBody) scanner.Buffer(nil, 52_428_800) // 50MB for scanner.Scan() { line := scanner.Bytes() helps.AppendAPIResponseChunk(ctx, e.cfg, line) if detail, ok := helps.ParseClaudeStreamUsage(line); ok { reporter.Publish(ctx, detail) } if isClaudeOAuthToken(apiKey) && !auth.ToolPrefixDisabled() { line = stripClaudeToolPrefixFromStreamLine(line, claudeToolPrefix) } // Forward the line as-is to preserve SSE format cloned := make([]byte, len(line)+1) copy(cloned, line) cloned[len(line)] = '\n' out <- cliproxyexecutor.StreamChunk{Payload: cloned} } if errScan := scanner.Err(); errScan != nil { helps.RecordAPIResponseError(ctx, e.cfg, errScan) reporter.PublishFailure(ctx) out <- cliproxyexecutor.StreamChunk{Err: errScan} } return } // For other formats, use translation scanner := bufio.NewScanner(decodedBody) scanner.Buffer(nil, 52_428_800) // 50MB var param any for scanner.Scan() { line := scanner.Bytes() helps.AppendAPIResponseChunk(ctx, e.cfg, line) if detail, ok := helps.ParseClaudeStreamUsage(line); ok { reporter.Publish(ctx, detail) } if isClaudeOAuthToken(apiKey) && !auth.ToolPrefixDisabled() { line = stripClaudeToolPrefixFromStreamLine(line, claudeToolPrefix) } chunks := sdktranslator.TranslateStream( ctx, to, from, req.Model, opts.OriginalRequest, bodyForTranslation, bytes.Clone(line), ¶m, ) for i := range chunks { out <- cliproxyexecutor.StreamChunk{Payload: chunks[i]} } } if errScan := scanner.Err(); errScan != nil { helps.RecordAPIResponseError(ctx, e.cfg, errScan) reporter.PublishFailure(ctx) out <- cliproxyexecutor.StreamChunk{Err: errScan} } }() return &cliproxyexecutor.StreamResult{Headers: httpResp.Header.Clone(), Chunks: out}, nil } func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { baseModel := thinking.ParseSuffix(req.Model).ModelName apiKey, baseURL := claudeCreds(auth) if baseURL == "" { baseURL = "https://api.anthropic.com" } from := opts.SourceFormat to := sdktranslator.FromString("claude") // Use streaming translation to preserve function calling, except for claude. stream := from != to body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, stream) body, _ = sjson.SetBytes(body, "model", baseModel) if !strings.HasPrefix(baseModel, "claude-3-5-haiku") { body = checkSystemInstructions(body) } // Keep count_tokens requests compatible with Anthropic cache-control constraints too. body = enforceCacheControlLimit(body, 4) body = normalizeCacheControlTTL(body) // Extract betas from body and convert to header (for count_tokens too) var extraBetas []string extraBetas, body = extractAndRemoveBetas(body) if isClaudeOAuthToken(apiKey) && !auth.ToolPrefixDisabled() { body = applyClaudeToolPrefix(body, claudeToolPrefix) } url := fmt.Sprintf("%s/v1/messages/count_tokens?beta=true", baseURL) httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) if err != nil { return cliproxyexecutor.Response{}, err } applyClaudeHeaders(httpReq, auth, apiKey, false, extraBetas, e.cfg) var authID, authLabel, authType, authValue string if auth != nil { authID = auth.ID authLabel = auth.Label authType, authValue = auth.AccountInfo() } helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: httpReq.Header.Clone(), Body: body, Provider: e.Identifier(), AuthID: authID, AuthLabel: authLabel, AuthType: authType, AuthValue: authValue, }) httpClient := helps.NewUtlsHTTPClient(e.cfg, auth, 0) resp, err := httpClient.Do(httpReq) if err != nil { helps.RecordAPIResponseError(ctx, e.cfg, err) return cliproxyexecutor.Response{}, err } helps.RecordAPIResponseMetadata(ctx, e.cfg, resp.StatusCode, resp.Header.Clone()) if resp.StatusCode < 200 || resp.StatusCode >= 300 { // Decompress error responses — pass the Content-Encoding value (may be empty) // and let decodeResponseBody handle both header-declared and magic-byte-detected // compression. This keeps error-path behaviour consistent with the success path. errBody, decErr := decodeResponseBody(resp.Body, resp.Header.Get("Content-Encoding")) if decErr != nil { helps.RecordAPIResponseError(ctx, e.cfg, decErr) msg := fmt.Sprintf("failed to decode error response body: %v", decErr) helps.LogWithRequestID(ctx).Warn(msg) return cliproxyexecutor.Response{}, statusErr{code: resp.StatusCode, msg: msg} } b, readErr := io.ReadAll(errBody) if readErr != nil { helps.RecordAPIResponseError(ctx, e.cfg, readErr) msg := fmt.Sprintf("failed to read error response body: %v", readErr) helps.LogWithRequestID(ctx).Warn(msg) b = []byte(msg) } helps.AppendAPIResponseChunk(ctx, e.cfg, b) if errClose := errBody.Close(); errClose != nil { log.Errorf("response body close error: %v", errClose) } return cliproxyexecutor.Response{}, statusErr{code: resp.StatusCode, msg: string(b)} } decodedBody, err := decodeResponseBody(resp.Body, resp.Header.Get("Content-Encoding")) if err != nil { helps.RecordAPIResponseError(ctx, e.cfg, err) if errClose := resp.Body.Close(); errClose != nil { log.Errorf("response body close error: %v", errClose) } return cliproxyexecutor.Response{}, err } defer func() { if errClose := decodedBody.Close(); errClose != nil { log.Errorf("response body close error: %v", errClose) } }() data, err := io.ReadAll(decodedBody) if err != nil { helps.RecordAPIResponseError(ctx, e.cfg, err) return cliproxyexecutor.Response{}, err } helps.AppendAPIResponseChunk(ctx, e.cfg, data) count := gjson.GetBytes(data, "input_tokens").Int() out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data) return cliproxyexecutor.Response{Payload: out, Headers: resp.Header.Clone()}, nil } func (e *ClaudeExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) { log.Debugf("claude executor: refresh called") if auth == nil { return nil, fmt.Errorf("claude executor: auth is nil") } var refreshToken string if auth.Metadata != nil { if v, ok := auth.Metadata["refresh_token"].(string); ok && v != "" { refreshToken = v } } if refreshToken == "" { return auth, nil } svc := claudeauth.NewClaudeAuth(e.cfg) td, err := svc.RefreshTokens(ctx, refreshToken) if err != nil { return nil, err } if auth.Metadata == nil { auth.Metadata = make(map[string]any) } auth.Metadata["access_token"] = td.AccessToken if td.RefreshToken != "" { auth.Metadata["refresh_token"] = td.RefreshToken } auth.Metadata["email"] = td.Email auth.Metadata["expired"] = td.Expire auth.Metadata["type"] = "claude" now := time.Now().Format(time.RFC3339) auth.Metadata["last_refresh"] = now return auth, nil } // extractAndRemoveBetas extracts the "betas" array from the body and removes it. // Returns the extracted betas as a string slice and the modified body. func extractAndRemoveBetas(body []byte) ([]string, []byte) { betasResult := gjson.GetBytes(body, "betas") if !betasResult.Exists() { return nil, body } var betas []string if betasResult.IsArray() { for _, item := range betasResult.Array() { if s := strings.TrimSpace(item.String()); s != "" { betas = append(betas, s) } } } else if s := strings.TrimSpace(betasResult.String()); s != "" { betas = append(betas, s) } body, _ = sjson.DeleteBytes(body, "betas") return betas, body } // disableThinkingIfToolChoiceForced checks if tool_choice forces tool use and disables thinking. // Anthropic API does not allow thinking when tool_choice is set to "any" or a specific tool. // See: https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations func disableThinkingIfToolChoiceForced(body []byte) []byte { toolChoiceType := gjson.GetBytes(body, "tool_choice.type").String() // "auto" is allowed with thinking, but "any" or "tool" (specific tool) are not if toolChoiceType == "any" || toolChoiceType == "tool" { // Remove thinking configuration entirely to avoid API error body, _ = sjson.DeleteBytes(body, "thinking") // Adaptive thinking may also set output_config.effort; remove it to avoid // leaking thinking controls when tool_choice forces tool use. body, _ = sjson.DeleteBytes(body, "output_config.effort") if oc := gjson.GetBytes(body, "output_config"); oc.Exists() && oc.IsObject() && len(oc.Map()) == 0 { body, _ = sjson.DeleteBytes(body, "output_config") } } return body } // normalizeClaudeTemperatureForThinking keeps Anthropic message requests valid when // thinking is enabled. Anthropic rejects temperatures other than 1 when // thinking.type is enabled/adaptive/auto. func normalizeClaudeTemperatureForThinking(body []byte) []byte { if !gjson.GetBytes(body, "temperature").Exists() { return body } thinkingType := strings.ToLower(strings.TrimSpace(gjson.GetBytes(body, "thinking.type").String())) switch thinkingType { case "enabled", "adaptive", "auto": if temp := gjson.GetBytes(body, "temperature"); temp.Exists() && temp.Type == gjson.Number && temp.Float() == 1 { return body } body, _ = sjson.SetBytes(body, "temperature", 1) } return body } type compositeReadCloser struct { io.Reader closers []func() error } func (c *compositeReadCloser) Close() error { var firstErr error for i := range c.closers { if c.closers[i] == nil { continue } if err := c.closers[i](); err != nil && firstErr == nil { firstErr = err } } return firstErr } // peekableBody wraps a bufio.Reader around the original ReadCloser so that // magic bytes can be inspected without consuming them from the stream. type peekableBody struct { *bufio.Reader closer io.Closer } func (p *peekableBody) Close() error { return p.closer.Close() } func decodeResponseBody(body io.ReadCloser, contentEncoding string) (io.ReadCloser, error) { if body == nil { return nil, fmt.Errorf("response body is nil") } if contentEncoding == "" { // No Content-Encoding header. Attempt best-effort magic-byte detection to // handle misbehaving upstreams that compress without setting the header. // Only gzip (1f 8b) and zstd (28 b5 2f fd) have reliable magic sequences; // br and deflate have none and are left as-is. // The bufio wrapper preserves unread bytes so callers always see the full // stream regardless of whether decompression was applied. pb := &peekableBody{Reader: bufio.NewReader(body), closer: body} magic, peekErr := pb.Peek(4) if peekErr == nil || (peekErr == io.EOF && len(magic) >= 2) { switch { case len(magic) >= 2 && magic[0] == 0x1f && magic[1] == 0x8b: gzipReader, gzErr := gzip.NewReader(pb) if gzErr != nil { _ = pb.Close() return nil, fmt.Errorf("magic-byte gzip: failed to create reader: %w", gzErr) } return &compositeReadCloser{ Reader: gzipReader, closers: []func() error{ gzipReader.Close, pb.Close, }, }, nil case len(magic) >= 4 && magic[0] == 0x28 && magic[1] == 0xb5 && magic[2] == 0x2f && magic[3] == 0xfd: decoder, zdErr := zstd.NewReader(pb) if zdErr != nil { _ = pb.Close() return nil, fmt.Errorf("magic-byte zstd: failed to create reader: %w", zdErr) } return &compositeReadCloser{ Reader: decoder, closers: []func() error{ func() error { decoder.Close(); return nil }, pb.Close, }, }, nil } } return pb, nil } encodings := strings.Split(contentEncoding, ",") for _, raw := range encodings { encoding := strings.TrimSpace(strings.ToLower(raw)) switch encoding { case "", "identity": continue case "gzip": gzipReader, err := gzip.NewReader(body) if err != nil { _ = body.Close() return nil, fmt.Errorf("failed to create gzip reader: %w", err) } return &compositeReadCloser{ Reader: gzipReader, closers: []func() error{ gzipReader.Close, func() error { return body.Close() }, }, }, nil case "deflate": deflateReader := flate.NewReader(body) return &compositeReadCloser{ Reader: deflateReader, closers: []func() error{ deflateReader.Close, func() error { return body.Close() }, }, }, nil case "br": return &compositeReadCloser{ Reader: brotli.NewReader(body), closers: []func() error{ func() error { return body.Close() }, }, }, nil case "zstd": decoder, err := zstd.NewReader(body) if err != nil { _ = body.Close() return nil, fmt.Errorf("failed to create zstd reader: %w", err) } return &compositeReadCloser{ Reader: decoder, closers: []func() error{ func() error { decoder.Close(); return nil }, func() error { return body.Close() }, }, }, nil default: continue } } return body, nil } func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string, stream bool, extraBetas []string, cfg *config.Config) { hdrDefault := func(cfgVal, fallback string) string { if cfgVal != "" { return cfgVal } return fallback } var hd config.ClaudeHeaderDefaults if cfg != nil { hd = cfg.ClaudeHeaderDefaults } useAPIKey := auth != nil && auth.Attributes != nil && strings.TrimSpace(auth.Attributes["api_key"]) != "" isAnthropicBase := r.URL != nil && strings.EqualFold(r.URL.Scheme, "https") && strings.EqualFold(r.URL.Host, "api.anthropic.com") if isAnthropicBase && useAPIKey { r.Header.Del("Authorization") r.Header.Set("x-api-key", apiKey) } else { r.Header.Set("Authorization", "Bearer "+apiKey) } r.Header.Set("Content-Type", "application/json") var ginHeaders http.Header if ginCtx, ok := r.Context().Value("gin").(*gin.Context); ok && ginCtx != nil && ginCtx.Request != nil { ginHeaders = ginCtx.Request.Header } stabilizeDeviceProfile := helps.ClaudeDeviceProfileStabilizationEnabled(cfg) var deviceProfile helps.ClaudeDeviceProfile if stabilizeDeviceProfile { deviceProfile = helps.ResolveClaudeDeviceProfile(auth, apiKey, ginHeaders, cfg) } baseBetas := "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,structured-outputs-2025-12-15,fast-mode-2026-02-01,redact-thinking-2026-02-12,token-efficient-tools-2026-03-28" if val := strings.TrimSpace(ginHeaders.Get("Anthropic-Beta")); val != "" { baseBetas = val if !strings.Contains(val, "oauth") { baseBetas += ",oauth-2025-04-20" } } if !strings.Contains(baseBetas, "interleaved-thinking") { baseBetas += ",interleaved-thinking-2025-05-14" } hasClaude1MHeader := false if ginHeaders != nil { if _, ok := ginHeaders[textproto.CanonicalMIMEHeaderKey("X-CPA-CLAUDE-1M")]; ok { hasClaude1MHeader = true } } // Merge extra betas from request body and request flags. if len(extraBetas) > 0 || hasClaude1MHeader { existingSet := make(map[string]bool) for _, b := range strings.Split(baseBetas, ",") { betaName := strings.TrimSpace(b) if betaName != "" { existingSet[betaName] = true } } for _, beta := range extraBetas { beta = strings.TrimSpace(beta) if beta != "" && !existingSet[beta] { baseBetas += "," + beta existingSet[beta] = true } } if hasClaude1MHeader && !existingSet["context-1m-2025-08-07"] { baseBetas += ",context-1m-2025-08-07" } } r.Header.Set("Anthropic-Beta", baseBetas) misc.EnsureHeader(r.Header, ginHeaders, "Anthropic-Version", "2023-06-01") // Only set browser access header for API key mode; real Claude Code CLI does not send it. if useAPIKey { misc.EnsureHeader(r.Header, ginHeaders, "Anthropic-Dangerous-Direct-Browser-Access", "true") } misc.EnsureHeader(r.Header, ginHeaders, "X-App", "cli") // Values below match Claude Code 2.1.63 / @anthropic-ai/sdk 0.74.0 (updated 2026-02-28). misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Retry-Count", "0") misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Runtime", "node") misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Lang", "js") misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Timeout", hdrDefault(hd.Timeout, "600")) // Session ID: stable per auth/apiKey, matches Claude Code's X-Claude-Code-Session-Id header. misc.EnsureHeader(r.Header, ginHeaders, "X-Claude-Code-Session-Id", helps.CachedSessionID(apiKey)) // Per-request UUID, matches Claude Code's x-client-request-id for first-party API. if isAnthropicBase { misc.EnsureHeader(r.Header, ginHeaders, "x-client-request-id", uuid.New().String()) } r.Header.Set("Connection", "keep-alive") if stream { r.Header.Set("Accept", "text/event-stream") // SSE streams must not be compressed: the downstream scanner reads // line-delimited text and cannot parse compressed bytes. Using // "identity" tells the upstream to send an uncompressed stream. r.Header.Set("Accept-Encoding", "identity") } else { r.Header.Set("Accept", "application/json") r.Header.Set("Accept-Encoding", "gzip, deflate, br, zstd") } // Legacy mode keeps OS/Arch runtime-derived; stabilized mode pins OS/Arch // to the configured baseline while still allowing newer official // User-Agent/package/runtime tuples to upgrade the software fingerprint. if stabilizeDeviceProfile { helps.ApplyClaudeDeviceProfileHeaders(r, deviceProfile) } else { helps.ApplyClaudeLegacyDeviceHeaders(r, ginHeaders, cfg) } var attrs map[string]string if auth != nil { attrs = auth.Attributes } util.ApplyCustomHeadersFromAttrs(r, attrs) // Re-enforce Accept-Encoding: identity after ApplyCustomHeadersFromAttrs, which // may override it with a user-configured value. Compressed SSE breaks the line // scanner regardless of user preference, so this is non-negotiable for streams. if stream { r.Header.Set("Accept-Encoding", "identity") } } func claudeCreds(a *cliproxyauth.Auth) (apiKey, baseURL string) { if a == nil { return "", "" } if a.Attributes != nil { apiKey = a.Attributes["api_key"] baseURL = a.Attributes["base_url"] } if apiKey == "" && a.Metadata != nil { if v, ok := a.Metadata["access_token"].(string); ok { apiKey = v } } return } func checkSystemInstructions(payload []byte) []byte { return checkSystemInstructionsWithSigningMode(payload, false, false, false, "2.1.63", "", "") } func isClaudeOAuthToken(apiKey string) bool { return strings.Contains(apiKey, "sk-ant-oat") } func applyClaudeToolPrefix(body []byte, prefix string) []byte { if prefix == "" { return body } // Collect built-in tool names from the authoritative fallback seed list and // augment it with any typed built-ins present in the current request body. builtinTools := helps.AugmentClaudeBuiltinToolRegistry(body, nil) if tools := gjson.GetBytes(body, "tools"); tools.Exists() && tools.IsArray() { tools.ForEach(func(index, tool gjson.Result) bool { // Skip built-in tools (web_search, code_execution, etc.) which have // a "type" field and require their name to remain unchanged. if tool.Get("type").Exists() && tool.Get("type").String() != "" { if n := tool.Get("name").String(); n != "" { builtinTools[n] = true } return true } name := tool.Get("name").String() if name == "" || strings.HasPrefix(name, prefix) { return true } path := fmt.Sprintf("tools.%d.name", index.Int()) body, _ = sjson.SetBytes(body, path, prefix+name) return true }) } if gjson.GetBytes(body, "tool_choice.type").String() == "tool" { name := gjson.GetBytes(body, "tool_choice.name").String() if name != "" && !strings.HasPrefix(name, prefix) && !builtinTools[name] { body, _ = sjson.SetBytes(body, "tool_choice.name", prefix+name) } } if messages := gjson.GetBytes(body, "messages"); messages.Exists() && messages.IsArray() { messages.ForEach(func(msgIndex, msg gjson.Result) bool { content := msg.Get("content") if !content.Exists() || !content.IsArray() { return true } content.ForEach(func(contentIndex, part gjson.Result) bool { partType := part.Get("type").String() switch partType { case "tool_use": name := part.Get("name").String() if name == "" || strings.HasPrefix(name, prefix) || builtinTools[name] { return true } path := fmt.Sprintf("messages.%d.content.%d.name", msgIndex.Int(), contentIndex.Int()) body, _ = sjson.SetBytes(body, path, prefix+name) case "tool_reference": toolName := part.Get("tool_name").String() if toolName == "" || strings.HasPrefix(toolName, prefix) || builtinTools[toolName] { return true } path := fmt.Sprintf("messages.%d.content.%d.tool_name", msgIndex.Int(), contentIndex.Int()) body, _ = sjson.SetBytes(body, path, prefix+toolName) case "tool_result": // Handle nested tool_reference blocks inside tool_result.content[] nestedContent := part.Get("content") if nestedContent.Exists() && nestedContent.IsArray() { nestedContent.ForEach(func(nestedIndex, nestedPart gjson.Result) bool { if nestedPart.Get("type").String() == "tool_reference" { nestedToolName := nestedPart.Get("tool_name").String() if nestedToolName != "" && !strings.HasPrefix(nestedToolName, prefix) && !builtinTools[nestedToolName] { nestedPath := fmt.Sprintf("messages.%d.content.%d.content.%d.tool_name", msgIndex.Int(), contentIndex.Int(), nestedIndex.Int()) body, _ = sjson.SetBytes(body, nestedPath, prefix+nestedToolName) } } return true }) } } return true }) return true }) } return body } func stripClaudeToolPrefixFromResponse(body []byte, prefix string) []byte { if prefix == "" { return body } content := gjson.GetBytes(body, "content") if !content.Exists() || !content.IsArray() { return body } content.ForEach(func(index, part gjson.Result) bool { partType := part.Get("type").String() switch partType { case "tool_use": name := part.Get("name").String() if !strings.HasPrefix(name, prefix) { return true } path := fmt.Sprintf("content.%d.name", index.Int()) body, _ = sjson.SetBytes(body, path, strings.TrimPrefix(name, prefix)) case "tool_reference": toolName := part.Get("tool_name").String() if !strings.HasPrefix(toolName, prefix) { return true } path := fmt.Sprintf("content.%d.tool_name", index.Int()) body, _ = sjson.SetBytes(body, path, strings.TrimPrefix(toolName, prefix)) case "tool_result": // Handle nested tool_reference blocks inside tool_result.content[] nestedContent := part.Get("content") if nestedContent.Exists() && nestedContent.IsArray() { nestedContent.ForEach(func(nestedIndex, nestedPart gjson.Result) bool { if nestedPart.Get("type").String() == "tool_reference" { nestedToolName := nestedPart.Get("tool_name").String() if strings.HasPrefix(nestedToolName, prefix) { nestedPath := fmt.Sprintf("content.%d.content.%d.tool_name", index.Int(), nestedIndex.Int()) body, _ = sjson.SetBytes(body, nestedPath, strings.TrimPrefix(nestedToolName, prefix)) } } return true }) } } return true }) return body } func stripClaudeToolPrefixFromStreamLine(line []byte, prefix string) []byte { if prefix == "" { return line } payload := helps.JSONPayload(line) if len(payload) == 0 || !gjson.ValidBytes(payload) { return line } contentBlock := gjson.GetBytes(payload, "content_block") if !contentBlock.Exists() { return line } blockType := contentBlock.Get("type").String() var updated []byte var err error switch blockType { case "tool_use": name := contentBlock.Get("name").String() if !strings.HasPrefix(name, prefix) { return line } updated, err = sjson.SetBytes(payload, "content_block.name", strings.TrimPrefix(name, prefix)) if err != nil { return line } case "tool_reference": toolName := contentBlock.Get("tool_name").String() if !strings.HasPrefix(toolName, prefix) { return line } updated, err = sjson.SetBytes(payload, "content_block.tool_name", strings.TrimPrefix(toolName, prefix)) if err != nil { return line } default: return line } trimmed := bytes.TrimSpace(line) if bytes.HasPrefix(trimmed, []byte("data:")) { return append([]byte("data: "), updated...) } return updated } // getClientUserAgent extracts the client User-Agent from the gin context. func getClientUserAgent(ctx context.Context) string { if ginCtx, ok := ctx.Value("gin").(*gin.Context); ok && ginCtx != nil && ginCtx.Request != nil { return ginCtx.GetHeader("User-Agent") } return "" } // parseEntrypointFromUA extracts the entrypoint from a Claude Code User-Agent. // Format: "claude-cli/x.y.z (external, cli)" → "cli" // Format: "claude-cli/x.y.z (external, vscode)" → "vscode" // Returns "cli" if parsing fails or UA is not Claude Code. func parseEntrypointFromUA(userAgent string) string { // Find content inside parentheses start := strings.Index(userAgent, "(") end := strings.LastIndex(userAgent, ")") if start < 0 || end <= start { return "cli" } inner := userAgent[start+1 : end] // Split by comma, take the second part (entrypoint is at index 1, after USER_TYPE) // Format: "(USER_TYPE, ENTRYPOINT[, extra...])" parts := strings.Split(inner, ",") if len(parts) >= 2 { ep := strings.TrimSpace(parts[1]) if ep != "" { return ep } } return "cli" } // getWorkloadFromContext extracts workload identifier from the gin request headers. func getWorkloadFromContext(ctx context.Context) string { if ginCtx, ok := ctx.Value("gin").(*gin.Context); ok && ginCtx != nil && ginCtx.Request != nil { return strings.TrimSpace(ginCtx.GetHeader("X-CPA-Claude-Workload")) } return "" } // getCloakConfigFromAuth extracts cloak configuration from auth attributes. // Returns (cloakMode, strictMode, sensitiveWords, cacheUserID). func getCloakConfigFromAuth(auth *cliproxyauth.Auth) (string, bool, []string, bool) { if auth == nil || auth.Attributes == nil { return "auto", false, nil, false } cloakMode := auth.Attributes["cloak_mode"] if cloakMode == "" { cloakMode = "auto" } strictMode := strings.ToLower(auth.Attributes["cloak_strict_mode"]) == "true" var sensitiveWords []string if wordsStr := auth.Attributes["cloak_sensitive_words"]; wordsStr != "" { sensitiveWords = strings.Split(wordsStr, ",") for i := range sensitiveWords { sensitiveWords[i] = strings.TrimSpace(sensitiveWords[i]) } } cacheUserID := strings.EqualFold(strings.TrimSpace(auth.Attributes["cloak_cache_user_id"]), "true") return cloakMode, strictMode, sensitiveWords, cacheUserID } // injectFakeUserID generates and injects a fake user ID into the request metadata. // When useCache is false, a new user ID is generated for every call. func injectFakeUserID(payload []byte, apiKey string, useCache bool) []byte { generateID := func() string { if useCache { return helps.CachedUserID(apiKey) } return helps.GenerateFakeUserID() } metadata := gjson.GetBytes(payload, "metadata") if !metadata.Exists() { payload, _ = sjson.SetBytes(payload, "metadata.user_id", generateID()) return payload } existingUserID := gjson.GetBytes(payload, "metadata.user_id").String() if existingUserID == "" || !helps.IsValidUserID(existingUserID) { payload, _ = sjson.SetBytes(payload, "metadata.user_id", generateID()) } return payload } // fingerprintSalt is the salt used by Claude Code to compute the 3-char build fingerprint. const fingerprintSalt = "59cf53e54c78" // computeFingerprint computes the 3-char build fingerprint that Claude Code embeds in cc_version. // Algorithm: SHA256(salt + messageText[4] + messageText[7] + messageText[20] + version)[:3] func computeFingerprint(messageText, version string) string { indices := [3]int{4, 7, 20} runes := []rune(messageText) var sb strings.Builder for _, idx := range indices { if idx < len(runes) { sb.WriteRune(runes[idx]) } else { sb.WriteRune('0') } } input := fingerprintSalt + sb.String() + version h := sha256.Sum256([]byte(input)) return hex.EncodeToString(h[:])[:3] } // generateBillingHeader creates the x-anthropic-billing-header text block that // real Claude Code prepends to every system prompt array. // Format: x-anthropic-billing-header: cc_version=.; cc_entrypoint=; cch=; [cc_workload=;] func generateBillingHeader(payload []byte, experimentalCCHSigning bool, version, messageText, entrypoint, workload string) string { if entrypoint == "" { entrypoint = "cli" } buildHash := computeFingerprint(messageText, version) workloadPart := "" if workload != "" { workloadPart = fmt.Sprintf(" cc_workload=%s;", workload) } if experimentalCCHSigning { return fmt.Sprintf("x-anthropic-billing-header: cc_version=%s.%s; cc_entrypoint=%s; cch=00000;%s", version, buildHash, entrypoint, workloadPart) } // Generate a deterministic cch hash from the payload content (system + messages + tools). h := sha256.Sum256(payload) cch := hex.EncodeToString(h[:])[:5] return fmt.Sprintf("x-anthropic-billing-header: cc_version=%s.%s; cc_entrypoint=%s; cch=%s;%s", version, buildHash, entrypoint, cch, workloadPart) } func checkSystemInstructionsWithMode(payload []byte, strictMode bool) []byte { return checkSystemInstructionsWithSigningMode(payload, strictMode, false, false, "2.1.63", "", "") } // checkSystemInstructionsWithSigningMode injects Claude Code-style system blocks: // // system[0]: billing header (no cache_control) // system[1]: agent identifier (cache_control ephemeral, scope=org) // system[2]: core intro prompt (cache_control ephemeral, scope=global) // system[3]: system instructions (no cache_control) // system[4]: doing tasks (no cache_control) // system[5]: user system messages moved to first user message func checkSystemInstructionsWithSigningMode(payload []byte, strictMode bool, experimentalCCHSigning bool, oauthMode bool, version, entrypoint, workload string) []byte { system := gjson.GetBytes(payload, "system") // Extract original message text for fingerprint computation (before billing injection). // Use the first system text block's content as the fingerprint source. messageText := "" if system.IsArray() { system.ForEach(func(_, part gjson.Result) bool { if part.Get("type").String() == "text" { messageText = part.Get("text").String() return false } return true }) } else if system.Type == gjson.String { messageText = system.String() } // Skip if already injected firstText := gjson.GetBytes(payload, "system.0.text").String() if strings.HasPrefix(firstText, "x-anthropic-billing-header:") { return payload } billingText := generateBillingHeader(payload, experimentalCCHSigning, version, messageText, entrypoint, workload) billingBlock := fmt.Sprintf(`{"type":"text","text":"%s"}`, billingText) // Build system blocks matching real Claude Code structure. // Important: Claude Code's internal cacheScope='org' does NOT serialize to // scope='org' in the API request. Only scope='global' is sent explicitly. // The system prompt prefix block is sent without cache_control. agentBlock := buildTextBlock("You are Claude Code, Anthropic's official CLI for Claude.", nil) staticPrompt := strings.Join([]string{ claudeCodeIntro, claudeCodeSystem, claudeCodeDoingTasks, claudeCodeToneAndStyle, claudeCodeOutputEfficiency, }, "\n\n") staticBlock := buildTextBlock(staticPrompt, map[string]string{"scope": "global"}) systemResult := "[" + billingBlock + "," + agentBlock + "," + staticBlock + "]" payload, _ = sjson.SetRawBytes(payload, "system", []byte(systemResult)) // Collect user system instructions and prepend to first user message if !strictMode { var userSystemParts []string if system.IsArray() { system.ForEach(func(_, part gjson.Result) bool { if part.Get("type").String() == "text" { txt := strings.TrimSpace(part.Get("text").String()) if txt != "" { userSystemParts = append(userSystemParts, txt) } } return true }) } else if system.Type == gjson.String && strings.TrimSpace(system.String()) != "" { userSystemParts = append(userSystemParts, strings.TrimSpace(system.String())) } if len(userSystemParts) > 0 { combined := strings.Join(userSystemParts, "\n\n") if oauthMode { combined = sanitizeForwardedSystemPrompt(combined) } if strings.TrimSpace(combined) != "" { payload = prependToFirstUserMessage(payload, combined) } } } return payload } // sanitizeForwardedSystemPrompt removes third-party branding and high-signal // product-specific prompt sections before forwarding context into the first user // message for Claude OAuth cloaking. The goal is to preserve neutral task/tool // guidance while stripping fingerprints like OpenCode branding, product docs, // and workflow sections that are unique to the third-party client. func sanitizeForwardedSystemPrompt(text string) string { if strings.TrimSpace(text) == "" { return "" } lines := strings.Split(text, "\n") var kept []string skipUntilNextHeading := false shouldDropLine := func(line string) bool { trimmed := strings.TrimSpace(line) if trimmed == "" { return false } lower := strings.ToLower(trimmed) dropSubstrings := []string{ "you are opencode", "best coding agent on the planet", "opencode.ai/docs", "github.com/anomalyco/opencode", "anomalyco/opencode", "ctrl+p to list available actions", "to give feedback, users should report the issue at", "you are powered by the model named", "the exact model id is", "here is some useful information about the environment", "skills provide specialized instructions and workflows", "use the skill tool to load a skill", "no skills are currently available", "instructions from:", } for _, sub := range dropSubstrings { if strings.Contains(lower, sub) { return true } } switch lower { case "", "", "", "", "", "": return true } return false } shouldDropHeading := func(line string) bool { switch strings.ToLower(strings.TrimSpace(line)) { case "# professional objectivity", "# task management", "# tool usage policy", "# code references": return true default: return false } } for _, line := range lines { trimmed := strings.TrimSpace(line) if skipUntilNextHeading { if strings.HasPrefix(trimmed, "# ") { skipUntilNextHeading = false } else { continue } } if shouldDropHeading(line) { skipUntilNextHeading = true continue } if shouldDropLine(line) { continue } line = strings.ReplaceAll(line, "OpenCode", "the coding assistant") line = strings.ReplaceAll(line, "opencode", "coding assistant") kept = append(kept, line) } result := strings.Join(kept, "\n") // Collapse excessive blank lines after removing sections. for strings.Contains(result, "\n\n\n") { result = strings.ReplaceAll(result, "\n\n\n", "\n\n") } return strings.TrimSpace(result) } // buildTextBlock constructs a JSON text block object with proper escaping. // Uses sjson.SetBytes to handle multi-line text, quotes, and control characters. // cacheControl is optional; pass nil to omit cache_control. func buildTextBlock(text string, cacheControl map[string]string) string { block := []byte(`{"type":"text"}`) block, _ = sjson.SetBytes(block, "text", text) if cacheControl != nil && len(cacheControl) > 0 { // Build cache_control JSON manually to avoid sjson map marshaling issues. // sjson.SetBytes with map[string]string may not produce expected structure. cc := `{"type":"ephemeral"` if s, ok := cacheControl["scope"]; ok { cc += fmt.Sprintf(`,"scope":"%s"`, s) } if t, ok := cacheControl["ttl"]; ok { cc += fmt.Sprintf(`,"ttl":"%s"`, t) } cc += "}" block, _ = sjson.SetRawBytes(block, "cache_control", []byte(cc)) } return string(block) } // prependToFirstUserMessage prepends text content to the first user message. // This avoids putting non-Claude-Code system instructions in system[] which // triggers Anthropic's extra usage billing for OAuth-proxied requests. func prependToFirstUserMessage(payload []byte, text string) []byte { messages := gjson.GetBytes(payload, "messages") if !messages.Exists() || !messages.IsArray() { return payload } // Find the first user message index firstUserIdx := -1 messages.ForEach(func(idx, msg gjson.Result) bool { if msg.Get("role").String() == "user" { firstUserIdx = int(idx.Int()) return false } return true }) if firstUserIdx < 0 { return payload } prefixBlock := fmt.Sprintf(` As you answer the user's questions, you can use the following context from the system: %s IMPORTANT: this context may or may not be relevant to your tasks. You should not respond to this context unless it is highly relevant to your task. `, text) contentPath := fmt.Sprintf("messages.%d.content", firstUserIdx) content := gjson.GetBytes(payload, contentPath) if content.IsArray() { newBlock := fmt.Sprintf(`{"type":"text","text":%q}`, prefixBlock) existing := content.Raw newArray := "[" + newBlock + "," + existing[1:] payload, _ = sjson.SetRawBytes(payload, contentPath, []byte(newArray)) } else if content.Type == gjson.String { newText := prefixBlock + content.String() payload, _ = sjson.SetBytes(payload, contentPath, newText) } return payload } // applyCloaking applies cloaking transformations to the payload based on config and client. // Cloaking includes: system prompt injection, fake user ID, and sensitive word obfuscation. func applyCloaking(ctx context.Context, cfg *config.Config, auth *cliproxyauth.Auth, payload []byte, model string, apiKey string) []byte { clientUserAgent := getClientUserAgent(ctx) // Enable cch signing for OAuth tokens by default (not just experimental flag). oauthToken := isClaudeOAuthToken(apiKey) useCCHSigning := oauthToken || experimentalCCHSigningEnabled(cfg, auth) // Get cloak config from ClaudeKey configuration cloakCfg := resolveClaudeKeyCloakConfig(cfg, auth) attrMode, attrStrict, attrWords, attrCache := getCloakConfigFromAuth(auth) // Determine cloak settings cloakMode := attrMode strictMode := attrStrict sensitiveWords := attrWords cacheUserID := attrCache if cloakCfg != nil { if mode := strings.TrimSpace(cloakCfg.Mode); mode != "" { cloakMode = mode } if cloakCfg.StrictMode { strictMode = true } if len(cloakCfg.SensitiveWords) > 0 { sensitiveWords = cloakCfg.SensitiveWords } if cloakCfg.CacheUserID != nil { cacheUserID = *cloakCfg.CacheUserID } } // Determine if cloaking should be applied if !helps.ShouldCloak(cloakMode, clientUserAgent) { return payload } // Skip system instructions for claude-3-5-haiku models if !strings.HasPrefix(model, "claude-3-5-haiku") { billingVersion := helps.DefaultClaudeVersion(cfg) entrypoint := parseEntrypointFromUA(clientUserAgent) workload := getWorkloadFromContext(ctx) payload = checkSystemInstructionsWithSigningMode(payload, strictMode, useCCHSigning, oauthToken, billingVersion, entrypoint, workload) } // Inject fake user ID payload = injectFakeUserID(payload, apiKey, cacheUserID) // Apply sensitive word obfuscation if len(sensitiveWords) > 0 { matcher := helps.BuildSensitiveWordMatcher(sensitiveWords) payload = helps.ObfuscateSensitiveWords(payload, matcher) } return payload } // ensureCacheControl injects cache_control breakpoints into the payload for optimal prompt caching. // According to Anthropic's documentation, cache prefixes are created in order: tools -> system -> messages. // This function adds cache_control to: // 1. The LAST tool in the tools array (caches all tool definitions) // 2. The LAST system prompt element // 3. The SECOND-TO-LAST user turn (caches conversation history for multi-turn) // // Up to 4 cache breakpoints are allowed per request. Tools, System, and Messages are INDEPENDENT breakpoints. // This enables up to 90% cost reduction on cached tokens (cache read = 0.1x base price). // See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching func ensureCacheControl(payload []byte) []byte { // 1. Inject cache_control into the LAST tool (caches all tool definitions) // Tools are cached first in the hierarchy, so this is the most important breakpoint. payload = injectToolsCacheControl(payload) // 2. Inject cache_control into the LAST system prompt element // System is the second level in the cache hierarchy. payload = injectSystemCacheControl(payload) // 3. Inject cache_control into messages for multi-turn conversation caching // This caches the conversation history up to the second-to-last user turn. payload = injectMessagesCacheControl(payload) return payload } func countCacheControls(payload []byte) int { count := 0 // Check system system := gjson.GetBytes(payload, "system") if system.IsArray() { system.ForEach(func(_, item gjson.Result) bool { if item.Get("cache_control").Exists() { count++ } return true }) } // Check tools tools := gjson.GetBytes(payload, "tools") if tools.IsArray() { tools.ForEach(func(_, item gjson.Result) bool { if item.Get("cache_control").Exists() { count++ } return true }) } // Check messages messages := gjson.GetBytes(payload, "messages") if messages.IsArray() { messages.ForEach(func(_, msg gjson.Result) bool { content := msg.Get("content") if content.IsArray() { content.ForEach(func(_, item gjson.Result) bool { if item.Get("cache_control").Exists() { count++ } return true }) } return true }) } return count } // normalizeCacheControlTTL ensures cache_control TTL values don't violate the // prompt-caching-scope-2026-01-05 ordering constraint: a 1h-TTL block must not // appear after a 5m-TTL block anywhere in the evaluation order. // // Anthropic evaluates blocks in order: tools → system (index 0..N) → messages. // Within each section, blocks are evaluated in array order. A 5m (default) block // followed by a 1h block at ANY later position is an error — including within // the same section (e.g. system[1]=5m then system[3]=1h). // // Strategy: walk all cache_control blocks in evaluation order. Once a 5m block // is seen, strip ttl from ALL subsequent 1h blocks (downgrading them to 5m). func normalizeCacheControlTTL(payload []byte) []byte { if len(payload) == 0 || !gjson.ValidBytes(payload) { return payload } original := payload seen5m := false modified := false processBlock := func(path string, obj gjson.Result) { cc := obj.Get("cache_control") if !cc.Exists() { return } if !cc.IsObject() { seen5m = true return } ttl := cc.Get("ttl") if ttl.Type != gjson.String || ttl.String() != "1h" { seen5m = true return } if !seen5m { return } ttlPath := path + ".cache_control.ttl" updated, errDel := sjson.DeleteBytes(payload, ttlPath) if errDel != nil { return } payload = updated modified = true } tools := gjson.GetBytes(payload, "tools") if tools.IsArray() { tools.ForEach(func(idx, item gjson.Result) bool { processBlock(fmt.Sprintf("tools.%d", int(idx.Int())), item) return true }) } system := gjson.GetBytes(payload, "system") if system.IsArray() { system.ForEach(func(idx, item gjson.Result) bool { processBlock(fmt.Sprintf("system.%d", int(idx.Int())), item) return true }) } messages := gjson.GetBytes(payload, "messages") if messages.IsArray() { messages.ForEach(func(msgIdx, msg gjson.Result) bool { content := msg.Get("content") if !content.IsArray() { return true } content.ForEach(func(itemIdx, item gjson.Result) bool { processBlock(fmt.Sprintf("messages.%d.content.%d", int(msgIdx.Int()), int(itemIdx.Int())), item) return true }) return true }) } if !modified { return original } return payload } // enforceCacheControlLimit removes excess cache_control blocks from a payload // so the total does not exceed the Anthropic API limit (currently 4). // // Anthropic evaluates cache breakpoints in order: tools → system → messages. // The most valuable breakpoints are: // 1. Last tool — caches ALL tool definitions // 2. Last system block — caches ALL system content // 3. Recent messages — cache conversation context // // Removal priority (strip lowest-value first): // // Phase 1: system blocks earliest-first, preserving the last one. // Phase 2: tool blocks earliest-first, preserving the last one. // Phase 3: message content blocks earliest-first. // Phase 4: remaining system blocks (last system). // Phase 5: remaining tool blocks (last tool). func enforceCacheControlLimit(payload []byte, maxBlocks int) []byte { if len(payload) == 0 || !gjson.ValidBytes(payload) { return payload } total := countCacheControls(payload) if total <= maxBlocks { return payload } excess := total - maxBlocks system := gjson.GetBytes(payload, "system") if system.IsArray() { lastIdx := -1 system.ForEach(func(idx, item gjson.Result) bool { if item.Get("cache_control").Exists() { lastIdx = int(idx.Int()) } return true }) if lastIdx >= 0 { system.ForEach(func(idx, item gjson.Result) bool { if excess <= 0 { return false } i := int(idx.Int()) if i == lastIdx { return true } if !item.Get("cache_control").Exists() { return true } path := fmt.Sprintf("system.%d.cache_control", i) updated, errDel := sjson.DeleteBytes(payload, path) if errDel != nil { return true } payload = updated excess-- return true }) } } if excess <= 0 { return payload } tools := gjson.GetBytes(payload, "tools") if tools.IsArray() { lastIdx := -1 tools.ForEach(func(idx, item gjson.Result) bool { if item.Get("cache_control").Exists() { lastIdx = int(idx.Int()) } return true }) if lastIdx >= 0 { tools.ForEach(func(idx, item gjson.Result) bool { if excess <= 0 { return false } i := int(idx.Int()) if i == lastIdx { return true } if !item.Get("cache_control").Exists() { return true } path := fmt.Sprintf("tools.%d.cache_control", i) updated, errDel := sjson.DeleteBytes(payload, path) if errDel != nil { return true } payload = updated excess-- return true }) } } if excess <= 0 { return payload } messages := gjson.GetBytes(payload, "messages") if messages.IsArray() { messages.ForEach(func(msgIdx, msg gjson.Result) bool { if excess <= 0 { return false } content := msg.Get("content") if !content.IsArray() { return true } content.ForEach(func(itemIdx, item gjson.Result) bool { if excess <= 0 { return false } if !item.Get("cache_control").Exists() { return true } path := fmt.Sprintf("messages.%d.content.%d.cache_control", int(msgIdx.Int()), int(itemIdx.Int())) updated, errDel := sjson.DeleteBytes(payload, path) if errDel != nil { return true } payload = updated excess-- return true }) return true }) } if excess <= 0 { return payload } system = gjson.GetBytes(payload, "system") if system.IsArray() { system.ForEach(func(idx, item gjson.Result) bool { if excess <= 0 { return false } if !item.Get("cache_control").Exists() { return true } path := fmt.Sprintf("system.%d.cache_control", int(idx.Int())) updated, errDel := sjson.DeleteBytes(payload, path) if errDel != nil { return true } payload = updated excess-- return true }) } if excess <= 0 { return payload } tools = gjson.GetBytes(payload, "tools") if tools.IsArray() { tools.ForEach(func(idx, item gjson.Result) bool { if excess <= 0 { return false } if !item.Get("cache_control").Exists() { return true } path := fmt.Sprintf("tools.%d.cache_control", int(idx.Int())) updated, errDel := sjson.DeleteBytes(payload, path) if errDel != nil { return true } payload = updated excess-- return true }) } return payload } // injectMessagesCacheControl adds cache_control to the second-to-last user turn for multi-turn caching. // Per Anthropic docs: "Place cache_control on the second-to-last User message to let the model reuse the earlier cache." // This enables caching of conversation history, which is especially beneficial for long multi-turn conversations. // Only adds cache_control if: // - There are at least 2 user turns in the conversation // - No message content already has cache_control func injectMessagesCacheControl(payload []byte) []byte { messages := gjson.GetBytes(payload, "messages") if !messages.Exists() || !messages.IsArray() { return payload } // Check if ANY message content already has cache_control hasCacheControlInMessages := false messages.ForEach(func(_, msg gjson.Result) bool { content := msg.Get("content") if content.IsArray() { content.ForEach(func(_, item gjson.Result) bool { if item.Get("cache_control").Exists() { hasCacheControlInMessages = true return false } return true }) } return !hasCacheControlInMessages }) if hasCacheControlInMessages { return payload } // Find all user message indices var userMsgIndices []int messages.ForEach(func(index gjson.Result, msg gjson.Result) bool { if msg.Get("role").String() == "user" { userMsgIndices = append(userMsgIndices, int(index.Int())) } return true }) // Need at least 2 user turns to cache the second-to-last if len(userMsgIndices) < 2 { return payload } // Get the second-to-last user message index secondToLastUserIdx := userMsgIndices[len(userMsgIndices)-2] // Get the content of this message contentPath := fmt.Sprintf("messages.%d.content", secondToLastUserIdx) content := gjson.GetBytes(payload, contentPath) if content.IsArray() { // Add cache_control to the last content block of this message contentCount := int(content.Get("#").Int()) if contentCount > 0 { cacheControlPath := fmt.Sprintf("messages.%d.content.%d.cache_control", secondToLastUserIdx, contentCount-1) result, err := sjson.SetBytes(payload, cacheControlPath, map[string]string{"type": "ephemeral"}) if err != nil { log.Warnf("failed to inject cache_control into messages: %v", err) return payload } payload = result } } else if content.Type == gjson.String { // Convert string content to array with cache_control text := content.String() newContent := []map[string]interface{}{ { "type": "text", "text": text, "cache_control": map[string]string{ "type": "ephemeral", }, }, } result, err := sjson.SetBytes(payload, contentPath, newContent) if err != nil { log.Warnf("failed to inject cache_control into message string content: %v", err) return payload } payload = result } return payload } // injectToolsCacheControl adds cache_control to the last tool in the tools array. // Per Anthropic docs: "The cache_control parameter on the last tool definition caches all tool definitions." // This only adds cache_control if NO tool in the array already has it. func injectToolsCacheControl(payload []byte) []byte { tools := gjson.GetBytes(payload, "tools") if !tools.Exists() || !tools.IsArray() { return payload } toolCount := int(tools.Get("#").Int()) if toolCount == 0 { return payload } // Check if ANY tool already has cache_control - if so, don't modify tools hasCacheControlInTools := false tools.ForEach(func(_, tool gjson.Result) bool { if tool.Get("cache_control").Exists() { hasCacheControlInTools = true return false } return true }) if hasCacheControlInTools { return payload } // Add cache_control to the last tool lastToolPath := fmt.Sprintf("tools.%d.cache_control", toolCount-1) result, err := sjson.SetBytes(payload, lastToolPath, map[string]string{"type": "ephemeral"}) if err != nil { log.Warnf("failed to inject cache_control into tools array: %v", err) return payload } return result } // injectSystemCacheControl adds cache_control to the last element in the system prompt. // Converts string system prompts to array format if needed. // This only adds cache_control if NO system element already has it. func injectSystemCacheControl(payload []byte) []byte { system := gjson.GetBytes(payload, "system") if !system.Exists() { return payload } if system.IsArray() { count := int(system.Get("#").Int()) if count == 0 { return payload } // Check if ANY system element already has cache_control hasCacheControlInSystem := false system.ForEach(func(_, item gjson.Result) bool { if item.Get("cache_control").Exists() { hasCacheControlInSystem = true return false } return true }) if hasCacheControlInSystem { return payload } // Add cache_control to the last system element lastSystemPath := fmt.Sprintf("system.%d.cache_control", count-1) result, err := sjson.SetBytes(payload, lastSystemPath, map[string]string{"type": "ephemeral"}) if err != nil { log.Warnf("failed to inject cache_control into system array: %v", err) return payload } payload = result } else if system.Type == gjson.String { // Convert string system prompt to array with cache_control // "system": "text" -> "system": [{"type": "text", "text": "text", "cache_control": {"type": "ephemeral"}}] text := system.String() newSystem := []map[string]interface{}{ { "type": "text", "text": text, "cache_control": map[string]string{ "type": "ephemeral", }, }, } result, err := sjson.SetBytes(payload, "system", newSystem) if err != nil { log.Warnf("failed to inject cache_control into system string: %v", err) return payload } payload = result } return payload } func ensureModelMaxTokens(body []byte, modelID string) []byte { if len(body) == 0 || !gjson.ValidBytes(body) { return body } if maxTokens := gjson.GetBytes(body, "max_tokens"); maxTokens.Exists() { return body } for _, provider := range registry.GetGlobalRegistry().GetModelProviders(strings.TrimSpace(modelID)) { if strings.EqualFold(provider, "claude") { maxTokens := defaultModelMaxTokens if info := registry.GetGlobalRegistry().GetModelInfo(strings.TrimSpace(modelID), "claude"); info != nil && info.MaxCompletionTokens > 0 { maxTokens = info.MaxCompletionTokens } body, _ = sjson.SetBytes(body, "max_tokens", maxTokens) return body } } return body }