chore(config): add default OAuth model alias configurations and extend registry with supported API endpoints

Merge branch 'pr-402-local'
# Conflicts: # internal/config/oauth_model_alias_migration.go
2026-03-09 15:25:17 +00:00 · 2026-03-02 21:36:42 +08:00 · 2026-03-02 20:45:37 +08:00 · 2026-03-02 20:30:07 +08:00 · 2026-03-02 19:30:32 +08:00 · 2026-03-02 13:04:30 +08:00
37 changed files with 1286 additions and 753 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -31,6 +31,7 @@ bin/*
 .agent/*
 .agents/*
 .opencode/*
+.idea/*
 .bmad/*
 _bmad/*
 _bmad-output/*
--- a/.gitignore
+++ b/.gitignore
@@ -44,6 +44,7 @@ GEMINI.md
 .agents/*
 .agents/*
 .opencode/*
+.idea/*
 .bmad/*
 _bmad/*
 _bmad-output/*
--- a/README.md
+++ b/README.md
@@ -10,20 +10,11 @@ The Plus release stays in lockstep with the mainline features.

 ## Differences from the Mainline

- Added GitHub Copilot support (OAuth login), provided by [em4go](https://github.com/em4go/CLIProxyAPI/tree/feature/github-copilot-auth)
- Added Kiro (AWS CodeWhisperer) support (OAuth login), provided by [fuko2935](https://github.com/fuko2935/CLIProxyAPI/tree/feature/kiro-integration), [Ravens2121](https://github.com/Ravens2121/CLIProxyAPIPlus/)
+[![z.ai](https://assets.router-for.me/english-5.png)](https://z.ai/subscribe?ic=8JVLJQFSKB)

 ## New Features (Plus Enhanced)

- **OAuth Web Authentication**: Browser-based OAuth login for Kiro with beautiful web UI
- **Rate Limiter**: Built-in request rate limiting to prevent API abuse
- **Background Token Refresh**: Automatic token refresh 10 minutes before expiration
- **Metrics & Monitoring**: Request metrics collection for monitoring and debugging
- **Device Fingerprint**: Device fingerprint generation for enhanced security
- **Cooldown Management**: Smart cooldown mechanism for API rate limits
- **Usage Checker**: Real-time usage monitoring and quota management
- **Model Converter**: Unified model name conversion across providers
- **UTF-8 Stream Processing**: Improved streaming response handling
+GLM CODING PLAN is a subscription service designed for AI coding, starting at just $10/month. It provides access to their flagship GLM-4.7 & （GLM-5 Only Available  for Pro Users）model across 10+ popular AI coding tools (Claude Code, Cline, Roo Code, etc.), offering developers top-tier, fast, and stable coding experiences.

 ## Kiro Authentication

--- a/README_CN.md
+++ b/README_CN.md
@@ -10,22 +10,13 @@

 ## 与主线版本版本差异

- 新增 GitHub Copilot 支持（OAuth 登录），由[em4go](https://github.com/em4go/CLIProxyAPI/tree/feature/github-copilot-auth)提供
- 新增 Kiro (AWS CodeWhisperer) 支持 (OAuth 登录), 由[fuko2935](https://github.com/fuko2935/CLIProxyAPI/tree/feature/kiro-integration)、[Ravens2121](https://github.com/Ravens2121/CLIProxyAPIPlus/)提供
+[![bigmodel.cn](https://assets.router-for.me/chinese-5.png)](https://www.bigmodel.cn/claude-code?ic=RRVJPB5SII)

 ## 新增功能 (Plus 增强版)

- **OAuth Web 认证**: 基于浏览器的 Kiro OAuth 登录，提供美观的 Web UI
- **请求限流器**: 内置请求限流，防止 API 滥用
- **后台令牌刷新**: 过期前 10 分钟自动刷新令牌
- **监控指标**: 请求指标收集，用于监控和调试
- **设备指纹**: 设备指纹生成，增强安全性
- **冷却管理**: 智能冷却机制，应对 API 速率限制
- **用量检查器**: 实时用量监控和配额管理
- **模型转换器**: 跨供应商的统一模型名称转换
- **UTF-8 流处理**: 改进的流式响应处理
+GLM CODING PLAN 是专为AI编码打造的订阅套餐，每月最低仅需20元，即可在十余款主流AI编码工具如 Claude Code、Cline、Roo Code 中畅享智谱旗舰模型GLM-4.7（受限于算力，目前仅限Pro用户开放），为开发者提供顶尖的编码体验。

-## Kiro 认证
+智谱AI为本产品提供了特别优惠，使用以下链接购买可以享受九折优惠：https://www.bigmodel.cn/claude-code?ic=RRVJPB5SII

 ### 命令行登录

--- a/config.example.yaml
+++ b/config.example.yaml
@@ -80,6 +80,10 @@ passthrough-headers: false
 # Number of times to retry a request. Retries will occur if the HTTP response code is 403, 408, 500, 502, 503, or 504.
 request-retry: 3

+# Maximum number of different credentials to try for one failed request.
+# Set to 0 to keep legacy behavior (try all available credentials).
+max-retry-credentials: 0
+
 # Maximum wait time in seconds for a cooled-down credential before triggering a retry.
 max-retry-interval: 30

--- a/internal/api/handlers/management/auth_files.go
+++ b/internal/api/handlers/management/auth_files.go
@@ -48,14 +48,11 @@ import (
 var lastRefreshKeys = []string{"last_refresh", "lastRefresh", "last_refreshed_at", "lastRefreshedAt"}

 const (
-	anthropicCallbackPort   = 54545
-	geminiCallbackPort      = 8085
-	codexCallbackPort       = 1455
-	geminiCLIEndpoint       = "https://cloudcode-pa.googleapis.com"
-	geminiCLIVersion        = "v1internal"
-	geminiCLIUserAgent      = "google-api-nodejs-client/9.15.1"
-	geminiCLIApiClient      = "gl-node/22.17.0"
-	geminiCLIClientMetadata = "ideType=IDE_UNSPECIFIED,platform=PLATFORM_UNSPECIFIED,pluginType=GEMINI"
+	anthropicCallbackPort = 54545
+	geminiCallbackPort    = 8085
+	codexCallbackPort     = 1455
+	geminiCLIEndpoint     = "https://cloudcode-pa.googleapis.com"
+	geminiCLIVersion      = "v1internal"
 )

 type callbackForwarder struct {
@@ -2384,9 +2381,7 @@ func callGeminiCLI(ctx context.Context, httpClient *http.Client, endpoint string
 		return fmt.Errorf("create request: %w", errRequest)
 	}
 	req.Header.Set("Content-Type", "application/json")
-	req.Header.Set("User-Agent", geminiCLIUserAgent)
-	req.Header.Set("X-Goog-Api-Client", geminiCLIApiClient)
-	req.Header.Set("Client-Metadata", geminiCLIClientMetadata)
+	req.Header.Set("User-Agent", misc.GeminiCLIUserAgent(""))

 	resp, errDo := httpClient.Do(req)
 	if errDo != nil {
@@ -2456,7 +2451,7 @@ func checkCloudAPIIsEnabled(ctx context.Context, httpClient *http.Client, projec
 			return false, fmt.Errorf("failed to create request: %w", errRequest)
 		}
 		req.Header.Set("Content-Type", "application/json")
-		req.Header.Set("User-Agent", geminiCLIUserAgent)
+		req.Header.Set("User-Agent", misc.GeminiCLIUserAgent(""))
 		resp, errDo := httpClient.Do(req)
 		if errDo != nil {
 			return false, fmt.Errorf("failed to execute request: %w", errDo)
@@ -2477,7 +2472,7 @@ func checkCloudAPIIsEnabled(ctx context.Context, httpClient *http.Client, projec
 			return false, fmt.Errorf("failed to create request: %w", errRequest)
 		}
 		req.Header.Set("Content-Type", "application/json")
-		req.Header.Set("User-Agent", geminiCLIUserAgent)
+		req.Header.Set("User-Agent", misc.GeminiCLIUserAgent(""))
 		resp, errDo = httpClient.Do(req)
 		if errDo != nil {
 			return false, fmt.Errorf("failed to execute request: %w", errDo)
--- a/internal/api/modules/amp/proxy.go
+++ b/internal/api/modules/amp/proxy.go
@@ -15,6 +15,7 @@ import (
 	"strings"

 	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
 	log "github.com/sirupsen/logrus"
 )

@@ -77,6 +78,9 @@ func createReverseProxy(upstreamURL string, secretSource SecretSource) (*httputi
 		req.Header.Del("X-Api-Key")
 		req.Header.Del("X-Goog-Api-Key")

+		// Remove proxy, client identity, and browser fingerprint headers
+		misc.ScrubProxyAndFingerprintHeaders(req)
+
 		// Remove query-based credentials if they match the authenticated client API key.
 		// This prevents leaking client auth material to the Amp upstream while avoiding
 		// breaking unrelated upstream query parameters.
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -258,7 +258,7 @@ func NewServer(cfg *config.Config, authManager *auth.Manager, accessManager *sdk
 	s.oldConfigYaml, _ = yaml.Marshal(cfg)
 	s.applyAccessConfig(nil, cfg)
 	if authManager != nil {
-		authManager.SetRetryConfig(cfg.RequestRetry, time.Duration(cfg.MaxRetryInterval)*time.Second)
+		authManager.SetRetryConfig(cfg.RequestRetry, time.Duration(cfg.MaxRetryInterval)*time.Second, cfg.MaxRetryCredentials)
 	}
 	managementasset.SetCurrentConfig(cfg)
 	auth.SetQuotaCooldownDisabled(cfg.DisableCooling)
@@ -944,7 +944,7 @@ func (s *Server) UpdateClients(cfg *config.Config) {
 	}

 	if s.handlers != nil && s.handlers.AuthManager != nil {
-		s.handlers.AuthManager.SetRetryConfig(cfg.RequestRetry, time.Duration(cfg.MaxRetryInterval)*time.Second)
+		s.handlers.AuthManager.SetRetryConfig(cfg.RequestRetry, time.Duration(cfg.MaxRetryInterval)*time.Second, cfg.MaxRetryCredentials)
 	}

 	// Update log level dynamically when debug flag changes
--- a/internal/cmd/login.go
+++ b/internal/cmd/login.go
@@ -20,6 +20,7 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/gemini"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
 	sdkAuth "github.com/router-for-me/CLIProxyAPI/v6/sdk/auth"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	log "github.com/sirupsen/logrus"
@@ -27,11 +28,8 @@ import (
 )

 const (
-	geminiCLIEndpoint       = "https://cloudcode-pa.googleapis.com"
-	geminiCLIVersion        = "v1internal"
-	geminiCLIUserAgent      = "google-api-nodejs-client/9.15.1"
-	geminiCLIApiClient      = "gl-node/22.17.0"
-	geminiCLIClientMetadata = "ideType=IDE_UNSPECIFIED,platform=PLATFORM_UNSPECIFIED,pluginType=GEMINI"
+	geminiCLIEndpoint = "https://cloudcode-pa.googleapis.com"
+	geminiCLIVersion  = "v1internal"
 )

 type projectSelectionRequiredError struct{}
@@ -409,9 +407,7 @@ func callGeminiCLI(ctx context.Context, httpClient *http.Client, endpoint string
 		return fmt.Errorf("create request: %w", errRequest)
 	}
 	req.Header.Set("Content-Type", "application/json")
-	req.Header.Set("User-Agent", geminiCLIUserAgent)
-	req.Header.Set("X-Goog-Api-Client", geminiCLIApiClient)
-	req.Header.Set("Client-Metadata", geminiCLIClientMetadata)
+	req.Header.Set("User-Agent", misc.GeminiCLIUserAgent(""))

 	resp, errDo := httpClient.Do(req)
 	if errDo != nil {
@@ -630,7 +626,7 @@ func checkCloudAPIIsEnabled(ctx context.Context, httpClient *http.Client, projec
 			return false, fmt.Errorf("failed to create request: %w", errRequest)
 		}
 		req.Header.Set("Content-Type", "application/json")
-		req.Header.Set("User-Agent", geminiCLIUserAgent)
+		req.Header.Set("User-Agent", misc.GeminiCLIUserAgent(""))
 		resp, errDo := httpClient.Do(req)
 		if errDo != nil {
 			return false, fmt.Errorf("failed to execute request: %w", errDo)
@@ -651,7 +647,7 @@ func checkCloudAPIIsEnabled(ctx context.Context, httpClient *http.Client, projec
 			return false, fmt.Errorf("failed to create request: %w", errRequest)
 		}
 		req.Header.Set("Content-Type", "application/json")
-		req.Header.Set("User-Agent", geminiCLIUserAgent)
+		req.Header.Set("User-Agent", misc.GeminiCLIUserAgent(""))
 		resp, errDo = httpClient.Do(req)
 		if errDo != nil {
 			return false, fmt.Errorf("failed to execute request: %w", errDo)
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -69,6 +69,9 @@ type Config struct {

 	// RequestRetry defines the retry times when the request failed.
 	RequestRetry int `yaml:"request-retry" json:"request-retry"`
+	// MaxRetryCredentials defines the maximum number of credentials to try for a failed request.
+	// Set to 0 or a negative value to keep trying all available credentials (legacy behavior).
+	MaxRetryCredentials int `yaml:"max-retry-credentials" json:"max-retry-credentials"`
 	// MaxRetryInterval defines the maximum wait time in seconds before retrying a cooled-down credential.
 	MaxRetryInterval int `yaml:"max-retry-interval" json:"max-retry-interval"`

@@ -576,16 +579,6 @@ func LoadConfig(configFile string) (*Config, error) {
 // If optional is true and the file is missing, it returns an empty Config.
 // If optional is true and the file is empty or invalid, it returns an empty Config.
 func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
-	// NOTE: Startup oauth-model-alias migration is intentionally disabled.
-	// Reason: avoid mutating config.yaml during server startup.
-	// Re-enable the block below if automatic startup migration is needed again.
-	// if migrated, err := MigrateOAuthModelAlias(configFile); err != nil {
-	// 	// Log warning but don't fail - config loading should still work
-	// 	fmt.Printf("Warning: oauth-model-alias migration failed: %v\n", err)
-	// } else if migrated {
-	// 	fmt.Println("Migrated oauth-model-mappings to oauth-model-alias")
-	// }
-
 	// Read the entire configuration file into memory.
 	data, err := os.ReadFile(configFile)
 	if err != nil {
@@ -673,6 +666,10 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
 		cfg.ErrorLogsMaxFiles = 10
 	}

+	if cfg.MaxRetryCredentials < 0 {
+		cfg.MaxRetryCredentials = 0
+	}
+
 	// Sanitize Gemini API key configuration and migrate legacy entries.
 	cfg.SanitizeGeminiKeys()

@@ -1669,9 +1666,6 @@ func pruneMappingToGeneratedKeys(dstRoot, srcRoot *yaml.Node, key string) {
 	srcIdx := findMapKeyIndex(srcRoot, key)
 	if srcIdx < 0 {
 		// Keep an explicit empty mapping for oauth-model-alias when it was previously present.
-		//
-		// Rationale: LoadConfig runs MigrateOAuthModelAlias before unmarshalling. If the
-		// oauth-model-alias key is missing, migration will add the default antigravity aliases.
 		// When users delete the last channel from oauth-model-alias via the management API,
 		// we want that deletion to persist across hot reloads and restarts.
 		if key == "oauth-model-alias" {
--- a/internal/config/oauth_model_alias_defaults.go
+++ b/internal/config/oauth_model_alias_defaults.go
@@ -0,0 +1,37 @@
+package config
+
+// defaultKiroAliases returns default oauth-model-alias entries for Kiro.
+// These aliases expose standard Claude IDs for Kiro-prefixed upstream models.
+func defaultKiroAliases() []OAuthModelAlias {
+	return []OAuthModelAlias{
+		// Sonnet 4.6
+		{Name: "kiro-claude-sonnet-4-6", Alias: "claude-sonnet-4-6", Fork: true},
+		// Sonnet 4.5
+		{Name: "kiro-claude-sonnet-4-5", Alias: "claude-sonnet-4-5-20250929", Fork: true},
+		{Name: "kiro-claude-sonnet-4-5", Alias: "claude-sonnet-4-5", Fork: true},
+		// Sonnet 4
+		{Name: "kiro-claude-sonnet-4", Alias: "claude-sonnet-4-20250514", Fork: true},
+		{Name: "kiro-claude-sonnet-4", Alias: "claude-sonnet-4", Fork: true},
+		// Opus 4.6
+		{Name: "kiro-claude-opus-4-6", Alias: "claude-opus-4-6", Fork: true},
+		// Opus 4.5
+		{Name: "kiro-claude-opus-4-5", Alias: "claude-opus-4-5-20251101", Fork: true},
+		{Name: "kiro-claude-opus-4-5", Alias: "claude-opus-4-5", Fork: true},
+		// Haiku 4.5
+		{Name: "kiro-claude-haiku-4-5", Alias: "claude-haiku-4-5-20251001", Fork: true},
+		{Name: "kiro-claude-haiku-4-5", Alias: "claude-haiku-4-5", Fork: true},
+	}
+}
+
+// defaultGitHubCopilotAliases returns default oauth-model-alias entries for
+// GitHub Copilot Claude models. It exposes hyphen-style IDs used by clients.
+func defaultGitHubCopilotAliases() []OAuthModelAlias {
+	return []OAuthModelAlias{
+		{Name: "claude-haiku-4.5", Alias: "claude-haiku-4-5", Fork: true},
+		{Name: "claude-opus-4.1", Alias: "claude-opus-4-1", Fork: true},
+		{Name: "claude-opus-4.5", Alias: "claude-opus-4-5", Fork: true},
+		{Name: "claude-opus-4.6", Alias: "claude-opus-4-6", Fork: true},
+		{Name: "claude-sonnet-4.5", Alias: "claude-sonnet-4-5", Fork: true},
+		{Name: "claude-sonnet-4.6", Alias: "claude-sonnet-4-6", Fork: true},
+	}
+}
--- a/internal/config/oauth_model_alias_migration.go
+++ b/internal/config/oauth_model_alias_migration.go
@@ -1,316 +0,0 @@
-package config
-
-import (
-	"os"
-	"strings"
-
-	"gopkg.in/yaml.v3"
-)
-
-// antigravityModelConversionTable maps old built-in aliases to actual model names
-// for the antigravity channel during migration.
-var antigravityModelConversionTable = map[string]string{
-	"gemini-2.5-computer-use-preview-10-2025": "rev19-uic3-1p",
-	"gemini-3-pro-image-preview":              "gemini-3-pro-image",
-	"gemini-3-pro-preview":                    "gemini-3-pro-high",
-	"gemini-3-flash-preview":                  "gemini-3-flash",
-	"gemini-claude-sonnet-4-5":                "claude-sonnet-4-5",
-	"gemini-claude-sonnet-4-5-thinking":       "claude-sonnet-4-5-thinking",
-	"gemini-claude-opus-4-5-thinking":         "claude-opus-4-5-thinking",
-	"gemini-claude-opus-4-6-thinking":         "claude-opus-4-6-thinking",
-}
-
-// defaultKiroAliases returns the default oauth-model-alias configuration
-// for the kiro channel. Maps kiro-prefixed model names to standard Claude model
-// names so that clients like Claude Code can use standard names directly.
-func defaultKiroAliases() []OAuthModelAlias {
-	return []OAuthModelAlias{
-		// Sonnet 4.6
-		{Name: "kiro-claude-sonnet-4-6", Alias: "claude-sonnet-4-6", Fork: true},
-		// Sonnet 4.5
-		{Name: "kiro-claude-sonnet-4-5", Alias: "claude-sonnet-4-5-20250929", Fork: true},
-		{Name: "kiro-claude-sonnet-4-5", Alias: "claude-sonnet-4-5", Fork: true},
-		// Sonnet 4
-		{Name: "kiro-claude-sonnet-4", Alias: "claude-sonnet-4-20250514", Fork: true},
-		{Name: "kiro-claude-sonnet-4", Alias: "claude-sonnet-4", Fork: true},
-		// Opus 4.6
-		{Name: "kiro-claude-opus-4-6", Alias: "claude-opus-4-6", Fork: true},
-		// Opus 4.5
-		{Name: "kiro-claude-opus-4-5", Alias: "claude-opus-4-5-20251101", Fork: true},
-		{Name: "kiro-claude-opus-4-5", Alias: "claude-opus-4-5", Fork: true},
-		// Haiku 4.5
-		{Name: "kiro-claude-haiku-4-5", Alias: "claude-haiku-4-5-20251001", Fork: true},
-		{Name: "kiro-claude-haiku-4-5", Alias: "claude-haiku-4-5", Fork: true},
-	}
-}
-
-// defaultGitHubCopilotAliases returns default oauth-model-alias entries that
-// expose Claude hyphen-style IDs for GitHub Copilot Claude models.
-// This keeps compatibility with clients (e.g. Claude Code) that use
-// Anthropic-style model IDs like "claude-opus-4-6".
-func defaultGitHubCopilotAliases() []OAuthModelAlias {
-	return []OAuthModelAlias{
-		{Name: "claude-haiku-4.5", Alias: "claude-haiku-4-5", Fork: true},
-		{Name: "claude-opus-4.1", Alias: "claude-opus-4-1", Fork: true},
-		{Name: "claude-opus-4.5", Alias: "claude-opus-4-5", Fork: true},
-		{Name: "claude-opus-4.6", Alias: "claude-opus-4-6", Fork: true},
-		{Name: "claude-sonnet-4.5", Alias: "claude-sonnet-4-5", Fork: true},
-		{Name: "claude-sonnet-4.6", Alias: "claude-sonnet-4-6", Fork: true},
-	}
-}
-
-// defaultAntigravityAliases returns the default oauth-model-alias configuration
-// for the antigravity channel when neither field exists.
-func defaultAntigravityAliases() []OAuthModelAlias {
-	return []OAuthModelAlias{
-		{Name: "rev19-uic3-1p", Alias: "gemini-2.5-computer-use-preview-10-2025"},
-		{Name: "gemini-3-pro-image", Alias: "gemini-3-pro-image-preview"},
-		{Name: "gemini-3-pro-high", Alias: "gemini-3-pro-preview"},
-		{Name: "gemini-3-flash", Alias: "gemini-3-flash-preview"},
-		{Name: "claude-sonnet-4-5", Alias: "gemini-claude-sonnet-4-5"},
-		{Name: "claude-sonnet-4-5-thinking", Alias: "gemini-claude-sonnet-4-5-thinking"},
-		{Name: "claude-opus-4-5-thinking", Alias: "gemini-claude-opus-4-5-thinking"},
-		{Name: "claude-opus-4-6-thinking", Alias: "gemini-claude-opus-4-6-thinking"},
-	}
-}
-
-// MigrateOAuthModelAlias checks for and performs migration from oauth-model-mappings
-// to oauth-model-alias at startup. Returns true if migration was performed.
-//
-// Migration flow:
-// 1. Check if oauth-model-alias exists -> skip migration
-// 2. Check if oauth-model-mappings exists -> convert and migrate
-//   - For antigravity channel, convert old built-in aliases to actual model names
-//
-// 3. Neither exists -> add default antigravity config
-func MigrateOAuthModelAlias(configFile string) (bool, error) {
-	data, err := os.ReadFile(configFile)
-	if err != nil {
-		if os.IsNotExist(err) {
-			return false, nil
-		}
-		return false, err
-	}
-	if len(data) == 0 {
-		return false, nil
-	}
-
-	// Parse YAML into node tree to preserve structure
-	var root yaml.Node
-	if err := yaml.Unmarshal(data, &root); err != nil {
-		return false, nil
-	}
-	if root.Kind != yaml.DocumentNode || len(root.Content) == 0 {
-		return false, nil
-	}
-	rootMap := root.Content[0]
-	if rootMap == nil || rootMap.Kind != yaml.MappingNode {
-		return false, nil
-	}
-
-	// Check if oauth-model-alias already exists
-	if findMapKeyIndex(rootMap, "oauth-model-alias") >= 0 {
-		return false, nil
-	}
-
-	// Check if oauth-model-mappings exists
-	oldIdx := findMapKeyIndex(rootMap, "oauth-model-mappings")
-	if oldIdx >= 0 {
-		// Migrate from old field
-		return migrateFromOldField(configFile, &root, rootMap, oldIdx)
-	}
-
-	// Neither field exists - add default antigravity config
-	return addDefaultAntigravityConfig(configFile, &root, rootMap)
-}
-
-// migrateFromOldField converts oauth-model-mappings to oauth-model-alias
-func migrateFromOldField(configFile string, root *yaml.Node, rootMap *yaml.Node, oldIdx int) (bool, error) {
-	if oldIdx+1 >= len(rootMap.Content) {
-		return false, nil
-	}
-	oldValue := rootMap.Content[oldIdx+1]
-	if oldValue == nil || oldValue.Kind != yaml.MappingNode {
-		return false, nil
-	}
-
-	// Parse the old aliases
-	oldAliases := parseOldAliasNode(oldValue)
-	if len(oldAliases) == 0 {
-		// Remove the old field and write
-		removeMapKeyByIndex(rootMap, oldIdx)
-		return writeYAMLNode(configFile, root)
-	}
-
-	// Convert model names for antigravity channel
-	newAliases := make(map[string][]OAuthModelAlias, len(oldAliases))
-	for channel, entries := range oldAliases {
-		converted := make([]OAuthModelAlias, 0, len(entries))
-		for _, entry := range entries {
-			newEntry := OAuthModelAlias{
-				Name:  entry.Name,
-				Alias: entry.Alias,
-				Fork:  entry.Fork,
-			}
-			// Convert model names for antigravity channel
-			if strings.EqualFold(channel, "antigravity") {
-				if actual, ok := antigravityModelConversionTable[entry.Name]; ok {
-					newEntry.Name = actual
-				}
-			}
-			converted = append(converted, newEntry)
-		}
-		newAliases[channel] = converted
-	}
-
-	// For antigravity channel, supplement missing default aliases
-	if antigravityEntries, exists := newAliases["antigravity"]; exists {
-		// Build a set of already configured model names (upstream names)
-		configuredModels := make(map[string]bool, len(antigravityEntries))
-		for _, entry := range antigravityEntries {
-			configuredModels[entry.Name] = true
-		}
-
-		// Add missing default aliases
-		for _, defaultAlias := range defaultAntigravityAliases() {
-			if !configuredModels[defaultAlias.Name] {
-				antigravityEntries = append(antigravityEntries, defaultAlias)
-			}
-		}
-		newAliases["antigravity"] = antigravityEntries
-	}
-
-	// Build new node
-	newNode := buildOAuthModelAliasNode(newAliases)
-
-	// Replace old key with new key and value
-	rootMap.Content[oldIdx].Value = "oauth-model-alias"
-	rootMap.Content[oldIdx+1] = newNode
-
-	return writeYAMLNode(configFile, root)
-}
-
-// addDefaultAntigravityConfig adds the default antigravity configuration
-func addDefaultAntigravityConfig(configFile string, root *yaml.Node, rootMap *yaml.Node) (bool, error) {
-	defaults := map[string][]OAuthModelAlias{
-		"antigravity": defaultAntigravityAliases(),
-	}
-	newNode := buildOAuthModelAliasNode(defaults)
-
-	// Add new key-value pair
-	keyNode := &yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: "oauth-model-alias"}
-	rootMap.Content = append(rootMap.Content, keyNode, newNode)
-
-	return writeYAMLNode(configFile, root)
-}
-
-// parseOldAliasNode parses the old oauth-model-mappings node structure
-func parseOldAliasNode(node *yaml.Node) map[string][]OAuthModelAlias {
-	if node == nil || node.Kind != yaml.MappingNode {
-		return nil
-	}
-	result := make(map[string][]OAuthModelAlias)
-	for i := 0; i+1 < len(node.Content); i += 2 {
-		channelNode := node.Content[i]
-		entriesNode := node.Content[i+1]
-		if channelNode == nil || entriesNode == nil {
-			continue
-		}
-		channel := strings.ToLower(strings.TrimSpace(channelNode.Value))
-		if channel == "" || entriesNode.Kind != yaml.SequenceNode {
-			continue
-		}
-		entries := make([]OAuthModelAlias, 0, len(entriesNode.Content))
-		for _, entryNode := range entriesNode.Content {
-			if entryNode == nil || entryNode.Kind != yaml.MappingNode {
-				continue
-			}
-			entry := parseAliasEntry(entryNode)
-			if entry.Name != "" && entry.Alias != "" {
-				entries = append(entries, entry)
-			}
-		}
-		if len(entries) > 0 {
-			result[channel] = entries
-		}
-	}
-	return result
-}
-
-// parseAliasEntry parses a single alias entry node
-func parseAliasEntry(node *yaml.Node) OAuthModelAlias {
-	var entry OAuthModelAlias
-	for i := 0; i+1 < len(node.Content); i += 2 {
-		keyNode := node.Content[i]
-		valNode := node.Content[i+1]
-		if keyNode == nil || valNode == nil {
-			continue
-		}
-		switch strings.ToLower(strings.TrimSpace(keyNode.Value)) {
-		case "name":
-			entry.Name = strings.TrimSpace(valNode.Value)
-		case "alias":
-			entry.Alias = strings.TrimSpace(valNode.Value)
-		case "fork":
-			entry.Fork = strings.ToLower(strings.TrimSpace(valNode.Value)) == "true"
-		}
-	}
-	return entry
-}
-
-// buildOAuthModelAliasNode creates a YAML node for oauth-model-alias
-func buildOAuthModelAliasNode(aliases map[string][]OAuthModelAlias) *yaml.Node {
-	node := &yaml.Node{Kind: yaml.MappingNode, Tag: "!!map"}
-	for channel, entries := range aliases {
-		channelNode := &yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: channel}
-		entriesNode := &yaml.Node{Kind: yaml.SequenceNode, Tag: "!!seq"}
-		for _, entry := range entries {
-			entryNode := &yaml.Node{Kind: yaml.MappingNode, Tag: "!!map"}
-			entryNode.Content = append(entryNode.Content,
-				&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: "name"},
-				&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: entry.Name},
-				&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: "alias"},
-				&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: entry.Alias},
-			)
-			if entry.Fork {
-				entryNode.Content = append(entryNode.Content,
-					&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: "fork"},
-					&yaml.Node{Kind: yaml.ScalarNode, Tag: "!!bool", Value: "true"},
-				)
-			}
-			entriesNode.Content = append(entriesNode.Content, entryNode)
-		}
-		node.Content = append(node.Content, channelNode, entriesNode)
-	}
-	return node
-}
-
-// removeMapKeyByIndex removes a key-value pair from a mapping node by index
-func removeMapKeyByIndex(mapNode *yaml.Node, keyIdx int) {
-	if mapNode == nil || mapNode.Kind != yaml.MappingNode {
-		return
-	}
-	if keyIdx < 0 || keyIdx+1 >= len(mapNode.Content) {
-		return
-	}
-	mapNode.Content = append(mapNode.Content[:keyIdx], mapNode.Content[keyIdx+2:]...)
-}
-
-// writeYAMLNode writes the YAML node tree back to file
-func writeYAMLNode(configFile string, root *yaml.Node) (bool, error) {
-	f, err := os.Create(configFile)
-	if err != nil {
-		return false, err
-	}
-	defer f.Close()
-
-	enc := yaml.NewEncoder(f)
-	enc.SetIndent(2)
-	if err := enc.Encode(root); err != nil {
-		return false, err
-	}
-	if err := enc.Close(); err != nil {
-		return false, err
-	}
-	return true, nil
-}
--- a/internal/config/oauth_model_alias_migration_test.go
+++ b/internal/config/oauth_model_alias_migration_test.go
@@ -1,245 +0,0 @@
-package config
-
-import (
-	"os"
-	"path/filepath"
-	"strings"
-	"testing"
-
-	"gopkg.in/yaml.v3"
-)
-
-func TestMigrateOAuthModelAlias_SkipsIfNewFieldExists(t *testing.T) {
-	t.Parallel()
-
-	dir := t.TempDir()
-	configFile := filepath.Join(dir, "config.yaml")
-
-	content := `oauth-model-alias:
-  gemini-cli:
-    - name: "gemini-2.5-pro"
-      alias: "g2.5p"
-`
-	if err := os.WriteFile(configFile, []byte(content), 0644); err != nil {
-		t.Fatal(err)
-	}
-
-	migrated, err := MigrateOAuthModelAlias(configFile)
-	if err != nil {
-		t.Fatalf("unexpected error: %v", err)
-	}
-	if migrated {
-		t.Fatal("expected no migration when oauth-model-alias already exists")
-	}
-
-	// Verify file unchanged
-	data, _ := os.ReadFile(configFile)
-	if !strings.Contains(string(data), "oauth-model-alias:") {
-		t.Fatal("file should still contain oauth-model-alias")
-	}
-}
-
-func TestMigrateOAuthModelAlias_MigratesOldField(t *testing.T) {
-	t.Parallel()
-
-	dir := t.TempDir()
-	configFile := filepath.Join(dir, "config.yaml")
-
-	content := `oauth-model-mappings:
-  gemini-cli:
-    - name: "gemini-2.5-pro"
-      alias: "g2.5p"
-      fork: true
-`
-	if err := os.WriteFile(configFile, []byte(content), 0644); err != nil {
-		t.Fatal(err)
-	}
-
-	migrated, err := MigrateOAuthModelAlias(configFile)
-	if err != nil {
-		t.Fatalf("unexpected error: %v", err)
-	}
-	if !migrated {
-		t.Fatal("expected migration to occur")
-	}
-
-	// Verify new field exists and old field removed
-	data, _ := os.ReadFile(configFile)
-	if strings.Contains(string(data), "oauth-model-mappings:") {
-		t.Fatal("old field should be removed")
-	}
-	if !strings.Contains(string(data), "oauth-model-alias:") {
-		t.Fatal("new field should exist")
-	}
-
-	// Parse and verify structure
-	var root yaml.Node
-	if err := yaml.Unmarshal(data, &root); err != nil {
-		t.Fatal(err)
-	}
-}
-
-func TestMigrateOAuthModelAlias_ConvertsAntigravityModels(t *testing.T) {
-	t.Parallel()
-
-	dir := t.TempDir()
-	configFile := filepath.Join(dir, "config.yaml")
-
-	// Use old model names that should be converted
-	content := `oauth-model-mappings:
-  antigravity:
-    - name: "gemini-2.5-computer-use-preview-10-2025"
-      alias: "computer-use"
-    - name: "gemini-3-pro-preview"
-      alias: "g3p"
-`
-	if err := os.WriteFile(configFile, []byte(content), 0644); err != nil {
-		t.Fatal(err)
-	}
-
-	migrated, err := MigrateOAuthModelAlias(configFile)
-	if err != nil {
-		t.Fatalf("unexpected error: %v", err)
-	}
-	if !migrated {
-		t.Fatal("expected migration to occur")
-	}
-
-	// Verify model names were converted
-	data, _ := os.ReadFile(configFile)
-	content = string(data)
-	if !strings.Contains(content, "rev19-uic3-1p") {
-		t.Fatal("expected gemini-2.5-computer-use-preview-10-2025 to be converted to rev19-uic3-1p")
-	}
-	if !strings.Contains(content, "gemini-3-pro-high") {
-		t.Fatal("expected gemini-3-pro-preview to be converted to gemini-3-pro-high")
-	}
-
-	// Verify missing default aliases were supplemented
-	if !strings.Contains(content, "gemini-3-pro-image") {
-		t.Fatal("expected missing default alias gemini-3-pro-image to be added")
-	}
-	if !strings.Contains(content, "gemini-3-flash") {
-		t.Fatal("expected missing default alias gemini-3-flash to be added")
-	}
-	if !strings.Contains(content, "claude-sonnet-4-5") {
-		t.Fatal("expected missing default alias claude-sonnet-4-5 to be added")
-	}
-	if !strings.Contains(content, "claude-sonnet-4-5-thinking") {
-		t.Fatal("expected missing default alias claude-sonnet-4-5-thinking to be added")
-	}
-	if !strings.Contains(content, "claude-opus-4-5-thinking") {
-		t.Fatal("expected missing default alias claude-opus-4-5-thinking to be added")
-	}
-	if !strings.Contains(content, "claude-opus-4-6-thinking") {
-		t.Fatal("expected missing default alias claude-opus-4-6-thinking to be added")
-	}
-}
-
-func TestMigrateOAuthModelAlias_AddsDefaultIfNeitherExists(t *testing.T) {
-	t.Parallel()
-
-	dir := t.TempDir()
-	configFile := filepath.Join(dir, "config.yaml")
-
-	content := `debug: true
-port: 8080
-`
-	if err := os.WriteFile(configFile, []byte(content), 0644); err != nil {
-		t.Fatal(err)
-	}
-
-	migrated, err := MigrateOAuthModelAlias(configFile)
-	if err != nil {
-		t.Fatalf("unexpected error: %v", err)
-	}
-	if !migrated {
-		t.Fatal("expected migration to add default config")
-	}
-
-	// Verify default antigravity config was added
-	data, _ := os.ReadFile(configFile)
-	content = string(data)
-	if !strings.Contains(content, "oauth-model-alias:") {
-		t.Fatal("expected oauth-model-alias to be added")
-	}
-	if !strings.Contains(content, "antigravity:") {
-		t.Fatal("expected antigravity channel to be added")
-	}
-	if !strings.Contains(content, "rev19-uic3-1p") {
-		t.Fatal("expected default antigravity aliases to include rev19-uic3-1p")
-	}
-}
-
-func TestMigrateOAuthModelAlias_PreservesOtherConfig(t *testing.T) {
-	t.Parallel()
-
-	dir := t.TempDir()
-	configFile := filepath.Join(dir, "config.yaml")
-
-	content := `debug: true
-port: 8080
-oauth-model-mappings:
-  gemini-cli:
-    - name: "test"
-      alias: "t"
-api-keys:
-  - "key1"
-  - "key2"
-`
-	if err := os.WriteFile(configFile, []byte(content), 0644); err != nil {
-		t.Fatal(err)
-	}
-
-	migrated, err := MigrateOAuthModelAlias(configFile)
-	if err != nil {
-		t.Fatalf("unexpected error: %v", err)
-	}
-	if !migrated {
-		t.Fatal("expected migration to occur")
-	}
-
-	// Verify other config preserved
-	data, _ := os.ReadFile(configFile)
-	content = string(data)
-	if !strings.Contains(content, "debug: true") {
-		t.Fatal("expected debug field to be preserved")
-	}
-	if !strings.Contains(content, "port: 8080") {
-		t.Fatal("expected port field to be preserved")
-	}
-	if !strings.Contains(content, "api-keys:") {
-		t.Fatal("expected api-keys field to be preserved")
-	}
-}
-
-func TestMigrateOAuthModelAlias_NonexistentFile(t *testing.T) {
-	t.Parallel()
-
-	migrated, err := MigrateOAuthModelAlias("/nonexistent/path/config.yaml")
-	if err != nil {
-		t.Fatalf("unexpected error for nonexistent file: %v", err)
-	}
-	if migrated {
-		t.Fatal("expected no migration for nonexistent file")
-	}
-}
-
-func TestMigrateOAuthModelAlias_EmptyFile(t *testing.T) {
-	t.Parallel()
-
-	dir := t.TempDir()
-	configFile := filepath.Join(dir, "config.yaml")
-
-	if err := os.WriteFile(configFile, []byte(""), 0644); err != nil {
-		t.Fatal(err)
-	}
-
-	migrated, err := MigrateOAuthModelAlias(configFile)
-	if err != nil {
-		t.Fatalf("unexpected error: %v", err)
-	}
-	if migrated {
-		t.Fatal("expected no migration for empty file")
-	}
-}
--- a/internal/misc/header_utils.go
+++ b/internal/misc/header_utils.go
@@ -4,10 +4,98 @@
 package misc

 import (
+	"fmt"
 	"net/http"
+	"runtime"
 	"strings"
 )

+const (
+	// GeminiCLIVersion is the version string reported in the User-Agent for upstream requests.
+	GeminiCLIVersion = "0.31.0"
+
+	// GeminiCLIApiClientHeader is the value for the X-Goog-Api-Client header sent to the Gemini CLI upstream.
+	GeminiCLIApiClientHeader = "google-genai-sdk/1.41.0 gl-node/v22.19.0"
+)
+
+// geminiCLIOS maps Go runtime OS names to the Node.js-style platform strings used by Gemini CLI.
+func geminiCLIOS() string {
+	switch runtime.GOOS {
+	case "windows":
+		return "win32"
+	default:
+		return runtime.GOOS
+	}
+}
+
+// geminiCLIArch maps Go runtime architecture names to the Node.js-style arch strings used by Gemini CLI.
+func geminiCLIArch() string {
+	switch runtime.GOARCH {
+	case "amd64":
+		return "x64"
+	case "386":
+		return "x86"
+	default:
+		return runtime.GOARCH
+	}
+}
+
+// GeminiCLIUserAgent returns a User-Agent string that matches the Gemini CLI format.
+// The model parameter is included in the UA; pass "" or "unknown" when the model is not applicable.
+func GeminiCLIUserAgent(model string) string {
+	if model == "" {
+		model = "unknown"
+	}
+	return fmt.Sprintf("GeminiCLI/%s/%s (%s; %s)", GeminiCLIVersion, model, geminiCLIOS(), geminiCLIArch())
+}
+
+// ScrubProxyAndFingerprintHeaders removes all headers that could reveal
+// proxy infrastructure, client identity, or browser fingerprints from an
+// outgoing request. This ensures requests to upstream services look like they
+// originate directly from a native client rather than a third-party client
+// behind a reverse proxy.
+func ScrubProxyAndFingerprintHeaders(req *http.Request) {
+	if req == nil {
+		return
+	}
+
+	// --- Proxy tracing headers ---
+	req.Header.Del("X-Forwarded-For")
+	req.Header.Del("X-Forwarded-Host")
+	req.Header.Del("X-Forwarded-Proto")
+	req.Header.Del("X-Forwarded-Port")
+	req.Header.Del("X-Real-IP")
+	req.Header.Del("Forwarded")
+	req.Header.Del("Via")
+
+	// --- Client identity headers ---
+	req.Header.Del("X-Title")
+	req.Header.Del("X-Stainless-Lang")
+	req.Header.Del("X-Stainless-Package-Version")
+	req.Header.Del("X-Stainless-Os")
+	req.Header.Del("X-Stainless-Arch")
+	req.Header.Del("X-Stainless-Runtime")
+	req.Header.Del("X-Stainless-Runtime-Version")
+	req.Header.Del("Http-Referer")
+	req.Header.Del("Referer")
+
+	// --- Browser / Chromium fingerprint headers ---
+	// These are sent by Electron-based clients (e.g. CherryStudio) using the
+	// Fetch API, but NOT by Node.js https module (which Antigravity uses).
+	req.Header.Del("Sec-Ch-Ua")
+	req.Header.Del("Sec-Ch-Ua-Mobile")
+	req.Header.Del("Sec-Ch-Ua-Platform")
+	req.Header.Del("Sec-Fetch-Mode")
+	req.Header.Del("Sec-Fetch-Site")
+	req.Header.Del("Sec-Fetch-Dest")
+	req.Header.Del("Priority")
+
+	// --- Encoding negotiation ---
+	// Antigravity (Node.js) sends "gzip, deflate, br" by default;
+	// Electron-based clients may add "zstd" which is a fingerprint mismatch.
+	req.Header.Del("Accept-Encoding")
+}
+
 // EnsureHeader ensures that a header exists in the target header map by checking
 // multiple sources in order of priority: source headers, existing target headers,
 // and finally the default value. It only sets the header if it's not already present
--- a/internal/registry/model_definitions_static_data.go
+++ b/internal/registry/model_definitions_static_data.go
@@ -959,22 +959,17 @@ type AntigravityModelConfig struct {
 // Keys use upstream model names returned by the Antigravity models endpoint.
 func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
 	return map[string]*AntigravityModelConfig{
-		// "rev19-uic3-1p":              {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}},
-		"gemini-2.5-flash":           {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
-		"gemini-2.5-flash-lite":      {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
-		"gemini-3-pro-high":          {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
-		"gemini-3-pro-image":         {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
-		"gemini-3.1-pro-high":        {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
-		"gemini-3.1-flash-image":     {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "high"}}},
-		"gemini-3-flash":             {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}},
-		"claude-opus-4-5-thinking":   {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
-		"claude-opus-4-6-thinking":   {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
-		"claude-sonnet-4-5":          {MaxCompletionTokens: 64000},
-		"claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
-		"claude-sonnet-4-6":          {MaxCompletionTokens: 64000},
-		"claude-sonnet-4-6-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
-		"gpt-oss-120b-medium":        {},
-		"tab_flash_lite_preview":     {},
+		"gemini-2.5-flash":         {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
+		"gemini-2.5-flash-lite":    {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
+		"gemini-3-pro-high":        {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
+		"gemini-3-pro-low":         {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
+		"gemini-3.1-pro-high":      {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
+		"gemini-3.1-pro-low":       {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
+		"gemini-3.1-flash-image":   {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "high"}}},
+		"gemini-3-flash":           {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}},
+		"claude-opus-4-6-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 64000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
+		"claude-sonnet-4-6":        {Thinking: &ThinkingSupport{Min: 1024, Max: 64000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
+		"gpt-oss-120b-medium":      {},
 	}
 }

--- a/internal/registry/model_registry.go
+++ b/internal/registry/model_registry.go
@@ -49,6 +49,10 @@ type ModelInfo struct {
 	SupportedParameters []string `json:"supported_parameters,omitempty"`
 	// SupportedEndpoints lists supported API endpoints (e.g., "/chat/completions", "/responses").
 	SupportedEndpoints []string `json:"supported_endpoints,omitempty"`
+	// SupportedInputModalities lists supported input modalities (e.g., TEXT, IMAGE, VIDEO, AUDIO)
+	SupportedInputModalities []string `json:"supportedInputModalities,omitempty"`
+	// SupportedOutputModalities lists supported output modalities (e.g., TEXT, IMAGE)
+	SupportedOutputModalities []string `json:"supportedOutputModalities,omitempty"`

 	// Thinking holds provider-specific reasoning/thinking budget capabilities.
 	// This is optional and currently used for Gemini thinking budget normalization.
@@ -501,8 +505,11 @@ func cloneModelInfo(model *ModelInfo) *ModelInfo {
 	if len(model.SupportedParameters) > 0 {
 		copyModel.SupportedParameters = append([]string(nil), model.SupportedParameters...)
 	}
-	if len(model.SupportedEndpoints) > 0 {
-		copyModel.SupportedEndpoints = append([]string(nil), model.SupportedEndpoints...)
+	if len(model.SupportedInputModalities) > 0 {
+		copyModel.SupportedInputModalities = append([]string(nil), model.SupportedInputModalities...)
+	}
+	if len(model.SupportedOutputModalities) > 0 {
+		copyModel.SupportedOutputModalities = append([]string(nil), model.SupportedOutputModalities...)
 	}
 	return &copyModel
 }
@@ -1089,6 +1096,12 @@ func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string)
 		if len(model.SupportedGenerationMethods) > 0 {
 			result["supportedGenerationMethods"] = model.SupportedGenerationMethods
 		}
+		if len(model.SupportedInputModalities) > 0 {
+			result["supportedInputModalities"] = model.SupportedInputModalities
+		}
+		if len(model.SupportedOutputModalities) > 0 {
+			result["supportedOutputModalities"] = model.SupportedOutputModalities
+		}
 		return result

 	default:
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -8,6 +8,7 @@ import (
 	"bytes"
 	"context"
 	"crypto/sha256"
+	"crypto/tls"
 	"encoding/binary"
 	"encoding/json"
 	"errors"
@@ -45,10 +46,10 @@ const (
 	antigravityModelsPath          = "/v1internal:fetchAvailableModels"
 	antigravityClientID            = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
 	antigravityClientSecret        = "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"
-	defaultAntigravityAgent        = "antigravity/1.104.0 darwin/arm64"
+	defaultAntigravityAgent        = "antigravity/1.19.6 darwin/arm64"
 	antigravityAuthType            = "antigravity"
 	refreshSkew                    = 3000 * time.Second
-	systemInstruction              = "You are Antigravity, a powerful agentic AI coding assistant designed by the Google Deepmind team working on Advanced Agentic Coding.You are pair programming with a USER to solve their coding task. The task may require creating a new codebase, modifying or debugging an existing codebase, or simply answering a question.**Absolute paths only****Proactiveness**"
+	// systemInstruction              = "You are Antigravity, a powerful agentic AI coding assistant designed by the Google Deepmind team working on Advanced Agentic Coding.You are pair programming with a USER to solve their coding task. The task may require creating a new codebase, modifying or debugging an existing codebase, or simply answering a question.**Absolute paths only****Proactiveness**"
 )

 var (
@@ -142,6 +143,62 @@ func NewAntigravityExecutor(cfg *config.Config) *AntigravityExecutor {
 	return &AntigravityExecutor{cfg: cfg}
 }

+// antigravityTransport is a singleton HTTP/1.1 transport shared by all Antigravity requests.
+// It is initialized once via antigravityTransportOnce to avoid leaking a new connection pool
+// (and the goroutines managing it) on every request.
+var (
+	antigravityTransport     *http.Transport
+	antigravityTransportOnce sync.Once
+)
+
+func cloneTransportWithHTTP11(base *http.Transport) *http.Transport {
+	if base == nil {
+		return nil
+	}
+
+	clone := base.Clone()
+	clone.ForceAttemptHTTP2 = false
+	// Wipe TLSNextProto to prevent implicit HTTP/2 upgrade.
+	clone.TLSNextProto = make(map[string]func(authority string, c *tls.Conn) http.RoundTripper)
+	if clone.TLSClientConfig == nil {
+		clone.TLSClientConfig = &tls.Config{}
+	} else {
+		clone.TLSClientConfig = clone.TLSClientConfig.Clone()
+	}
+	// Actively advertise only HTTP/1.1 in the ALPN handshake.
+	clone.TLSClientConfig.NextProtos = []string{"http/1.1"}
+	return clone
+}
+
+// initAntigravityTransport creates the shared HTTP/1.1 transport exactly once.
+func initAntigravityTransport() {
+	base, ok := http.DefaultTransport.(*http.Transport)
+	if !ok {
+		base = &http.Transport{}
+	}
+	antigravityTransport = cloneTransportWithHTTP11(base)
+}
+
+// newAntigravityHTTPClient creates an HTTP client specifically for Antigravity,
+// enforcing HTTP/1.1 by disabling HTTP/2 to perfectly mimic Node.js https defaults.
+// The underlying Transport is a singleton to avoid leaking connection pools.
+func newAntigravityHTTPClient(ctx context.Context, cfg *config.Config, auth *cliproxyauth.Auth, timeout time.Duration) *http.Client {
+	antigravityTransportOnce.Do(initAntigravityTransport)
+
+	client := newProxyAwareHTTPClient(ctx, cfg, auth, timeout)
+	// If no transport is set, use the shared HTTP/1.1 transport.
+	if client.Transport == nil {
+		client.Transport = antigravityTransport
+		return client
+	}
+
+	// Preserve proxy settings from proxy-aware transports while forcing HTTP/1.1.
+	if transport, ok := client.Transport.(*http.Transport); ok {
+		client.Transport = cloneTransportWithHTTP11(transport)
+	}
+	return client
+}
+
 // Identifier returns the executor identifier.
 func (e *AntigravityExecutor) Identifier() string { return antigravityAuthType }

@@ -162,6 +219,8 @@ func (e *AntigravityExecutor) PrepareRequest(req *http.Request, auth *cliproxyau
 }

 // HttpRequest injects Antigravity credentials into the request and executes it.
+// It uses a whitelist approach: all incoming headers are stripped and only
+// the minimum set required by the Antigravity protocol is explicitly set.
 func (e *AntigravityExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth, req *http.Request) (*http.Response, error) {
 	if req == nil {
 		return nil, fmt.Errorf("antigravity executor: request is nil")
@@ -170,10 +229,29 @@ func (e *AntigravityExecutor) HttpRequest(ctx context.Context, auth *cliproxyaut
 		ctx = req.Context()
 	}
 	httpReq := req.WithContext(ctx)
+
+	// --- Whitelist: save only the headers we need from the original request ---
+	contentType := httpReq.Header.Get("Content-Type")
+
+	// Wipe ALL incoming headers
+	for k := range httpReq.Header {
+		delete(httpReq.Header, k)
+	}
+
+	// --- Set only the headers Antigravity actually sends ---
+	if contentType != "" {
+		httpReq.Header.Set("Content-Type", contentType)
+	}
+	// Content-Length is managed automatically by Go's http.Client from the Body
+	httpReq.Header.Set("User-Agent", resolveUserAgent(auth))
+	httpReq.Close = true // sends Connection: close
+
+	// Inject Authorization: Bearer <token>
 	if err := e.PrepareRequest(httpReq, auth); err != nil {
 		return nil, err
 	}
-	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+
+	httpClient := newAntigravityHTTPClient(ctx, e.cfg, auth, 0)
 	return httpClient.Do(httpReq)
 }

@@ -185,7 +263,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 	baseModel := thinking.ParseSuffix(req.Model).ModelName
 	isClaude := strings.Contains(strings.ToLower(baseModel), "claude")

-	if isClaude || strings.Contains(baseModel, "gemini-3-pro") {
+	if isClaude || strings.Contains(baseModel, "gemini-3-pro") || strings.Contains(baseModel, "gemini-3.1-flash-image") {
 		return e.executeClaudeNonStream(ctx, auth, req, opts)
 	}

@@ -220,7 +298,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated, requestedModel)

 	baseURLs := antigravityBaseURLFallbackOrder(auth)
-	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpClient := newAntigravityHTTPClient(ctx, e.cfg, auth, 0)

 	attempts := antigravityRetryAttempts(auth, e.cfg)

@@ -362,7 +440,7 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
 	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated, requestedModel)

 	baseURLs := antigravityBaseURLFallbackOrder(auth)
-	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpClient := newAntigravityHTTPClient(ctx, e.cfg, auth, 0)

 	attempts := antigravityRetryAttempts(auth, e.cfg)

@@ -754,7 +832,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated, requestedModel)

 	baseURLs := antigravityBaseURLFallbackOrder(auth)
-	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpClient := newAntigravityHTTPClient(ctx, e.cfg, auth, 0)

 	attempts := antigravityRetryAttempts(auth, e.cfg)

@@ -956,7 +1034,7 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
 	payload = deleteJSONField(payload, "request.safetySettings")

 	baseURLs := antigravityBaseURLFallbackOrder(auth)
-	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpClient := newAntigravityHTTPClient(ctx, e.cfg, auth, 0)

 	var authID, authLabel, authType, authValue string
 	if auth != nil {
@@ -987,10 +1065,10 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
 		if errReq != nil {
 			return cliproxyexecutor.Response{}, errReq
 		}
+		httpReq.Close = true
 		httpReq.Header.Set("Content-Type", "application/json")
 		httpReq.Header.Set("Authorization", "Bearer "+token)
 		httpReq.Header.Set("User-Agent", resolveUserAgent(auth))
-		httpReq.Header.Set("Accept", "application/json")
 		if host := resolveHost(base); host != "" {
 			httpReq.Host = host
 		}
@@ -1084,14 +1162,26 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 	}

 	baseURLs := antigravityBaseURLFallbackOrder(auth)
-	httpClient := newProxyAwareHTTPClient(ctx, cfg, auth, 0)
+	httpClient := newAntigravityHTTPClient(ctx, cfg, auth, 0)

 	for idx, baseURL := range baseURLs {
 		modelsURL := baseURL + antigravityModelsPath
-		httpReq, errReq := http.NewRequestWithContext(ctx, http.MethodPost, modelsURL, bytes.NewReader([]byte(`{}`)))
+
+		var payload []byte
+		if auth != nil && auth.Metadata != nil {
+			if pid, ok := auth.Metadata["project_id"].(string); ok && strings.TrimSpace(pid) != "" {
+				payload = []byte(fmt.Sprintf(`{"project": "%s"}`, strings.TrimSpace(pid)))
+			}
+		}
+		if len(payload) == 0 {
+			payload = []byte(`{}`)
+		}
+
+		httpReq, errReq := http.NewRequestWithContext(ctx, http.MethodPost, modelsURL, bytes.NewReader(payload))
 		if errReq != nil {
 			return fallbackAntigravityPrimaryModels()
 		}
+		httpReq.Close = true
 		httpReq.Header.Set("Content-Type", "application/json")
 		httpReq.Header.Set("Authorization", "Bearer "+token)
 		httpReq.Header.Set("User-Agent", resolveUserAgent(auth))
@@ -1152,7 +1242,7 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 				continue
 			}
 			switch modelID {
-			case "chat_20706", "chat_23310", "gemini-2.5-flash-thinking", "gemini-3-pro-low", "gemini-2.5-pro":
+			case "chat_20706", "chat_23310", "tab_flash_lite_preview", "tab_jump_flash_lite_preview", "gemini-2.5-flash-thinking", "gemini-2.5-pro":
 				continue
 			}
 			modelCfg := modelConfig[modelID]
@@ -1174,6 +1264,29 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 				OwnedBy:     antigravityAuthType,
 				Type:        antigravityAuthType,
 			}
+
+			// Build input modalities from upstream capability flags.
+			inputModalities := []string{"TEXT"}
+			if modelData.Get("supportsImages").Bool() {
+				inputModalities = append(inputModalities, "IMAGE")
+			}
+			if modelData.Get("supportsVideo").Bool() {
+				inputModalities = append(inputModalities, "VIDEO")
+			}
+			modelInfo.SupportedInputModalities = inputModalities
+			modelInfo.SupportedOutputModalities = []string{"TEXT"}
+
+			// Token limits from upstream.
+			if maxTok := modelData.Get("maxTokens").Int(); maxTok > 0 {
+				modelInfo.InputTokenLimit = int(maxTok)
+			}
+			if maxOut := modelData.Get("maxOutputTokens").Int(); maxOut > 0 {
+				modelInfo.OutputTokenLimit = int(maxOut)
+			}
+
+			// Supported generation methods (Gemini v1beta convention).
+			modelInfo.SupportedGenerationMethods = []string{"generateContent", "countTokens"}
+
 			// Look up Thinking support from static config using upstream model name.
 			if modelCfg != nil {
 				if modelCfg.Thinking != nil {
@@ -1241,10 +1354,11 @@ func (e *AntigravityExecutor) refreshToken(ctx context.Context, auth *cliproxyau
 		return auth, errReq
 	}
 	httpReq.Header.Set("Host", "oauth2.googleapis.com")
-	httpReq.Header.Set("User-Agent", defaultAntigravityAgent)
 	httpReq.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+	// Real Antigravity uses Go's default User-Agent for OAuth token refresh
+	httpReq.Header.Set("User-Agent", "Go-http-client/2.0")

-	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpClient := newAntigravityHTTPClient(ctx, e.cfg, auth, 0)
 	httpResp, errDo := httpClient.Do(httpReq)
 	if errDo != nil {
 		return auth, errDo
@@ -1315,7 +1429,7 @@ func (e *AntigravityExecutor) ensureAntigravityProjectID(ctx context.Context, au
 		return nil
 	}

-	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpClient := newAntigravityHTTPClient(ctx, e.cfg, auth, 0)
 	projectID, errFetch := sdkAuth.FetchAntigravityProjectID(ctx, token, httpClient)
 	if errFetch != nil {
 		return errFetch
@@ -1369,7 +1483,7 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
 	payload = geminiToAntigravity(modelName, payload, projectID)
 	payload, _ = sjson.SetBytes(payload, "model", modelName)

-	useAntigravitySchema := strings.Contains(modelName, "claude") || strings.Contains(modelName, "gemini-3-pro-high")
+	useAntigravitySchema := strings.Contains(modelName, "claude") || strings.Contains(modelName, "gemini-3-pro") || strings.Contains(modelName, "gemini-3.1-pro")
 	payloadStr := string(payload)
 	paths := make([]string, 0)
 	util.Walk(gjson.Parse(payloadStr), "", "parametersJsonSchema", &paths)
@@ -1383,18 +1497,18 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
 		payloadStr = util.CleanJSONSchemaForGemini(payloadStr)
 	}

-	if useAntigravitySchema {
-		systemInstructionPartsResult := gjson.Get(payloadStr, "request.systemInstruction.parts")
-		payloadStr, _ = sjson.Set(payloadStr, "request.systemInstruction.role", "user")
-		payloadStr, _ = sjson.Set(payloadStr, "request.systemInstruction.parts.0.text", systemInstruction)
-		payloadStr, _ = sjson.Set(payloadStr, "request.systemInstruction.parts.1.text", fmt.Sprintf("Please ignore following [ignore]%s[/ignore]", systemInstruction))
+	// if useAntigravitySchema {
+	// 	systemInstructionPartsResult := gjson.Get(payloadStr, "request.systemInstruction.parts")
+	// 	payloadStr, _ = sjson.Set(payloadStr, "request.systemInstruction.role", "user")
+	// 	payloadStr, _ = sjson.Set(payloadStr, "request.systemInstruction.parts.0.text", systemInstruction)
+	// 	payloadStr, _ = sjson.Set(payloadStr, "request.systemInstruction.parts.1.text", fmt.Sprintf("Please ignore following [ignore]%s[/ignore]", systemInstruction))

-		if systemInstructionPartsResult.Exists() && systemInstructionPartsResult.IsArray() {
-			for _, partResult := range systemInstructionPartsResult.Array() {
-				payloadStr, _ = sjson.SetRaw(payloadStr, "request.systemInstruction.parts.-1", partResult.Raw)
-			}
-		}
-	}
+	// 	if systemInstructionPartsResult.Exists() && systemInstructionPartsResult.IsArray() {
+	// 		for _, partResult := range systemInstructionPartsResult.Array() {
+	// 			payloadStr, _ = sjson.SetRaw(payloadStr, "request.systemInstruction.parts.-1", partResult.Raw)
+	// 		}
+	// 	}
+	// }

 	if strings.Contains(modelName, "claude") {
 		payloadStr, _ = sjson.Set(payloadStr, "request.toolConfig.functionCallingConfig.mode", "VALIDATED")
@@ -1406,14 +1520,10 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
 	if errReq != nil {
 		return nil, errReq
 	}
+	httpReq.Close = true
 	httpReq.Header.Set("Content-Type", "application/json")
 	httpReq.Header.Set("Authorization", "Bearer "+token)
 	httpReq.Header.Set("User-Agent", resolveUserAgent(auth))
-	if stream {
-		httpReq.Header.Set("Accept", "text/event-stream")
-	} else {
-		httpReq.Header.Set("Accept", "application/json")
-	}
 	if host := resolveHost(base); host != "" {
 		httpReq.Host = host
 	}
@@ -1625,7 +1735,16 @@ func resolveCustomAntigravityBaseURL(auth *cliproxyauth.Auth) string {
 func geminiToAntigravity(modelName string, payload []byte, projectID string) []byte {
 	template, _ := sjson.Set(string(payload), "model", modelName)
 	template, _ = sjson.Set(template, "userAgent", "antigravity")
-	template, _ = sjson.Set(template, "requestType", "agent")
+
+	isImageModel := strings.Contains(modelName, "image")
+
+	var reqType string
+	if isImageModel {
+		reqType = "image_gen"
+	} else {
+		reqType = "agent"
+	}
+	template, _ = sjson.Set(template, "requestType", reqType)

 	// Use real project ID from auth if available, otherwise generate random (legacy fallback)
 	if projectID != "" {
@@ -1633,8 +1752,13 @@ func geminiToAntigravity(modelName string, payload []byte, projectID string) []b
 	} else {
 		template, _ = sjson.Set(template, "project", generateProjectID())
 	}
-	template, _ = sjson.Set(template, "requestId", generateRequestID())
-	template, _ = sjson.Set(template, "request.sessionId", generateStableSessionID(payload))
+
+	if isImageModel {
+		template, _ = sjson.Set(template, "requestId", generateImageGenRequestID())
+	} else {
+		template, _ = sjson.Set(template, "requestId", generateRequestID())
+		template, _ = sjson.Set(template, "request.sessionId", generateStableSessionID(payload))
+	}

 	template, _ = sjson.Delete(template, "request.safetySettings")
 	if toolConfig := gjson.Get(template, "toolConfig"); toolConfig.Exists() && !gjson.Get(template, "request.toolConfig").Exists() {
@@ -1648,6 +1772,10 @@ func generateRequestID() string {
 	return "agent-" + uuid.NewString()
 }

+func generateImageGenRequestID() string {
+	return fmt.Sprintf("image_gen/%d/%s/12", time.Now().UnixMilli(), uuid.NewString())
+}
+
 func generateSessionID() string {
 	randSourceMutex.Lock()
 	n := randSource.Int63n(9_000_000_000_000_000_000)
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -9,9 +9,11 @@ import (
 	"crypto/rand"
 	"crypto/sha256"
 	"encoding/hex"
+	"encoding/json"
 	"fmt"
 	"io"
 	"net/http"
+	"net/textproto"
 	"runtime"
 	"strings"
 	"time"
@@ -135,6 +137,15 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 		body = ensureCacheControl(body)
 	}

+	// Enforce Anthropic's cache_control block limit (max 4 breakpoints per request).
+	// Cloaking and ensureCacheControl may push the total over 4 when the client
+	// (e.g. Amp CLI) already sends multiple cache_control blocks.
+	body = enforceCacheControlLimit(body, 4)
+
+	// Normalize TTL values to prevent ordering violations under prompt-caching-scope-2026-01-05.
+	// A 1h-TTL block must not appear after a 5m-TTL block in evaluation order (tools→system→messages).
+	body = normalizeCacheControlTTL(body)
+
 	// Extract betas from body and convert to header
 	var extraBetas []string
 	extraBetas, body = extractAndRemoveBetas(body)
@@ -176,11 +187,29 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	}
 	recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
-		b, _ := io.ReadAll(httpResp.Body)
+		// Decompress error responses (e.g. gzip-compressed 400 errors from Anthropic API).
+		errBody := httpResp.Body
+		if ce := httpResp.Header.Get("Content-Encoding"); ce != "" {
+			var decErr error
+			errBody, decErr = decodeResponseBody(httpResp.Body, ce)
+			if decErr != nil {
+				recordAPIResponseError(ctx, e.cfg, decErr)
+				msg := fmt.Sprintf("failed to decode error response body (encoding=%s): %v", ce, decErr)
+				logWithRequestID(ctx).Warn(msg)
+				return resp, statusErr{code: httpResp.StatusCode, msg: msg}
+			}
+		}
+		b, readErr := io.ReadAll(errBody)
+		if readErr != nil {
+			recordAPIResponseError(ctx, e.cfg, readErr)
+			msg := fmt.Sprintf("failed to read error response body: %v", readErr)
+			logWithRequestID(ctx).Warn(msg)
+			b = []byte(msg)
+		}
 		appendAPIResponseChunk(ctx, e.cfg, b)
 		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
-		if errClose := httpResp.Body.Close(); errClose != nil {
+		if errClose := errBody.Close(); errClose != nil {
 			log.Errorf("response body close error: %v", errClose)
 		}
 		return resp, err
@@ -276,6 +305,12 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 		body = ensureCacheControl(body)
 	}

+	// Enforce Anthropic's cache_control block limit (max 4 breakpoints per request).
+	body = enforceCacheControlLimit(body, 4)
+
+	// Normalize TTL values to prevent ordering violations under prompt-caching-scope-2026-01-05.
+	body = normalizeCacheControlTTL(body)
+
 	// Extract betas from body and convert to header
 	var extraBetas []string
 	extraBetas, body = extractAndRemoveBetas(body)
@@ -317,10 +352,28 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	}
 	recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
-		b, _ := io.ReadAll(httpResp.Body)
+		// Decompress error responses (e.g. gzip-compressed 400 errors from Anthropic API).
+		errBody := httpResp.Body
+		if ce := httpResp.Header.Get("Content-Encoding"); ce != "" {
+			var decErr error
+			errBody, decErr = decodeResponseBody(httpResp.Body, ce)
+			if decErr != nil {
+				recordAPIResponseError(ctx, e.cfg, decErr)
+				msg := fmt.Sprintf("failed to decode error response body (encoding=%s): %v", ce, decErr)
+				logWithRequestID(ctx).Warn(msg)
+				return nil, statusErr{code: httpResp.StatusCode, msg: msg}
+			}
+		}
+		b, readErr := io.ReadAll(errBody)
+		if readErr != nil {
+			recordAPIResponseError(ctx, e.cfg, readErr)
+			msg := fmt.Sprintf("failed to read error response body: %v", readErr)
+			logWithRequestID(ctx).Warn(msg)
+			b = []byte(msg)
+		}
 		appendAPIResponseChunk(ctx, e.cfg, b)
 		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
-		if errClose := httpResp.Body.Close(); errClose != nil {
+		if errClose := errBody.Close(); errClose != nil {
 			log.Errorf("response body close error: %v", errClose)
 		}
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
@@ -425,6 +478,10 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 		body = checkSystemInstructions(body)
 	}

+	// Keep count_tokens requests compatible with Anthropic cache-control constraints too.
+	body = enforceCacheControlLimit(body, 4)
+	body = normalizeCacheControlTTL(body)
+
 	// Extract betas from body and convert to header (for count_tokens too)
 	var extraBetas []string
 	extraBetas, body = extractAndRemoveBetas(body)
@@ -464,9 +521,27 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 	}
 	recordAPIResponseMetadata(ctx, e.cfg, resp.StatusCode, resp.Header.Clone())
 	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
-		b, _ := io.ReadAll(resp.Body)
+		// Decompress error responses (e.g. gzip-compressed 400 errors from Anthropic API).
+		errBody := resp.Body
+		if ce := resp.Header.Get("Content-Encoding"); ce != "" {
+			var decErr error
+			errBody, decErr = decodeResponseBody(resp.Body, ce)
+			if decErr != nil {
+				recordAPIResponseError(ctx, e.cfg, decErr)
+				msg := fmt.Sprintf("failed to decode error response body (encoding=%s): %v", ce, decErr)
+				logWithRequestID(ctx).Warn(msg)
+				return cliproxyexecutor.Response{}, statusErr{code: resp.StatusCode, msg: msg}
+			}
+		}
+		b, readErr := io.ReadAll(errBody)
+		if readErr != nil {
+			recordAPIResponseError(ctx, e.cfg, readErr)
+			msg := fmt.Sprintf("failed to read error response body: %v", readErr)
+			logWithRequestID(ctx).Warn(msg)
+			b = []byte(msg)
+		}
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		if errClose := resp.Body.Close(); errClose != nil {
+		if errClose := errBody.Close(); errClose != nil {
 			log.Errorf("response body close error: %v", errClose)
 		}
 		return cliproxyexecutor.Response{}, statusErr{code: resp.StatusCode, msg: string(b)}
@@ -709,11 +784,21 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string,
 		}
 	}

-	// Merge extra betas from request body
-	if len(extraBetas) > 0 {
+	hasClaude1MHeader := false
+	if ginHeaders != nil {
+		if _, ok := ginHeaders[textproto.CanonicalMIMEHeaderKey("X-CPA-CLAUDE-1M")]; ok {
+			hasClaude1MHeader = true
+		}
+	}
+
+	// Merge extra betas from request body and request flags.
+	if len(extraBetas) > 0 || hasClaude1MHeader {
 		existingSet := make(map[string]bool)
 		for _, b := range strings.Split(baseBetas, ",") {
-			existingSet[strings.TrimSpace(b)] = true
+			betaName := strings.TrimSpace(b)
+			if betaName != "" {
+				existingSet[betaName] = true
+			}
 		}
 		for _, beta := range extraBetas {
 			beta = strings.TrimSpace(beta)
@@ -722,6 +807,9 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string,
 				existingSet[beta] = true
 			}
 		}
+		if hasClaude1MHeader && !existingSet["context-1m-2025-08-07"] {
+			baseBetas += ",context-1m-2025-08-07"
+		}
 	}
 	r.Header.Set("Anthropic-Beta", baseBetas)

@@ -1073,17 +1161,22 @@ func generateBillingHeader(payload []byte) string {
 	return fmt.Sprintf("x-anthropic-billing-header: cc_version=2.1.63.%s; cc_entrypoint=cli; cch=%s;", buildHash, cch)
 }

-// checkSystemInstructionsWithMode injects Claude Code system prompt to match
-// the real Claude Code request format:
-//   system[0]: billing header (no cache_control)
-//   system[1]: "You are a Claude agent, built on Anthropic's Claude Agent SDK." (with cache_control)
-//   system[2..]: user's system messages (with cache_control on last)
+// checkSystemInstructionsWithMode injects Claude Code-style system blocks:
+//
+//	system[0]: billing header (no cache_control)
+//	system[1]: agent identifier (no cache_control)
+//	system[2..]: user system messages (cache_control added when missing)
 func checkSystemInstructionsWithMode(payload []byte, strictMode bool) []byte {
 	system := gjson.GetBytes(payload, "system")

 	billingText := generateBillingHeader(payload)
 	billingBlock := fmt.Sprintf(`{"type":"text","text":"%s"}`, billingText)
-	agentBlock := `{"type":"text","text":"You are a Claude agent, built on Anthropic's Claude Agent SDK.","cache_control":{"type":"ephemeral","ttl":"1h"}}`
+	// No cache_control on the agent block. It is a cloaking artifact with zero cache
+	// value (the last system block is what actually triggers caching of all system content).
+	// Including any cache_control here creates an intra-system TTL ordering violation
+	// when the client's system blocks use ttl='1h' (prompt-caching-scope-2026-01-05 beta
+	// forbids 1h blocks after 5m blocks, and a no-TTL block defaults to 5m).
+	agentBlock := `{"type":"text","text":"You are a Claude agent, built on Anthropic's Claude Agent SDK."}`

 	if strictMode {
 		// Strict mode: billing header + agent identifier only
@@ -1103,11 +1196,12 @@ func checkSystemInstructionsWithMode(payload []byte, strictMode bool) []byte {
 	if system.IsArray() {
 		system.ForEach(func(_, part gjson.Result) bool {
 			if part.Get("type").String() == "text" {
-				// Add cache_control with ttl to user system messages if not present
+				// Add cache_control to user system messages if not present.
+				// Do NOT add ttl — let it inherit the default (5m) to avoid
+				// TTL ordering violations with the prompt-caching-scope-2026-01-05 beta.
 				partJSON := part.Raw
 				if !part.Get("cache_control").Exists() {
 					partJSON, _ = sjson.Set(partJSON, "cache_control.type", "ephemeral")
-					partJSON, _ = sjson.Set(partJSON, "cache_control.ttl", "1h")
 				}
 				result += "," + partJSON
 			}
@@ -1254,6 +1348,313 @@ func countCacheControls(payload []byte) int {
 	return count
 }

+func parsePayloadObject(payload []byte) (map[string]any, bool) {
+	if len(payload) == 0 {
+		return nil, false
+	}
+	var root map[string]any
+	if err := json.Unmarshal(payload, &root); err != nil {
+		return nil, false
+	}
+	return root, true
+}
+
+func marshalPayloadObject(original []byte, root map[string]any) []byte {
+	if root == nil {
+		return original
+	}
+	out, err := json.Marshal(root)
+	if err != nil {
+		return original
+	}
+	return out
+}
+
+func asObject(v any) (map[string]any, bool) {
+	obj, ok := v.(map[string]any)
+	return obj, ok
+}
+
+func asArray(v any) ([]any, bool) {
+	arr, ok := v.([]any)
+	return arr, ok
+}
+
+func countCacheControlsMap(root map[string]any) int {
+	count := 0
+
+	if system, ok := asArray(root["system"]); ok {
+		for _, item := range system {
+			if obj, ok := asObject(item); ok {
+				if _, exists := obj["cache_control"]; exists {
+					count++
+				}
+			}
+		}
+	}
+
+	if tools, ok := asArray(root["tools"]); ok {
+		for _, item := range tools {
+			if obj, ok := asObject(item); ok {
+				if _, exists := obj["cache_control"]; exists {
+					count++
+				}
+			}
+		}
+	}
+
+	if messages, ok := asArray(root["messages"]); ok {
+		for _, msg := range messages {
+			msgObj, ok := asObject(msg)
+			if !ok {
+				continue
+			}
+			content, ok := asArray(msgObj["content"])
+			if !ok {
+				continue
+			}
+			for _, item := range content {
+				if obj, ok := asObject(item); ok {
+					if _, exists := obj["cache_control"]; exists {
+						count++
+					}
+				}
+			}
+		}
+	}
+
+	return count
+}
+
+func normalizeTTLForBlock(obj map[string]any, seen5m *bool) {
+	ccRaw, exists := obj["cache_control"]
+	if !exists {
+		return
+	}
+	cc, ok := asObject(ccRaw)
+	if !ok {
+		*seen5m = true
+		return
+	}
+	ttlRaw, ttlExists := cc["ttl"]
+	ttl, ttlIsString := ttlRaw.(string)
+	if !ttlExists || !ttlIsString || ttl != "1h" {
+		*seen5m = true
+		return
+	}
+	if *seen5m {
+		delete(cc, "ttl")
+	}
+}
+
+func findLastCacheControlIndex(arr []any) int {
+	last := -1
+	for idx, item := range arr {
+		obj, ok := asObject(item)
+		if !ok {
+			continue
+		}
+		if _, exists := obj["cache_control"]; exists {
+			last = idx
+		}
+	}
+	return last
+}
+
+func stripCacheControlExceptIndex(arr []any, preserveIdx int, excess *int) {
+	for idx, item := range arr {
+		if *excess <= 0 {
+			return
+		}
+		obj, ok := asObject(item)
+		if !ok {
+			continue
+		}
+		if _, exists := obj["cache_control"]; exists && idx != preserveIdx {
+			delete(obj, "cache_control")
+			*excess--
+		}
+	}
+}
+
+func stripAllCacheControl(arr []any, excess *int) {
+	for _, item := range arr {
+		if *excess <= 0 {
+			return
+		}
+		obj, ok := asObject(item)
+		if !ok {
+			continue
+		}
+		if _, exists := obj["cache_control"]; exists {
+			delete(obj, "cache_control")
+			*excess--
+		}
+	}
+}
+
+func stripMessageCacheControl(messages []any, excess *int) {
+	for _, msg := range messages {
+		if *excess <= 0 {
+			return
+		}
+		msgObj, ok := asObject(msg)
+		if !ok {
+			continue
+		}
+		content, ok := asArray(msgObj["content"])
+		if !ok {
+			continue
+		}
+		for _, item := range content {
+			if *excess <= 0 {
+				return
+			}
+			obj, ok := asObject(item)
+			if !ok {
+				continue
+			}
+			if _, exists := obj["cache_control"]; exists {
+				delete(obj, "cache_control")
+				*excess--
+			}
+		}
+	}
+}
+
+// normalizeCacheControlTTL ensures cache_control TTL values don't violate the
+// prompt-caching-scope-2026-01-05 ordering constraint: a 1h-TTL block must not
+// appear after a 5m-TTL block anywhere in the evaluation order.
+//
+// Anthropic evaluates blocks in order: tools → system (index 0..N) → messages.
+// Within each section, blocks are evaluated in array order. A 5m (default) block
+// followed by a 1h block at ANY later position is an error — including within
+// the same section (e.g. system[1]=5m then system[3]=1h).
+//
+// Strategy: walk all cache_control blocks in evaluation order. Once a 5m block
+// is seen, strip ttl from ALL subsequent 1h blocks (downgrading them to 5m).
+func normalizeCacheControlTTL(payload []byte) []byte {
+	root, ok := parsePayloadObject(payload)
+	if !ok {
+		return payload
+	}
+
+	seen5m := false
+
+	if tools, ok := asArray(root["tools"]); ok {
+		for _, tool := range tools {
+			if obj, ok := asObject(tool); ok {
+				normalizeTTLForBlock(obj, &seen5m)
+			}
+		}
+	}
+
+	if system, ok := asArray(root["system"]); ok {
+		for _, item := range system {
+			if obj, ok := asObject(item); ok {
+				normalizeTTLForBlock(obj, &seen5m)
+			}
+		}
+	}
+
+	if messages, ok := asArray(root["messages"]); ok {
+		for _, msg := range messages {
+			msgObj, ok := asObject(msg)
+			if !ok {
+				continue
+			}
+			content, ok := asArray(msgObj["content"])
+			if !ok {
+				continue
+			}
+			for _, item := range content {
+				if obj, ok := asObject(item); ok {
+					normalizeTTLForBlock(obj, &seen5m)
+				}
+			}
+		}
+	}
+
+	return marshalPayloadObject(payload, root)
+}
+
+// enforceCacheControlLimit removes excess cache_control blocks from a payload
+// so the total does not exceed the Anthropic API limit (currently 4).
+//
+// Anthropic evaluates cache breakpoints in order: tools → system → messages.
+// The most valuable breakpoints are:
+//  1. Last tool         — caches ALL tool definitions
+//  2. Last system block — caches ALL system content
+//  3. Recent messages   — cache conversation context
+//
+// Removal priority (strip lowest-value first):
+//
+//	Phase 1: system blocks earliest-first, preserving the last one.
+//	Phase 2: tool blocks earliest-first, preserving the last one.
+//	Phase 3: message content blocks earliest-first.
+//	Phase 4: remaining system blocks (last system).
+//	Phase 5: remaining tool blocks (last tool).
+func enforceCacheControlLimit(payload []byte, maxBlocks int) []byte {
+	root, ok := parsePayloadObject(payload)
+	if !ok {
+		return payload
+	}
+
+	total := countCacheControlsMap(root)
+	if total <= maxBlocks {
+		return payload
+	}
+
+	excess := total - maxBlocks
+
+	var system []any
+	if arr, ok := asArray(root["system"]); ok {
+		system = arr
+	}
+	var tools []any
+	if arr, ok := asArray(root["tools"]); ok {
+		tools = arr
+	}
+	var messages []any
+	if arr, ok := asArray(root["messages"]); ok {
+		messages = arr
+	}
+
+	if len(system) > 0 {
+		stripCacheControlExceptIndex(system, findLastCacheControlIndex(system), &excess)
+	}
+	if excess <= 0 {
+		return marshalPayloadObject(payload, root)
+	}
+
+	if len(tools) > 0 {
+		stripCacheControlExceptIndex(tools, findLastCacheControlIndex(tools), &excess)
+	}
+	if excess <= 0 {
+		return marshalPayloadObject(payload, root)
+	}
+
+	if len(messages) > 0 {
+		stripMessageCacheControl(messages, &excess)
+	}
+	if excess <= 0 {
+		return marshalPayloadObject(payload, root)
+	}
+
+	if len(system) > 0 {
+		stripAllCacheControl(system, &excess)
+	}
+	if excess <= 0 {
+		return marshalPayloadObject(payload, root)
+	}
+
+	if len(tools) > 0 {
+		stripAllCacheControl(tools, &excess)
+	}
+
+	return marshalPayloadObject(payload, root)
+}
+
 // injectMessagesCacheControl adds cache_control to the second-to-last user turn for multi-turn caching.
 // Per Anthropic docs: "Place cache_control on the second-to-last User message to let the model reuse the earlier cache."
 // This enables caching of conversation history, which is especially beneficial for long multi-turn conversations.
--- a/internal/runtime/executor/claude_executor_test.go
+++ b/internal/runtime/executor/claude_executor_test.go
@@ -6,6 +6,7 @@ import (
 	"io"
 	"net/http"
 	"net/http/httptest"
+	"strings"
 	"testing"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
@@ -348,3 +349,237 @@ func TestApplyClaudeToolPrefix_SkipsBuiltinToolReference(t *testing.T) {
 		t.Fatalf("built-in tool_reference should not be prefixed, got %q", got)
 	}
 }
+
+func TestNormalizeCacheControlTTL_DowngradesLaterOneHourBlocks(t *testing.T) {
+	payload := []byte(`{
+		"tools": [{"name":"t1","cache_control":{"type":"ephemeral","ttl":"1h"}}],
+		"system": [{"type":"text","text":"s1","cache_control":{"type":"ephemeral"}}],
+		"messages": [{"role":"user","content":[{"type":"text","text":"u1","cache_control":{"type":"ephemeral","ttl":"1h"}}]}]
+	}`)
+
+	out := normalizeCacheControlTTL(payload)
+
+	if got := gjson.GetBytes(out, "tools.0.cache_control.ttl").String(); got != "1h" {
+		t.Fatalf("tools.0.cache_control.ttl = %q, want %q", got, "1h")
+	}
+	if gjson.GetBytes(out, "messages.0.content.0.cache_control.ttl").Exists() {
+		t.Fatalf("messages.0.content.0.cache_control.ttl should be removed after a default-5m block")
+	}
+}
+
+func TestEnforceCacheControlLimit_StripsNonLastToolBeforeMessages(t *testing.T) {
+	payload := []byte(`{
+		"tools": [
+			{"name":"t1","cache_control":{"type":"ephemeral"}},
+			{"name":"t2","cache_control":{"type":"ephemeral"}}
+		],
+		"system": [{"type":"text","text":"s1","cache_control":{"type":"ephemeral"}}],
+		"messages": [
+			{"role":"user","content":[{"type":"text","text":"u1","cache_control":{"type":"ephemeral"}}]},
+			{"role":"user","content":[{"type":"text","text":"u2","cache_control":{"type":"ephemeral"}}]}
+		]
+	}`)
+
+	out := enforceCacheControlLimit(payload, 4)
+
+	if got := countCacheControls(out); got != 4 {
+		t.Fatalf("cache_control count = %d, want 4", got)
+	}
+	if gjson.GetBytes(out, "tools.0.cache_control").Exists() {
+		t.Fatalf("tools.0.cache_control should be removed first (non-last tool)")
+	}
+	if !gjson.GetBytes(out, "tools.1.cache_control").Exists() {
+		t.Fatalf("tools.1.cache_control (last tool) should be preserved")
+	}
+	if !gjson.GetBytes(out, "messages.0.content.0.cache_control").Exists() || !gjson.GetBytes(out, "messages.1.content.0.cache_control").Exists() {
+		t.Fatalf("message cache_control blocks should be preserved when non-last tool removal is enough")
+	}
+}
+
+func TestEnforceCacheControlLimit_ToolOnlyPayloadStillRespectsLimit(t *testing.T) {
+	payload := []byte(`{
+		"tools": [
+			{"name":"t1","cache_control":{"type":"ephemeral"}},
+			{"name":"t2","cache_control":{"type":"ephemeral"}},
+			{"name":"t3","cache_control":{"type":"ephemeral"}},
+			{"name":"t4","cache_control":{"type":"ephemeral"}},
+			{"name":"t5","cache_control":{"type":"ephemeral"}}
+		]
+	}`)
+
+	out := enforceCacheControlLimit(payload, 4)
+
+	if got := countCacheControls(out); got != 4 {
+		t.Fatalf("cache_control count = %d, want 4", got)
+	}
+	if gjson.GetBytes(out, "tools.0.cache_control").Exists() {
+		t.Fatalf("tools.0.cache_control should be removed to satisfy max=4")
+	}
+	if !gjson.GetBytes(out, "tools.4.cache_control").Exists() {
+		t.Fatalf("last tool cache_control should be preserved when possible")
+	}
+}
+
+func TestClaudeExecutor_CountTokens_AppliesCacheControlGuards(t *testing.T) {
+	var seenBody []byte
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		body, _ := io.ReadAll(r.Body)
+		seenBody = bytes.Clone(body)
+		w.Header().Set("Content-Type", "application/json")
+		_, _ = w.Write([]byte(`{"input_tokens":42}`))
+	}))
+	defer server.Close()
+
+	executor := NewClaudeExecutor(&config.Config{})
+	auth := &cliproxyauth.Auth{Attributes: map[string]string{
+		"api_key":  "key-123",
+		"base_url": server.URL,
+	}}
+
+	payload := []byte(`{
+		"tools": [
+			{"name":"t1","cache_control":{"type":"ephemeral","ttl":"1h"}},
+			{"name":"t2","cache_control":{"type":"ephemeral"}}
+		],
+		"system": [
+			{"type":"text","text":"s1","cache_control":{"type":"ephemeral","ttl":"1h"}},
+			{"type":"text","text":"s2","cache_control":{"type":"ephemeral","ttl":"1h"}}
+		],
+		"messages": [
+			{"role":"user","content":[{"type":"text","text":"u1","cache_control":{"type":"ephemeral","ttl":"1h"}}]},
+			{"role":"user","content":[{"type":"text","text":"u2","cache_control":{"type":"ephemeral","ttl":"1h"}}]}
+		]
+	}`)
+
+	_, err := executor.CountTokens(context.Background(), auth, cliproxyexecutor.Request{
+		Model:   "claude-3-5-haiku-20241022",
+		Payload: payload,
+	}, cliproxyexecutor.Options{SourceFormat: sdktranslator.FromString("claude")})
+	if err != nil {
+		t.Fatalf("CountTokens error: %v", err)
+	}
+
+	if len(seenBody) == 0 {
+		t.Fatal("expected count_tokens request body to be captured")
+	}
+	if got := countCacheControls(seenBody); got > 4 {
+		t.Fatalf("count_tokens body has %d cache_control blocks, want <= 4", got)
+	}
+	if hasTTLOrderingViolation(seenBody) {
+		t.Fatalf("count_tokens body still has ttl ordering violations: %s", string(seenBody))
+	}
+}
+
+func hasTTLOrderingViolation(payload []byte) bool {
+	seen5m := false
+	violates := false
+
+	checkCC := func(cc gjson.Result) {
+		if !cc.Exists() || violates {
+			return
+		}
+		ttl := cc.Get("ttl").String()
+		if ttl != "1h" {
+			seen5m = true
+			return
+		}
+		if seen5m {
+			violates = true
+		}
+	}
+
+	tools := gjson.GetBytes(payload, "tools")
+	if tools.IsArray() {
+		tools.ForEach(func(_, tool gjson.Result) bool {
+			checkCC(tool.Get("cache_control"))
+			return !violates
+		})
+	}
+
+	system := gjson.GetBytes(payload, "system")
+	if system.IsArray() {
+		system.ForEach(func(_, item gjson.Result) bool {
+			checkCC(item.Get("cache_control"))
+			return !violates
+		})
+	}
+
+	messages := gjson.GetBytes(payload, "messages")
+	if messages.IsArray() {
+		messages.ForEach(func(_, msg gjson.Result) bool {
+			content := msg.Get("content")
+			if content.IsArray() {
+				content.ForEach(func(_, item gjson.Result) bool {
+					checkCC(item.Get("cache_control"))
+					return !violates
+				})
+			}
+			return !violates
+		})
+	}
+
+	return violates
+}
+
+func TestClaudeExecutor_Execute_InvalidGzipErrorBodyReturnsDecodeMessage(t *testing.T) {
+	testClaudeExecutorInvalidCompressedErrorBody(t, func(executor *ClaudeExecutor, auth *cliproxyauth.Auth, payload []byte) error {
+		_, err := executor.Execute(context.Background(), auth, cliproxyexecutor.Request{
+			Model:   "claude-3-5-sonnet-20241022",
+			Payload: payload,
+		}, cliproxyexecutor.Options{SourceFormat: sdktranslator.FromString("claude")})
+		return err
+	})
+}
+
+func TestClaudeExecutor_ExecuteStream_InvalidGzipErrorBodyReturnsDecodeMessage(t *testing.T) {
+	testClaudeExecutorInvalidCompressedErrorBody(t, func(executor *ClaudeExecutor, auth *cliproxyauth.Auth, payload []byte) error {
+		_, err := executor.ExecuteStream(context.Background(), auth, cliproxyexecutor.Request{
+			Model:   "claude-3-5-sonnet-20241022",
+			Payload: payload,
+		}, cliproxyexecutor.Options{SourceFormat: sdktranslator.FromString("claude")})
+		return err
+	})
+}
+
+func TestClaudeExecutor_CountTokens_InvalidGzipErrorBodyReturnsDecodeMessage(t *testing.T) {
+	testClaudeExecutorInvalidCompressedErrorBody(t, func(executor *ClaudeExecutor, auth *cliproxyauth.Auth, payload []byte) error {
+		_, err := executor.CountTokens(context.Background(), auth, cliproxyexecutor.Request{
+			Model:   "claude-3-5-sonnet-20241022",
+			Payload: payload,
+		}, cliproxyexecutor.Options{SourceFormat: sdktranslator.FromString("claude")})
+		return err
+	})
+}
+
+func testClaudeExecutorInvalidCompressedErrorBody(
+	t *testing.T,
+	invoke func(executor *ClaudeExecutor, auth *cliproxyauth.Auth, payload []byte) error,
+) {
+	t.Helper()
+
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		w.Header().Set("Content-Encoding", "gzip")
+		w.WriteHeader(http.StatusBadRequest)
+		_, _ = w.Write([]byte("not-a-valid-gzip-stream"))
+	}))
+	defer server.Close()
+
+	executor := NewClaudeExecutor(&config.Config{})
+	auth := &cliproxyauth.Auth{Attributes: map[string]string{
+		"api_key":  "key-123",
+		"base_url": server.URL,
+	}}
+	payload := []byte(`{"messages":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`)
+
+	err := invoke(executor, auth, payload)
+	if err == nil {
+		t.Fatal("expected error, got nil")
+	}
+	if !strings.Contains(err.Error(), "failed to decode error response body") {
+		t.Fatalf("expected decode failure message, got: %v", err)
+	}
+	if statusProvider, ok := err.(interface{ StatusCode() int }); !ok || statusProvider.StatusCode() != http.StatusBadRequest {
+		t.Fatalf("expected status code 400, got: %v", err)
+	}
+}
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -16,7 +16,6 @@ import (
 	"strings"
 	"time"

-	"github.com/gin-gonic/gin"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/geminicli"
@@ -81,7 +80,7 @@ func (e *GeminiCLIExecutor) PrepareRequest(req *http.Request, auth *cliproxyauth
 		return statusErr{code: http.StatusUnauthorized, msg: "missing access token"}
 	}
 	req.Header.Set("Authorization", "Bearer "+tok.AccessToken)
-	applyGeminiCLIHeaders(req)
+	applyGeminiCLIHeaders(req, "unknown")
 	return nil
 }

@@ -189,7 +188,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 		}
 		reqHTTP.Header.Set("Content-Type", "application/json")
 		reqHTTP.Header.Set("Authorization", "Bearer "+tok.AccessToken)
-		applyGeminiCLIHeaders(reqHTTP)
+		applyGeminiCLIHeaders(reqHTTP, attemptModel)
 		reqHTTP.Header.Set("Accept", "application/json")
 		recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
 			URL:       url,
@@ -334,7 +333,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 		}
 		reqHTTP.Header.Set("Content-Type", "application/json")
 		reqHTTP.Header.Set("Authorization", "Bearer "+tok.AccessToken)
-		applyGeminiCLIHeaders(reqHTTP)
+		applyGeminiCLIHeaders(reqHTTP, attemptModel)
 		reqHTTP.Header.Set("Accept", "text/event-stream")
 		recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
 			URL:       url,
@@ -515,7 +514,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
 		}
 		reqHTTP.Header.Set("Content-Type", "application/json")
 		reqHTTP.Header.Set("Authorization", "Bearer "+tok.AccessToken)
-		applyGeminiCLIHeaders(reqHTTP)
+		applyGeminiCLIHeaders(reqHTTP, baseModel)
 		reqHTTP.Header.Set("Accept", "application/json")
 		recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
 			URL:       url,
@@ -738,21 +737,11 @@ func stringValue(m map[string]any, key string) string {
 }

 // applyGeminiCLIHeaders sets required headers for the Gemini CLI upstream.
-func applyGeminiCLIHeaders(r *http.Request) {
-	var ginHeaders http.Header
-	if ginCtx, ok := r.Context().Value("gin").(*gin.Context); ok && ginCtx != nil && ginCtx.Request != nil {
-		ginHeaders = ginCtx.Request.Header
-	}
-
-	misc.EnsureHeader(r.Header, ginHeaders, "User-Agent", "google-api-nodejs-client/9.15.1")
-	misc.EnsureHeader(r.Header, ginHeaders, "X-Goog-Api-Client", "gl-node/22.17.0")
-	misc.EnsureHeader(r.Header, ginHeaders, "Client-Metadata", geminiCLIClientMetadata())
-}
-
-// geminiCLIClientMetadata returns a compact metadata string required by upstream.
-func geminiCLIClientMetadata() string {
-	// Keep parity with CLI client defaults
-	return "ideType=IDE_UNSPECIFIED,platform=PLATFORM_UNSPECIFIED,pluginType=GEMINI"
+// User-Agent is always forced to the GeminiCLI format regardless of the client's value,
+// so that upstream identifies the request as a native GeminiCLI client.
+func applyGeminiCLIHeaders(r *http.Request, model string) {
+	r.Header.Set("User-Agent", misc.GeminiCLIUserAgent(model))
+	r.Header.Set("X-Goog-Api-Client", misc.GeminiCLIApiClientHeader)
 }

 // cliPreviewFallbackOrder returns preview model candidates for a base model.
--- a/internal/translator/antigravity/claude/antigravity_claude_request.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_request.go
@@ -400,7 +400,7 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 	hasTools := toolDeclCount > 0
 	thinkingResult := gjson.GetBytes(rawJSON, "thinking")
 	thinkingType := thinkingResult.Get("type").String()
-	hasThinking := thinkingResult.Exists() && thinkingResult.IsObject() && (thinkingType == "enabled" || thinkingType == "adaptive")
+	hasThinking := thinkingResult.Exists() && thinkingResult.IsObject() && (thinkingType == "enabled" || thinkingType == "adaptive" || thinkingType == "auto")
 	isClaudeThinking := util.IsClaudeThinkingModel(modelName)

 	if hasTools && hasThinking && isClaudeThinking {
@@ -440,8 +440,8 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true)
 			}
-		case "adaptive":
-			// Keep adaptive as a high level sentinel; ApplyThinking resolves it
+		case "adaptive", "auto":
+			// Keep adaptive/auto as a high level sentinel; ApplyThinking resolves it
 			// to model-specific max capability.
 			out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingLevel", "high")
 			out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true)
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
@@ -34,6 +34,11 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 	// Model
 	out, _ = sjson.SetBytes(out, "model", modelName)

+	// Let user-provided generationConfig pass through
+	if genConfig := gjson.GetBytes(rawJSON, "generationConfig"); genConfig.Exists() {
+		out, _ = sjson.SetRawBytes(out, "request.generationConfig", []byte(genConfig.Raw))
+	}
+
 	// Apply thinking configuration: convert OpenAI reasoning_effort to Gemini CLI thinkingConfig.
 	// Inline translation-only mapping; capability checks happen later in ApplyThinking.
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
--- a/internal/translator/codex/claude/codex_claude_request.go
+++ b/internal/translator/codex/claude/codex_claude_request.go
@@ -230,8 +230,8 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 					reasoningEffort = effort
 				}
 			}
-		case "adaptive":
-			// Claude adaptive means "enable with max capacity"; keep it as highest level
+		case "adaptive", "auto":
+			// Claude adaptive/auto means "enable with max capacity"; keep it as highest level
 			// and let ApplyThinking normalize per target model capability.
 			reasoningEffort = string(thinking.LevelXHigh)
 		case "disabled":
--- a/internal/translator/codex/claude/codex_claude_response.go
+++ b/internal/translator/codex/claude/codex_claude_response.go
@@ -22,8 +22,8 @@ var (

 // ConvertCodexResponseToClaudeParams holds parameters for response conversion.
 type ConvertCodexResponseToClaudeParams struct {
-	HasToolCall              bool
-	BlockIndex               int
+	HasToolCall               bool
+	BlockIndex                int
 	HasReceivedArgumentsDelta bool
 }

--- a/internal/translator/codex/openai/responses/codex_openai-responses_request_test.go
+++ b/internal/translator/codex/openai/responses/codex_openai-responses_request_test.go
@@ -264,18 +264,18 @@ func TestConvertSystemRoleToDeveloper_AssistantRole(t *testing.T) {
 	}
 }

-func TestUserFieldDeletion(t *testing.T) {  
+func TestUserFieldDeletion(t *testing.T) {
 	inputJSON := []byte(`{  
 		"model": "gpt-5.2",  
 		"user": "test-user",  
 		"input": [{"role": "user", "content": "Hello"}]  
-	}`)  
-	  
-	output := ConvertOpenAIResponsesRequestToCodex("gpt-5.2", inputJSON, false)  
-	outputStr := string(output)  
-	  
-	// Verify user field is deleted  
-	userField := gjson.Get(outputStr, "user")  
+	}`)
+
+	output := ConvertOpenAIResponsesRequestToCodex("gpt-5.2", inputJSON, false)
+	outputStr := string(output)
+
+	// Verify user field is deleted
+	userField := gjson.Get(outputStr, "user")
 	if userField.Exists() {
 		t.Errorf("user field should be deleted, but it was found with value: %s", userField.Raw)
 	}
--- a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
+++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
@@ -180,8 +180,8 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
 				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true)
 			}
-		case "adaptive":
-			// Keep adaptive as a high level sentinel; ApplyThinking resolves it
+		case "adaptive", "auto":
+			// Keep adaptive/auto as a high level sentinel; ApplyThinking resolves it
 			// to model-specific max capability.
 			out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingLevel", "high")
 			out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true)
--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
@@ -34,6 +34,11 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 	// Model
 	out, _ = sjson.SetBytes(out, "model", modelName)

+	// Let user-provided generationConfig pass through
+	if genConfig := gjson.GetBytes(rawJSON, "generationConfig"); genConfig.Exists() {
+		out, _ = sjson.SetRawBytes(out, "request.generationConfig", []byte(genConfig.Raw))
+	}
+
 	// Apply thinking configuration: convert OpenAI reasoning_effort to Gemini CLI thinkingConfig.
 	// Inline translation-only mapping; capability checks happen later in ApplyThinking.
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
--- a/internal/translator/gemini/claude/gemini_claude_request.go
+++ b/internal/translator/gemini/claude/gemini_claude_request.go
@@ -161,8 +161,8 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
 				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.includeThoughts", true)
 			}
-		case "adaptive":
-			// Keep adaptive as a high level sentinel; ApplyThinking resolves it
+		case "adaptive", "auto":
+			// Keep adaptive/auto as a high level sentinel; ApplyThinking resolves it
 			// to model-specific max capability.
 			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingLevel", "high")
 			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.includeThoughts", true)
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
@@ -34,6 +34,11 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 	// Model
 	out, _ = sjson.SetBytes(out, "model", modelName)

+	// Let user-provided generationConfig pass through
+	if genConfig := gjson.GetBytes(rawJSON, "generationConfig"); genConfig.Exists() {
+		out, _ = sjson.SetRawBytes(out, "generationConfig", []byte(genConfig.Raw))
+	}
+
 	// Apply thinking configuration: convert OpenAI reasoning_effort to Gemini thinkingConfig.
 	// Inline translation-only mapping; capability checks happen later in ApplyThinking.
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
--- a/internal/translator/openai/claude/openai_claude_request.go
+++ b/internal/translator/openai/claude/openai_claude_request.go
@@ -75,8 +75,8 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 						out, _ = sjson.Set(out, "reasoning_effort", effort)
 					}
 				}
-			case "adaptive":
-				// Claude adaptive means "enable with max capacity"; keep it as highest level
+			case "adaptive", "auto":
+				// Claude adaptive/auto means "enable with max capacity"; keep it as highest level
 				// and let ApplyThinking normalize per target model capability.
 				out, _ = sjson.Set(out, "reasoning_effort", string(thinking.LevelXHigh))
 			case "disabled":
--- a/internal/watcher/config_reload.go
+++ b/internal/watcher/config_reload.go
@@ -127,7 +127,8 @@ func (w *Watcher) reloadConfig() bool {
 	}

 	authDirChanged := oldConfig == nil || oldConfig.AuthDir != newConfig.AuthDir
-	forceAuthRefresh := oldConfig != nil && (oldConfig.ForceModelPrefix != newConfig.ForceModelPrefix || !reflect.DeepEqual(oldConfig.OAuthModelAlias, newConfig.OAuthModelAlias))
+	retryConfigChanged := oldConfig != nil && (oldConfig.RequestRetry != newConfig.RequestRetry || oldConfig.MaxRetryInterval != newConfig.MaxRetryInterval || oldConfig.MaxRetryCredentials != newConfig.MaxRetryCredentials)
+	forceAuthRefresh := oldConfig != nil && (oldConfig.ForceModelPrefix != newConfig.ForceModelPrefix || !reflect.DeepEqual(oldConfig.OAuthModelAlias, newConfig.OAuthModelAlias) || retryConfigChanged)

 	log.Infof("config successfully reloaded, triggering client reload")
 	w.reloadClients(authDirChanged, affectedOAuthProviders, forceAuthRefresh)
--- a/internal/watcher/diff/config_diff.go
+++ b/internal/watcher/diff/config_diff.go
@@ -54,6 +54,9 @@ func BuildConfigChangeDetails(oldCfg, newCfg *config.Config) []string {
 	if oldCfg.RequestRetry != newCfg.RequestRetry {
 		changes = append(changes, fmt.Sprintf("request-retry: %d -> %d", oldCfg.RequestRetry, newCfg.RequestRetry))
 	}
+	if oldCfg.MaxRetryCredentials != newCfg.MaxRetryCredentials {
+		changes = append(changes, fmt.Sprintf("max-retry-credentials: %d -> %d", oldCfg.MaxRetryCredentials, newCfg.MaxRetryCredentials))
+	}
 	if oldCfg.MaxRetryInterval != newCfg.MaxRetryInterval {
 		changes = append(changes, fmt.Sprintf("max-retry-interval: %d -> %d", oldCfg.MaxRetryInterval, newCfg.MaxRetryInterval))
 	}
--- a/internal/watcher/diff/config_diff_test.go
+++ b/internal/watcher/diff/config_diff_test.go
@@ -223,6 +223,7 @@ func TestBuildConfigChangeDetails_FlagsAndKeys(t *testing.T) {
 		UsageStatisticsEnabled: false,
 		DisableCooling:         false,
 		RequestRetry:           1,
+		MaxRetryCredentials:    1,
 		MaxRetryInterval:       1,
 		WebsocketAuth:          false,
 		QuotaExceeded:          config.QuotaExceeded{SwitchProject: false, SwitchPreviewModel: false},
@@ -246,6 +247,7 @@ func TestBuildConfigChangeDetails_FlagsAndKeys(t *testing.T) {
 		UsageStatisticsEnabled: true,
 		DisableCooling:         true,
 		RequestRetry:           2,
+		MaxRetryCredentials:    3,
 		MaxRetryInterval:       3,
 		WebsocketAuth:          true,
 		QuotaExceeded:          config.QuotaExceeded{SwitchProject: true, SwitchPreviewModel: true},
@@ -283,6 +285,7 @@ func TestBuildConfigChangeDetails_FlagsAndKeys(t *testing.T) {
 	expectContains(t, details, "disable-cooling: false -> true")
 	expectContains(t, details, "request-log: false -> true")
 	expectContains(t, details, "request-retry: 1 -> 2")
+	expectContains(t, details, "max-retry-credentials: 1 -> 3")
 	expectContains(t, details, "max-retry-interval: 1 -> 3")
 	expectContains(t, details, "proxy-url: http://old-proxy -> http://new-proxy")
 	expectContains(t, details, "ws-auth: false -> true")
@@ -309,6 +312,7 @@ func TestBuildConfigChangeDetails_AllBranches(t *testing.T) {
 		UsageStatisticsEnabled: false,
 		DisableCooling:         false,
 		RequestRetry:           1,
+		MaxRetryCredentials:    1,
 		MaxRetryInterval:       1,
 		WebsocketAuth:          false,
 		QuotaExceeded:          config.QuotaExceeded{SwitchProject: false, SwitchPreviewModel: false},
@@ -361,6 +365,7 @@ func TestBuildConfigChangeDetails_AllBranches(t *testing.T) {
 		UsageStatisticsEnabled: true,
 		DisableCooling:         true,
 		RequestRetry:           2,
+		MaxRetryCredentials:    3,
 		MaxRetryInterval:       3,
 		WebsocketAuth:          true,
 		QuotaExceeded:          config.QuotaExceeded{SwitchProject: true, SwitchPreviewModel: true},
@@ -419,6 +424,7 @@ func TestBuildConfigChangeDetails_AllBranches(t *testing.T) {
 	expectContains(t, changes, "usage-statistics-enabled: false -> true")
 	expectContains(t, changes, "disable-cooling: false -> true")
 	expectContains(t, changes, "request-retry: 1 -> 2")
+	expectContains(t, changes, "max-retry-credentials: 1 -> 3")
 	expectContains(t, changes, "max-retry-interval: 1 -> 3")
 	expectContains(t, changes, "proxy-url: http://old-proxy -> http://new-proxy")
 	expectContains(t, changes, "ws-auth: false -> true")
--- a/internal/watcher/watcher_test.go
+++ b/internal/watcher/watcher_test.go
@@ -1239,6 +1239,67 @@ func TestReloadConfigFiltersAffectedOAuthProviders(t *testing.T) {
 	}
 }

+func TestReloadConfigTriggersCallbackForMaxRetryCredentialsChange(t *testing.T) {
+	tmpDir := t.TempDir()
+	authDir := filepath.Join(tmpDir, "auth")
+	if err := os.MkdirAll(authDir, 0o755); err != nil {
+		t.Fatalf("failed to create auth dir: %v", err)
+	}
+	configPath := filepath.Join(tmpDir, "config.yaml")
+
+	oldCfg := &config.Config{
+		AuthDir:             authDir,
+		MaxRetryCredentials: 0,
+		RequestRetry:        1,
+		MaxRetryInterval:    5,
+	}
+	newCfg := &config.Config{
+		AuthDir:             authDir,
+		MaxRetryCredentials: 2,
+		RequestRetry:        1,
+		MaxRetryInterval:    5,
+	}
+	data, errMarshal := yaml.Marshal(newCfg)
+	if errMarshal != nil {
+		t.Fatalf("failed to marshal config: %v", errMarshal)
+	}
+	if errWrite := os.WriteFile(configPath, data, 0o644); errWrite != nil {
+		t.Fatalf("failed to write config: %v", errWrite)
+	}
+
+	callbackCalls := 0
+	callbackMaxRetryCredentials := -1
+	w := &Watcher{
+		configPath:     configPath,
+		authDir:        authDir,
+		lastAuthHashes: make(map[string]string),
+		reloadCallback: func(cfg *config.Config) {
+			callbackCalls++
+			if cfg != nil {
+				callbackMaxRetryCredentials = cfg.MaxRetryCredentials
+			}
+		},
+	}
+	w.SetConfig(oldCfg)
+
+	if ok := w.reloadConfig(); !ok {
+		t.Fatal("expected reloadConfig to succeed")
+	}
+
+	if callbackCalls != 1 {
+		t.Fatalf("expected reload callback to be called once, got %d", callbackCalls)
+	}
+	if callbackMaxRetryCredentials != 2 {
+		t.Fatalf("expected callback MaxRetryCredentials=2, got %d", callbackMaxRetryCredentials)
+	}
+
+	w.clientsMutex.RLock()
+	defer w.clientsMutex.RUnlock()
+	if w.config == nil || w.config.MaxRetryCredentials != 2 {
+		t.Fatalf("expected watcher config MaxRetryCredentials=2, got %+v", w.config)
+	}
+}
+
 func TestStartFailsWhenAuthDirMissing(t *testing.T) {
 	tmpDir := t.TempDir()
 	configPath := filepath.Join(tmpDir, "config.yaml")
--- a/sdk/cliproxy/auth/conductor.go
+++ b/sdk/cliproxy/auth/conductor.go
@@ -138,8 +138,9 @@ type Manager struct {
 	providerOffsets map[string]int

 	// Retry controls request retry behavior.
-	requestRetry     atomic.Int32
-	maxRetryInterval atomic.Int64
+	requestRetry        atomic.Int32
+	maxRetryCredentials atomic.Int32
+	maxRetryInterval    atomic.Int64

 	// oauthModelAlias stores global OAuth model alias mappings (alias -> upstream name) keyed by channel.
 	oauthModelAlias atomic.Value
@@ -384,18 +385,22 @@ func compileAPIKeyModelAliasForModels[T interface {
 	}
 }

-// SetRetryConfig updates retry attempts and cooldown wait interval.
-func (m *Manager) SetRetryConfig(retry int, maxRetryInterval time.Duration) {
+// SetRetryConfig updates retry attempts, credential retry limit and cooldown wait interval.
+func (m *Manager) SetRetryConfig(retry int, maxRetryInterval time.Duration, maxRetryCredentials int) {
 	if m == nil {
 		return
 	}
 	if retry < 0 {
 		retry = 0
 	}
+	if maxRetryCredentials < 0 {
+		maxRetryCredentials = 0
+	}
 	if maxRetryInterval < 0 {
 		maxRetryInterval = 0
 	}
 	m.requestRetry.Store(int32(retry))
+	m.maxRetryCredentials.Store(int32(maxRetryCredentials))
 	m.maxRetryInterval.Store(maxRetryInterval.Nanoseconds())
 }

@@ -506,11 +511,11 @@ func (m *Manager) Execute(ctx context.Context, providers []string, req cliproxye
 		return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
 	}

-	_, maxWait := m.retrySettings()
+	_, maxRetryCredentials, maxWait := m.retrySettings()

 	var lastErr error
 	for attempt := 0; ; attempt++ {
-		resp, errExec := m.executeMixedOnce(ctx, normalized, req, opts)
+		resp, errExec := m.executeMixedOnce(ctx, normalized, req, opts, maxRetryCredentials)
 		if errExec == nil {
 			return resp, nil
 		}
@@ -537,11 +542,11 @@ func (m *Manager) ExecuteCount(ctx context.Context, providers []string, req clip
 		return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
 	}

-	_, maxWait := m.retrySettings()
+	_, maxRetryCredentials, maxWait := m.retrySettings()

 	var lastErr error
 	for attempt := 0; ; attempt++ {
-		resp, errExec := m.executeCountMixedOnce(ctx, normalized, req, opts)
+		resp, errExec := m.executeCountMixedOnce(ctx, normalized, req, opts, maxRetryCredentials)
 		if errExec == nil {
 			return resp, nil
 		}
@@ -568,11 +573,11 @@ func (m *Manager) ExecuteStream(ctx context.Context, providers []string, req cli
 		return nil, &Error{Code: "provider_not_found", Message: "no provider supplied"}
 	}

-	_, maxWait := m.retrySettings()
+	_, maxRetryCredentials, maxWait := m.retrySettings()

 	var lastErr error
 	for attempt := 0; ; attempt++ {
-		result, errStream := m.executeStreamMixedOnce(ctx, normalized, req, opts)
+		result, errStream := m.executeStreamMixedOnce(ctx, normalized, req, opts, maxRetryCredentials)
 		if errStream == nil {
 			return result, nil
 		}
@@ -591,7 +596,7 @@ func (m *Manager) ExecuteStream(ctx context.Context, providers []string, req cli
 	return nil, &Error{Code: "auth_not_found", Message: "no auth available"}
 }

-func (m *Manager) executeMixedOnce(ctx context.Context, providers []string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+func (m *Manager) executeMixedOnce(ctx context.Context, providers []string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, maxRetryCredentials int) (cliproxyexecutor.Response, error) {
 	if len(providers) == 0 {
 		return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
 	}
@@ -600,6 +605,12 @@ func (m *Manager) executeMixedOnce(ctx context.Context, providers []string, req
 	tried := make(map[string]struct{})
 	var lastErr error
 	for {
+		if maxRetryCredentials > 0 && len(tried) >= maxRetryCredentials {
+			if lastErr != nil {
+				return cliproxyexecutor.Response{}, lastErr
+			}
+			return cliproxyexecutor.Response{}, &Error{Code: "auth_not_found", Message: "no auth available"}
+		}
 		auth, executor, provider, errPick := m.pickNextMixed(ctx, providers, routeModel, opts, tried)
 		if errPick != nil {
 			if lastErr != nil {
@@ -647,7 +658,7 @@ func (m *Manager) executeMixedOnce(ctx context.Context, providers []string, req
 	}
 }

-func (m *Manager) executeCountMixedOnce(ctx context.Context, providers []string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+func (m *Manager) executeCountMixedOnce(ctx context.Context, providers []string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, maxRetryCredentials int) (cliproxyexecutor.Response, error) {
 	if len(providers) == 0 {
 		return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
 	}
@@ -656,6 +667,12 @@ func (m *Manager) executeCountMixedOnce(ctx context.Context, providers []string,
 	tried := make(map[string]struct{})
 	var lastErr error
 	for {
+		if maxRetryCredentials > 0 && len(tried) >= maxRetryCredentials {
+			if lastErr != nil {
+				return cliproxyexecutor.Response{}, lastErr
+			}
+			return cliproxyexecutor.Response{}, &Error{Code: "auth_not_found", Message: "no auth available"}
+		}
 		auth, executor, provider, errPick := m.pickNextMixed(ctx, providers, routeModel, opts, tried)
 		if errPick != nil {
 			if lastErr != nil {
@@ -703,7 +720,7 @@ func (m *Manager) executeCountMixedOnce(ctx context.Context, providers []string,
 	}
 }

-func (m *Manager) executeStreamMixedOnce(ctx context.Context, providers []string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (*cliproxyexecutor.StreamResult, error) {
+func (m *Manager) executeStreamMixedOnce(ctx context.Context, providers []string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, maxRetryCredentials int) (*cliproxyexecutor.StreamResult, error) {
 	if len(providers) == 0 {
 		return nil, &Error{Code: "provider_not_found", Message: "no provider supplied"}
 	}
@@ -712,6 +729,12 @@ func (m *Manager) executeStreamMixedOnce(ctx context.Context, providers []string
 	tried := make(map[string]struct{})
 	var lastErr error
 	for {
+		if maxRetryCredentials > 0 && len(tried) >= maxRetryCredentials {
+			if lastErr != nil {
+				return nil, lastErr
+			}
+			return nil, &Error{Code: "auth_not_found", Message: "no auth available"}
+		}
 		auth, executor, provider, errPick := m.pickNextMixed(ctx, providers, routeModel, opts, tried)
 		if errPick != nil {
 			if lastErr != nil {
@@ -1108,11 +1131,11 @@ func (m *Manager) normalizeProviders(providers []string) []string {
 	return result
 }

-func (m *Manager) retrySettings() (int, time.Duration) {
+func (m *Manager) retrySettings() (int, int, time.Duration) {
 	if m == nil {
-		return 0, 0
+		return 0, 0, 0
 	}
-	return int(m.requestRetry.Load()), time.Duration(m.maxRetryInterval.Load())
+	return int(m.requestRetry.Load()), int(m.maxRetryCredentials.Load()), time.Duration(m.maxRetryInterval.Load())
 }

 func (m *Manager) closestCooldownWait(providers []string, model string, attempt int) (time.Duration, bool) {
--- a/sdk/cliproxy/auth/conductor_overrides_test.go
+++ b/sdk/cliproxy/auth/conductor_overrides_test.go
@@ -2,13 +2,17 @@ package auth

 import (
 	"context"
+	"net/http"
+	"sync"
 	"testing"
 	"time"
+
+	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 )

 func TestManager_ShouldRetryAfterError_RespectsAuthRequestRetryOverride(t *testing.T) {
 	m := NewManager(nil, nil, nil)
-	m.SetRetryConfig(3, 30*time.Second)
+	m.SetRetryConfig(3, 30*time.Second, 0)

 	model := "test-model"
 	next := time.Now().Add(5 * time.Second)
@@ -31,7 +35,7 @@ func TestManager_ShouldRetryAfterError_RespectsAuthRequestRetryOverride(t *testi
 		t.Fatalf("register auth: %v", errRegister)
 	}

-	_, maxWait := m.retrySettings()
+	_, _, maxWait := m.retrySettings()
 	wait, shouldRetry := m.shouldRetryAfterError(&Error{HTTPStatus: 500, Message: "boom"}, 0, []string{"claude"}, model, maxWait)
 	if shouldRetry {
 		t.Fatalf("expected shouldRetry=false for request_retry=0, got true (wait=%v)", wait)
@@ -56,6 +60,124 @@ func TestManager_ShouldRetryAfterError_RespectsAuthRequestRetryOverride(t *testi
 	}
 }

+type credentialRetryLimitExecutor struct {
+	id string
+
+	mu    sync.Mutex
+	calls int
+}
+
+func (e *credentialRetryLimitExecutor) Identifier() string {
+	return e.id
+}
+
+func (e *credentialRetryLimitExecutor) Execute(context.Context, *Auth, cliproxyexecutor.Request, cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+	e.recordCall()
+	return cliproxyexecutor.Response{}, &Error{HTTPStatus: 500, Message: "boom"}
+}
+
+func (e *credentialRetryLimitExecutor) ExecuteStream(context.Context, *Auth, cliproxyexecutor.Request, cliproxyexecutor.Options) (*cliproxyexecutor.StreamResult, error) {
+	e.recordCall()
+	return nil, &Error{HTTPStatus: 500, Message: "boom"}
+}
+
+func (e *credentialRetryLimitExecutor) Refresh(_ context.Context, auth *Auth) (*Auth, error) {
+	return auth, nil
+}
+
+func (e *credentialRetryLimitExecutor) CountTokens(context.Context, *Auth, cliproxyexecutor.Request, cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+	e.recordCall()
+	return cliproxyexecutor.Response{}, &Error{HTTPStatus: 500, Message: "boom"}
+}
+
+func (e *credentialRetryLimitExecutor) HttpRequest(context.Context, *Auth, *http.Request) (*http.Response, error) {
+	return nil, nil
+}
+
+func (e *credentialRetryLimitExecutor) recordCall() {
+	e.mu.Lock()
+	defer e.mu.Unlock()
+	e.calls++
+}
+
+func (e *credentialRetryLimitExecutor) Calls() int {
+	e.mu.Lock()
+	defer e.mu.Unlock()
+	return e.calls
+}
+
+func newCredentialRetryLimitTestManager(t *testing.T, maxRetryCredentials int) (*Manager, *credentialRetryLimitExecutor) {
+	t.Helper()
+
+	m := NewManager(nil, nil, nil)
+	m.SetRetryConfig(0, 0, maxRetryCredentials)
+
+	executor := &credentialRetryLimitExecutor{id: "claude"}
+	m.RegisterExecutor(executor)
+
+	auth1 := &Auth{ID: "auth-1", Provider: "claude"}
+	auth2 := &Auth{ID: "auth-2", Provider: "claude"}
+	if _, errRegister := m.Register(context.Background(), auth1); errRegister != nil {
+		t.Fatalf("register auth1: %v", errRegister)
+	}
+	if _, errRegister := m.Register(context.Background(), auth2); errRegister != nil {
+		t.Fatalf("register auth2: %v", errRegister)
+	}
+
+	return m, executor
+}
+
+func TestManager_MaxRetryCredentials_LimitsCrossCredentialRetries(t *testing.T) {
+	request := cliproxyexecutor.Request{Model: "test-model"}
+	testCases := []struct {
+		name   string
+		invoke func(*Manager) error
+	}{
+		{
+			name: "execute",
+			invoke: func(m *Manager) error {
+				_, errExecute := m.Execute(context.Background(), []string{"claude"}, request, cliproxyexecutor.Options{})
+				return errExecute
+			},
+		},
+		{
+			name: "execute_count",
+			invoke: func(m *Manager) error {
+				_, errExecute := m.ExecuteCount(context.Background(), []string{"claude"}, request, cliproxyexecutor.Options{})
+				return errExecute
+			},
+		},
+		{
+			name: "execute_stream",
+			invoke: func(m *Manager) error {
+				_, errExecute := m.ExecuteStream(context.Background(), []string{"claude"}, request, cliproxyexecutor.Options{})
+				return errExecute
+			},
+		},
+	}
+
+	for _, tc := range testCases {
+		tc := tc
+		t.Run(tc.name, func(t *testing.T) {
+			limitedManager, limitedExecutor := newCredentialRetryLimitTestManager(t, 1)
+			if errInvoke := tc.invoke(limitedManager); errInvoke == nil {
+				t.Fatalf("expected error for limited retry execution")
+			}
+			if calls := limitedExecutor.Calls(); calls != 1 {
+				t.Fatalf("expected 1 call with max-retry-credentials=1, got %d", calls)
+			}
+
+			unlimitedManager, unlimitedExecutor := newCredentialRetryLimitTestManager(t, 0)
+			if errInvoke := tc.invoke(unlimitedManager); errInvoke == nil {
+				t.Fatalf("expected error for unlimited retry execution")
+			}
+			if calls := unlimitedExecutor.Calls(); calls != 2 {
+				t.Fatalf("expected 2 calls with max-retry-credentials=0, got %d", calls)
+			}
+		})
+	}
+}
+
 func TestManager_MarkResult_RespectsAuthDisableCoolingOverride(t *testing.T) {
 	prev := quotaCooldownDisabled.Load()
 	quotaCooldownDisabled.Store(false)
--- a/sdk/cliproxy/service.go
+++ b/sdk/cliproxy/service.go
@@ -347,7 +347,7 @@ func (s *Service) applyRetryConfig(cfg *config.Config) {
 		return
 	}
 	maxInterval := time.Duration(cfg.MaxRetryInterval) * time.Second
-	s.coreManager.SetRetryConfig(cfg.RequestRetry, maxInterval)
+	s.coreManager.SetRetryConfig(cfg.RequestRetry, maxInterval, cfg.MaxRetryCredentials)
 }

 func openAICompatInfoFromAuth(a *coreauth.Auth) (providerKey string, compatName string, ok bool) {
Author	SHA1	Message	Date
Luis Pater	7547d1d0b3	chore(config): add default OAuth model alias configurations and extend registry with supported API endpoints	2026-03-02 21:36:42 +08:00
Luis Pater	68934942d0	Merge branch 'pr-402-local' # Conflicts: # internal/config/oauth_model_alias_migration.go	2026-03-02 20:45:37 +08:00
Luis Pater	09fec34e1c	chore(docs): update sponsor info and GLM model details in README files	2026-03-02 20:30:07 +08:00
hkfires	9229708b6c	revert(executor): re-apply PR #1735 antigravity changes with cleanup	2026-03-02 19:30:32 +08:00
hkfires	914db94e79	refactor(headers): streamline User-Agent handling and introduce GeminiCLI versioning	2026-03-02 13:04:30 +08:00
hkfires	660bd7eff5	refactor(config): remove oauth-model-alias migration logic and related tests	2026-03-02 13:02:15 +08:00
hkfires	b907d21851	revert(executor): revert antigravity_executor.go changes from PR #1735	2026-03-02 12:54:15 +08:00
Luis Pater	d6cc976d1f	chore(executor): remove unused header scrubbing function	2026-03-02 03:40:54 +08:00
Luis Pater	8aa2cce8c5	Merge PR #1735 into dev with conflict resolution and fixes	2026-03-02 03:22:51 +08:00
Luis Pater	bf9b2c49df	Merge branch 'router-for-me:main' into main	2026-03-01 21:40:14 +08:00
Luis Pater	77b42c6165	fix(claude): handle `X-CPA-CLAUDE-1M` header and ensure proper beta merging logic	2026-03-01 21:39:33 +08:00
Luis Pater	446150a747	Merge branch 'router-for-me:main' into main	2026-03-01 20:35:05 +08:00
Luis Pater	1cbc4834e1	Merge pull request #1771 from edlsh/fix/claude-cache-control-1769 Fix Claude OAuth cache_control regressions and gzip error decoding	2026-03-01 20:17:22 +08:00
hkfires	a8a5d03c33	chore: ignore .idea directory in git and docker builds	2026-03-01 12:42:59 +08:00
edlsh	76aa917882	Optimize cache-control JSON mutations in Claude executor	2026-02-28 22:47:04 -05:00
edlsh	6ac9b31e4e	Handle compressed error decode failures safely	2026-02-28 22:43:46 -05:00
edlsh	0ad3e8457f	Clarify cloaking system block cache-control comments	2026-02-28 22:34:14 -05:00
edlsh	444a47ae63	Fix Claude cache-control guardrails and gzip error decoding	2026-02-28 22:32:33 -05:00
Luis Pater	725f4fdff4	Merge pull request #1768 from router-for-me/claude fix(translator): handle Claude thinking type "auto" like adaptive	2026-03-01 11:03:13 +08:00
Luis Pater	c23e46f45d	Merge pull request #1767 from router-for-me/antigravity fix(antigravity): update model configurations and add new models for Antigravity	2026-03-01 11:02:20 +08:00
hkfires	b148820c35	fix(translator): handle Claude thinking type "auto" like adaptive	2026-03-01 10:30:19 +08:00
hkfires	134f41496d	fix(antigravity): update model configurations and add new models for Antigravity	2026-03-01 10:05:29 +08:00
Luis Pater	c5838dd58d	Merge pull request #400 from router-for-me/plus v6.8.35	2026-03-01 09:42:03 +08:00
Luis Pater	b6ca5ef7ce	Merge branch 'main' into plus	2026-03-01 09:41:52 +08:00
Luis Pater	1ae994b4aa	fix(antigravity): adjust thinkingBudget default to 64000 and update model definitions for Claude	2026-03-01 09:39:39 +08:00
Luis Pater	84e9793e61	Merge pull request #399 from router-for-me/plus v6.8.34	2026-03-01 02:44:36 +08:00
Luis Pater	32e64dacfd	Merge branch 'main' into plus	2026-03-01 02:44:26 +08:00
Luis Pater	cc1d8f6629	Fixed: #1747 feat(auth): add configurable max-retry-credentials for finer control over cross-credential retries	2026-03-01 02:42:36 +08:00
Luis Pater	5446cd2b02	Merge pull request #1761 from margbug01/fix/thinking-chain-display fix: support thinking.type=auto from Amp client and decouple thinking translation from unsigned history	2026-03-01 02:30:42 +08:00
margbug01	8de0885b7d	fix: support thinking.type="auto" from Amp client for Antigravity Claude models ## Problem When using Antigravity Claude models through CLIProxyAPI, the thinking chain (reasoning content) does not display in the Amp client. ## Root Cause The Amp client sends `thinking: {"type": "auto"}` in its requests, but `ConvertClaudeRequestToAntigravity` only handled `"enabled"` and `"adaptive"` types in its switch statement. The `"auto"` type was silently ignored, resulting in no `thinkingConfig` being set in the translated Gemini request. Without `thinkingConfig`, the Antigravity API returns responses without any thinking content. Additionally, the Antigravity API for Claude models does not support `thinkingBudget: -1` (auto mode sentinel). It requires a concrete positive budget value. The fix uses 128000 as the budget for "auto" mode, which `ApplyThinking` will then normalize to stay within the model's actual limits (e.g., capped to `maxOutputTokens - 1`). ## Changes ### internal/translator/antigravity/claude/antigravity_claude_request.go 1. Add "auto" case to the thinking type switch statement. Sets `thinkingBudget: 128000` and `includeThoughts: true`. The budget is subsequently normalized by `ApplyThinking` based on model-specific limits. 2. Add "auto" to hasThinking check so that interleaved thinking hints are injected for tool-use scenarios when Amp sends `thinking.type="auto"`. ### internal/registry/model_definitions_static_data.go 3. Add Thinking configuration for `claude-sonnet-4-6`, `claude-sonnet-4-5`, and `claude-opus-4-6` in `GetAntigravityModelConfig()` -- these were previously missing, causing `ApplyThinking` to skip thinking config entirely. ## Testing - Deployed to Railway test instance (cpa-thinking-test) - Verified via debug logging that: - Amp sends `thinking: {"type": "auto"}` - CPA now translates this to `thinkingConfig: {thinkingBudget: 128000, includeThoughts: true}` - `ApplyThinking` normalizes the budget to model-specific limits - Antigravity API receives the correct thinkingConfig Amp-Thread-ID: https://ampcode.com/threads/T-019ca511-710d-776d-a07c-4b750f871a93 Co-authored-by: Amp <amp@ampcode.com>	2026-03-01 02:18:43 +08:00
maplelove	68dd2bfe82	fix(translator): allow passthrough of custom generationConfig for all Gemini-like providers	2026-02-27 17:13:42 +08:00
maplelove	2baf35b3ef	fix(executor): bump antigravity UA to 1.19.6 and align image_gen payload	2026-02-27 14:09:37 +08:00
maplelove	846e75b893	feat(gemini): route gemini-3.1-flash-image identically to gemini-3-pro-image	2026-02-27 13:32:06 +08:00
maplelove	fc0257d6d9	refactor: consolidate duplicate UA and header scrubbing into shared misc functions	2026-02-27 10:57:13 +08:00
maplelove	f3c164d345	feat(antigravity): update to v1.19.5 with new models and Claude 4-6 migration	2026-02-27 10:34:27 +08:00
maplelove	4040b1e766	Merge remote-tracking branch 'upstream/dev' into dev # Conflicts: # internal/runtime/executor/antigravity_executor.go	2026-02-27 10:29:50 +08:00
maplelove	8f97a5f77c	feat(registry): expose input modalities, token limits, and generation methods for Antigravity models	2026-02-23 13:33:51 +08:00
maplelove	2a4d3e60f3	Merge remote-tracking branch 'upstream/dev' into dev	2026-02-23 00:01:47 +08:00
maplelove	8b5af2ab84	fix(executor): match real Antigravity OAuth UA, remove redundant header scrubbing on new requests	2026-02-22 23:20:12 +08:00
maplelove	d887716ebd	refactor(executor): switch HttpRequest to whitelist-based header filtering	2026-02-22 21:00:12 +08:00
maplelove	5dc1848466	feat(scrub): add comprehensive browser fingerprint and client identity header scrubbing	2026-02-22 20:51:00 +08:00
maplelove	9491517b26	fix(executor): use singleton transport to prevent OOM from connection pool leaks	2026-02-22 20:17:30 +08:00
maplelove	9370b5bd04	fix(executor): completely scrub all proxy tracing headers in executor	2026-02-22 19:43:10 +08:00
maplelove	abb51a0d93	fix(executor): correctly disable http2 ALPN in Antigravity client to resolve connection reset errors	2026-02-22 19:23:48 +08:00
maplelove	c8d809131b	fix(executor): improve antigravity reverse proxy emulation - force http/1.1 instead of http/2 - explicit connection close - strip proxy headers X-Forwarded-For and X-Real-IP - add project id to fetch models payload	2026-02-22 18:41:58 +08:00
maplelove	dd71c73a9f	fix: align gemini-cli upstream communication headers Removed legacy Client-Metadata and explicit API-Client headers. Dynamically generating accurate User-Agent strings matching the official cli.	2026-02-22 17:07:17 +08:00