From 52760a4eaa3c4611aa0a3e0bb028e79dbe7b4773 Mon Sep 17 00:00:00 2001
From: CodeIgnitor <theunionjack007@gmail.com>
Date: Mon, 5 Jan 2026 02:41:24 +0500
Subject: [PATCH 01/26] fix(auth): use backend project ID for free tier Gemini
 CLI OAuth users

Fixes issue where free tier users cannot access Gemini 3 preview models
due to frontend/backend project ID mapping.

## Problem
Google's Gemini API uses a frontend/backend project mapping system for
free tier users:
- Frontend projects (e.g., gen-lang-client-*) are user-visible
- Backend projects (e.g., mystical-victor-*) host actual API access
- Only backend projects have access to preview models (gemini-3-*)

Previously, CLIProxyAPI ignored the backend project ID returned by
Google's onboarding API and kept using the frontend ID, preventing
access to preview models.

## Solution
### CLI (internal/cmd/login.go)
- Detect free tier users (gen-lang-client-* projects or FREE/LEGACY tier)
- Show interactive prompt allowing users to choose frontend or backend
- Default to backend (recommended for preview model access)
- Pro users: maintain original behavior (keep frontend ID)

### Web UI (internal/api/handlers/management/auth_files.go)
- Detect free tier users using same logic
- Automatically use backend project ID (recommended choice)
- Pro users: maintain original behavior (keep frontend ID)

### Deduplication (internal/cmd/login.go)
- Add deduplication when user selects ALL projects
- Prevents redundant API calls when multiple frontend projects map to
  same backend
- Skips duplicate project IDs in activation loop

## Impact
- Free tier users: Can now access gemini-3-pro-preview and
  gemini-3-flash-preview models
- Pro users: No change in behavior (backward compatible)
- Only affects Gemini CLI OAuth (not antigravity or API key auth)

## Testing
- Tested with free tier account selecting single project
- Tested with free tier account selecting ALL projects
- Verified deduplication prevents redundant onboarding calls
- Confirmed pro user behavior unchanged
---
 .../api/handlers/management/auth_files.go     | 15 ++++++-
 internal/cmd/login.go                         | 44 ++++++++++++++++++-
 2 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/internal/api/handlers/management/auth_files.go b/internal/api/handlers/management/auth_files.go
index e0904ab6..770ba303 100644
--- a/internal/api/handlers/management/auth_files.go
+++ b/internal/api/handlers/management/auth_files.go
@@ -2102,7 +2102,20 @@ func performGeminiCLISetup(ctx context.Context, httpClient *http.Client, storage
 			finalProjectID := projectID
 			if responseProjectID != "" {
 				if explicitProject && !strings.EqualFold(responseProjectID, projectID) {
-					log.Warnf("Gemini onboarding returned project %s instead of requested %s; keeping requested project ID.", responseProjectID, projectID)
+					// Check if this is a free user (gen-lang-client projects or free/legacy tier)
+					isFreeUser := strings.HasPrefix(projectID, "gen-lang-client-") ||
+						strings.EqualFold(tierID, "FREE") ||
+						strings.EqualFold(tierID, "LEGACY")
+
+					if isFreeUser {
+						// For free users, use backend project ID for preview model access
+						log.Infof("Gemini onboarding: frontend project %s maps to backend project %s", projectID, responseProjectID)
+						log.Infof("Using backend project ID: %s (recommended for preview model access)", responseProjectID)
+						finalProjectID = responseProjectID
+					} else {
+						// Pro users: keep requested project ID (original behavior)
+						log.Warnf("Gemini onboarding returned project %s instead of requested %s; keeping requested project ID.", responseProjectID, projectID)
+					}
 				} else {
 					finalProjectID = responseProjectID
 				}
diff --git a/internal/cmd/login.go b/internal/cmd/login.go
index 3bb0b9a5..f68e6135 100644
--- a/internal/cmd/login.go
+++ b/internal/cmd/login.go
@@ -116,6 +116,7 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
 	}
 
 	activatedProjects := make([]string, 0, len(projectSelections))
+	seenProjects := make(map[string]bool)
 	for _, candidateID := range projectSelections {
 		log.Infof("Activating project %s", candidateID)
 		if errSetup := performGeminiCLISetup(ctx, httpClient, storage, candidateID); errSetup != nil {
@@ -132,6 +133,13 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
 		if finalID == "" {
 			finalID = candidateID
 		}
+
+		// Skip duplicates
+		if seenProjects[finalID] {
+			log.Infof("Project %s already activated, skipping", finalID)
+			continue
+		}
+		seenProjects[finalID] = true
 		activatedProjects = append(activatedProjects, finalID)
 	}
 
@@ -259,9 +267,41 @@ func performGeminiCLISetup(ctx context.Context, httpClient *http.Client, storage
 			finalProjectID := projectID
 			if responseProjectID != "" {
 				if explicitProject && !strings.EqualFold(responseProjectID, projectID) {
-					log.Warnf("Gemini onboarding returned project %s instead of requested %s; keeping requested project ID.", responseProjectID, projectID)
+					// Check if this is a free user (gen-lang-client projects or free/legacy tier)
+					isFreeUser := strings.HasPrefix(projectID, "gen-lang-client-") ||
+						strings.EqualFold(tierID, "FREE") ||
+						strings.EqualFold(tierID, "LEGACY")
+
+					if isFreeUser {
+						// Interactive prompt for free users
+						fmt.Printf("\n⚠️  Google returned a different project ID:\n")
+						fmt.Printf("    Requested (frontend): %s\n", projectID)
+						fmt.Printf("    Returned (backend):   %s\n\n", responseProjectID)
+						fmt.Printf("ℹ️   Backend project IDs have access to preview models (gemini-3-*).\n")
+						fmt.Printf("    This is normal for free tier users.\n\n")
+						fmt.Printf("Which project ID would you like to use?\n")
+						fmt.Printf("  [1] Backend (recommended): %s\n", responseProjectID)
+						fmt.Printf("  [2] Frontend: %s\n\n", projectID)
+						fmt.Printf("Enter choice [1]: ")
+
+						reader := bufio.NewReader(os.Stdin)
+						choice, _ := reader.ReadString('\n')
+						choice = strings.TrimSpace(choice)
+
+						if choice == "2" {
+							log.Infof("Using frontend project ID: %s", projectID)
+							fmt.Println("⚠️  Warning: Frontend project IDs may not have access to preview models.")
+							finalProjectID = projectID
+						} else {
+							log.Infof("Using backend project ID: %s (recommended)", responseProjectID)
+							finalProjectID = responseProjectID
+						}
+					} else {
+						// Pro users: keep requested project ID (original behavior)
+						log.Warnf("Gemini onboarding returned project %s instead of requested %s; keeping requested project ID.", responseProjectID, projectID)
+					}
 				} else {
-					finalProjectID = responseProjectID
+				finalProjectID = responseProjectID
 				}
 			}
 

From 00280b6fe8df4ebc07febfb6fd5884b4708909c7 Mon Sep 17 00:00:00 2001
From: maoring24 <mao@allstream.ai>
Date: Mon, 5 Jan 2026 20:32:51 +0800
Subject: [PATCH 02/26] feat(claude): add native request cloaking for
 non-claude-code clients
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

integrate claude-cloak functionality to disguise api requests:
- add CloakConfig with mode (auto/always/never) and strict-mode options
- generate fake user_id in claude code format (user_[hex]_account__session_[uuid])
- inject claude code system prompt (configurable strict mode)
- obfuscate sensitive words with zero-width characters
- auto-detect claude code clients via user-agent

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 config.example.yaml                          |   9 +
 internal/config/config.go                    |  22 +++
 internal/runtime/executor/claude_executor.go | 174 +++++++++++++++++-
 internal/runtime/executor/cloak_obfuscate.go | 176 +++++++++++++++++++
 internal/runtime/executor/cloak_utils.go     |  47 +++++
 5 files changed, 424 insertions(+), 4 deletions(-)
 create mode 100644 internal/runtime/executor/cloak_obfuscate.go
 create mode 100644 internal/runtime/executor/cloak_utils.go

diff --git a/config.example.yaml b/config.example.yaml
index 332fba70..a5ca875c 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -134,6 +134,15 @@ ws-auth: false
 #       - "claude-3-*"               # wildcard matching prefix (e.g. claude-3-7-sonnet-20250219)
 #       - "*-thinking"               # wildcard matching suffix (e.g. claude-opus-4-5-thinking)
 #       - "*haiku*"                  # wildcard matching substring (e.g. claude-3-5-haiku-20241022)
+#     cloak:                         # optional: request cloaking for non-Claude-Code clients
+#       mode: "auto"                 # "auto" (default): cloak only when client is not Claude Code
+#                                    # "always": always apply cloaking
+#                                    # "never": never apply cloaking
+#       strict-mode: false           # false (default): prepend Claude Code prompt to user system messages
+#                                    # true: strip all user system messages, keep only Claude Code prompt
+#       sensitive-words:             # optional: words to obfuscate with zero-width characters
+#         - "API"
+#         - "proxy"
 
 # OpenAI compatibility providers
 # openai-compatibility:
diff --git a/internal/config/config.go b/internal/config/config.go
index e8ae3554..0b327a6c 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -236,6 +236,25 @@ type PayloadModelRule struct {
 	Protocol string `yaml:"protocol" json:"protocol"`
 }
 
+// CloakConfig configures request cloaking for non-Claude-Code clients.
+// Cloaking disguises API requests to appear as originating from the official Claude Code CLI.
+type CloakConfig struct {
+	// Mode controls cloaking behavior: "auto" (default), "always", or "never".
+	// - "auto": cloak only when client is not Claude Code (based on User-Agent)
+	// - "always": always apply cloaking regardless of client
+	// - "never": never apply cloaking
+	Mode string `yaml:"mode,omitempty" json:"mode,omitempty"`
+
+	// StrictMode controls how system prompts are handled when cloaking.
+	// - false (default): prepend Claude Code prompt to user system messages
+	// - true: strip all user system messages, keep only Claude Code prompt
+	StrictMode bool `yaml:"strict-mode,omitempty" json:"strict-mode,omitempty"`
+
+	// SensitiveWords is a list of words to obfuscate with zero-width characters.
+	// This can help bypass certain content filters.
+	SensitiveWords []string `yaml:"sensitive-words,omitempty" json:"sensitive-words,omitempty"`
+}
+
 // ClaudeKey represents the configuration for a Claude API key,
 // including the API key itself and an optional base URL for the API endpoint.
 type ClaudeKey struct {
@@ -260,6 +279,9 @@ type ClaudeKey struct {
 
 	// ExcludedModels lists model IDs that should be excluded for this provider.
 	ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"`
+
+	// Cloak configures request cloaking for non-Claude-Code clients.
+	Cloak *CloakConfig `yaml:"cloak,omitempty" json:"cloak,omitempty"`
 }
 
 // ClaudeModel describes a mapping between an alias and the actual upstream model name.
diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go
index 7be4f41b..49263c73 100644
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -67,9 +67,10 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	// Inject thinking config based on model metadata for thinking variants
 	body = e.injectThinkingConfig(model, req.Metadata, body)
 
-	if !strings.HasPrefix(model, "claude-3-5-haiku") {
-		body = checkSystemInstructions(body)
-	}
+	// Apply cloaking (system prompt injection, fake user ID, sensitive word obfuscation)
+	// based on client type and configuration
+	body = applyCloaking(ctx, e.cfg, auth, body, model)
+
 	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
 
 	// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
@@ -181,7 +182,11 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	body, _ = sjson.SetBytes(body, "model", model)
 	// Inject thinking config based on model metadata for thinking variants
 	body = e.injectThinkingConfig(model, req.Metadata, body)
-	body = checkSystemInstructions(body)
+
+	// Apply cloaking (system prompt injection, fake user ID, sensitive word obfuscation)
+	// based on client type and configuration
+	body = applyCloaking(ctx, e.cfg, auth, body, model)
+
 	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
 
 	// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
@@ -770,3 +775,164 @@ func checkSystemInstructions(payload []byte) []byte {
 	}
 	return payload
 }
+
+// getClientUserAgent extracts the client User-Agent from the gin context.
+func getClientUserAgent(ctx context.Context) string {
+	if ginCtx, ok := ctx.Value("gin").(*gin.Context); ok && ginCtx != nil && ginCtx.Request != nil {
+		return ginCtx.GetHeader("User-Agent")
+	}
+	return ""
+}
+
+// getCloakConfigFromAuth extracts cloak configuration from auth attributes.
+// Returns (cloakMode, strictMode, sensitiveWords).
+func getCloakConfigFromAuth(auth *cliproxyauth.Auth) (string, bool, []string) {
+	if auth == nil || auth.Attributes == nil {
+		return "auto", false, nil
+	}
+
+	cloakMode := auth.Attributes["cloak_mode"]
+	if cloakMode == "" {
+		cloakMode = "auto"
+	}
+
+	strictMode := strings.ToLower(auth.Attributes["cloak_strict_mode"]) == "true"
+
+	var sensitiveWords []string
+	if wordsStr := auth.Attributes["cloak_sensitive_words"]; wordsStr != "" {
+		sensitiveWords = strings.Split(wordsStr, ",")
+		for i := range sensitiveWords {
+			sensitiveWords[i] = strings.TrimSpace(sensitiveWords[i])
+		}
+	}
+
+	return cloakMode, strictMode, sensitiveWords
+}
+
+// resolveClaudeKeyCloakConfig finds the matching ClaudeKey config and returns its CloakConfig.
+func resolveClaudeKeyCloakConfig(cfg *config.Config, auth *cliproxyauth.Auth) *config.CloakConfig {
+	if cfg == nil || auth == nil {
+		return nil
+	}
+
+	apiKey, baseURL := claudeCreds(auth)
+	if apiKey == "" {
+		return nil
+	}
+
+	for i := range cfg.ClaudeKey {
+		entry := &cfg.ClaudeKey[i]
+		cfgKey := strings.TrimSpace(entry.APIKey)
+		cfgBase := strings.TrimSpace(entry.BaseURL)
+
+		// Match by API key
+		if strings.EqualFold(cfgKey, apiKey) {
+			// If baseURL is specified, also check it
+			if baseURL != "" && cfgBase != "" && !strings.EqualFold(cfgBase, baseURL) {
+				continue
+			}
+			return entry.Cloak
+		}
+	}
+
+	return nil
+}
+
+// injectFakeUserID generates and injects a fake user ID into the request metadata.
+func injectFakeUserID(payload []byte) []byte {
+	metadata := gjson.GetBytes(payload, "metadata")
+	if !metadata.Exists() {
+		payload, _ = sjson.SetBytes(payload, "metadata.user_id", generateFakeUserID())
+		return payload
+	}
+
+	existingUserID := gjson.GetBytes(payload, "metadata.user_id").String()
+	if existingUserID == "" || !isValidUserID(existingUserID) {
+		payload, _ = sjson.SetBytes(payload, "metadata.user_id", generateFakeUserID())
+	}
+	return payload
+}
+
+// checkSystemInstructionsWithMode injects Claude Code system prompt.
+// In strict mode, it replaces all user system messages.
+// In non-strict mode (default), it prepends to existing system messages.
+func checkSystemInstructionsWithMode(payload []byte, strictMode bool) []byte {
+	system := gjson.GetBytes(payload, "system")
+	claudeCodeInstructions := `[{"type":"text","text":"You are Claude Code, Anthropic's official CLI for Claude."}]`
+
+	if strictMode {
+		// Strict mode: replace all system messages with Claude Code prompt only
+		payload, _ = sjson.SetRawBytes(payload, "system", []byte(claudeCodeInstructions))
+		return payload
+	}
+
+	// Non-strict mode (default): prepend Claude Code prompt to existing system messages
+	if system.IsArray() {
+		if gjson.GetBytes(payload, "system.0.text").String() != "You are Claude Code, Anthropic's official CLI for Claude." {
+			system.ForEach(func(_, part gjson.Result) bool {
+				if part.Get("type").String() == "text" {
+					claudeCodeInstructions, _ = sjson.SetRaw(claudeCodeInstructions, "-1", part.Raw)
+				}
+				return true
+			})
+			payload, _ = sjson.SetRawBytes(payload, "system", []byte(claudeCodeInstructions))
+		}
+	} else {
+		payload, _ = sjson.SetRawBytes(payload, "system", []byte(claudeCodeInstructions))
+	}
+	return payload
+}
+
+// applyCloaking applies cloaking transformations to the payload based on config and client.
+// Cloaking includes: system prompt injection, fake user ID, and sensitive word obfuscation.
+func applyCloaking(ctx context.Context, cfg *config.Config, auth *cliproxyauth.Auth, payload []byte, model string) []byte {
+	clientUserAgent := getClientUserAgent(ctx)
+
+	// Get cloak config from ClaudeKey configuration
+	cloakCfg := resolveClaudeKeyCloakConfig(cfg, auth)
+
+	// Determine cloak settings
+	var cloakMode string
+	var strictMode bool
+	var sensitiveWords []string
+
+	if cloakCfg != nil {
+		cloakMode = cloakCfg.Mode
+		strictMode = cloakCfg.StrictMode
+		sensitiveWords = cloakCfg.SensitiveWords
+	}
+
+	// Fallback to auth attributes if no config found
+	if cloakMode == "" {
+		attrMode, attrStrict, attrWords := getCloakConfigFromAuth(auth)
+		cloakMode = attrMode
+		if !strictMode {
+			strictMode = attrStrict
+		}
+		if len(sensitiveWords) == 0 {
+			sensitiveWords = attrWords
+		}
+	}
+
+	// Determine if cloaking should be applied
+	if !shouldCloak(cloakMode, clientUserAgent) {
+		return payload
+	}
+
+	// Skip system instructions for claude-3-5-haiku models
+	if !strings.HasPrefix(model, "claude-3-5-haiku") {
+		payload = checkSystemInstructionsWithMode(payload, strictMode)
+	}
+
+	// Inject fake user ID
+	payload = injectFakeUserID(payload)
+
+	// Apply sensitive word obfuscation
+	if len(sensitiveWords) > 0 {
+		matcher := buildSensitiveWordMatcher(sensitiveWords)
+		payload = obfuscateSensitiveWords(payload, matcher)
+	}
+
+	return payload
+}
+
diff --git a/internal/runtime/executor/cloak_obfuscate.go b/internal/runtime/executor/cloak_obfuscate.go
new file mode 100644
index 00000000..81781802
--- /dev/null
+++ b/internal/runtime/executor/cloak_obfuscate.go
@@ -0,0 +1,176 @@
+package executor
+
+import (
+	"regexp"
+	"sort"
+	"strings"
+	"unicode/utf8"
+
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// zeroWidthSpace is the Unicode zero-width space character used for obfuscation.
+const zeroWidthSpace = "\u200B"
+
+// SensitiveWordMatcher holds the compiled regex for matching sensitive words.
+type SensitiveWordMatcher struct {
+	regex *regexp.Regexp
+}
+
+// buildSensitiveWordMatcher compiles a regex from the word list.
+// Words are sorted by length (longest first) for proper matching.
+func buildSensitiveWordMatcher(words []string) *SensitiveWordMatcher {
+	if len(words) == 0 {
+		return nil
+	}
+
+	// Filter and normalize words
+	var validWords []string
+	for _, w := range words {
+		w = strings.TrimSpace(w)
+		if utf8.RuneCountInString(w) >= 2 && !strings.Contains(w, zeroWidthSpace) {
+			validWords = append(validWords, w)
+		}
+	}
+
+	if len(validWords) == 0 {
+		return nil
+	}
+
+	// Sort by length (longest first) for proper matching
+	sort.Slice(validWords, func(i, j int) bool {
+		return len(validWords[i]) > len(validWords[j])
+	})
+
+	// Escape and join
+	escaped := make([]string, len(validWords))
+	for i, w := range validWords {
+		escaped[i] = regexp.QuoteMeta(w)
+	}
+
+	pattern := "(?i)" + strings.Join(escaped, "|")
+	re, err := regexp.Compile(pattern)
+	if err != nil {
+		return nil
+	}
+
+	return &SensitiveWordMatcher{regex: re}
+}
+
+// obfuscateWord inserts a zero-width space after the first grapheme.
+func obfuscateWord(word string) string {
+	if strings.Contains(word, zeroWidthSpace) {
+		return word
+	}
+
+	// Get first rune
+	r, size := utf8.DecodeRuneInString(word)
+	if r == utf8.RuneError || size >= len(word) {
+		return word
+	}
+
+	return string(r) + zeroWidthSpace + word[size:]
+}
+
+// obfuscateText replaces all sensitive words in the text.
+func (m *SensitiveWordMatcher) obfuscateText(text string) string {
+	if m == nil || m.regex == nil {
+		return text
+	}
+	return m.regex.ReplaceAllStringFunc(text, obfuscateWord)
+}
+
+// obfuscateSensitiveWords processes the payload and obfuscates sensitive words
+// in system blocks and message content.
+func obfuscateSensitiveWords(payload []byte, matcher *SensitiveWordMatcher) []byte {
+	if matcher == nil || matcher.regex == nil {
+		return payload
+	}
+
+	// Obfuscate in system blocks
+	payload = obfuscateSystemBlocks(payload, matcher)
+
+	// Obfuscate in messages
+	payload = obfuscateMessages(payload, matcher)
+
+	return payload
+}
+
+// obfuscateSystemBlocks obfuscates sensitive words in system blocks.
+func obfuscateSystemBlocks(payload []byte, matcher *SensitiveWordMatcher) []byte {
+	system := gjson.GetBytes(payload, "system")
+	if !system.Exists() {
+		return payload
+	}
+
+	if system.IsArray() {
+		modified := false
+		system.ForEach(func(key, value gjson.Result) bool {
+			if value.Get("type").String() == "text" {
+				text := value.Get("text").String()
+				obfuscated := matcher.obfuscateText(text)
+				if obfuscated != text {
+					path := "system." + key.String() + ".text"
+					payload, _ = sjson.SetBytes(payload, path, obfuscated)
+					modified = true
+				}
+			}
+			return true
+		})
+		if modified {
+			return payload
+		}
+	} else if system.Type == gjson.String {
+		text := system.String()
+		obfuscated := matcher.obfuscateText(text)
+		if obfuscated != text {
+			payload, _ = sjson.SetBytes(payload, "system", obfuscated)
+		}
+	}
+
+	return payload
+}
+
+// obfuscateMessages obfuscates sensitive words in message content.
+func obfuscateMessages(payload []byte, matcher *SensitiveWordMatcher) []byte {
+	messages := gjson.GetBytes(payload, "messages")
+	if !messages.Exists() || !messages.IsArray() {
+		return payload
+	}
+
+	messages.ForEach(func(msgKey, msg gjson.Result) bool {
+		content := msg.Get("content")
+		if !content.Exists() {
+			return true
+		}
+
+		msgPath := "messages." + msgKey.String()
+
+		if content.Type == gjson.String {
+			// Simple string content
+			text := content.String()
+			obfuscated := matcher.obfuscateText(text)
+			if obfuscated != text {
+				payload, _ = sjson.SetBytes(payload, msgPath+".content", obfuscated)
+			}
+		} else if content.IsArray() {
+			// Array of content blocks
+			content.ForEach(func(blockKey, block gjson.Result) bool {
+				if block.Get("type").String() == "text" {
+					text := block.Get("text").String()
+					obfuscated := matcher.obfuscateText(text)
+					if obfuscated != text {
+						path := msgPath + ".content." + blockKey.String() + ".text"
+						payload, _ = sjson.SetBytes(payload, path, obfuscated)
+					}
+				}
+				return true
+			})
+		}
+
+		return true
+	})
+
+	return payload
+}
diff --git a/internal/runtime/executor/cloak_utils.go b/internal/runtime/executor/cloak_utils.go
new file mode 100644
index 00000000..560ff880
--- /dev/null
+++ b/internal/runtime/executor/cloak_utils.go
@@ -0,0 +1,47 @@
+package executor
+
+import (
+	"crypto/rand"
+	"encoding/hex"
+	"regexp"
+	"strings"
+
+	"github.com/google/uuid"
+)
+
+// userIDPattern matches Claude Code format: user_[64-hex]_account__session_[uuid-v4]
+var userIDPattern = regexp.MustCompile(`^user_[a-fA-F0-9]{64}_account__session_[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$`)
+
+// generateFakeUserID generates a fake user ID in Claude Code format.
+// Format: user_[64-hex-chars]_account__session_[UUID-v4]
+func generateFakeUserID() string {
+	hexBytes := make([]byte, 32)
+	_, _ = rand.Read(hexBytes)
+	hexPart := hex.EncodeToString(hexBytes)
+	uuidPart := uuid.New().String()
+	return "user_" + hexPart + "_account__session_" + uuidPart
+}
+
+// isValidUserID checks if a user ID matches Claude Code format.
+func isValidUserID(userID string) bool {
+	return userIDPattern.MatchString(userID)
+}
+
+// shouldCloak determines if request should be cloaked based on config and client User-Agent.
+// Returns true if cloaking should be applied.
+func shouldCloak(cloakMode string, userAgent string) bool {
+	switch strings.ToLower(cloakMode) {
+	case "always":
+		return true
+	case "never":
+		return false
+	default: // "auto" or empty
+		// If client is Claude Code, don't cloak
+		return !strings.HasPrefix(userAgent, "claude-cli")
+	}
+}
+
+// isClaudeCodeClient checks if the User-Agent indicates a Claude Code client.
+func isClaudeCodeClient(userAgent string) bool {
+	return strings.HasPrefix(userAgent, "claude-cli")
+}

From 33aa665555e4ea6e2687cb2d6355d39f6fe5df72 Mon Sep 17 00:00:00 2001
From: Zhi Yang <196515526+FakerL@users.noreply.github.com>
Date: Mon, 5 Jan 2026 10:21:29 +0000
Subject: [PATCH 03/26] fix(auth): persist access_token on refresh for
 providers that need it
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously, metadataEqualIgnoringTimestamps() ignored access_token for all
providers, which prevented refreshed tokens from being persisted to disk/database.
This caused tokens to be lost on server restart for providers like iFlow.

This change makes the behavior provider-specific:
- Providers like gemini/gemini-cli that issue new tokens on every refresh and
  can re-fetch when needed will continue to ignore access_token (optimization)
- Other providers like iFlow will now persist access_token changes

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 sdk/auth/filestore.go | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/sdk/auth/filestore.go b/sdk/auth/filestore.go
index 84092d37..772de9d6 100644
--- a/sdk/auth/filestore.go
+++ b/sdk/auth/filestore.go
@@ -74,7 +74,7 @@ func (s *FileTokenStore) Save(ctx context.Context, auth *cliproxyauth.Auth) (str
 		if existing, errRead := os.ReadFile(path); errRead == nil {
 			// Use metadataEqualIgnoringTimestamps to skip writes when only timestamp fields change.
 			// This prevents the token refresh loop caused by timestamp/expired/expires_in changes.
-			if metadataEqualIgnoringTimestamps(existing, raw) {
+			if metadataEqualIgnoringTimestamps(existing, raw, auth.Provider) {
 				return path, nil
 			}
 		} else if errRead != nil && !os.IsNotExist(errRead) {
@@ -284,7 +284,10 @@ func jsonEqual(a, b []byte) bool {
 // ignoring fields that change on every refresh but don't affect functionality.
 // This prevents unnecessary file writes that would trigger watcher events and
 // create refresh loops.
-func metadataEqualIgnoringTimestamps(a, b []byte) bool {
+// The provider parameter controls whether access_token is ignored: providers like
+// Google OAuth (gemini, gemini-cli) can re-fetch tokens when needed, while others
+// like iFlow require the refreshed token to be persisted.
+func metadataEqualIgnoringTimestamps(a, b []byte, provider string) bool {
 	var objA, objB map[string]any
 	if err := json.Unmarshal(a, &objA); err != nil {
 		return false
@@ -295,9 +298,18 @@ func metadataEqualIgnoringTimestamps(a, b []byte) bool {
 
 	// Fields to ignore: these change on every refresh but don't affect authentication logic.
 	// - timestamp, expired, expires_in, last_refresh: time-related fields that change on refresh
-	// - access_token: Google OAuth returns a new access_token on each refresh, this is expected
-	//   and shouldn't trigger file writes (the new token will be fetched again when needed)
-	ignoredFields := []string{"timestamp", "expired", "expires_in", "last_refresh", "access_token"}
+	ignoredFields := []string{"timestamp", "expired", "expires_in", "last_refresh"}
+
+	// Providers that issue new access_token on every refresh and can re-fetch when needed.
+	// For these providers, we also ignore access_token to avoid unnecessary file writes.
+	providersIgnoringAccessToken := map[string]bool{
+		"gemini":     true,
+		"gemini-cli": true,
+	}
+	if providersIgnoringAccessToken[provider] {
+		ignoredFields = append(ignoredFields, "access_token")
+	}
+
 	for _, field := range ignoredFields {
 		delete(objA, field)
 		delete(objB, field)

From 08d21b76e2ba40d522673b963b3559b405dd3f50 Mon Sep 17 00:00:00 2001
From: FakerL <jy210607@gmail.com>
Date: Mon, 5 Jan 2026 21:38:26 +0800
Subject: [PATCH 04/26] Update sdk/auth/filestore.go

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 sdk/auth/filestore.go | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/sdk/auth/filestore.go b/sdk/auth/filestore.go
index 772de9d6..3dd9f752 100644
--- a/sdk/auth/filestore.go
+++ b/sdk/auth/filestore.go
@@ -300,13 +300,10 @@ func metadataEqualIgnoringTimestamps(a, b []byte, provider string) bool {
 	// - timestamp, expired, expires_in, last_refresh: time-related fields that change on refresh
 	ignoredFields := []string{"timestamp", "expired", "expires_in", "last_refresh"}
 
-	// Providers that issue new access_token on every refresh and can re-fetch when needed.
-	// For these providers, we also ignore access_token to avoid unnecessary file writes.
-	providersIgnoringAccessToken := map[string]bool{
-		"gemini":     true,
-		"gemini-cli": true,
-	}
-	if providersIgnoringAccessToken[provider] {
+	// For providers that can re-fetch tokens when needed (e.g., Google OAuth),
+	// we ignore access_token to avoid unnecessary file writes.
+	switch provider {
+	case "gemini", "gemini-cli":
 		ignoredFields = append(ignoredFields, "access_token")
 	}
 

From ac3ca0ad8e10f60bdec90376873bcc76e6f9908e Mon Sep 17 00:00:00 2001
From: zhiqing0205 <1775840762@qq.com>
Date: Tue, 6 Jan 2026 02:25:56 +0800
Subject: [PATCH 05/26] feat(codex): include plan type in auth filename

---
 .../api/handlers/management/auth_files.go     |  7 ++-
 internal/auth/codex/filename.go               | 55 +++++++++++++++++++
 sdk/auth/codex.go                             |  8 ++-
 3 files changed, 67 insertions(+), 3 deletions(-)
 create mode 100644 internal/auth/codex/filename.go

diff --git a/internal/api/handlers/management/auth_files.go b/internal/api/handlers/management/auth_files.go
index e0904ab6..ad35bb9b 100644
--- a/internal/api/handlers/management/auth_files.go
+++ b/internal/api/handlers/management/auth_files.go
@@ -1377,9 +1377,11 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
 		claims, _ := codex.ParseJWTToken(tokenResp.IDToken)
 		email := ""
 		accountID := ""
+		planType := ""
 		if claims != nil {
 			email = claims.GetUserEmail()
 			accountID = claims.GetAccountID()
+			planType = strings.TrimSpace(claims.CodexAuthInfo.ChatgptPlanType)
 		}
 		// Build bundle compatible with existing storage
 		bundle := &codex.CodexAuthBundle{
@@ -1396,10 +1398,11 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
 
 		// Create token storage and persist
 		tokenStorage := openaiAuth.CreateTokenStorage(bundle)
+		fileName := codex.CredentialFileName(tokenStorage.Email, planType, true)
 		record := &coreauth.Auth{
-			ID:       fmt.Sprintf("codex-%s.json", tokenStorage.Email),
+			ID:       fileName,
 			Provider: "codex",
-			FileName: fmt.Sprintf("codex-%s.json", tokenStorage.Email),
+			FileName: fileName,
 			Storage:  tokenStorage,
 			Metadata: map[string]any{
 				"email":      tokenStorage.Email,
diff --git a/internal/auth/codex/filename.go b/internal/auth/codex/filename.go
new file mode 100644
index 00000000..7eeedce6
--- /dev/null
+++ b/internal/auth/codex/filename.go
@@ -0,0 +1,55 @@
+package codex
+
+import (
+	"fmt"
+	"strings"
+	"unicode"
+)
+
+// CredentialFileName returns the filename used to persist Codex OAuth credentials.
+// When planType is available (e.g. "plus", "team"), it is appended after the email
+// as a suffix to disambiguate subscriptions.
+func CredentialFileName(email, planType string, includeProviderPrefix bool) string {
+	email = strings.TrimSpace(email)
+	plan := normalizePlanTypeForFilename(planType)
+
+	prefix := ""
+	if includeProviderPrefix {
+		prefix = "codex-"
+	}
+
+	if plan == "" {
+		return fmt.Sprintf("%s%s.json", prefix, email)
+	}
+	return fmt.Sprintf("%s%s-%s.json", prefix, email, plan)
+}
+
+func normalizePlanTypeForFilename(planType string) string {
+	planType = strings.TrimSpace(planType)
+	if planType == "" {
+		return ""
+	}
+
+	parts := strings.FieldsFunc(planType, func(r rune) bool {
+		return !unicode.IsLetter(r) && !unicode.IsDigit(r)
+	})
+	if len(parts) == 0 {
+		return ""
+	}
+
+	for i, part := range parts {
+		parts[i] = titleToken(part)
+	}
+	return strings.Join(parts, "-")
+}
+
+func titleToken(token string) string {
+	token = strings.TrimSpace(token)
+	if token == "" {
+		return ""
+	}
+	lower := strings.ToLower(token)
+	runes := []rune(lower)
+	runes[0] = unicode.ToUpper(runes[0])
+	return string(runes)
+}
diff --git a/sdk/auth/codex.go b/sdk/auth/codex.go
index 99992525..af57f180 100644
--- a/sdk/auth/codex.go
+++ b/sdk/auth/codex.go
@@ -186,7 +186,13 @@ waitForCallback:
 		return nil, fmt.Errorf("codex token storage missing account information")
 	}
 
-	fileName := fmt.Sprintf("codex-%s.json", tokenStorage.Email)
+	planType := ""
+	if tokenStorage.IDToken != "" {
+		if claims, errParse := codex.ParseJWTToken(tokenStorage.IDToken); errParse == nil && claims != nil {
+			planType = strings.TrimSpace(claims.CodexAuthInfo.ChatgptPlanType)
+		}
+	}
+	fileName := codex.CredentialFileName(tokenStorage.Email, planType, true)
 	metadata := map[string]any{
 		"email": tokenStorage.Email,
 	}

From aa8526edc0331248e0273875c517865a6732e454 Mon Sep 17 00:00:00 2001
From: zhiqing0205 <1775840762@qq.com>
Date: Tue, 6 Jan 2026 10:24:02 +0800
Subject: [PATCH 06/26] fix(codex): use unicode title casing for plan

---
 internal/auth/codex/filename.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/internal/auth/codex/filename.go b/internal/auth/codex/filename.go
index 7eeedce6..fcf02802 100644
--- a/internal/auth/codex/filename.go
+++ b/internal/auth/codex/filename.go
@@ -4,6 +4,9 @@ import (
 	"fmt"
 	"strings"
 	"unicode"
+
+	"golang.org/x/text/cases"
+	"golang.org/x/text/language"
 )
 
 // CredentialFileName returns the filename used to persist Codex OAuth credentials.
@@ -48,8 +51,5 @@ func titleToken(token string) string {
 	if token == "" {
 		return ""
 	}
-	lower := strings.ToLower(token)
-	runes := []rune(lower)
-	runes[0] = unicode.ToUpper(runes[0])
-	return string(runes)
+	return cases.Title(language.English).String(token)
 }

From b0c17af2cf2ec902d584a487caa7d7e09e86735c Mon Sep 17 00:00:00 2001
From: extremk <86131343@qq.com>
Date: Sat, 10 Jan 2026 18:46:25 +0800
Subject: [PATCH 07/26] Enhance Gemini to OpenAI response conversion

Refactor response handling to support multiple candidates and improve parameter management.
---
 .../gemini_openai_response.go                 | 399 ++++++++++--------
 1 file changed, 227 insertions(+), 172 deletions(-)

diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go
index 52fbba43..7de1b5ff 100644
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go
@@ -21,7 +21,8 @@ import (
 // convertGeminiResponseToOpenAIChatParams holds parameters for response conversion.
 type convertGeminiResponseToOpenAIChatParams struct {
 	UnixTimestamp int64
-	FunctionIndex int
+	// FunctionIndex tracks tool call indices per candidate index to support multiple candidates.
+	FunctionIndex map[int]int
 }
 
 // functionCallIDCounter provides a process-wide unique counter for function call identifiers.
@@ -42,12 +43,19 @@ var functionCallIDCounter uint64
 // Returns:
 //   - []string: A slice of strings, each containing an OpenAI-compatible JSON response
 func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, param *any) []string {
+	// Initialize parameters if nil.
 	if *param == nil {
 		*param = &convertGeminiResponseToOpenAIChatParams{
 			UnixTimestamp: 0,
-			FunctionIndex: 0,
+			FunctionIndex: make(map[int]int),
 		}
 	}
+	
+	// Ensure the Map is initialized (handling cases where param might be reused from older context).
+	p := (*param).(*convertGeminiResponseToOpenAIChatParams)
+	if p.FunctionIndex == nil {
+		p.FunctionIndex = make(map[int]int)
+	}
 
 	if bytes.HasPrefix(rawJSON, []byte("data:")) {
 		rawJSON = bytes.TrimSpace(rawJSON[5:])
@@ -57,151 +65,179 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR
 		return []string{}
 	}
 
-	// Initialize the OpenAI SSE template.
-	template := `{"id":"","object":"chat.completion.chunk","created":12345,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":null,"native_finish_reason":null}]}`
+	// Initialize the OpenAI SSE base template.
+	// We use a base template and clone it for each candidate to support multiple candidates.
+	baseTemplate := `{"id":"","object":"chat.completion.chunk","created":12345,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":null,"native_finish_reason":null}]}`
 
 	// Extract and set the model version.
 	if modelVersionResult := gjson.GetBytes(rawJSON, "modelVersion"); modelVersionResult.Exists() {
-		template, _ = sjson.Set(template, "model", modelVersionResult.String())
+		baseTemplate, _ = sjson.Set(baseTemplate, "model", modelVersionResult.String())
 	}
 
 	// Extract and set the creation timestamp.
 	if createTimeResult := gjson.GetBytes(rawJSON, "createTime"); createTimeResult.Exists() {
 		t, err := time.Parse(time.RFC3339Nano, createTimeResult.String())
 		if err == nil {
-			(*param).(*convertGeminiResponseToOpenAIChatParams).UnixTimestamp = t.Unix()
+			p.UnixTimestamp = t.Unix()
 		}
-		template, _ = sjson.Set(template, "created", (*param).(*convertGeminiResponseToOpenAIChatParams).UnixTimestamp)
+		baseTemplate, _ = sjson.Set(baseTemplate, "created", p.UnixTimestamp)
 	} else {
-		template, _ = sjson.Set(template, "created", (*param).(*convertGeminiResponseToOpenAIChatParams).UnixTimestamp)
+		baseTemplate, _ = sjson.Set(baseTemplate, "created", p.UnixTimestamp)
 	}
 
 	// Extract and set the response ID.
 	if responseIDResult := gjson.GetBytes(rawJSON, "responseId"); responseIDResult.Exists() {
-		template, _ = sjson.Set(template, "id", responseIDResult.String())
-	}
-
-	// Extract and set the finish reason.
-	if finishReasonResult := gjson.GetBytes(rawJSON, "candidates.0.finishReason"); finishReasonResult.Exists() {
-		template, _ = sjson.Set(template, "choices.0.finish_reason", strings.ToLower(finishReasonResult.String()))
-		template, _ = sjson.Set(template, "choices.0.native_finish_reason", strings.ToLower(finishReasonResult.String()))
+		baseTemplate, _ = sjson.Set(baseTemplate, "id", responseIDResult.String())
 	}
 
 	// Extract and set usage metadata (token counts).
+	// Usage is applied to the base template so it appears in the chunks.
 	if usageResult := gjson.GetBytes(rawJSON, "usageMetadata"); usageResult.Exists() {
 		cachedTokenCount := usageResult.Get("cachedContentTokenCount").Int()
 		if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
-			template, _ = sjson.Set(template, "usage.completion_tokens", candidatesTokenCountResult.Int())
+			baseTemplate, _ = sjson.Set(baseTemplate, "usage.completion_tokens", candidatesTokenCountResult.Int())
 		}
 		if totalTokenCountResult := usageResult.Get("totalTokenCount"); totalTokenCountResult.Exists() {
-			template, _ = sjson.Set(template, "usage.total_tokens", totalTokenCountResult.Int())
+			baseTemplate, _ = sjson.Set(baseTemplate, "usage.total_tokens", totalTokenCountResult.Int())
 		}
 		promptTokenCount := usageResult.Get("promptTokenCount").Int() - cachedTokenCount
 		thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
-		template, _ = sjson.Set(template, "usage.prompt_tokens", promptTokenCount+thoughtsTokenCount)
+		baseTemplate, _ = sjson.Set(baseTemplate, "usage.prompt_tokens", promptTokenCount+thoughtsTokenCount)
 		if thoughtsTokenCount > 0 {
-			template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount)
+			baseTemplate, _ = sjson.Set(baseTemplate, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount)
 		}
 		// Include cached token count if present (indicates prompt caching is working)
 		if cachedTokenCount > 0 {
 			var err error
-			template, err = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount)
+			baseTemplate, err = sjson.Set(baseTemplate, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount)
 			if err != nil {
 				log.Warnf("gemini openai response: failed to set cached_tokens in streaming: %v", err)
 			}
 		}
 	}
 
-	// Process the main content part of the response.
-	partsResult := gjson.GetBytes(rawJSON, "candidates.0.content.parts")
-	hasFunctionCall := false
-	if partsResult.IsArray() {
-		partResults := partsResult.Array()
-		for i := 0; i < len(partResults); i++ {
-			partResult := partResults[i]
-			partTextResult := partResult.Get("text")
-			functionCallResult := partResult.Get("functionCall")
-			inlineDataResult := partResult.Get("inlineData")
-			if !inlineDataResult.Exists() {
-				inlineDataResult = partResult.Get("inline_data")
-			}
-			thoughtSignatureResult := partResult.Get("thoughtSignature")
-			if !thoughtSignatureResult.Exists() {
-				thoughtSignatureResult = partResult.Get("thought_signature")
+	var responseStrings []string
+	candidates := gjson.GetBytes(rawJSON, "candidates")
+
+	// Iterate over all candidates to support candidate_count > 1.
+	if candidates.IsArray() {
+		candidates.ForEach(func(_, candidate gjson.Result) bool {
+			// Clone the template for the current candidate.
+			template := baseTemplate
+
+			// Set the specific index for this candidate.
+			candidateIndex := int(candidate.Get("index").Int())
+			template, _ = sjson.Set(template, "choices.0.index", candidateIndex)
+
+			// Extract and set the finish reason.
+			if finishReasonResult := candidate.Get("finishReason"); finishReasonResult.Exists() {
+				template, _ = sjson.Set(template, "choices.0.finish_reason", strings.ToLower(finishReasonResult.String()))
+				template, _ = sjson.Set(template, "choices.0.native_finish_reason", strings.ToLower(finishReasonResult.String()))
 			}
 
-			hasThoughtSignature := thoughtSignatureResult.Exists() && thoughtSignatureResult.String() != ""
-			hasContentPayload := partTextResult.Exists() || functionCallResult.Exists() || inlineDataResult.Exists()
+			partsResult := candidate.Get("content.parts")
+			hasFunctionCall := false
 
-			// Skip pure thoughtSignature parts but keep any actual payload in the same part.
-			if hasThoughtSignature && !hasContentPayload {
-				continue
+			if partsResult.IsArray() {
+				partResults := partsResult.Array()
+				for i := 0; i < len(partResults); i++ {
+					partResult := partResults[i]
+					partTextResult := partResult.Get("text")
+					functionCallResult := partResult.Get("functionCall")
+					inlineDataResult := partResult.Get("inlineData")
+					if !inlineDataResult.Exists() {
+						inlineDataResult = partResult.Get("inline_data")
+					}
+					thoughtSignatureResult := partResult.Get("thoughtSignature")
+					if !thoughtSignatureResult.Exists() {
+						thoughtSignatureResult = partResult.Get("thought_signature")
+					}
+
+					hasThoughtSignature := thoughtSignatureResult.Exists() && thoughtSignatureResult.String() != ""
+					hasContentPayload := partTextResult.Exists() || functionCallResult.Exists() || inlineDataResult.Exists()
+
+					// Skip pure thoughtSignature parts but keep any actual payload in the same part.
+					if hasThoughtSignature && !hasContentPayload {
+						continue
+					}
+
+					if partTextResult.Exists() {
+						text := partTextResult.String()
+						// Handle text content, distinguishing between regular content and reasoning/thoughts.
+						if partResult.Get("thought").Bool() {
+							template, _ = sjson.Set(template, "choices.0.delta.reasoning_content", text)
+						} else {
+							template, _ = sjson.Set(template, "choices.0.delta.content", text)
+						}
+						template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
+					} else if functionCallResult.Exists() {
+						// Handle function call content.
+						hasFunctionCall = true
+						toolCallsResult := gjson.Get(template, "choices.0.delta.tool_calls")
+						
+						// Retrieve the function index for this specific candidate.
+						functionCallIndex := p.FunctionIndex[candidateIndex]
+						p.FunctionIndex[candidateIndex]++
+
+						if toolCallsResult.Exists() && toolCallsResult.IsArray() {
+							functionCallIndex = len(toolCallsResult.Array())
+						} else {
+							template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls", `[]`)
+						}
+
+						functionCallTemplate := `{"id": "","index": 0,"type": "function","function": {"name": "","arguments": ""}}`
+						fcName := functionCallResult.Get("name").String()
+						functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
+						functionCallTemplate, _ = sjson.Set(functionCallTemplate, "index", functionCallIndex)
+						functionCallTemplate, _ = sjson.Set(functionCallTemplate, "function.name", fcName)
+						if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
+							functionCallTemplate, _ = sjson.Set(functionCallTemplate, "function.arguments", fcArgsResult.Raw)
+						}
+						template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
+						template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls.-1", functionCallTemplate)
+					} else if inlineDataResult.Exists() {
+						data := inlineDataResult.Get("data").String()
+						if data == "" {
+							continue
+						}
+						mimeType := inlineDataResult.Get("mimeType").String()
+						if mimeType == "" {
+							mimeType = inlineDataResult.Get("mime_type").String()
+						}
+						if mimeType == "" {
+							mimeType = "image/png"
+						}
+						imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data)
+						imagesResult := gjson.Get(template, "choices.0.delta.images")
+						if !imagesResult.Exists() || !imagesResult.IsArray() {
+							template, _ = sjson.SetRaw(template, "choices.0.delta.images", `[]`)
+						}
+						imageIndex := len(gjson.Get(template, "choices.0.delta.images").Array())
+						imagePayload := `{"type":"image_url","image_url":{"url":""}}`
+						imagePayload, _ = sjson.Set(imagePayload, "index", imageIndex)
+						imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL)
+						template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
+						template, _ = sjson.SetRaw(template, "choices.0.delta.images.-1", imagePayload)
+					}
+				}
 			}
 
-			if partTextResult.Exists() {
-				text := partTextResult.String()
-				// Handle text content, distinguishing between regular content and reasoning/thoughts.
-				if partResult.Get("thought").Bool() {
-					template, _ = sjson.Set(template, "choices.0.delta.reasoning_content", text)
-				} else {
-					template, _ = sjson.Set(template, "choices.0.delta.content", text)
-				}
-				template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
-			} else if functionCallResult.Exists() {
-				// Handle function call content.
-				hasFunctionCall = true
-				toolCallsResult := gjson.Get(template, "choices.0.delta.tool_calls")
-				functionCallIndex := (*param).(*convertGeminiResponseToOpenAIChatParams).FunctionIndex
-				(*param).(*convertGeminiResponseToOpenAIChatParams).FunctionIndex++
-				if toolCallsResult.Exists() && toolCallsResult.IsArray() {
-					functionCallIndex = len(toolCallsResult.Array())
-				} else {
-					template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls", `[]`)
-				}
-
-				functionCallTemplate := `{"id": "","index": 0,"type": "function","function": {"name": "","arguments": ""}}`
-				fcName := functionCallResult.Get("name").String()
-				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
-				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "index", functionCallIndex)
-				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "function.name", fcName)
-				if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
-					functionCallTemplate, _ = sjson.Set(functionCallTemplate, "function.arguments", fcArgsResult.Raw)
-				}
-				template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
-				template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls.-1", functionCallTemplate)
-			} else if inlineDataResult.Exists() {
-				data := inlineDataResult.Get("data").String()
-				if data == "" {
-					continue
-				}
-				mimeType := inlineDataResult.Get("mimeType").String()
-				if mimeType == "" {
-					mimeType = inlineDataResult.Get("mime_type").String()
-				}
-				if mimeType == "" {
-					mimeType = "image/png"
-				}
-				imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data)
-				imagesResult := gjson.Get(template, "choices.0.delta.images")
-				if !imagesResult.Exists() || !imagesResult.IsArray() {
-					template, _ = sjson.SetRaw(template, "choices.0.delta.images", `[]`)
-				}
-				imageIndex := len(gjson.Get(template, "choices.0.delta.images").Array())
-				imagePayload := `{"type":"image_url","image_url":{"url":""}}`
-				imagePayload, _ = sjson.Set(imagePayload, "index", imageIndex)
-				imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL)
-				template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
-				template, _ = sjson.SetRaw(template, "choices.0.delta.images.-1", imagePayload)
+			if hasFunctionCall {
+				template, _ = sjson.Set(template, "choices.0.finish_reason", "tool_calls")
+				template, _ = sjson.Set(template, "choices.0.native_finish_reason", "tool_calls")
 			}
+
+			responseStrings = append(responseStrings, template)
+			return true // continue loop
+		})
+	} else {
+		// If there are no candidates (e.g., a pure usageMetadata chunk), return the usage chunk if present.
+		if gjson.GetBytes(rawJSON, "usageMetadata").Exists() && len(responseStrings) == 0 {
+			responseStrings = append(responseStrings, baseTemplate)
 		}
 	}
 
-	if hasFunctionCall {
-		template, _ = sjson.Set(template, "choices.0.finish_reason", "tool_calls")
-		template, _ = sjson.Set(template, "choices.0.native_finish_reason", "tool_calls")
-	}
-
-	return []string{template}
+	return responseStrings
 }
 
 // ConvertGeminiResponseToOpenAINonStream converts a non-streaming Gemini response to a non-streaming OpenAI response.
@@ -219,7 +255,9 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR
 //   - string: An OpenAI-compatible JSON response containing all message content and metadata
 func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, _ *any) string {
 	var unixTimestamp int64
-	template := `{"id":"","object":"chat.completion","created":123456,"model":"model","choices":[{"index":0,"message":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":null,"native_finish_reason":null}]}`
+	// Initialize template with an empty choices array to support multiple candidates.
+	template := `{"id":"","object":"chat.completion","created":123456,"model":"model","choices":[]}`
+
 	if modelVersionResult := gjson.GetBytes(rawJSON, "modelVersion"); modelVersionResult.Exists() {
 		template, _ = sjson.Set(template, "model", modelVersionResult.String())
 	}
@@ -238,11 +276,6 @@ func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, origina
 		template, _ = sjson.Set(template, "id", responseIDResult.String())
 	}
 
-	if finishReasonResult := gjson.GetBytes(rawJSON, "candidates.0.finishReason"); finishReasonResult.Exists() {
-		template, _ = sjson.Set(template, "choices.0.finish_reason", strings.ToLower(finishReasonResult.String()))
-		template, _ = sjson.Set(template, "choices.0.native_finish_reason", strings.ToLower(finishReasonResult.String()))
-	}
-
 	if usageResult := gjson.GetBytes(rawJSON, "usageMetadata"); usageResult.Exists() {
 		if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
 			template, _ = sjson.Set(template, "usage.completion_tokens", candidatesTokenCountResult.Int())
@@ -267,74 +300,96 @@ func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, origina
 		}
 	}
 
-	// Process the main content part of the response.
-	partsResult := gjson.GetBytes(rawJSON, "candidates.0.content.parts")
-	hasFunctionCall := false
-	if partsResult.IsArray() {
-		partsResults := partsResult.Array()
-		for i := 0; i < len(partsResults); i++ {
-			partResult := partsResults[i]
-			partTextResult := partResult.Get("text")
-			functionCallResult := partResult.Get("functionCall")
-			inlineDataResult := partResult.Get("inlineData")
-			if !inlineDataResult.Exists() {
-				inlineDataResult = partResult.Get("inline_data")
+	// Process the main content part of the response for all candidates.
+	candidates := gjson.GetBytes(rawJSON, "candidates")
+	if candidates.IsArray() {
+		candidates.ForEach(func(_, candidate gjson.Result) bool {
+			// Construct a single Choice object.
+			choiceTemplate := `{"index":0,"message":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":null,"native_finish_reason":null}`
+
+			// Set the index for this choice.
+			choiceTemplate, _ = sjson.Set(choiceTemplate, "index", candidate.Get("index").Int())
+
+			// Set finish reason.
+			if finishReasonResult := candidate.Get("finishReason"); finishReasonResult.Exists() {
+				choiceTemplate, _ = sjson.Set(choiceTemplate, "finish_reason", strings.ToLower(finishReasonResult.String()))
+				choiceTemplate, _ = sjson.Set(choiceTemplate, "native_finish_reason", strings.ToLower(finishReasonResult.String()))
 			}
 
-			if partTextResult.Exists() {
-				// Append text content, distinguishing between regular content and reasoning.
-				if partResult.Get("thought").Bool() {
-					template, _ = sjson.Set(template, "choices.0.message.reasoning_content", partTextResult.String())
-				} else {
-					template, _ = sjson.Set(template, "choices.0.message.content", partTextResult.String())
-				}
-				template, _ = sjson.Set(template, "choices.0.message.role", "assistant")
-			} else if functionCallResult.Exists() {
-				// Append function call content to the tool_calls array.
-				hasFunctionCall = true
-				toolCallsResult := gjson.Get(template, "choices.0.message.tool_calls")
-				if !toolCallsResult.Exists() || !toolCallsResult.IsArray() {
-					template, _ = sjson.SetRaw(template, "choices.0.message.tool_calls", `[]`)
-				}
-				functionCallItemTemplate := `{"id": "","type": "function","function": {"name": "","arguments": ""}}`
-				fcName := functionCallResult.Get("name").String()
-				functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
-				functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.name", fcName)
-				if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
-					functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.arguments", fcArgsResult.Raw)
-				}
-				template, _ = sjson.Set(template, "choices.0.message.role", "assistant")
-				template, _ = sjson.SetRaw(template, "choices.0.message.tool_calls.-1", functionCallItemTemplate)
-			} else if inlineDataResult.Exists() {
-				data := inlineDataResult.Get("data").String()
-				if data == "" {
-					continue
-				}
-				mimeType := inlineDataResult.Get("mimeType").String()
-				if mimeType == "" {
-					mimeType = inlineDataResult.Get("mime_type").String()
-				}
-				if mimeType == "" {
-					mimeType = "image/png"
-				}
-				imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data)
-				imagesResult := gjson.Get(template, "choices.0.message.images")
-				if !imagesResult.Exists() || !imagesResult.IsArray() {
-					template, _ = sjson.SetRaw(template, "choices.0.message.images", `[]`)
-				}
-				imageIndex := len(gjson.Get(template, "choices.0.message.images").Array())
-				imagePayload := `{"type":"image_url","image_url":{"url":""}}`
-				imagePayload, _ = sjson.Set(imagePayload, "index", imageIndex)
-				imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL)
-				template, _ = sjson.Set(template, "choices.0.message.role", "assistant")
-				template, _ = sjson.SetRaw(template, "choices.0.message.images.-1", imagePayload)
-			}
-		}
-	}
+			partsResult := candidate.Get("content.parts")
+			hasFunctionCall := false
+			if partsResult.IsArray() {
+				partsResults := partsResult.Array()
+				for i := 0; i < len(partsResults); i++ {
+					partResult := partsResults[i]
+					partTextResult := partResult.Get("text")
+					functionCallResult := partResult.Get("functionCall")
+					inlineDataResult := partResult.Get("inlineData")
+					if !inlineDataResult.Exists() {
+						inlineDataResult = partResult.Get("inline_data")
+					}
 
-	if hasFunctionCall {
-		template, _ = sjson.Set(template, "choices.0.finish_reason", "tool_calls")
-		template, _ = sjson.Set(template, "choices.0.native_finish_reason", "tool_calls")
+					if partTextResult.Exists() {
+						// Append text content, distinguishing between regular content and reasoning.
+						if partResult.Get("thought").Bool() {
+							oldVal := gjson.Get(choiceTemplate, "message.reasoning_content").String()
+							choiceTemplate, _ = sjson.Set(choiceTemplate, "message.reasoning_content", oldVal+partTextResult.String())
+						} else {
+							oldVal := gjson.Get(choiceTemplate, "message.content").String()
+							choiceTemplate, _ = sjson.Set(choiceTemplate, "message.content", oldVal+partTextResult.String())
+						}
+						choiceTemplate, _ = sjson.Set(choiceTemplate, "message.role", "assistant")
+					} else if functionCallResult.Exists() {
+						// Append function call content to the tool_calls array.
+						hasFunctionCall = true
+						toolCallsResult := gjson.Get(choiceTemplate, "message.tool_calls")
+						if !toolCallsResult.Exists() || !toolCallsResult.IsArray() {
+							choiceTemplate, _ = sjson.SetRaw(choiceTemplate, "message.tool_calls", `[]`)
+						}
+						functionCallItemTemplate := `{"id": "","type": "function","function": {"name": "","arguments": ""}}`
+						fcName := functionCallResult.Get("name").String()
+						functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
+						functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.name", fcName)
+						if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
+							functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.arguments", fcArgsResult.Raw)
+						}
+						choiceTemplate, _ = sjson.Set(choiceTemplate, "message.role", "assistant")
+						choiceTemplate, _ = sjson.SetRaw(choiceTemplate, "message.tool_calls.-1", functionCallItemTemplate)
+					} else if inlineDataResult.Exists() {
+						data := inlineDataResult.Get("data").String()
+						if data != "" {
+							mimeType := inlineDataResult.Get("mimeType").String()
+							if mimeType == "" {
+								mimeType = inlineDataResult.Get("mime_type").String()
+							}
+							if mimeType == "" {
+								mimeType = "image/png"
+							}
+							imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data)
+							imagesResult := gjson.Get(choiceTemplate, "message.images")
+							if !imagesResult.Exists() || !imagesResult.IsArray() {
+								choiceTemplate, _ = sjson.SetRaw(choiceTemplate, "message.images", `[]`)
+							}
+							imageIndex := len(gjson.Get(choiceTemplate, "message.images").Array())
+							imagePayload := `{"type":"image_url","image_url":{"url":""}}`
+							imagePayload, _ = sjson.Set(imagePayload, "index", imageIndex)
+							imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL)
+							choiceTemplate, _ = sjson.Set(choiceTemplate, "message.role", "assistant")
+							choiceTemplate, _ = sjson.SetRaw(choiceTemplate, "message.images.-1", imagePayload)
+						}
+					}
+				}
+			}
+
+			if hasFunctionCall {
+				choiceTemplate, _ = sjson.Set(choiceTemplate, "finish_reason", "tool_calls")
+				choiceTemplate, _ = sjson.Set(choiceTemplate, "native_finish_reason", "tool_calls")
+			}
+
+			// Append the constructed choice to the main choices array.
+			template, _ = sjson.SetRaw(template, "choices.-1", choiceTemplate)
+			return true
+		})
 	}
 
 	return template

From 6448d0ee7c783561d069f818d435a22cbded12e4 Mon Sep 17 00:00:00 2001
From: extremk <86131343@qq.com>
Date: Sat, 10 Jan 2026 18:47:41 +0800
Subject: [PATCH 08/26] Add candidate count handling in OpenAI request

---
 .../openai/chat-completions/gemini-cli_openai_request.go   | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
index 98188835..6be05346 100644
--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
@@ -81,6 +81,13 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 		out, _ = sjson.SetBytes(out, "request.generationConfig.topK", tkr.Num)
 	}
 
+	// Candidate count (OpenAI 'n' parameter)
+	if n := gjson.GetBytes(rawJSON, "n"); n.Exists() && n.Type == gjson.Number {
+		if val := n.Int(); val > 1 {
+			out, _ = sjson.SetBytes(out, "request.generationConfig.candidateCount", val)
+		}
+	}
+
 	// Map OpenAI modalities -> Gemini CLI request.generationConfig.responseModalities
 	// e.g. "modalities": ["image", "text"] -> ["IMAGE", "TEXT"]
 	if mods := gjson.GetBytes(rawJSON, "modalities"); mods.Exists() && mods.IsArray() {

From 14c74e5e848b8583203effb67a4a0bb48259da2a Mon Sep 17 00:00:00 2001
From: extremk <86131343@qq.com>
Date: Sat, 10 Jan 2026 18:48:33 +0800
Subject: [PATCH 09/26] Handle 'n' parameter for candidate count in requests

Added handling for the 'n' parameter to set candidate count in generationConfig.
---
 .../openai/chat-completions/gemini_openai_request.go       | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
index 57e150c1..3d2009db 100644
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
@@ -99,6 +99,13 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 		out, _ = sjson.SetBytes(out, "generationConfig.topK", tkr.Num)
 	}
 
+	// Candidate count (OpenAI 'n' parameter)
+	if n := gjson.GetBytes(rawJSON, "n"); n.Exists() && n.Type == gjson.Number {
+		if val := n.Int(); val > 1 {
+			out, _ = sjson.SetBytes(out, "generationConfig.candidateCount", val)
+		}
+	}
+
 	// Map OpenAI modalities -> Gemini generationConfig.responseModalities
 	// e.g. "modalities": ["image", "text"] -> ["IMAGE", "TEXT"]
 	if mods := gjson.GetBytes(rawJSON, "modalities"); mods.Exists() && mods.IsArray() {

From 0b5bbe923499359e471c739c43f252983bdd8af2 Mon Sep 17 00:00:00 2001
From: extremk <86131343@qq.com>
Date: Sat, 10 Jan 2026 18:49:29 +0800
Subject: [PATCH 10/26] Add candidate count handling in OpenAI request

---
 .../openai/chat-completions/antigravity_openai_request.go  | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
index 7ca01b07..fde5829f 100644
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
@@ -113,6 +113,13 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 		out, _ = sjson.SetBytes(out, "request.generationConfig.maxOutputTokens", maxTok.Num)
 	}
 
+	// Candidate count (OpenAI 'n' parameter)
+	if n := gjson.GetBytes(rawJSON, "n"); n.Exists() && n.Type == gjson.Number {
+		if val := n.Int(); val > 1 {
+			out, _ = sjson.SetBytes(out, "request.generationConfig.candidateCount", val)
+		}
+	}
+
 	// Map OpenAI modalities -> Gemini CLI request.generationConfig.responseModalities
 	// e.g. "modalities": ["image", "text"] -> ["IMAGE", "TEXT"]
 	if mods := gjson.GetBytes(rawJSON, "modalities"); mods.Exists() && mods.IsArray() {

From 5bb9c2a2bd2ba65d642284e2d89715698ec56d9c Mon Sep 17 00:00:00 2001
From: extremk <86131343@qq.com>
Date: Sat, 10 Jan 2026 18:50:13 +0800
Subject: [PATCH 11/26] Add candidate count parameter to OpenAI request

---
 internal/translator/openai/gemini/openai_gemini_request.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/internal/translator/openai/gemini/openai_gemini_request.go b/internal/translator/openai/gemini/openai_gemini_request.go
index f51d914b..5dba2421 100644
--- a/internal/translator/openai/gemini/openai_gemini_request.go
+++ b/internal/translator/openai/gemini/openai_gemini_request.go
@@ -77,6 +77,11 @@ func ConvertGeminiRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 			}
 		}
 
+		// Candidate count (OpenAI 'n' parameter)
+		if candidateCount := genConfig.Get("candidateCount"); candidateCount.Exists() {
+			out, _ = sjson.Set(out, "n", candidateCount.Int())
+		}
+
 		// Convert thinkingBudget to reasoning_effort
 		// Always perform conversion to support allowCompat models that may not be in registry
 		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {

From 5977af96a056658873b1c1a8f60df72d4a283c5a Mon Sep 17 00:00:00 2001
From: adrenjc <1145156794@qq.com>
Date: Tue, 13 Jan 2026 18:24:05 +0800
Subject: [PATCH 12/26] fix(antigravity): prevent corrupted thought signature
 when switching models

When switching from Claude models (e.g., Opus 4.5) to Gemini models
(e.g., Flash) mid-conversation via Antigravity OAuth, the client-provided
thinking signatures from Claude would cause "Corrupted thought signature"
errors since they are incompatible with Gemini API.

Changes:
- Remove fallback to client-provided signatures in thinking block handling
- Only use cached signatures (from same-session Gemini responses)
- Skip thinking blocks without valid cached signatures
- tool_use blocks continue to use skip_thought_signature_validator when
  no valid signature is available

This ensures cross-model switching works correctly while preserving
signature validation for same-model conversations.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .../claude/antigravity_claude_request.go      | 15 ++---
 .../claude/antigravity_claude_request_test.go | 67 ++++++++++++++++---
 2 files changed, 61 insertions(+), 21 deletions(-)

diff --git a/internal/translator/antigravity/claude/antigravity_claude_request.go b/internal/translator/antigravity/claude/antigravity_claude_request.go
index d5064c3c..13ddfe5a 100644
--- a/internal/translator/antigravity/claude/antigravity_claude_request.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_request.go
@@ -123,11 +123,6 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 					if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "thinking" {
 						// Use GetThinkingText to handle wrapped thinking objects
 						thinkingText := util.GetThinkingText(contentResult)
-						signatureResult := contentResult.Get("signature")
-						clientSignature := ""
-						if signatureResult.Exists() && signatureResult.String() != "" {
-							clientSignature = signatureResult.String()
-						}
 
 						// Always try cached signature first (more reliable than client-provided)
 						// Client may send stale or invalid signatures from different sessions
@@ -139,11 +134,11 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 							}
 						}
 
-						// Fallback to client signature only if cache miss and client signature is valid
-						if signature == "" && cache.HasValidSignature(clientSignature) {
-							signature = clientSignature
-							// log.Debugf("Using client-provided signature for thinking block")
-						}
+						// NOTE: We do NOT fallback to client signature anymore.
+						// Client signatures from Claude models are incompatible with Antigravity/Gemini API.
+						// When switching between models (e.g., Claude Opus -> Gemini Flash), the Claude
+						// signatures will cause "Corrupted thought signature" errors.
+						// If we have no cached signature, the thinking block will be skipped below.
 
 						// Store for subsequent tool_use in the same message
 						if cache.HasValidSignature(signature) {
diff --git a/internal/translator/antigravity/claude/antigravity_claude_request_test.go b/internal/translator/antigravity/claude/antigravity_claude_request_test.go
index 1d727c94..8c045620 100644
--- a/internal/translator/antigravity/claude/antigravity_claude_request_test.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_request_test.go
@@ -4,6 +4,7 @@ import (
 	"strings"
 	"testing"
 
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/cache"
 	"github.com/tidwall/gjson"
 )
 
@@ -75,28 +76,42 @@ func TestConvertClaudeRequestToAntigravity_RoleMapping(t *testing.T) {
 func TestConvertClaudeRequestToAntigravity_ThinkingBlocks(t *testing.T) {
 	// Valid signature must be at least 50 characters
 	validSignature := "abc123validSignature1234567890123456789012345678901234567890"
+	thinkingText := "Let me think..."
+
+	// Pre-cache the signature (simulating a response from the same session)
+	// The session ID is derived from the first user message hash
+	// Since there's no user message in this test, we need to add one
 	inputJSON := []byte(`{
 		"model": "claude-sonnet-4-5-thinking",
 		"messages": [
+			{
+				"role": "user",
+				"content": [{"type": "text", "text": "Test user message"}]
+			},
 			{
 				"role": "assistant",
 				"content": [
-					{"type": "thinking", "thinking": "Let me think...", "signature": "` + validSignature + `"},
+					{"type": "thinking", "thinking": "` + thinkingText + `", "signature": "` + validSignature + `"},
 					{"type": "text", "text": "Answer"}
 				]
 			}
 		]
 	}`)
 
+	// Derive session ID and cache the signature
+	sessionID := deriveSessionID(inputJSON)
+	cache.CacheSignature(sessionID, thinkingText, validSignature)
+	defer cache.ClearSignatureCache(sessionID)
+
 	output := ConvertClaudeRequestToAntigravity("claude-sonnet-4-5-thinking", inputJSON, false)
 	outputStr := string(output)
 
-	// Check thinking block conversion
-	firstPart := gjson.Get(outputStr, "request.contents.0.parts.0")
+	// Check thinking block conversion (now in contents.1 due to user message)
+	firstPart := gjson.Get(outputStr, "request.contents.1.parts.0")
 	if !firstPart.Get("thought").Bool() {
 		t.Error("thinking block should have thought: true")
 	}
-	if firstPart.Get("text").String() != "Let me think..." {
+	if firstPart.Get("text").String() != thinkingText {
 		t.Error("thinking text mismatch")
 	}
 	if firstPart.Get("thoughtSignature").String() != validSignature {
@@ -227,13 +242,19 @@ func TestConvertClaudeRequestToAntigravity_ToolUse(t *testing.T) {
 
 func TestConvertClaudeRequestToAntigravity_ToolUse_WithSignature(t *testing.T) {
 	validSignature := "abc123validSignature1234567890123456789012345678901234567890"
+	thinkingText := "Let me think..."
+
 	inputJSON := []byte(`{
 		"model": "claude-sonnet-4-5-thinking",
 		"messages": [
+			{
+				"role": "user",
+				"content": [{"type": "text", "text": "Test user message"}]
+			},
 			{
 				"role": "assistant",
 				"content": [
-					{"type": "thinking", "thinking": "Let me think...", "signature": "` + validSignature + `"},
+					{"type": "thinking", "thinking": "` + thinkingText + `", "signature": "` + validSignature + `"},
 					{
 						"type": "tool_use",
 						"id": "call_123",
@@ -245,11 +266,16 @@ func TestConvertClaudeRequestToAntigravity_ToolUse_WithSignature(t *testing.T) {
 		]
 	}`)
 
+	// Derive session ID and cache the signature
+	sessionID := deriveSessionID(inputJSON)
+	cache.CacheSignature(sessionID, thinkingText, validSignature)
+	defer cache.ClearSignatureCache(sessionID)
+
 	output := ConvertClaudeRequestToAntigravity("claude-sonnet-4-5-thinking", inputJSON, false)
 	outputStr := string(output)
 
-	// Check function call has the signature from the preceding thinking block
-	part := gjson.Get(outputStr, "request.contents.0.parts.1")
+	// Check function call has the signature from the preceding thinking block (now in contents.1)
+	part := gjson.Get(outputStr, "request.contents.1.parts.1")
 	if part.Get("functionCall.name").String() != "get_weather" {
 		t.Errorf("Expected functionCall, got %s", part.Raw)
 	}
@@ -261,24 +287,35 @@ func TestConvertClaudeRequestToAntigravity_ToolUse_WithSignature(t *testing.T) {
 func TestConvertClaudeRequestToAntigravity_ReorderThinking(t *testing.T) {
 	// Case: text block followed by thinking block -> should be reordered to thinking first
 	validSignature := "abc123validSignature1234567890123456789012345678901234567890"
+	thinkingText := "Planning..."
+
 	inputJSON := []byte(`{
 		"model": "claude-sonnet-4-5-thinking",
 		"messages": [
+			{
+				"role": "user",
+				"content": [{"type": "text", "text": "Test user message"}]
+			},
 			{
 				"role": "assistant",
 				"content": [
 					{"type": "text", "text": "Here is the plan."},
-					{"type": "thinking", "thinking": "Planning...", "signature": "` + validSignature + `"}
+					{"type": "thinking", "thinking": "` + thinkingText + `", "signature": "` + validSignature + `"}
 				]
 			}
 		]
 	}`)
 
+	// Derive session ID and cache the signature
+	sessionID := deriveSessionID(inputJSON)
+	cache.CacheSignature(sessionID, thinkingText, validSignature)
+	defer cache.ClearSignatureCache(sessionID)
+
 	output := ConvertClaudeRequestToAntigravity("claude-sonnet-4-5-thinking", inputJSON, false)
 	outputStr := string(output)
 
-	// Verify order: Thinking block MUST be first
-	parts := gjson.Get(outputStr, "request.contents.0.parts").Array()
+	// Verify order: Thinking block MUST be first (now in contents.1 due to user message)
+	parts := gjson.Get(outputStr, "request.contents.1.parts").Array()
 	if len(parts) != 2 {
 		t.Fatalf("Expected 2 parts, got %d", len(parts))
 	}
@@ -460,6 +497,9 @@ func TestConvertClaudeRequestToAntigravity_TrailingUnsignedThinking_Removed(t *t
 
 func TestConvertClaudeRequestToAntigravity_TrailingSignedThinking_Kept(t *testing.T) {
 	// Last assistant message ends with signed thinking block - should be kept
+	validSignature := "abc123validSignature1234567890123456789012345678901234567890"
+	thinkingText := "Valid thinking..."
+
 	inputJSON := []byte(`{
 		"model": "claude-sonnet-4-5-thinking",
 		"messages": [
@@ -471,12 +511,17 @@ func TestConvertClaudeRequestToAntigravity_TrailingSignedThinking_Kept(t *testin
 				"role": "assistant",
 				"content": [
 					{"type": "text", "text": "Here is my answer"},
-					{"type": "thinking", "thinking": "Valid thinking...", "signature": "abc123validSignature1234567890123456789012345678901234567890"}
+					{"type": "thinking", "thinking": "` + thinkingText + `", "signature": "` + validSignature + `"}
 				]
 			}
 		]
 	}`)
 
+	// Derive session ID and cache the signature
+	sessionID := deriveSessionID(inputJSON)
+	cache.CacheSignature(sessionID, thinkingText, validSignature)
+	defer cache.ClearSignatureCache(sessionID)
+
 	output := ConvertClaudeRequestToAntigravity("claude-sonnet-4-5-thinking", inputJSON, false)
 	outputStr := string(output)
 

From 8734d4cb9080706ffc4e980452a5cc44ce5097af Mon Sep 17 00:00:00 2001
From: dinhkarate <dinhkarat@gmail.com>
Date: Tue, 20 Jan 2026 01:26:37 +0700
Subject: [PATCH 13/26] feat(vertex): add Imagen image generation model support

Add support for Imagen 3.0 and 4.0 image generation models in Vertex AI:

- Add 5 Imagen model definitions (4.0, 4.0-ultra, 4.0-fast, 3.0, 3.0-fast)
- Implement :predict action routing for Imagen models
- Convert Imagen request/response format to match Gemini structure like gemini-3-pro-image
- Transform prompts to Imagen's instances/parameters format
- Convert base64 image responses to Gemini-compatible inline data
---
 internal/registry/model_definitions.go        |  61 +++++
 .../executor/gemini_vertex_executor.go        | 222 +++++++++++++++---
 2 files changed, 256 insertions(+), 27 deletions(-)

diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go
index 080c2726..1d29bda2 100644
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -287,6 +287,67 @@ func GetGeminiVertexModels() []*ModelInfo {
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
 		},
+		// Imagen image generation models - use :predict action
+		{
+			ID:                         "imagen-4.0-generate-001",
+			Object:                     "model",
+			Created:                    1750000000,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/imagen-4.0-generate-001",
+			Version:                    "4.0",
+			DisplayName:                "Imagen 4.0 Generate",
+			Description:                "Imagen 4.0 image generation model",
+			SupportedGenerationMethods: []string{"predict"},
+		},
+		{
+			ID:                         "imagen-4.0-ultra-generate-001",
+			Object:                     "model",
+			Created:                    1750000000,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/imagen-4.0-ultra-generate-001",
+			Version:                    "4.0",
+			DisplayName:                "Imagen 4.0 Ultra Generate",
+			Description:                "Imagen 4.0 Ultra high-quality image generation model",
+			SupportedGenerationMethods: []string{"predict"},
+		},
+		{
+			ID:                         "imagen-3.0-generate-002",
+			Object:                     "model",
+			Created:                    1740000000,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/imagen-3.0-generate-002",
+			Version:                    "3.0",
+			DisplayName:                "Imagen 3.0 Generate",
+			Description:                "Imagen 3.0 image generation model",
+			SupportedGenerationMethods: []string{"predict"},
+		},
+		{
+			ID:                         "imagen-3.0-fast-generate-001",
+			Object:                     "model",
+			Created:                    1740000000,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/imagen-3.0-fast-generate-001",
+			Version:                    "3.0",
+			DisplayName:                "Imagen 3.0 Fast Generate",
+			Description:                "Imagen 3.0 fast image generation model",
+			SupportedGenerationMethods: []string{"predict"},
+		},
+		{
+			ID:                         "imagen-4.0-fast-generate-001",
+			Object:                     "model",
+			Created:                    1750000000,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/imagen-4.0-fast-generate-001",
+			Version:                    "4.0",
+			DisplayName:                "Imagen 4.0 Fast Generate",
+			Description:                "Imagen 4.0 fast image generation model",
+			SupportedGenerationMethods: []string{"predict"},
+		},
 	}
 }
 
diff --git a/internal/runtime/executor/gemini_vertex_executor.go b/internal/runtime/executor/gemini_vertex_executor.go
index 20e59b3f..1184c07e 100644
--- a/internal/runtime/executor/gemini_vertex_executor.go
+++ b/internal/runtime/executor/gemini_vertex_executor.go
@@ -12,6 +12,7 @@ import (
 	"io"
 	"net/http"
 	"strings"
+	"time"
 
 	vertexauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/vertex"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
@@ -31,6 +32,143 @@ const (
 	vertexAPIVersion = "v1"
 )
 
+// isImagenModel checks if the model name is an Imagen image generation model.
+// Imagen models use the :predict action instead of :generateContent.
+func isImagenModel(model string) bool {
+	lowerModel := strings.ToLower(model)
+	return strings.Contains(lowerModel, "imagen")
+}
+
+// getVertexAction returns the appropriate action for the given model.
+// Imagen models use "predict", while Gemini models use "generateContent".
+func getVertexAction(model string, isStream bool) string {
+	if isImagenModel(model) {
+		return "predict"
+	}
+	if isStream {
+		return "streamGenerateContent"
+	}
+	return "generateContent"
+}
+
+// convertImagenToGeminiResponse converts Imagen API response to Gemini format
+// so it can be processed by the standard translation pipeline.
+// This ensures Imagen models return responses in the same format as gemini-3-pro-image-preview.
+func convertImagenToGeminiResponse(data []byte, model string) []byte {
+	predictions := gjson.GetBytes(data, "predictions")
+	if !predictions.Exists() || !predictions.IsArray() {
+		return data
+	}
+
+	// Build Gemini-compatible response with inlineData
+	parts := make([]map[string]any, 0)
+	for _, pred := range predictions.Array() {
+		imageData := pred.Get("bytesBase64Encoded").String()
+		mimeType := pred.Get("mimeType").String()
+		if mimeType == "" {
+			mimeType = "image/png"
+		}
+		if imageData != "" {
+			parts = append(parts, map[string]any{
+				"inlineData": map[string]any{
+					"mimeType": mimeType,
+					"data":     imageData,
+				},
+			})
+		}
+	}
+
+	// Generate unique response ID using timestamp
+	responseId := fmt.Sprintf("imagen-%d", time.Now().UnixNano())
+
+	response := map[string]any{
+		"candidates": []map[string]any{{
+			"content": map[string]any{
+				"parts": parts,
+				"role":  "model",
+			},
+			"finishReason": "STOP",
+		}},
+		"responseId":   responseId,
+		"modelVersion": model,
+		// Imagen API doesn't return token counts, set to 0 for tracking purposes
+		"usageMetadata": map[string]any{
+			"promptTokenCount":     0,
+			"candidatesTokenCount": 0,
+			"totalTokenCount":      0,
+		},
+	}
+
+	result, err := json.Marshal(response)
+	if err != nil {
+		return data
+	}
+	return result
+}
+
+// convertToImagenRequest converts a Gemini-style request to Imagen API format.
+// Imagen API uses a different structure: instances[].prompt instead of contents[].
+func convertToImagenRequest(payload []byte) ([]byte, error) {
+	// Extract prompt from Gemini-style contents
+	prompt := ""
+
+	// Try to get prompt from contents[0].parts[0].text
+	contentsText := gjson.GetBytes(payload, "contents.0.parts.0.text")
+	if contentsText.Exists() {
+		prompt = contentsText.String()
+	}
+
+	// If no contents, try messages format (OpenAI-compatible)
+	if prompt == "" {
+		messagesText := gjson.GetBytes(payload, "messages.#.content")
+		if messagesText.Exists() && messagesText.IsArray() {
+			for _, msg := range messagesText.Array() {
+				if msg.String() != "" {
+					prompt = msg.String()
+					break
+				}
+			}
+		}
+	}
+
+	// If still no prompt, try direct prompt field
+	if prompt == "" {
+		directPrompt := gjson.GetBytes(payload, "prompt")
+		if directPrompt.Exists() {
+			prompt = directPrompt.String()
+		}
+	}
+
+	if prompt == "" {
+		return nil, fmt.Errorf("imagen: no prompt found in request")
+	}
+
+	// Build Imagen API request
+	imagenReq := map[string]any{
+		"instances": []map[string]any{
+			{
+				"prompt": prompt,
+			},
+		},
+		"parameters": map[string]any{
+			"sampleCount": 1,
+		},
+	}
+
+	// Extract optional parameters
+	if aspectRatio := gjson.GetBytes(payload, "aspectRatio"); aspectRatio.Exists() {
+		imagenReq["parameters"].(map[string]any)["aspectRatio"] = aspectRatio.String()
+	}
+	if sampleCount := gjson.GetBytes(payload, "sampleCount"); sampleCount.Exists() {
+		imagenReq["parameters"].(map[string]any)["sampleCount"] = int(sampleCount.Int())
+	}
+	if negativePrompt := gjson.GetBytes(payload, "negativePrompt"); negativePrompt.Exists() {
+		imagenReq["instances"].([]map[string]any)[0]["negativePrompt"] = negativePrompt.String()
+	}
+
+	return json.Marshal(imagenReq)
+}
+
 // GeminiVertexExecutor sends requests to Vertex AI Gemini endpoints using service account credentials.
 type GeminiVertexExecutor struct {
 	cfg *config.Config
@@ -160,26 +298,38 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)
 
-	from := opts.SourceFormat
-	to := sdktranslator.FromString("gemini")
+	var body []byte
 
-	originalPayload := bytes.Clone(req.Payload)
-	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
-	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	// Handle Imagen models with special request format
+	if isImagenModel(baseModel) {
+		imagenBody, errImagen := convertToImagenRequest(req.Payload)
+		if errImagen != nil {
+			return resp, errImagen
+		}
+		body = imagenBody
+	} else {
+		// Standard Gemini translation flow
+		from := opts.SourceFormat
+		to := sdktranslator.FromString("gemini")
 
-	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
-	if err != nil {
-		return resp, err
+		originalPayload := bytes.Clone(req.Payload)
+		if len(opts.OriginalRequest) > 0 {
+			originalPayload = bytes.Clone(opts.OriginalRequest)
+		}
+		originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
+		body = sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+
+		body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
+		if err != nil {
+			return resp, err
+		}
+
+		body = fixGeminiImageAspectRatio(baseModel, body)
+		body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+		body, _ = sjson.SetBytes(body, "model", baseModel)
 	}
 
-	body = fixGeminiImageAspectRatio(baseModel, body)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
-	body, _ = sjson.SetBytes(body, "model", baseModel)
-
-	action := "generateContent"
+	action := getVertexAction(baseModel, false)
 	if req.Metadata != nil {
 		if a, _ := req.Metadata["action"].(string); a == "countTokens" {
 			action = "countTokens"
@@ -249,6 +399,16 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 	}
 	appendAPIResponseChunk(ctx, e.cfg, data)
 	reporter.publish(ctx, parseGeminiUsage(data))
+
+	// For Imagen models, convert response to Gemini format before translation
+	// This ensures Imagen responses use the same format as gemini-3-pro-image-preview
+	if isImagenModel(baseModel) {
+		data = convertImagenToGeminiResponse(data, baseModel)
+	}
+
+	// Standard Gemini translation (works for both Gemini and converted Imagen responses)
+	from := opts.SourceFormat
+	to := sdktranslator.FromString("gemini")
 	var param any
 	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
 	resp = cliproxyexecutor.Response{Payload: []byte(out)}
@@ -281,7 +441,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
-	action := "generateContent"
+	action := getVertexAction(baseModel, false)
 	if req.Metadata != nil {
 		if a, _ := req.Metadata["action"].(string); a == "countTokens" {
 			action = "countTokens"
@@ -384,12 +544,16 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
+	action := getVertexAction(baseModel, true)
 	baseURL := vertexBaseURL(location)
-	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, "streamGenerateContent")
-	if opts.Alt == "" {
-		url = url + "?alt=sse"
-	} else {
-		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
+	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, action)
+	// Imagen models don't support streaming, skip SSE params
+	if !isImagenModel(baseModel) {
+		if opts.Alt == "" {
+			url = url + "?alt=sse"
+		} else {
+			url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
+		}
 	}
 	body, _ = sjson.DeleteBytes(body, "session_id")
 
@@ -503,15 +667,19 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
+	action := getVertexAction(baseModel, true)
 	// For API key auth, use simpler URL format without project/location
 	if baseURL == "" {
 		baseURL = "https://generativelanguage.googleapis.com"
 	}
-	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, "streamGenerateContent")
-	if opts.Alt == "" {
-		url = url + "?alt=sse"
-	} else {
-		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
+	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, action)
+	// Imagen models don't support streaming, skip SSE params
+	if !isImagenModel(baseModel) {
+		if opts.Alt == "" {
+			url = url + "?alt=sse"
+		} else {
+			url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
+		}
 	}
 	body, _ = sjson.DeleteBytes(body, "session_id")
 

From e641fde25cf9f8146560042a2c3ba43ef7f329a1 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Tue, 20 Jan 2026 09:57:06 +0800
Subject: [PATCH 14/26] feat(registry): support provider-specific model info
 lookup

---
 internal/registry/model_registry.go           | 54 +++++++++++++++----
 .../runtime/executor/aistudio_executor.go     |  2 +-
 .../runtime/executor/antigravity_executor.go  |  8 +--
 internal/runtime/executor/claude_executor.go  |  4 +-
 internal/runtime/executor/codex_executor.go   |  6 +--
 .../runtime/executor/gemini_cli_executor.go   |  6 +--
 internal/runtime/executor/gemini_executor.go  |  6 +--
 .../executor/gemini_vertex_executor.go        | 12 ++---
 internal/runtime/executor/iflow_executor.go   |  4 +-
 .../executor/openai_compat_executor.go        |  6 +--
 internal/runtime/executor/qwen_executor.go    |  4 +-
 internal/thinking/apply.go                    | 14 +++--
 test/thinking_conversion_test.go              |  2 +-
 13 files changed, 85 insertions(+), 43 deletions(-)

diff --git a/internal/registry/model_registry.go b/internal/registry/model_registry.go
index 970c2dc9..5de0ba4a 100644
--- a/internal/registry/model_registry.go
+++ b/internal/registry/model_registry.go
@@ -78,6 +78,8 @@ type ThinkingSupport struct {
 type ModelRegistration struct {
 	// Info contains the model metadata
 	Info *ModelInfo
+	// InfoByProvider maps provider identifiers to specific ModelInfo to support differing capabilities.
+	InfoByProvider map[string]*ModelInfo
 	// Count is the number of active clients that can provide this model
 	Count int
 	// LastUpdated tracks when this registration was last modified
@@ -132,16 +134,19 @@ func GetGlobalRegistry() *ModelRegistry {
 	return globalRegistry
 }
 
-// LookupModelInfo searches the dynamic registry first, then falls back to static model definitions.
-//
-// This helper exists because some code paths only have a model ID and still need Thinking and
-// max completion token metadata even when the dynamic registry hasn't been populated.
-func LookupModelInfo(modelID string) *ModelInfo {
+// LookupModelInfo searches dynamic registry (provider-specific > global) then static definitions.
+func LookupModelInfo(modelID string, provider ...string) *ModelInfo {
 	modelID = strings.TrimSpace(modelID)
 	if modelID == "" {
 		return nil
 	}
-	if info := GetGlobalRegistry().GetModelInfo(modelID); info != nil {
+
+	p := ""
+	if len(provider) > 0 {
+		p = strings.ToLower(strings.TrimSpace(provider[0]))
+	}
+
+	if info := GetGlobalRegistry().GetModelInfo(modelID, p); info != nil {
 		return info
 	}
 	return LookupStaticModelInfo(modelID)
@@ -297,6 +302,9 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
 				if count, okProv := reg.Providers[oldProvider]; okProv {
 					if count <= toRemove {
 						delete(reg.Providers, oldProvider)
+						if reg.InfoByProvider != nil {
+							delete(reg.InfoByProvider, oldProvider)
+						}
 					} else {
 						reg.Providers[oldProvider] = count - toRemove
 					}
@@ -346,6 +354,12 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
 		model := newModels[id]
 		if reg, ok := r.models[id]; ok {
 			reg.Info = cloneModelInfo(model)
+			if provider != "" {
+				if reg.InfoByProvider == nil {
+					reg.InfoByProvider = make(map[string]*ModelInfo)
+				}
+				reg.InfoByProvider[provider] = cloneModelInfo(model)
+			}
 			reg.LastUpdated = now
 			if reg.QuotaExceededClients != nil {
 				delete(reg.QuotaExceededClients, clientID)
@@ -409,11 +423,15 @@ func (r *ModelRegistry) addModelRegistration(modelID, provider string, model *Mo
 		if existing.SuspendedClients == nil {
 			existing.SuspendedClients = make(map[string]string)
 		}
+		if existing.InfoByProvider == nil {
+			existing.InfoByProvider = make(map[string]*ModelInfo)
+		}
 		if provider != "" {
 			if existing.Providers == nil {
 				existing.Providers = make(map[string]int)
 			}
 			existing.Providers[provider]++
+			existing.InfoByProvider[provider] = cloneModelInfo(model)
 		}
 		log.Debugf("Incremented count for model %s, now %d clients", modelID, existing.Count)
 		return
@@ -421,6 +439,7 @@ func (r *ModelRegistry) addModelRegistration(modelID, provider string, model *Mo
 
 	registration := &ModelRegistration{
 		Info:                 cloneModelInfo(model),
+		InfoByProvider:       make(map[string]*ModelInfo),
 		Count:                1,
 		LastUpdated:          now,
 		QuotaExceededClients: make(map[string]*time.Time),
@@ -428,6 +447,7 @@ func (r *ModelRegistry) addModelRegistration(modelID, provider string, model *Mo
 	}
 	if provider != "" {
 		registration.Providers = map[string]int{provider: 1}
+		registration.InfoByProvider[provider] = cloneModelInfo(model)
 	}
 	r.models[modelID] = registration
 	log.Debugf("Registered new model %s from provider %s", modelID, provider)
@@ -453,6 +473,9 @@ func (r *ModelRegistry) removeModelRegistration(clientID, modelID, provider stri
 		if count, ok := registration.Providers[provider]; ok {
 			if count <= 1 {
 				delete(registration.Providers, provider)
+				if registration.InfoByProvider != nil {
+					delete(registration.InfoByProvider, provider)
+				}
 			} else {
 				registration.Providers[provider] = count - 1
 			}
@@ -534,6 +557,9 @@ func (r *ModelRegistry) unregisterClientInternal(clientID string) {
 				if count, ok := registration.Providers[provider]; ok {
 					if count <= 1 {
 						delete(registration.Providers, provider)
+						if registration.InfoByProvider != nil {
+							delete(registration.InfoByProvider, provider)
+						}
 					} else {
 						registration.Providers[provider] = count - 1
 					}
@@ -940,12 +966,22 @@ func (r *ModelRegistry) GetModelProviders(modelID string) []string {
 	return result
 }
 
-// GetModelInfo returns the registered ModelInfo for the given model ID, if present.
-// Returns nil if the model is unknown to the registry.
-func (r *ModelRegistry) GetModelInfo(modelID string) *ModelInfo {
+// GetModelInfo returns ModelInfo, prioritizing provider-specific definition if available.
+func (r *ModelRegistry) GetModelInfo(modelID, provider string) *ModelInfo {
 	r.mutex.RLock()
 	defer r.mutex.RUnlock()
 	if reg, ok := r.models[modelID]; ok && reg != nil {
+		// Try provider specific definition first
+		if provider != "" && reg.InfoByProvider != nil {
+			if reg.Providers != nil {
+				if count, ok := reg.Providers[provider]; ok && count > 0 {
+					if info, ok := reg.InfoByProvider[provider]; ok && info != nil {
+						return info
+					}
+				}
+			}
+		}
+		// Fallback to global info (last registered)
 		return reg.Info
 	}
 	return nil
diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go
index a020c670..eba38b00 100644
--- a/internal/runtime/executor/aistudio_executor.go
+++ b/internal/runtime/executor/aistudio_executor.go
@@ -393,7 +393,7 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, stream)
 	payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
-	payload, err := thinking.ApplyThinking(payload, req.Model, from.String(), to.String())
+	payload, err := thinking.ApplyThinking(payload, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return nil, translatedPayload{}, err
 	}
diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go
index df26e376..55cc1626 100644
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -137,7 +137,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
 	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String())
+	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return resp, err
 	}
@@ -256,7 +256,7 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
-	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String())
+	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return resp, err
 	}
@@ -622,7 +622,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
-	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String())
+	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return nil, err
 	}
@@ -802,7 +802,7 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
 	// Prepare payload once (doesn't depend on baseURL)
 	payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	payload, err := thinking.ApplyThinking(payload, req.Model, from.String(), to.String())
+	payload, err := thinking.ApplyThinking(payload, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
 	}
diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go
index b6d5418a..d5b3132a 100644
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -105,7 +105,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
-	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return resp, err
 	}
@@ -235,7 +235,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
-	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return nil, err
 	}
diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go
index cc0e32a1..a283df86 100644
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -96,7 +96,7 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	body = sdktranslator.TranslateRequest(from, to, baseModel, body, false)
 	body = misc.StripCodexUserAgent(body)
 
-	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return resp, err
 	}
@@ -208,7 +208,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	body = sdktranslator.TranslateRequest(from, to, baseModel, body, true)
 	body = misc.StripCodexUserAgent(body)
 
-	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return nil, err
 	}
@@ -316,7 +316,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
 	body = sdktranslator.TranslateRequest(from, to, baseModel, body, false)
 	body = misc.StripCodexUserAgent(body)
 
-	body, err := thinking.ApplyThinking(body, req.Model, from.String(), to.String())
+	body, err := thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
 	}
diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go
index b23406af..ba321ca5 100644
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -123,7 +123,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
 	basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	basePayload, err = thinking.ApplyThinking(basePayload, req.Model, from.String(), to.String())
+	basePayload, err = thinking.ApplyThinking(basePayload, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return resp, err
 	}
@@ -272,7 +272,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
-	basePayload, err = thinking.ApplyThinking(basePayload, req.Model, from.String(), to.String())
+	basePayload, err = thinking.ApplyThinking(basePayload, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return nil, err
 	}
@@ -479,7 +479,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
 	for range models {
 		payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-		payload, err = thinking.ApplyThinking(payload, req.Model, from.String(), to.String())
+		payload, err = thinking.ApplyThinking(payload, req.Model, from.String(), to.String(), e.Identifier())
 		if err != nil {
 			return cliproxyexecutor.Response{}, err
 		}
diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go
index e9f9dbca..2c7a860c 100644
--- a/internal/runtime/executor/gemini_executor.go
+++ b/internal/runtime/executor/gemini_executor.go
@@ -120,7 +120,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return resp, err
 	}
@@ -222,7 +222,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
-	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return nil, err
 	}
@@ -338,7 +338,7 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 	to := sdktranslator.FromString("gemini")
 	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String())
+	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
 	}
diff --git a/internal/runtime/executor/gemini_vertex_executor.go b/internal/runtime/executor/gemini_vertex_executor.go
index 1184c07e..302989c8 100644
--- a/internal/runtime/executor/gemini_vertex_executor.go
+++ b/internal/runtime/executor/gemini_vertex_executor.go
@@ -319,7 +319,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 		originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
 		body = sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-		body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
+		body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 		if err != nil {
 			return resp, err
 		}
@@ -432,7 +432,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return resp, err
 	}
@@ -535,7 +535,7 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
-	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return nil, err
 	}
@@ -658,7 +658,7 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
-	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return nil, err
 	}
@@ -773,7 +773,7 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
 
 	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String())
+	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
 	}
@@ -857,7 +857,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
 
 	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
-	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String())
+	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
 	}
diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go
index 3e6ca4e5..c62c0659 100644
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -92,7 +92,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
-	body, err = thinking.ApplyThinking(body, req.Model, from.String(), "iflow")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), "iflow", e.Identifier())
 	if err != nil {
 		return resp, err
 	}
@@ -190,7 +190,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
-	body, err = thinking.ApplyThinking(body, req.Model, from.String(), "iflow")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), "iflow", e.Identifier())
 	if err != nil {
 		return nil, err
 	}
diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go
index a2bef724..d910294a 100644
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -92,7 +92,7 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), opts.Stream)
 	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated)
 
-	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String())
+	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return resp, err
 	}
@@ -187,7 +187,7 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated)
 
-	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String())
+	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return nil, err
 	}
@@ -297,7 +297,7 @@ func (e *OpenAICompatExecutor) CountTokens(ctx context.Context, auth *cliproxyau
 
 	modelForCounting := baseModel
 
-	translated, err := thinking.ApplyThinking(translated, req.Model, from.String(), to.String())
+	translated, err := thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
 	}
diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go
index 260165d9..e013f594 100644
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -86,7 +86,7 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
-	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return resp, err
 	}
@@ -172,7 +172,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
-	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String())
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return nil, err
 	}
diff --git a/internal/thinking/apply.go b/internal/thinking/apply.go
index cf0e373b..58c26286 100644
--- a/internal/thinking/apply.go
+++ b/internal/thinking/apply.go
@@ -63,6 +63,7 @@ func IsUserDefinedModel(modelInfo *registry.ModelInfo) bool {
 //   - model: Model name, optionally with thinking suffix (e.g., "claude-sonnet-4-5(16384)")
 //   - fromFormat: Source request format (e.g., openai, codex, gemini)
 //   - toFormat: Target provider format for the request body (gemini, gemini-cli, antigravity, claude, openai, codex, iflow)
+//   - providerKey: Provider identifier used for registry model lookups (may differ from toFormat, e.g., openrouter -> openai)
 //
 // Returns:
 //   - Modified request body JSON with thinking configuration applied
@@ -79,12 +80,16 @@ func IsUserDefinedModel(modelInfo *registry.ModelInfo) bool {
 // Example:
 //
 //	// With suffix - suffix config takes priority
-//	result, err := thinking.ApplyThinking(body, "gemini-2.5-pro(8192)", "gemini", "gemini")
+//	result, err := thinking.ApplyThinking(body, "gemini-2.5-pro(8192)", "gemini", "gemini", "gemini")
 //
 //	// Without suffix - uses body config
-//	result, err := thinking.ApplyThinking(body, "gemini-2.5-pro", "gemini", "gemini")
-func ApplyThinking(body []byte, model string, fromFormat string, toFormat string) ([]byte, error) {
+//	result, err := thinking.ApplyThinking(body, "gemini-2.5-pro", "gemini", "gemini", "gemini")
+func ApplyThinking(body []byte, model string, fromFormat string, toFormat string, providerKey string) ([]byte, error) {
 	providerFormat := strings.ToLower(strings.TrimSpace(toFormat))
+	providerKey = strings.ToLower(strings.TrimSpace(providerKey))
+	if providerKey == "" {
+		providerKey = providerFormat
+	}
 	fromFormat = strings.ToLower(strings.TrimSpace(fromFormat))
 	if fromFormat == "" {
 		fromFormat = providerFormat
@@ -102,7 +107,8 @@ func ApplyThinking(body []byte, model string, fromFormat string, toFormat string
 	// 2. Parse suffix and get modelInfo
 	suffixResult := ParseSuffix(model)
 	baseModel := suffixResult.ModelName
-	modelInfo := registry.LookupModelInfo(baseModel)
+	// Use provider-specific lookup to handle capability differences across providers.
+	modelInfo := registry.LookupModelInfo(baseModel, providerKey)
 
 	// 3. Model capability check
 	// Unknown models are treated as user-defined so thinking config can still be applied.
diff --git a/test/thinking_conversion_test.go b/test/thinking_conversion_test.go
index 4a7df29a..3ad26ea6 100644
--- a/test/thinking_conversion_test.go
+++ b/test/thinking_conversion_test.go
@@ -2712,7 +2712,7 @@ func runThinkingTests(t *testing.T, cases []thinkingTestCase) {
 				body, _ = sjson.SetBytes(body, "max_tokens", 200000)
 			}
 
-			body, err := thinking.ApplyThinking(body, tc.model, tc.from, applyTo)
+			body, err := thinking.ApplyThinking(body, tc.model, tc.from, applyTo, applyTo)
 
 			if tc.expectErr {
 				if err == nil {

From d4bb4e66242decc7ad66b5190f9e91b1e332d281 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Tue, 20 Jan 2026 10:17:55 +0800
Subject: [PATCH 15/26] refactor(antigravity): remove unused client signature
 handling in thinking objects

---
 .../antigravity/claude/antigravity_claude_request.go   | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/internal/translator/antigravity/claude/antigravity_claude_request.go b/internal/translator/antigravity/claude/antigravity_claude_request.go
index 4216393f..038ddb8e 100644
--- a/internal/translator/antigravity/claude/antigravity_claude_request.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_request.go
@@ -124,11 +124,11 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 					if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "thinking" {
 						// Use GetThinkingText to handle wrapped thinking objects
 						thinkingText := thinking.GetThinkingText(contentResult)
-						signatureResult := contentResult.Get("signature")
-						clientSignature := ""
-						if signatureResult.Exists() && signatureResult.String() != "" {
-							clientSignature = signatureResult.String()
-						}
+						// signatureResult := contentResult.Get("signature")
+						// clientSignature := ""
+						// if signatureResult.Exists() && signatureResult.String() != "" {
+						// 	clientSignature = signatureResult.String()
+						// }
 
 						// Always try cached signature first (more reliable than client-provided)
 						// Client may send stale or invalid signatures from different sessions

From 7831cba9f60307d8bc10c1454546e07f955349ab Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Tue, 20 Jan 2026 11:02:52 +0800
Subject: [PATCH 16/26] refactor(claude): remove redundant system instructions
 check in Claude executor

---
 internal/runtime/executor/claude_executor.go | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go
index 8e0d15a6..9d8ad260 100644
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -110,10 +110,6 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 		return resp, err
 	}
 
-	if !strings.HasPrefix(baseModel, "claude-3-5-haiku") {
-		body = checkSystemInstructions(body)
-	}
-
 	// Apply cloaking (system prompt injection, fake user ID, sensitive word obfuscation)
 	// based on client type and configuration.
 	body = applyCloaking(ctx, e.cfg, auth, body, baseModel)
@@ -245,8 +241,6 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 		return nil, err
 	}
 
-	body = checkSystemInstructions(body)
-
 	// Apply cloaking (system prompt injection, fake user ID, sensitive word obfuscation)
 	// based on client type and configuration.
 	body = applyCloaking(ctx, e.cfg, auth, body, baseModel)

From 8447fd27a09d0cae5a4768ac2786dbfd931e7617 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Tue, 20 Jan 2026 11:09:56 +0800
Subject: [PATCH 17/26] fix(login): remove emojis from interactive prompt
 messages

---
 internal/cmd/login.go | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/internal/cmd/login.go b/internal/cmd/login.go
index f905e5ff..b5129cfd 100644
--- a/internal/cmd/login.go
+++ b/internal/cmd/login.go
@@ -276,11 +276,11 @@ func performGeminiCLISetup(ctx context.Context, httpClient *http.Client, storage
 
 					if isFreeUser {
 						// Interactive prompt for free users
-						fmt.Printf("\n⚠️  Google returned a different project ID:\n")
-						fmt.Printf("    Requested (frontend): %s\n", projectID)
-						fmt.Printf("    Returned (backend):   %s\n\n", responseProjectID)
-						fmt.Printf("ℹ️   Backend project IDs have access to preview models (gemini-3-*).\n")
-						fmt.Printf("    This is normal for free tier users.\n\n")
+						fmt.Printf("\nGoogle returned a different project ID:\n")
+						fmt.Printf("  Requested (frontend): %s\n", projectID)
+						fmt.Printf("  Returned (backend):   %s\n\n", responseProjectID)
+						fmt.Printf("  Backend project IDs have access to preview models (gemini-3-*).\n")
+						fmt.Printf("  This is normal for free tier users.\n\n")
 						fmt.Printf("Which project ID would you like to use?\n")
 						fmt.Printf("  [1] Backend (recommended): %s\n", responseProjectID)
 						fmt.Printf("  [2] Frontend: %s\n\n", projectID)
@@ -292,7 +292,7 @@ func performGeminiCLISetup(ctx context.Context, httpClient *http.Client, storage
 
 						if choice == "2" {
 							log.Infof("Using frontend project ID: %s", projectID)
-							fmt.Println("⚠️  Warning: Frontend project IDs may not have access to preview models.")
+							fmt.Println(". Warning: Frontend project IDs may not have access to preview models.")
 							finalProjectID = projectID
 						} else {
 							log.Infof("Using backend project ID: %s (recommended)", responseProjectID)
@@ -303,7 +303,7 @@ func performGeminiCLISetup(ctx context.Context, httpClient *http.Client, storage
 						log.Warnf("Gemini onboarding returned project %s instead of requested %s; keeping requested project ID.", responseProjectID, projectID)
 					}
 				} else {
-				finalProjectID = responseProjectID
+					finalProjectID = responseProjectID
 				}
 			}
 

From 059bfee91b4e29a34337b2c8fcb700bb8fc7ac82 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Tue, 20 Jan 2026 11:36:29 +0800
Subject: [PATCH 18/26] feat(auth): add hashed account ID to credential
 filenames for team plans

---
 internal/auth/codex/filename.go | 10 ++++++----
 sdk/auth/codex.go               | 10 +++++++++-
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/internal/auth/codex/filename.go b/internal/auth/codex/filename.go
index fcf02802..26515fef 100644
--- a/internal/auth/codex/filename.go
+++ b/internal/auth/codex/filename.go
@@ -12,19 +12,21 @@ import (
 // CredentialFileName returns the filename used to persist Codex OAuth credentials.
 // When planType is available (e.g. "plus", "team"), it is appended after the email
 // as a suffix to disambiguate subscriptions.
-func CredentialFileName(email, planType string, includeProviderPrefix bool) string {
+func CredentialFileName(email, planType, hashAccountID string, includeProviderPrefix bool) string {
 	email = strings.TrimSpace(email)
 	plan := normalizePlanTypeForFilename(planType)
 
 	prefix := ""
 	if includeProviderPrefix {
-		prefix = "codex-"
+		prefix = "codex"
 	}
 
 	if plan == "" {
-		return fmt.Sprintf("%s%s.json", prefix, email)
+		return fmt.Sprintf("%s-%s.json", prefix, email)
+	} else if plan == "team" {
+		return fmt.Sprintf("%s-%s-%s-%s.json", prefix, hashAccountID, email, plan)
 	}
-	return fmt.Sprintf("%s%s-%s.json", prefix, email, plan)
+	return fmt.Sprintf("%s-%s-%s.json", prefix, email, plan)
 }
 
 func normalizePlanTypeForFilename(planType string) string {
diff --git a/sdk/auth/codex.go b/sdk/auth/codex.go
index 30a36f97..b655a239 100644
--- a/sdk/auth/codex.go
+++ b/sdk/auth/codex.go
@@ -2,6 +2,8 @@ package auth
 
 import (
 	"context"
+	"crypto/sha256"
+	"encoding/hex"
 	"fmt"
 	"net/http"
 	"strings"
@@ -192,12 +194,18 @@ waitForCallback:
 	}
 
 	planType := ""
+	hashAccountID := ""
 	if tokenStorage.IDToken != "" {
 		if claims, errParse := codex.ParseJWTToken(tokenStorage.IDToken); errParse == nil && claims != nil {
 			planType = strings.TrimSpace(claims.CodexAuthInfo.ChatgptPlanType)
+			accountID := strings.TrimSpace(claims.CodexAuthInfo.ChatgptAccountID)
+			if accountID != "" {
+				digest := sha256.Sum256([]byte(accountID))
+				hashAccountID = hex.EncodeToString(digest[:])[:8]
+			}
 		}
 	}
-	fileName := codex.CredentialFileName(tokenStorage.Email, planType, true)
+	fileName := codex.CredentialFileName(tokenStorage.Email, planType, hashAccountID, true)
 	metadata := map[string]any{
 		"email": tokenStorage.Email,
 	}

From 9823dc35e12327e5a37535bb79d9353b4a4bef73 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Tue, 20 Jan 2026 11:37:52 +0800
Subject: [PATCH 19/26] feat(auth): hash account ID for improved uniqueness in
 credential filenames

---
 internal/api/handlers/management/auth_files.go | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/internal/api/handlers/management/auth_files.go b/internal/api/handlers/management/auth_files.go
index e018a054..63e75d88 100644
--- a/internal/api/handlers/management/auth_files.go
+++ b/internal/api/handlers/management/auth_files.go
@@ -3,6 +3,8 @@ package management
 import (
 	"bytes"
 	"context"
+	"crypto/sha256"
+	"encoding/hex"
 	"encoding/json"
 	"errors"
 	"fmt"
@@ -1389,6 +1391,11 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
 			accountID = claims.GetAccountID()
 			planType = strings.TrimSpace(claims.CodexAuthInfo.ChatgptPlanType)
 		}
+		hashAccountID := ""
+		if accountID != "" {
+			digest := sha256.Sum256([]byte(accountID))
+			hashAccountID = hex.EncodeToString(digest[:])[:8]
+		}
 		// Build bundle compatible with existing storage
 		bundle := &codex.CodexAuthBundle{
 			TokenData: codex.CodexTokenData{
@@ -1404,7 +1411,7 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
 
 		// Create token storage and persist
 		tokenStorage := openaiAuth.CreateTokenStorage(bundle)
-		fileName := codex.CredentialFileName(tokenStorage.Email, planType, true)
+		fileName := codex.CredentialFileName(tokenStorage.Email, planType, hashAccountID, true)
 		record := &coreauth.Auth{
 			ID:       fileName,
 			Provider: "codex",

From 3f385a8572ecae4c03af546630b9460868e05939 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Tue, 20 Jan 2026 11:38:31 +0800
Subject: [PATCH 20/26] feat(auth): add "antigravity" provider to ignored
 access_token fields in filestore

---
 sdk/auth/filestore.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/auth/filestore.go b/sdk/auth/filestore.go
index e364c7ca..6ac8b8a3 100644
--- a/sdk/auth/filestore.go
+++ b/sdk/auth/filestore.go
@@ -336,7 +336,7 @@ func metadataEqualIgnoringTimestamps(a, b []byte, provider string) bool {
 	// For providers that can re-fetch tokens when needed (e.g., Google OAuth),
 	// we ignore access_token to avoid unnecessary file writes.
 	switch provider {
-	case "gemini", "gemini-cli":
+	case "gemini", "gemini-cli", "antigravity":
 		ignoredFields = append(ignoredFields, "access_token")
 	}
 

From 2cbe4a790cfefd41bf503c6089ba46506cb20592 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Tue, 20 Jan 2026 11:47:33 +0800
Subject: [PATCH 21/26] chore(translator): remove unnecessary whitespace in
 gemini_openai_response code

---
 .../gemini/openai/chat-completions/gemini_openai_response.go  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go
index 7de1b5ff..9cce35f9 100644
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go
@@ -50,7 +50,7 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR
 			FunctionIndex: make(map[int]int),
 		}
 	}
-	
+
 	// Ensure the Map is initialized (handling cases where param might be reused from older context).
 	p := (*param).(*convertGeminiResponseToOpenAIChatParams)
 	if p.FunctionIndex == nil {
@@ -174,7 +174,7 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR
 						// Handle function call content.
 						hasFunctionCall = true
 						toolCallsResult := gjson.Get(template, "choices.0.delta.tool_calls")
-						
+
 						// Retrieve the function index for this specific candidate.
 						functionCallIndex := p.FunctionIndex[candidateIndex]
 						p.FunctionIndex[candidateIndex]++

From 6184c433191f20cdeb80caf3f28a63c070a360d9 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Tue, 20 Jan 2026 12:35:40 +0800
Subject: [PATCH 22/26] Fixed: #1109

feat(translator): enhance session ID derivation with user_id parsing in Claude
---
 .../antigravity/claude/antigravity_claude_request.go      | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/internal/translator/antigravity/claude/antigravity_claude_request.go b/internal/translator/antigravity/claude/antigravity_claude_request.go
index 038ddb8e..87772d8c 100644
--- a/internal/translator/antigravity/claude/antigravity_claude_request.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_request.go
@@ -22,6 +22,14 @@ import (
 // deriveSessionID generates a stable session ID from the request.
 // Uses the hash of the first user message to identify the conversation.
 func deriveSessionID(rawJSON []byte) string {
+	userIDResult := gjson.GetBytes(rawJSON, "metadata.user_id")
+	if userIDResult.Exists() {
+		userID := userIDResult.String()
+		idx := strings.Index(userID, "session_")
+		if idx != -1 {
+			return userID[idx+8:]
+		}
+	}
 	messages := gjson.GetBytes(rawJSON, "messages")
 	if !messages.IsArray() {
 		return ""

From 020e61d0daa578e7874a55e5be2bbdd0aba931aa Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Tue, 20 Jan 2026 13:31:36 +0800
Subject: [PATCH 23/26] feat(translator): improve signature handling by
 associating with model name in cache functions

---
 internal/cache/signature_cache.go             |  9 ++---
 .../claude/antigravity_claude_request.go      | 33 ++++++++++++-------
 .../claude/antigravity_claude_response.go     |  9 ++---
 3 files changed, 31 insertions(+), 20 deletions(-)

diff --git a/internal/cache/signature_cache.go b/internal/cache/signature_cache.go
index dee1b13b..ee8ad0b2 100644
--- a/internal/cache/signature_cache.go
+++ b/internal/cache/signature_cache.go
@@ -3,6 +3,7 @@ package cache
 import (
 	"crypto/sha256"
 	"encoding/hex"
+	"fmt"
 	"sync"
 	"time"
 )
@@ -94,7 +95,7 @@ func purgeExpiredSessions() {
 
 // CacheSignature stores a thinking signature for a given session and text.
 // Used for Claude models that require signed thinking blocks in multi-turn conversations.
-func CacheSignature(sessionID, text, signature string) {
+func CacheSignature(modelName, sessionID, text, signature string) {
 	if sessionID == "" || text == "" || signature == "" {
 		return
 	}
@@ -102,7 +103,7 @@ func CacheSignature(sessionID, text, signature string) {
 		return
 	}
 
-	sc := getOrCreateSession(sessionID)
+	sc := getOrCreateSession(fmt.Sprintf("%s#%s", modelName, sessionID))
 	textHash := hashText(text)
 
 	sc.mu.Lock()
@@ -116,12 +117,12 @@ func CacheSignature(sessionID, text, signature string) {
 
 // GetCachedSignature retrieves a cached signature for a given session and text.
 // Returns empty string if not found or expired.
-func GetCachedSignature(sessionID, text string) string {
+func GetCachedSignature(modelName, sessionID, text string) string {
 	if sessionID == "" || text == "" {
 		return ""
 	}
 
-	val, ok := signatureCache.Load(sessionID)
+	val, ok := signatureCache.Load(fmt.Sprintf("%s#%s", modelName, sessionID))
 	if !ok {
 		return ""
 	}
diff --git a/internal/translator/antigravity/claude/antigravity_claude_request.go b/internal/translator/antigravity/claude/antigravity_claude_request.go
index 87772d8c..5b6ffe22 100644
--- a/internal/translator/antigravity/claude/antigravity_claude_request.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_request.go
@@ -69,6 +69,7 @@ func deriveSessionID(rawJSON []byte) string {
 // Returns:
 //   - []byte: The transformed request data in Gemini CLI API format
 func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _ bool) []byte {
+	enableThoughtTranslate := true
 	rawJSON := bytes.Clone(inputRawJSON)
 
 	// Derive session ID for signature caching
@@ -132,27 +133,34 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 					if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "thinking" {
 						// Use GetThinkingText to handle wrapped thinking objects
 						thinkingText := thinking.GetThinkingText(contentResult)
-						// signatureResult := contentResult.Get("signature")
-						// clientSignature := ""
-						// if signatureResult.Exists() && signatureResult.String() != "" {
-						// 	clientSignature = signatureResult.String()
-						// }
 
 						// Always try cached signature first (more reliable than client-provided)
 						// Client may send stale or invalid signatures from different sessions
 						signature := ""
 						if sessionID != "" && thinkingText != "" {
-							if cachedSig := cache.GetCachedSignature(sessionID, thinkingText); cachedSig != "" {
+							if cachedSig := cache.GetCachedSignature(modelName, sessionID, thinkingText); cachedSig != "" {
 								signature = cachedSig
 								// log.Debugf("Using cached signature for thinking block")
 							}
 						}
 
-						// NOTE: We do NOT fallback to client signature anymore.
-						// Client signatures from Claude models are incompatible with Antigravity/Gemini API.
-						// When switching between models (e.g., Claude Opus -> Gemini Flash), the Claude
-						// signatures will cause "Corrupted thought signature" errors.
-						// If we have no cached signature, the thinking block will be skipped below.
+						// Fallback to client signature only if cache miss and client signature is valid
+						if signature == "" {
+							signatureResult := contentResult.Get("signature")
+							clientSignature := ""
+							if signatureResult.Exists() && signatureResult.String() != "" {
+								arrayClientSignatures := strings.SplitN(signatureResult.String(), "#", 2)
+								if len(arrayClientSignatures) == 2 {
+									if modelName == arrayClientSignatures[0] {
+										clientSignature = arrayClientSignatures[1]
+									}
+								}
+							}
+							if cache.HasValidSignature(clientSignature) {
+								signature = clientSignature
+							}
+							// log.Debugf("Using client-provided signature for thinking block")
+						}
 
 						// Store for subsequent tool_use in the same message
 						if cache.HasValidSignature(signature) {
@@ -167,6 +175,7 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 						// Converting to text would break this requirement
 						if isUnsigned {
 							// log.Debugf("Dropping unsigned thinking block (no valid signature)")
+							enableThoughtTranslate = false
 							continue
 						}
 
@@ -394,7 +403,7 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 	}
 
 	// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled
-	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
+	if t := gjson.GetBytes(rawJSON, "thinking"); enableThoughtTranslate && t.Exists() && t.IsObject() {
 		if t.Get("type").String() == "enabled" {
 			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
 				budget := int(b.Int())
diff --git a/internal/translator/antigravity/claude/antigravity_claude_response.go b/internal/translator/antigravity/claude/antigravity_claude_response.go
index 1672a835..c32918d6 100644
--- a/internal/translator/antigravity/claude/antigravity_claude_response.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_response.go
@@ -73,6 +73,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
 			SessionID:        deriveSessionID(originalRequestRawJSON),
 		}
 	}
+	modelName := gjson.GetBytes(requestRawJSON, "model").String()
 
 	params := (*param).(*Params)
 
@@ -139,13 +140,13 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
 						// log.Debug("Branch: signature_delta")
 
 						if params.SessionID != "" && params.CurrentThinkingText.Len() > 0 {
-							cache.CacheSignature(params.SessionID, params.CurrentThinkingText.String(), thoughtSignature.String())
+							cache.CacheSignature(modelName, params.SessionID, params.CurrentThinkingText.String(), thoughtSignature.String())
 							// log.Debugf("Cached signature for thinking block (sessionID=%s, textLen=%d)", params.SessionID, params.CurrentThinkingText.Len())
 							params.CurrentThinkingText.Reset()
 						}
 
 						output = output + "event: content_block_delta\n"
-						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":""}}`, params.ResponseIndex), "delta.signature", thoughtSignature.String())
+						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":""}}`, params.ResponseIndex), "delta.signature", fmt.Sprintf("%s#%s", modelName, thoughtSignature.String()))
 						output = output + fmt.Sprintf("data: %s\n\n\n", data)
 						params.HasContent = true
 					} else if params.ResponseType == 2 { // Continue existing thinking block if already in thinking state
@@ -372,7 +373,7 @@ func resolveStopReason(params *Params) string {
 //   - string: A Claude-compatible JSON response.
 func ConvertAntigravityResponseToClaudeNonStream(_ context.Context, _ string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, _ *any) string {
 	_ = originalRequestRawJSON
-	_ = requestRawJSON
+	modelName := gjson.GetBytes(requestRawJSON, "model").String()
 
 	root := gjson.ParseBytes(rawJSON)
 	promptTokens := root.Get("response.usageMetadata.promptTokenCount").Int()
@@ -437,7 +438,7 @@ func ConvertAntigravityResponseToClaudeNonStream(_ context.Context, _ string, or
 		block := `{"type":"thinking","thinking":""}`
 		block, _ = sjson.Set(block, "thinking", thinkingBuilder.String())
 		if thinkingSignature != "" {
-			block, _ = sjson.Set(block, "signature", thinkingSignature)
+			block, _ = sjson.Set(block, "signature", fmt.Sprintf("%s#%s", modelName, thinkingSignature))
 		}
 		responseJSON, _ = sjson.SetRaw(responseJSON, "content.-1", block)
 		thinkingBuilder.Reset()

From 8d9f4edf9b2f6c47824829612e0f2c72014c3be6 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Tue, 20 Jan 2026 13:45:25 +0800
Subject: [PATCH 24/26] feat(translator): unify model group references by
 introducing `GetModelGroup` helper function

---
 internal/cache/signature_cache.go                | 16 ++++++++++++++--
 .../claude/antigravity_claude_response.go        |  4 ++--
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/internal/cache/signature_cache.go b/internal/cache/signature_cache.go
index ee8ad0b2..477b1245 100644
--- a/internal/cache/signature_cache.go
+++ b/internal/cache/signature_cache.go
@@ -4,6 +4,7 @@ import (
 	"crypto/sha256"
 	"encoding/hex"
 	"fmt"
+	"strings"
 	"sync"
 	"time"
 )
@@ -103,7 +104,7 @@ func CacheSignature(modelName, sessionID, text, signature string) {
 		return
 	}
 
-	sc := getOrCreateSession(fmt.Sprintf("%s#%s", modelName, sessionID))
+	sc := getOrCreateSession(fmt.Sprintf("%s#%s", GetModelGroup(modelName), sessionID))
 	textHash := hashText(text)
 
 	sc.mu.Lock()
@@ -122,7 +123,7 @@ func GetCachedSignature(modelName, sessionID, text string) string {
 		return ""
 	}
 
-	val, ok := signatureCache.Load(fmt.Sprintf("%s#%s", modelName, sessionID))
+	val, ok := signatureCache.Load(fmt.Sprintf("%s#%s", GetModelGroup(modelName), sessionID))
 	if !ok {
 		return ""
 	}
@@ -168,3 +169,14 @@ func ClearSignatureCache(sessionID string) {
 func HasValidSignature(signature string) bool {
 	return signature != "" && len(signature) >= MinValidSignatureLen
 }
+
+func GetModelGroup(modelName string) string {
+	if strings.Contains(modelName, "gpt") {
+		return "gpt"
+	} else if strings.Contains(modelName, "claude") {
+		return "claude"
+	} else if strings.Contains(modelName, "gemini") {
+		return "gemini"
+	}
+	return modelName
+}
diff --git a/internal/translator/antigravity/claude/antigravity_claude_response.go b/internal/translator/antigravity/claude/antigravity_claude_response.go
index c32918d6..e360f850 100644
--- a/internal/translator/antigravity/claude/antigravity_claude_response.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_response.go
@@ -146,7 +146,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
 						}
 
 						output = output + "event: content_block_delta\n"
-						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":""}}`, params.ResponseIndex), "delta.signature", fmt.Sprintf("%s#%s", modelName, thoughtSignature.String()))
+						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":""}}`, params.ResponseIndex), "delta.signature", fmt.Sprintf("%s#%s", cache.GetModelGroup(modelName), thoughtSignature.String()))
 						output = output + fmt.Sprintf("data: %s\n\n\n", data)
 						params.HasContent = true
 					} else if params.ResponseType == 2 { // Continue existing thinking block if already in thinking state
@@ -438,7 +438,7 @@ func ConvertAntigravityResponseToClaudeNonStream(_ context.Context, _ string, or
 		block := `{"type":"thinking","thinking":""}`
 		block, _ = sjson.Set(block, "thinking", thinkingBuilder.String())
 		if thinkingSignature != "" {
-			block, _ = sjson.Set(block, "signature", fmt.Sprintf("%s#%s", modelName, thinkingSignature))
+			block, _ = sjson.Set(block, "signature", fmt.Sprintf("%s#%s", cache.GetModelGroup(modelName), thinkingSignature))
 		}
 		responseJSON, _ = sjson.SetRaw(responseJSON, "content.-1", block)
 		thinkingBuilder.Reset()

From 5364a2471d96b38d74e774cbdc9db99e7ebc04e2 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Tue, 20 Jan 2026 13:56:57 +0800
Subject: [PATCH 25/26] fix(endpoint_compat): update `GetModelInfo` to include
 missing parameter for improved registry compatibility

---
 sdk/api/handlers/openai/endpoint_compat.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/api/handlers/openai/endpoint_compat.go b/sdk/api/handlers/openai/endpoint_compat.go
index 56fac508..d7fc5f2f 100644
--- a/sdk/api/handlers/openai/endpoint_compat.go
+++ b/sdk/api/handlers/openai/endpoint_compat.go
@@ -11,7 +11,7 @@ func resolveEndpointOverride(modelName, requestedEndpoint string) (string, bool)
 	if modelName == "" {
 		return "", false
 	}
-	info := registry.GetGlobalRegistry().GetModelInfo(modelName)
+	info := registry.GetGlobalRegistry().GetModelInfo(modelName, "")
 	if info == nil || len(info.SupportedEndpoints) == 0 {
 		return "", false
 	}
@@ -34,4 +34,4 @@ func endpointListContains(items []string, value string) bool {
 		}
 	}
 	return false
-}
\ No newline at end of file
+}

From a9ee971e1c30499c409319471b7dea8b23f0c2ee Mon Sep 17 00:00:00 2001
From: "781456868@qq.com" <liuyu@dhb168.com>
Date: Tue, 20 Jan 2026 21:57:45 +0800
Subject: [PATCH 26/26] fix(kiro): improve auto-refresh and IDC auth file
 handling

Amp-Thread-ID: https://ampcode.com/threads/T-019bdb94-80e3-7302-be0f-a69937826d13
Co-authored-by: Amp <amp@ampcode.com>
---
 internal/auth/kiro/aws_auth.go             | 20 +++++++++++++++++
 internal/auth/kiro/oauth_web.go            | 25 +++++++++++-----------
 internal/runtime/executor/kiro_executor.go | 14 ++++++------
 sdk/auth/filestore.go                      |  4 ++--
 sdk/auth/kiro.go                           | 20 ++++++++---------
 sdk/cliproxy/auth/conductor.go             |  2 +-
 6 files changed, 53 insertions(+), 32 deletions(-)

diff --git a/internal/auth/kiro/aws_auth.go b/internal/auth/kiro/aws_auth.go
index 53c77a8b..d082f274 100644
--- a/internal/auth/kiro/aws_auth.go
+++ b/internal/auth/kiro/aws_auth.go
@@ -280,6 +280,11 @@ func (k *KiroAuth) CreateTokenStorage(tokenData *KiroTokenData) *KiroTokenStorag
 		AuthMethod:   tokenData.AuthMethod,
 		Provider:     tokenData.Provider,
 		LastRefresh:  time.Now().Format(time.RFC3339),
+		ClientID:     tokenData.ClientID,
+		ClientSecret: tokenData.ClientSecret,
+		Region:       tokenData.Region,
+		StartURL:     tokenData.StartURL,
+		Email:        tokenData.Email,
 	}
 }
 
@@ -311,4 +316,19 @@ func (k *KiroAuth) UpdateTokenStorage(storage *KiroTokenStorage, tokenData *Kiro
 	storage.AuthMethod = tokenData.AuthMethod
 	storage.Provider = tokenData.Provider
 	storage.LastRefresh = time.Now().Format(time.RFC3339)
+	if tokenData.ClientID != "" {
+		storage.ClientID = tokenData.ClientID
+	}
+	if tokenData.ClientSecret != "" {
+		storage.ClientSecret = tokenData.ClientSecret
+	}
+	if tokenData.Region != "" {
+		storage.Region = tokenData.Region
+	}
+	if tokenData.StartURL != "" {
+		storage.StartURL = tokenData.StartURL
+	}
+	if tokenData.Email != "" {
+		storage.Email = tokenData.Email
+	}
 }
diff --git a/internal/auth/kiro/oauth_web.go b/internal/auth/kiro/oauth_web.go
index 81c24393..6e4269c5 100644
--- a/internal/auth/kiro/oauth_web.go
+++ b/internal/auth/kiro/oauth_web.go
@@ -377,17 +377,18 @@ func (h *OAuthWebHandler) pollForToken(ctx context.Context, session *webAuthSess
 			email := FetchUserEmailWithFallback(ctx, h.cfg, tokenResp.AccessToken)
 
 			tokenData := &KiroTokenData{
-				AccessToken:  tokenResp.AccessToken,
-				RefreshToken: tokenResp.RefreshToken,
-				ProfileArn:   profileArn,
-				ExpiresAt:    expiresAt.Format(time.RFC3339),
-				AuthMethod:   session.authMethod,
-				Provider:     "AWS",
-				ClientID:     session.clientID,
-				ClientSecret: session.clientSecret,
-				Email:        email,
-				Region:       session.region,
-			}
+					AccessToken:  tokenResp.AccessToken,
+					RefreshToken: tokenResp.RefreshToken,
+					ProfileArn:   profileArn,
+					ExpiresAt:    expiresAt.Format(time.RFC3339),
+					AuthMethod:   session.authMethod,
+					Provider:     "AWS",
+					ClientID:     session.clientID,
+					ClientSecret: session.clientSecret,
+					Email:        email,
+					Region:       session.region,
+					StartURL:     session.startURL,
+				}
 
 			h.mu.Lock()
 			session.status = statusSuccess
@@ -828,7 +829,7 @@ func (h *OAuthWebHandler) handleImportToken(c *gin.Context) {
 
 // handleManualRefresh handles manual token refresh requests from the web UI.
 // This allows users to trigger a token refresh when needed, without waiting
-// for the automatic 5-second check and 10-minute-before-expiry refresh cycle.
+// for the automatic 30-second check and 20-minute-before-expiry refresh cycle.
 // Uses the same refresh logic as kiro_executor.Refresh for consistency.
 func (h *OAuthWebHandler) handleManualRefresh(c *gin.Context) {
 	authDir := ""
diff --git a/internal/runtime/executor/kiro_executor.go b/internal/runtime/executor/kiro_executor.go
index b0c14c61..cab4bcd6 100644
--- a/internal/runtime/executor/kiro_executor.go
+++ b/internal/runtime/executor/kiro_executor.go
@@ -3513,14 +3513,14 @@ func (e *KiroExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*c
 		// Also check if expires_at is now in the future with sufficient buffer
 		if expiresAt, ok := auth.Metadata["expires_at"].(string); ok {
 			if expTime, err := time.Parse(time.RFC3339, expiresAt); err == nil {
-				// If token expires more than 5 minutes from now, it's still valid
-				if time.Until(expTime) > 5*time.Minute {
+				// If token expires more than 20 minutes from now, it's still valid
+				if time.Until(expTime) > 20*time.Minute {
 					log.Debugf("kiro executor: token is still valid (expires in %v), skipping refresh", time.Until(expTime))
 					// CRITICAL FIX: Set NextRefreshAfter to prevent frequent refresh checks
-					// Without this, shouldRefresh() will return true again in 5 seconds
+					// Without this, shouldRefresh() will return true again in 30 seconds
 					updated := auth.Clone()
-					// Set next refresh to 5 minutes before expiry, or at least 30 seconds from now
-					nextRefresh := expTime.Add(-5 * time.Minute)
+					// Set next refresh to 20 minutes before expiry, or at least 30 seconds from now
+					nextRefresh := expTime.Add(-20 * time.Minute)
 					minNextRefresh := time.Now().Add(30 * time.Second)
 					if nextRefresh.Before(minNextRefresh) {
 						nextRefresh = minNextRefresh
@@ -3626,9 +3626,9 @@ func (e *KiroExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*c
 		updated.Attributes["profile_arn"] = tokenData.ProfileArn
 	}
 
-	// NextRefreshAfter is aligned with RefreshLead (5min)
+	// NextRefreshAfter is aligned with RefreshLead (20min)
 	if expiresAt, parseErr := time.Parse(time.RFC3339, tokenData.ExpiresAt); parseErr == nil {
-		updated.NextRefreshAfter = expiresAt.Add(-5 * time.Minute)
+		updated.NextRefreshAfter = expiresAt.Add(-20 * time.Minute)
 	}
 
 	log.Infof("kiro executor: token refreshed successfully, expires at %s", tokenData.ExpiresAt)
diff --git a/sdk/auth/filestore.go b/sdk/auth/filestore.go
index 0010be7d..9a288b10 100644
--- a/sdk/auth/filestore.go
+++ b/sdk/auth/filestore.go
@@ -217,11 +217,11 @@ func (s *FileTokenStore) readAuthFile(path, baseDir string) (*cliproxyauth.Auth,
 	}
 	id := s.idFor(path, baseDir)
 
-	// Calculate NextRefreshAfter from expires_at (10 minutes before expiry)
+	// Calculate NextRefreshAfter from expires_at (20 minutes before expiry)
 	var nextRefreshAfter time.Time
 	if expiresAtStr, ok := metadata["expires_at"].(string); ok && expiresAtStr != "" {
 		if expiresAt, err := time.Parse(time.RFC3339, expiresAtStr); err == nil {
-			nextRefreshAfter = expiresAt.Add(-10 * time.Minute)
+			nextRefreshAfter = expiresAt.Add(-20 * time.Minute)
 		}
 	}
 
diff --git a/sdk/auth/kiro.go b/sdk/auth/kiro.go
index b0687eba..f66be461 100644
--- a/sdk/auth/kiro.go
+++ b/sdk/auth/kiro.go
@@ -52,9 +52,9 @@ func (a *KiroAuthenticator) Provider() string {
 }
 
 // RefreshLead indicates how soon before expiry a refresh should be attempted.
-// Set to 10 minutes for proactive refresh before token expiry.
+// Set to 20 minutes for proactive refresh before token expiry.
 func (a *KiroAuthenticator) RefreshLead() *time.Duration {
-	d := 10 * time.Minute
+	d := 20 * time.Minute
 	return &d
 }
 
@@ -132,8 +132,8 @@ func (a *KiroAuthenticator) createAuthRecord(tokenData *kiroauth.KiroTokenData,
 		UpdatedAt: now,
 		Metadata:  metadata,
 		Attributes: attributes,
-		// NextRefreshAfter: 10 minutes before expiry
-		NextRefreshAfter: expiresAt.Add(-10 * time.Minute),
+		// NextRefreshAfter: 20 minutes before expiry
+		NextRefreshAfter: expiresAt.Add(-20 * time.Minute),
 	}
 
 	if tokenData.Email != "" {
@@ -214,8 +214,8 @@ func (a *KiroAuthenticator) LoginWithAuthCode(ctx context.Context, cfg *config.C
 			"source":      "aws-builder-id-authcode",
 			"email":       tokenData.Email,
 		},
-		// NextRefreshAfter: 10 minutes before expiry
-		NextRefreshAfter: expiresAt.Add(-10 * time.Minute),
+		// NextRefreshAfter: 20 minutes before expiry
+		NextRefreshAfter: expiresAt.Add(-20 * time.Minute),
 	}
 
 	if tokenData.Email != "" {
@@ -298,8 +298,8 @@ func (a *KiroAuthenticator) ImportFromKiroIDE(ctx context.Context, cfg *config.C
 			"email":       tokenData.Email,
 			"region":      tokenData.Region,
 		},
-		// NextRefreshAfter: 10 minutes before expiry
-		NextRefreshAfter: expiresAt.Add(-10 * time.Minute),
+		// NextRefreshAfter: 20 minutes before expiry
+		NextRefreshAfter: expiresAt.Add(-20 * time.Minute),
 	}
 
 	// Display the email if extracted
@@ -367,8 +367,8 @@ func (a *KiroAuthenticator) Refresh(ctx context.Context, cfg *config.Config, aut
 	updated.Metadata["refresh_token"] = tokenData.RefreshToken
 	updated.Metadata["expires_at"] = tokenData.ExpiresAt
 	updated.Metadata["last_refresh"] = now.Format(time.RFC3339) // For double-check optimization
-	// NextRefreshAfter: 10 minutes before expiry
-	updated.NextRefreshAfter = expiresAt.Add(-10 * time.Minute)
+	// NextRefreshAfter: 20 minutes before expiry
+	updated.NextRefreshAfter = expiresAt.Add(-20 * time.Minute)
 
 	return updated, nil
 }
diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go
index 83769198..5f553bdd 100644
--- a/sdk/cliproxy/auth/conductor.go
+++ b/sdk/cliproxy/auth/conductor.go
@@ -47,7 +47,7 @@ type RefreshEvaluator interface {
 }
 
 const (
-	refreshCheckInterval  = 5 * time.Second
+	refreshCheckInterval  = 30 * time.Second
 	refreshPendingBackoff = time.Minute
 	refreshFailureBackoff = 1 * time.Minute
 	quotaBackoffBase      = time.Second