Merge pull request #138 from router-for-me/plus

v6.7.26
Merge branch 'main' into plus
2026-03-10 07:43:07 +00:00 · 2026-01-27 20:40:12 +08:00 · 2026-01-27 20:39:59 +08:00 · 2026-01-27 20:38:03 +08:00 · 2026-01-27 20:36:56 +08:00 · 2026-01-27 20:32:04 +08:00
69 changed files with 3545 additions and 2540 deletions
--- a/go.mod
+++ b/go.mod
@@ -21,6 +21,7 @@ require (
 	golang.org/x/crypto v0.45.0
 	golang.org/x/net v0.47.0
 	golang.org/x/oauth2 v0.30.0
+	golang.org/x/sync v0.18.0
 	golang.org/x/term v0.37.0
 	gopkg.in/natefinch/lumberjack.v2 v2.2.1
 	gopkg.in/yaml.v3 v3.0.1
@@ -39,6 +40,7 @@ require (
 	github.com/dlclark/regexp2 v1.11.5 // indirect
 	github.com/dustin/go-humanize v1.0.1 // indirect
 	github.com/emirpasic/gods v1.18.1 // indirect
+	github.com/fxamacker/cbor/v2 v2.9.0 // indirect
 	github.com/gabriel-vasile/mimetype v1.4.3 // indirect
 	github.com/gin-contrib/sse v0.1.0 // indirect
 	github.com/go-git/gcfg/v2 v2.0.2 // indirect
@@ -68,8 +70,8 @@ require (
 	github.com/tidwall/pretty v1.2.0 // indirect
 	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
 	github.com/ugorji/go/codec v1.2.12 // indirect
+	github.com/x448/float16 v0.8.4 // indirect
 	golang.org/x/arch v0.8.0 // indirect
-	golang.org/x/sync v0.18.0 // indirect
 	golang.org/x/sys v0.38.0 // indirect
 	golang.org/x/text v0.31.0 // indirect
 	google.golang.org/protobuf v1.34.1 // indirect
--- a/go.sum
+++ b/go.sum
@@ -35,6 +35,8 @@ github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc
 github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=
 github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
 github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
+github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM=
+github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ=
 github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
 github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
 github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
@@ -157,6 +159,8 @@ github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS
 github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
 github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
 github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
+github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
+github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
 golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
 golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
 golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
--- a/internal/api/handlers/management/api_tools.go
+++ b/internal/api/handlers/management/api_tools.go
@@ -11,6 +11,7 @@ import (
 	"strings"
 	"time"

+	"github.com/fxamacker/cbor/v2"
 	"github.com/gin-gonic/gin"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/geminicli"
 	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
@@ -70,7 +71,7 @@ type apiCallResponse struct {
 //	- Authorization: Bearer <key>
 //	- X-Management-Key: <key>
 //
-// Request JSON:
+// Request JSON (supports both application/json and application/cbor):
 //   - auth_index / authIndex / AuthIndex (optional):
 //     The credential "auth_index" from GET /v0/management/auth-files (or other endpoints returning it).
 //     If omitted or not found, credential-specific proxy/token substitution is skipped.
@@ -90,10 +91,12 @@ type apiCallResponse struct {
 //  2. Global config proxy-url
 //  3. Direct connect (environment proxies are not used)
 //
-// Response JSON (returned with HTTP 200 when the APICall itself succeeds):
-//   - status_code: Upstream HTTP status code.
-//   - header: Upstream response headers.
-//   - body: Upstream response body as string.
+// Response (returned with HTTP 200 when the APICall itself succeeds):
+//
+//	Format matches request Content-Type (application/json or application/cbor)
+//	- status_code: Upstream HTTP status code.
+//	- header: Upstream response headers.
+//	- body: Upstream response body as string.
 //
 // Example:
 //
@@ -107,10 +110,28 @@ type apiCallResponse struct {
 //	  -H "Content-Type: application/json" \
 //	  -d '{"auth_index":"<AUTH_INDEX>","method":"POST","url":"https://api.example.com/v1/fetchAvailableModels","header":{"Authorization":"Bearer $TOKEN$","Content-Type":"application/json","User-Agent":"cliproxyapi"},"data":"{}"}'
 func (h *Handler) APICall(c *gin.Context) {
+	// Detect content type
+	contentType := strings.ToLower(strings.TrimSpace(c.GetHeader("Content-Type")))
+	isCBOR := strings.Contains(contentType, "application/cbor")
+
 	var body apiCallRequest
-	if errBindJSON := c.ShouldBindJSON(&body); errBindJSON != nil {
-		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid body"})
-		return
+
+	// Parse request body based on content type
+	if isCBOR {
+		rawBody, errRead := io.ReadAll(c.Request.Body)
+		if errRead != nil {
+			c.JSON(http.StatusBadRequest, gin.H{"error": "failed to read request body"})
+			return
+		}
+		if errUnmarshal := cbor.Unmarshal(rawBody, &body); errUnmarshal != nil {
+			c.JSON(http.StatusBadRequest, gin.H{"error": "invalid cbor body"})
+			return
+		}
+	} else {
+		if errBindJSON := c.ShouldBindJSON(&body); errBindJSON != nil {
+			c.JSON(http.StatusBadRequest, gin.H{"error": "invalid body"})
+			return
+		}
 	}

 	method := strings.ToUpper(strings.TrimSpace(body.Method))
@@ -209,11 +230,23 @@ func (h *Handler) APICall(c *gin.Context) {
 		return
 	}

-	c.JSON(http.StatusOK, apiCallResponse{
+	response := apiCallResponse{
 		StatusCode: resp.StatusCode,
 		Header:     resp.Header,
 		Body:       string(respBody),
-	})
+	}
+
+	// Return response in the same format as the request
+	if isCBOR {
+		cborData, errMarshal := cbor.Marshal(response)
+		if errMarshal != nil {
+			c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to encode cbor response"})
+			return
+		}
+		c.Data(http.StatusOK, "application/cbor", cborData)
+	} else {
+		c.JSON(http.StatusOK, response)
+	}
 }

 func firstNonEmptyString(values ...*string) string {
--- a/internal/api/handlers/management/api_tools_cbor_test.go
+++ b/internal/api/handlers/management/api_tools_cbor_test.go
@@ -0,0 +1,149 @@
+package management
+
+import (
+	"bytes"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/fxamacker/cbor/v2"
+	"github.com/gin-gonic/gin"
+)
+
+func TestAPICall_CBOR_Support(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	// Create a test handler
+	h := &Handler{}
+
+	// Create test request data
+	reqData := apiCallRequest{
+		Method: "GET",
+		URL:    "https://httpbin.org/get",
+		Header: map[string]string{
+			"User-Agent": "test-client",
+		},
+	}
+
+	t.Run("JSON request and response", func(t *testing.T) {
+		// Marshal request as JSON
+		jsonData, err := json.Marshal(reqData)
+		if err != nil {
+			t.Fatalf("Failed to marshal JSON: %v", err)
+		}
+
+		// Create HTTP request
+		req := httptest.NewRequest(http.MethodPost, "/v0/management/api-call", bytes.NewReader(jsonData))
+		req.Header.Set("Content-Type", "application/json")
+
+		// Create response recorder
+		w := httptest.NewRecorder()
+
+		// Create Gin context
+		c, _ := gin.CreateTestContext(w)
+		c.Request = req
+
+		// Call handler
+		h.APICall(c)
+
+		// Verify response
+		if w.Code != http.StatusOK && w.Code != http.StatusBadGateway {
+			t.Logf("Response status: %d", w.Code)
+			t.Logf("Response body: %s", w.Body.String())
+		}
+
+		// Check content type
+		contentType := w.Header().Get("Content-Type")
+		if w.Code == http.StatusOK && !contains(contentType, "application/json") {
+			t.Errorf("Expected JSON response, got: %s", contentType)
+		}
+	})
+
+	t.Run("CBOR request and response", func(t *testing.T) {
+		// Marshal request as CBOR
+		cborData, err := cbor.Marshal(reqData)
+		if err != nil {
+			t.Fatalf("Failed to marshal CBOR: %v", err)
+		}
+
+		// Create HTTP request
+		req := httptest.NewRequest(http.MethodPost, "/v0/management/api-call", bytes.NewReader(cborData))
+		req.Header.Set("Content-Type", "application/cbor")
+
+		// Create response recorder
+		w := httptest.NewRecorder()
+
+		// Create Gin context
+		c, _ := gin.CreateTestContext(w)
+		c.Request = req
+
+		// Call handler
+		h.APICall(c)
+
+		// Verify response
+		if w.Code != http.StatusOK && w.Code != http.StatusBadGateway {
+			t.Logf("Response status: %d", w.Code)
+			t.Logf("Response body: %s", w.Body.String())
+		}
+
+		// Check content type
+		contentType := w.Header().Get("Content-Type")
+		if w.Code == http.StatusOK && !contains(contentType, "application/cbor") {
+			t.Errorf("Expected CBOR response, got: %s", contentType)
+		}
+
+		// Try to decode CBOR response
+		if w.Code == http.StatusOK {
+			var response apiCallResponse
+			if err := cbor.Unmarshal(w.Body.Bytes(), &response); err != nil {
+				t.Errorf("Failed to unmarshal CBOR response: %v", err)
+			} else {
+				t.Logf("CBOR response decoded successfully: status_code=%d", response.StatusCode)
+			}
+		}
+	})
+
+	t.Run("CBOR encoding and decoding consistency", func(t *testing.T) {
+		// Test data
+		testReq := apiCallRequest{
+			Method: "POST",
+			URL:    "https://example.com/api",
+			Header: map[string]string{
+				"Authorization": "Bearer $TOKEN$",
+				"Content-Type":  "application/json",
+			},
+			Data: `{"key":"value"}`,
+		}
+
+		// Encode to CBOR
+		cborData, err := cbor.Marshal(testReq)
+		if err != nil {
+			t.Fatalf("Failed to marshal to CBOR: %v", err)
+		}
+
+		// Decode from CBOR
+		var decoded apiCallRequest
+		if err := cbor.Unmarshal(cborData, &decoded); err != nil {
+			t.Fatalf("Failed to unmarshal from CBOR: %v", err)
+		}
+
+		// Verify fields
+		if decoded.Method != testReq.Method {
+			t.Errorf("Method mismatch: got %s, want %s", decoded.Method, testReq.Method)
+		}
+		if decoded.URL != testReq.URL {
+			t.Errorf("URL mismatch: got %s, want %s", decoded.URL, testReq.URL)
+		}
+		if decoded.Data != testReq.Data {
+			t.Errorf("Data mismatch: got %s, want %s", decoded.Data, testReq.Data)
+		}
+		if len(decoded.Header) != len(testReq.Header) {
+			t.Errorf("Header count mismatch: got %d, want %d", len(decoded.Header), len(testReq.Header))
+		}
+	})
+}
+
+func contains(s, substr string) bool {
+	return len(s) > 0 && len(substr) > 0 && (s == substr || len(s) >= len(substr) && s[:len(substr)] == substr || bytes.Contains([]byte(s), []byte(substr)))
+}
--- a/internal/api/handlers/management/auth_files.go
+++ b/internal/api/handlers/management/auth_files.go
@@ -3,10 +3,10 @@ package management
 import (
 	"bytes"
 	"context"
-	"encoding/hex"
 	"crypto/rand"
 	"crypto/sha256"
 	"encoding/base64"
+	"encoding/hex"
 	"encoding/json"
 	"errors"
 	"fmt"
@@ -23,6 +23,7 @@ import (
 	"time"

 	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/antigravity"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/codex"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/copilot"
@@ -236,14 +237,6 @@ func stopForwarderInstance(port int, forwarder *callbackForwarder) {
 	log.Infof("callback forwarder on port %d stopped", port)
 }

-func sanitizeAntigravityFileName(email string) string {
-	if strings.TrimSpace(email) == "" {
-		return "antigravity.json"
-	}
-	replacer := strings.NewReplacer("@", "_", ".", "_")
-	return fmt.Sprintf("antigravity-%s.json", replacer.Replace(email))
-}
-
 func (h *Handler) managementCallbackURL(path string) (string, error) {
 	if h == nil || h.cfg == nil || h.cfg.Port <= 0 {
 		return "", fmt.Errorf("server port is not configured")
@@ -753,6 +746,72 @@ func (h *Handler) registerAuthFromFile(ctx context.Context, path string, data []
 	return err
 }

+// PatchAuthFileStatus toggles the disabled state of an auth file
+func (h *Handler) PatchAuthFileStatus(c *gin.Context) {
+	if h.authManager == nil {
+		c.JSON(http.StatusServiceUnavailable, gin.H{"error": "core auth manager unavailable"})
+		return
+	}
+
+	var req struct {
+		Name     string `json:"name"`
+		Disabled *bool  `json:"disabled"`
+	}
+	if err := c.ShouldBindJSON(&req); err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
+		return
+	}
+
+	name := strings.TrimSpace(req.Name)
+	if name == "" {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "name is required"})
+		return
+	}
+	if req.Disabled == nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "disabled is required"})
+		return
+	}
+
+	ctx := c.Request.Context()
+
+	// Find auth by name or ID
+	var targetAuth *coreauth.Auth
+	if auth, ok := h.authManager.GetByID(name); ok {
+		targetAuth = auth
+	} else {
+		auths := h.authManager.List()
+		for _, auth := range auths {
+			if auth.FileName == name {
+				targetAuth = auth
+				break
+			}
+		}
+	}
+
+	if targetAuth == nil {
+		c.JSON(http.StatusNotFound, gin.H{"error": "auth file not found"})
+		return
+	}
+
+	// Update disabled state
+	targetAuth.Disabled = *req.Disabled
+	if *req.Disabled {
+		targetAuth.Status = coreauth.StatusDisabled
+		targetAuth.StatusMessage = "disabled via management API"
+	} else {
+		targetAuth.Status = coreauth.StatusActive
+		targetAuth.StatusMessage = ""
+	}
+	targetAuth.UpdatedAt = time.Now()
+
+	if _, err := h.authManager.Update(ctx, targetAuth); err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to update auth: %v", err)})
+		return
+	}
+
+	c.JSON(http.StatusOK, gin.H{"status": "ok", "disabled": *req.Disabled})
+}
+
 func (h *Handler) disableAuth(ctx context.Context, id string) {
 	if h == nil || h.authManager == nil {
 		return
@@ -919,67 +978,14 @@ func (h *Handler) RequestAnthropicToken(c *gin.Context) {
 		rawCode := resultMap["code"]
 		code := strings.Split(rawCode, "#")[0]

-		// Exchange code for tokens (replicate logic using updated redirect_uri)
-		// Extract client_id from the modified auth URL
-		clientID := ""
-		if u2, errP := url.Parse(authURL); errP == nil {
-			clientID = u2.Query().Get("client_id")
-		}
-		// Build request
-		bodyMap := map[string]any{
-			"code":          code,
-			"state":         state,
-			"grant_type":    "authorization_code",
-			"client_id":     clientID,
-			"redirect_uri":  "http://localhost:54545/callback",
-			"code_verifier": pkceCodes.CodeVerifier,
-		}
-		bodyJSON, _ := json.Marshal(bodyMap)
-
-		httpClient := util.SetProxy(&h.cfg.SDKConfig, &http.Client{})
-		req, _ := http.NewRequestWithContext(ctx, "POST", "https://console.anthropic.com/v1/oauth/token", strings.NewReader(string(bodyJSON)))
-		req.Header.Set("Content-Type", "application/json")
-		req.Header.Set("Accept", "application/json")
-		resp, errDo := httpClient.Do(req)
-		if errDo != nil {
-			authErr := claude.NewAuthenticationError(claude.ErrCodeExchangeFailed, errDo)
+		// Exchange code for tokens using internal auth service
+		bundle, errExchange := anthropicAuth.ExchangeCodeForTokens(ctx, code, state, pkceCodes)
+		if errExchange != nil {
+			authErr := claude.NewAuthenticationError(claude.ErrCodeExchangeFailed, errExchange)
 			log.Errorf("Failed to exchange authorization code for tokens: %v", authErr)
 			SetOAuthSessionError(state, "Failed to exchange authorization code for tokens")
 			return
 		}
-		defer func() {
-			if errClose := resp.Body.Close(); errClose != nil {
-				log.Errorf("failed to close response body: %v", errClose)
-			}
-		}()
-		respBody, _ := io.ReadAll(resp.Body)
-		if resp.StatusCode != http.StatusOK {
-			log.Errorf("token exchange failed with status %d: %s", resp.StatusCode, string(respBody))
-			SetOAuthSessionError(state, fmt.Sprintf("token exchange failed with status %d", resp.StatusCode))
-			return
-		}
-		var tResp struct {
-			AccessToken  string `json:"access_token"`
-			RefreshToken string `json:"refresh_token"`
-			ExpiresIn    int    `json:"expires_in"`
-			Account      struct {
-				EmailAddress string `json:"email_address"`
-			} `json:"account"`
-		}
-		if errU := json.Unmarshal(respBody, &tResp); errU != nil {
-			log.Errorf("failed to parse token response: %v", errU)
-			SetOAuthSessionError(state, "Failed to parse token response")
-			return
-		}
-		bundle := &claude.ClaudeAuthBundle{
-			TokenData: claude.ClaudeTokenData{
-				AccessToken:  tResp.AccessToken,
-				RefreshToken: tResp.RefreshToken,
-				Email:        tResp.Account.EmailAddress,
-				Expire:       time.Now().Add(time.Duration(tResp.ExpiresIn) * time.Second).Format(time.RFC3339),
-			},
-			LastRefresh: time.Now().Format(time.RFC3339),
-		}

 		// Create token storage
 		tokenStorage := anthropicAuth.CreateTokenStorage(bundle)
@@ -1019,17 +1025,13 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {

 	fmt.Println("Initializing Google authentication...")

-	// OAuth2 configuration (mirrors internal/auth/gemini)
+	// OAuth2 configuration using exported constants from internal/auth/gemini
 	conf := &oauth2.Config{
-		ClientID:     "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com",
-		ClientSecret: "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",
-		RedirectURL:  "http://localhost:8085/oauth2callback",
-		Scopes: []string{
-			"https://www.googleapis.com/auth/cloud-platform",
-			"https://www.googleapis.com/auth/userinfo.email",
-			"https://www.googleapis.com/auth/userinfo.profile",
-		},
-		Endpoint: google.Endpoint,
+		ClientID:     geminiAuth.ClientID,
+		ClientSecret: geminiAuth.ClientSecret,
+		RedirectURL:  fmt.Sprintf("http://localhost:%d/oauth2callback", geminiAuth.DefaultCallbackPort),
+		Scopes:       geminiAuth.Scopes,
+		Endpoint:     google.Endpoint,
 	}

 	// Build authorization URL and return it immediately
@@ -1151,13 +1153,9 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {
 		}

 		ifToken["token_uri"] = "https://oauth2.googleapis.com/token"
-		ifToken["client_id"] = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com"
-		ifToken["client_secret"] = "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"
-		ifToken["scopes"] = []string{
-			"https://www.googleapis.com/auth/cloud-platform",
-			"https://www.googleapis.com/auth/userinfo.email",
-			"https://www.googleapis.com/auth/userinfo.profile",
-		}
+		ifToken["client_id"] = geminiAuth.ClientID
+		ifToken["client_secret"] = geminiAuth.ClientSecret
+		ifToken["scopes"] = geminiAuth.Scopes
 		ifToken["universe_domain"] = "googleapis.com"

 		ts := geminiAuth.GeminiTokenStorage{
@@ -1344,73 +1342,25 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
 		}

 		log.Debug("Authorization code received, exchanging for tokens...")
-		// Extract client_id from authURL
-		clientID := ""
-		if u2, errP := url.Parse(authURL); errP == nil {
-			clientID = u2.Query().Get("client_id")
-		}
-		// Exchange code for tokens with redirect equal to mgmtRedirect
-		form := url.Values{
-			"grant_type":    {"authorization_code"},
-			"client_id":     {clientID},
-			"code":          {code},
-			"redirect_uri":  {"http://localhost:1455/auth/callback"},
-			"code_verifier": {pkceCodes.CodeVerifier},
-		}
-		httpClient := util.SetProxy(&h.cfg.SDKConfig, &http.Client{})
-		req, _ := http.NewRequestWithContext(ctx, "POST", "https://auth.openai.com/oauth/token", strings.NewReader(form.Encode()))
-		req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
-		req.Header.Set("Accept", "application/json")
-		resp, errDo := httpClient.Do(req)
-		if errDo != nil {
-			authErr := codex.NewAuthenticationError(codex.ErrCodeExchangeFailed, errDo)
+		// Exchange code for tokens using internal auth service
+		bundle, errExchange := openaiAuth.ExchangeCodeForTokens(ctx, code, pkceCodes)
+		if errExchange != nil {
+			authErr := codex.NewAuthenticationError(codex.ErrCodeExchangeFailed, errExchange)
 			SetOAuthSessionError(state, "Failed to exchange authorization code for tokens")
 			log.Errorf("Failed to exchange authorization code for tokens: %v", authErr)
 			return
 		}
-		defer func() { _ = resp.Body.Close() }()
-		respBody, _ := io.ReadAll(resp.Body)
-		if resp.StatusCode != http.StatusOK {
-			SetOAuthSessionError(state, fmt.Sprintf("Token exchange failed with status %d", resp.StatusCode))
-			log.Errorf("token exchange failed with status %d: %s", resp.StatusCode, string(respBody))
-			return
-		}
-		var tokenResp struct {
-			AccessToken  string `json:"access_token"`
-			RefreshToken string `json:"refresh_token"`
-			IDToken      string `json:"id_token"`
-			ExpiresIn    int    `json:"expires_in"`
-		}
-		if errU := json.Unmarshal(respBody, &tokenResp); errU != nil {
-			SetOAuthSessionError(state, "Failed to parse token response")
-			log.Errorf("failed to parse token response: %v", errU)
-			return
-		}
-		claims, _ := codex.ParseJWTToken(tokenResp.IDToken)
-		email := ""
-		accountID := ""
+
+		// Extract additional info for filename generation
+		claims, _ := codex.ParseJWTToken(bundle.TokenData.IDToken)
 		planType := ""
-		if claims != nil {
-			email = claims.GetUserEmail()
-			accountID = claims.GetAccountID()
-			planType = strings.TrimSpace(claims.CodexAuthInfo.ChatgptPlanType)
-		}
 		hashAccountID := ""
-		if accountID != "" {
-			digest := sha256.Sum256([]byte(accountID))
-			hashAccountID = hex.EncodeToString(digest[:])[:8]
-		}
-		// Build bundle compatible with existing storage
-		bundle := &codex.CodexAuthBundle{
-			TokenData: codex.CodexTokenData{
-				IDToken:      tokenResp.IDToken,
-				AccessToken:  tokenResp.AccessToken,
-				RefreshToken: tokenResp.RefreshToken,
-				AccountID:    accountID,
-				Email:        email,
-				Expire:       time.Now().Add(time.Duration(tokenResp.ExpiresIn) * time.Second).Format(time.RFC3339),
-			},
-			LastRefresh: time.Now().Format(time.RFC3339),
+		if claims != nil {
+			planType = strings.TrimSpace(claims.CodexAuthInfo.ChatgptPlanType)
+			if accountID := claims.GetAccountID(); accountID != "" {
+				digest := sha256.Sum256([]byte(accountID))
+				hashAccountID = hex.EncodeToString(digest[:])[:8]
+			}
 		}

 		// Create token storage and persist
@@ -1445,23 +1395,12 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
 }

 func (h *Handler) RequestAntigravityToken(c *gin.Context) {
-	const (
-		antigravityCallbackPort = 51121
-		antigravityClientID     = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
-		antigravityClientSecret = "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"
-	)
-	var antigravityScopes = []string{
-		"https://www.googleapis.com/auth/cloud-platform",
-		"https://www.googleapis.com/auth/userinfo.email",
-		"https://www.googleapis.com/auth/userinfo.profile",
-		"https://www.googleapis.com/auth/cclog",
-		"https://www.googleapis.com/auth/experimentsandconfigs",
-	}
-
 	ctx := context.Background()

 	fmt.Println("Initializing Antigravity authentication...")

+	authSvc := antigravity.NewAntigravityAuth(h.cfg, nil)
+
 	state, errState := misc.GenerateRandomState()
 	if errState != nil {
 		log.Errorf("Failed to generate state parameter: %v", errState)
@@ -1469,17 +1408,8 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
 		return
 	}

-	redirectURI := fmt.Sprintf("http://localhost:%d/oauth-callback", antigravityCallbackPort)
-
-	params := url.Values{}
-	params.Set("access_type", "offline")
-	params.Set("client_id", antigravityClientID)
-	params.Set("prompt", "consent")
-	params.Set("redirect_uri", redirectURI)
-	params.Set("response_type", "code")
-	params.Set("scope", strings.Join(antigravityScopes, " "))
-	params.Set("state", state)
-	authURL := "https://accounts.google.com/o/oauth2/v2/auth?" + params.Encode()
+	redirectURI := fmt.Sprintf("http://localhost:%d/oauth-callback", antigravity.CallbackPort)
+	authURL := authSvc.BuildAuthURL(state, redirectURI)

 	RegisterOAuthSession(state, "antigravity")

@@ -1493,7 +1423,7 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
 			return
 		}
 		var errStart error
-		if forwarder, errStart = startCallbackForwarder(antigravityCallbackPort, "antigravity", targetURL); errStart != nil {
+		if forwarder, errStart = startCallbackForwarder(antigravity.CallbackPort, "antigravity", targetURL); errStart != nil {
 			log.WithError(errStart).Error("failed to start antigravity callback forwarder")
 			c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to start callback server"})
 			return
@@ -1502,7 +1432,7 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {

 	go func() {
 		if isWebUI {
-			defer stopCallbackForwarderInstance(antigravityCallbackPort, forwarder)
+			defer stopCallbackForwarderInstance(antigravity.CallbackPort, forwarder)
 		}

 		waitFile := filepath.Join(h.cfg.AuthDir, fmt.Sprintf(".oauth-antigravity-%s.oauth", state))
@@ -1542,93 +1472,36 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
 			time.Sleep(500 * time.Millisecond)
 		}

-		httpClient := util.SetProxy(&h.cfg.SDKConfig, &http.Client{})
-		form := url.Values{}
-		form.Set("code", authCode)
-		form.Set("client_id", antigravityClientID)
-		form.Set("client_secret", antigravityClientSecret)
-		form.Set("redirect_uri", redirectURI)
-		form.Set("grant_type", "authorization_code")
-
-		req, errNewRequest := http.NewRequestWithContext(ctx, http.MethodPost, "https://oauth2.googleapis.com/token", strings.NewReader(form.Encode()))
-		if errNewRequest != nil {
-			log.Errorf("Failed to build token request: %v", errNewRequest)
-			SetOAuthSessionError(state, "Failed to build token request")
-			return
-		}
-		req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
-
-		resp, errDo := httpClient.Do(req)
-		if errDo != nil {
-			log.Errorf("Failed to execute token request: %v", errDo)
+		tokenResp, errToken := authSvc.ExchangeCodeForTokens(ctx, authCode, redirectURI)
+		if errToken != nil {
+			log.Errorf("Failed to exchange token: %v", errToken)
 			SetOAuthSessionError(state, "Failed to exchange token")
 			return
 		}
-		defer func() {
-			if errClose := resp.Body.Close(); errClose != nil {
-				log.Errorf("antigravity token exchange close error: %v", errClose)
-			}
-		}()

-		if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusMultipleChoices {
-			bodyBytes, _ := io.ReadAll(resp.Body)
-			log.Errorf("Antigravity token exchange failed with status %d: %s", resp.StatusCode, string(bodyBytes))
-			SetOAuthSessionError(state, fmt.Sprintf("Token exchange failed: %d", resp.StatusCode))
+		accessToken := strings.TrimSpace(tokenResp.AccessToken)
+		if accessToken == "" {
+			log.Error("antigravity: token exchange returned empty access token")
+			SetOAuthSessionError(state, "Failed to exchange token")
 			return
 		}

-		var tokenResp struct {
-			AccessToken  string `json:"access_token"`
-			RefreshToken string `json:"refresh_token"`
-			ExpiresIn    int64  `json:"expires_in"`
-			TokenType    string `json:"token_type"`
-		}
-		if errDecode := json.NewDecoder(resp.Body).Decode(&tokenResp); errDecode != nil {
-			log.Errorf("Failed to parse token response: %v", errDecode)
-			SetOAuthSessionError(state, "Failed to parse token response")
+		email, errInfo := authSvc.FetchUserInfo(ctx, accessToken)
+		if errInfo != nil {
+			log.Errorf("Failed to fetch user info: %v", errInfo)
+			SetOAuthSessionError(state, "Failed to fetch user info")
 			return
 		}
-
-		email := ""
-		if strings.TrimSpace(tokenResp.AccessToken) != "" {
-			infoReq, errInfoReq := http.NewRequestWithContext(ctx, http.MethodGet, "https://www.googleapis.com/oauth2/v1/userinfo?alt=json", nil)
-			if errInfoReq != nil {
-				log.Errorf("Failed to build user info request: %v", errInfoReq)
-				SetOAuthSessionError(state, "Failed to build user info request")
-				return
-			}
-			infoReq.Header.Set("Authorization", "Bearer "+tokenResp.AccessToken)
-
-			infoResp, errInfo := httpClient.Do(infoReq)
-			if errInfo != nil {
-				log.Errorf("Failed to execute user info request: %v", errInfo)
-				SetOAuthSessionError(state, "Failed to execute user info request")
-				return
-			}
-			defer func() {
-				if errClose := infoResp.Body.Close(); errClose != nil {
-					log.Errorf("antigravity user info close error: %v", errClose)
-				}
-			}()
-
-			if infoResp.StatusCode >= http.StatusOK && infoResp.StatusCode < http.StatusMultipleChoices {
-				var infoPayload struct {
-					Email string `json:"email"`
-				}
-				if errDecodeInfo := json.NewDecoder(infoResp.Body).Decode(&infoPayload); errDecodeInfo == nil {
-					email = strings.TrimSpace(infoPayload.Email)
-				}
-			} else {
-				bodyBytes, _ := io.ReadAll(infoResp.Body)
-				log.Errorf("User info request failed with status %d: %s", infoResp.StatusCode, string(bodyBytes))
-				SetOAuthSessionError(state, fmt.Sprintf("User info request failed: %d", infoResp.StatusCode))
-				return
-			}
+		email = strings.TrimSpace(email)
+		if email == "" {
+			log.Error("antigravity: user info returned empty email")
+			SetOAuthSessionError(state, "Failed to fetch user info")
+			return
 		}

 		projectID := ""
-		if strings.TrimSpace(tokenResp.AccessToken) != "" {
-			fetchedProjectID, errProject := sdkAuth.FetchAntigravityProjectID(ctx, tokenResp.AccessToken, httpClient)
+		if accessToken != "" {
+			fetchedProjectID, errProject := authSvc.FetchProjectID(ctx, accessToken)
 			if errProject != nil {
 				log.Warnf("antigravity: failed to fetch project ID: %v", errProject)
 			} else {
@@ -1653,7 +1526,7 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
 			metadata["project_id"] = projectID
 		}

-		fileName := sanitizeAntigravityFileName(email)
+		fileName := antigravity.CredentialFileName(email)
 		label := strings.TrimSpace(email)
 		if label == "" {
 			label = "antigravity"
--- a/internal/api/handlers/management/model_definitions.go
+++ b/internal/api/handlers/management/model_definitions.go
@@ -0,0 +1,33 @@
+package management
+
+import (
+	"net/http"
+	"strings"
+
+	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+)
+
+// GetStaticModelDefinitions returns static model metadata for a given channel.
+// Channel is provided via path param (:channel) or query param (?channel=...).
+func (h *Handler) GetStaticModelDefinitions(c *gin.Context) {
+	channel := strings.TrimSpace(c.Param("channel"))
+	if channel == "" {
+		channel = strings.TrimSpace(c.Query("channel"))
+	}
+	if channel == "" {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "channel is required"})
+		return
+	}
+
+	models := registry.GetStaticModelDefinitionsByChannel(channel)
+	if models == nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "unknown channel", "channel": channel})
+		return
+	}
+
+	c.JSON(http.StatusOK, gin.H{
+		"channel": strings.ToLower(strings.TrimSpace(channel)),
+		"models":  models,
+	})
+}
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -633,9 +633,11 @@ func (s *Server) registerManagementRoutes() {

 		mgmt.GET("/auth-files", s.mgmt.ListAuthFiles)
 		mgmt.GET("/auth-files/models", s.mgmt.GetAuthFileModels)
+		mgmt.GET("/model-definitions/:channel", s.mgmt.GetStaticModelDefinitions)
 		mgmt.GET("/auth-files/download", s.mgmt.DownloadAuthFile)
 		mgmt.POST("/auth-files", s.mgmt.UploadAuthFile)
 		mgmt.DELETE("/auth-files", s.mgmt.DeleteAuthFile)
+		mgmt.PATCH("/auth-files/status", s.mgmt.PatchAuthFileStatus)
 		mgmt.POST("/vertex/import", s.mgmt.ImportVertexCredential)

 		mgmt.GET("/anthropic-auth-url", s.mgmt.RequestAnthropicToken)
--- a/internal/auth/antigravity/auth.go
+++ b/internal/auth/antigravity/auth.go
@@ -0,0 +1,344 @@
+// Package antigravity provides OAuth2 authentication functionality for the Antigravity provider.
+package antigravity
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"strings"
+	"time"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	log "github.com/sirupsen/logrus"
+)
+
+// TokenResponse represents OAuth token response from Google
+type TokenResponse struct {
+	AccessToken  string `json:"access_token"`
+	RefreshToken string `json:"refresh_token"`
+	ExpiresIn    int64  `json:"expires_in"`
+	TokenType    string `json:"token_type"`
+}
+
+// userInfo represents Google user profile
+type userInfo struct {
+	Email string `json:"email"`
+}
+
+// AntigravityAuth handles Antigravity OAuth authentication
+type AntigravityAuth struct {
+	httpClient *http.Client
+}
+
+// NewAntigravityAuth creates a new Antigravity auth service.
+func NewAntigravityAuth(cfg *config.Config, httpClient *http.Client) *AntigravityAuth {
+	if httpClient != nil {
+		return &AntigravityAuth{httpClient: httpClient}
+	}
+	if cfg == nil {
+		cfg = &config.Config{}
+	}
+	return &AntigravityAuth{
+		httpClient: util.SetProxy(&cfg.SDKConfig, &http.Client{}),
+	}
+}
+
+// BuildAuthURL generates the OAuth authorization URL.
+func (o *AntigravityAuth) BuildAuthURL(state, redirectURI string) string {
+	if strings.TrimSpace(redirectURI) == "" {
+		redirectURI = fmt.Sprintf("http://localhost:%d/oauth-callback", CallbackPort)
+	}
+	params := url.Values{}
+	params.Set("access_type", "offline")
+	params.Set("client_id", ClientID)
+	params.Set("prompt", "consent")
+	params.Set("redirect_uri", redirectURI)
+	params.Set("response_type", "code")
+	params.Set("scope", strings.Join(Scopes, " "))
+	params.Set("state", state)
+	return AuthEndpoint + "?" + params.Encode()
+}
+
+// ExchangeCodeForTokens exchanges authorization code for access and refresh tokens
+func (o *AntigravityAuth) ExchangeCodeForTokens(ctx context.Context, code, redirectURI string) (*TokenResponse, error) {
+	data := url.Values{}
+	data.Set("code", code)
+	data.Set("client_id", ClientID)
+	data.Set("client_secret", ClientSecret)
+	data.Set("redirect_uri", redirectURI)
+	data.Set("grant_type", "authorization_code")
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, TokenEndpoint, strings.NewReader(data.Encode()))
+	if err != nil {
+		return nil, fmt.Errorf("antigravity token exchange: create request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+
+	resp, errDo := o.httpClient.Do(req)
+	if errDo != nil {
+		return nil, fmt.Errorf("antigravity token exchange: execute request: %w", errDo)
+	}
+	defer func() {
+		if errClose := resp.Body.Close(); errClose != nil {
+			log.Errorf("antigravity token exchange: close body error: %v", errClose)
+		}
+	}()
+
+	if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusMultipleChoices {
+		bodyBytes, errRead := io.ReadAll(io.LimitReader(resp.Body, 8<<10))
+		if errRead != nil {
+			return nil, fmt.Errorf("antigravity token exchange: read response: %w", errRead)
+		}
+		body := strings.TrimSpace(string(bodyBytes))
+		if body == "" {
+			return nil, fmt.Errorf("antigravity token exchange: request failed: status %d", resp.StatusCode)
+		}
+		return nil, fmt.Errorf("antigravity token exchange: request failed: status %d: %s", resp.StatusCode, body)
+	}
+
+	var token TokenResponse
+	if errDecode := json.NewDecoder(resp.Body).Decode(&token); errDecode != nil {
+		return nil, fmt.Errorf("antigravity token exchange: decode response: %w", errDecode)
+	}
+	return &token, nil
+}
+
+// FetchUserInfo retrieves user email from Google
+func (o *AntigravityAuth) FetchUserInfo(ctx context.Context, accessToken string) (string, error) {
+	accessToken = strings.TrimSpace(accessToken)
+	if accessToken == "" {
+		return "", fmt.Errorf("antigravity userinfo: missing access token")
+	}
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, UserInfoEndpoint, nil)
+	if err != nil {
+		return "", fmt.Errorf("antigravity userinfo: create request: %w", err)
+	}
+	req.Header.Set("Authorization", "Bearer "+accessToken)
+
+	resp, errDo := o.httpClient.Do(req)
+	if errDo != nil {
+		return "", fmt.Errorf("antigravity userinfo: execute request: %w", errDo)
+	}
+	defer func() {
+		if errClose := resp.Body.Close(); errClose != nil {
+			log.Errorf("antigravity userinfo: close body error: %v", errClose)
+		}
+	}()
+
+	if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusMultipleChoices {
+		bodyBytes, errRead := io.ReadAll(io.LimitReader(resp.Body, 8<<10))
+		if errRead != nil {
+			return "", fmt.Errorf("antigravity userinfo: read response: %w", errRead)
+		}
+		body := strings.TrimSpace(string(bodyBytes))
+		if body == "" {
+			return "", fmt.Errorf("antigravity userinfo: request failed: status %d", resp.StatusCode)
+		}
+		return "", fmt.Errorf("antigravity userinfo: request failed: status %d: %s", resp.StatusCode, body)
+	}
+	var info userInfo
+	if errDecode := json.NewDecoder(resp.Body).Decode(&info); errDecode != nil {
+		return "", fmt.Errorf("antigravity userinfo: decode response: %w", errDecode)
+	}
+	email := strings.TrimSpace(info.Email)
+	if email == "" {
+		return "", fmt.Errorf("antigravity userinfo: response missing email")
+	}
+	return email, nil
+}
+
+// FetchProjectID retrieves the project ID for the authenticated user via loadCodeAssist
+func (o *AntigravityAuth) FetchProjectID(ctx context.Context, accessToken string) (string, error) {
+	loadReqBody := map[string]any{
+		"metadata": map[string]string{
+			"ideType":    "ANTIGRAVITY",
+			"platform":   "PLATFORM_UNSPECIFIED",
+			"pluginType": "GEMINI",
+		},
+	}
+
+	rawBody, errMarshal := json.Marshal(loadReqBody)
+	if errMarshal != nil {
+		return "", fmt.Errorf("marshal request body: %w", errMarshal)
+	}
+
+	endpointURL := fmt.Sprintf("%s/%s:loadCodeAssist", APIEndpoint, APIVersion)
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpointURL, strings.NewReader(string(rawBody)))
+	if err != nil {
+		return "", fmt.Errorf("create request: %w", err)
+	}
+	req.Header.Set("Authorization", "Bearer "+accessToken)
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("User-Agent", APIUserAgent)
+	req.Header.Set("X-Goog-Api-Client", APIClient)
+	req.Header.Set("Client-Metadata", ClientMetadata)
+
+	resp, errDo := o.httpClient.Do(req)
+	if errDo != nil {
+		return "", fmt.Errorf("execute request: %w", errDo)
+	}
+	defer func() {
+		if errClose := resp.Body.Close(); errClose != nil {
+			log.Errorf("antigravity loadCodeAssist: close body error: %v", errClose)
+		}
+	}()
+
+	bodyBytes, errRead := io.ReadAll(resp.Body)
+	if errRead != nil {
+		return "", fmt.Errorf("read response: %w", errRead)
+	}
+
+	if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusMultipleChoices {
+		return "", fmt.Errorf("request failed with status %d: %s", resp.StatusCode, strings.TrimSpace(string(bodyBytes)))
+	}
+
+	var loadResp map[string]any
+	if errDecode := json.Unmarshal(bodyBytes, &loadResp); errDecode != nil {
+		return "", fmt.Errorf("decode response: %w", errDecode)
+	}
+
+	// Extract projectID from response
+	projectID := ""
+	if id, ok := loadResp["cloudaicompanionProject"].(string); ok {
+		projectID = strings.TrimSpace(id)
+	}
+	if projectID == "" {
+		if projectMap, ok := loadResp["cloudaicompanionProject"].(map[string]any); ok {
+			if id, okID := projectMap["id"].(string); okID {
+				projectID = strings.TrimSpace(id)
+			}
+		}
+	}
+
+	if projectID == "" {
+		tierID := "legacy-tier"
+		if tiers, okTiers := loadResp["allowedTiers"].([]any); okTiers {
+			for _, rawTier := range tiers {
+				tier, okTier := rawTier.(map[string]any)
+				if !okTier {
+					continue
+				}
+				if isDefault, okDefault := tier["isDefault"].(bool); okDefault && isDefault {
+					if id, okID := tier["id"].(string); okID && strings.TrimSpace(id) != "" {
+						tierID = strings.TrimSpace(id)
+						break
+					}
+				}
+			}
+		}
+
+		projectID, err = o.OnboardUser(ctx, accessToken, tierID)
+		if err != nil {
+			return "", err
+		}
+		return projectID, nil
+	}
+
+	return projectID, nil
+}
+
+// OnboardUser attempts to fetch the project ID via onboardUser by polling for completion
+func (o *AntigravityAuth) OnboardUser(ctx context.Context, accessToken, tierID string) (string, error) {
+	log.Infof("Antigravity: onboarding user with tier: %s", tierID)
+	requestBody := map[string]any{
+		"tierId": tierID,
+		"metadata": map[string]string{
+			"ideType":    "ANTIGRAVITY",
+			"platform":   "PLATFORM_UNSPECIFIED",
+			"pluginType": "GEMINI",
+		},
+	}
+
+	rawBody, errMarshal := json.Marshal(requestBody)
+	if errMarshal != nil {
+		return "", fmt.Errorf("marshal request body: %w", errMarshal)
+	}
+
+	maxAttempts := 5
+	for attempt := 1; attempt <= maxAttempts; attempt++ {
+		log.Debugf("Polling attempt %d/%d", attempt, maxAttempts)
+
+		reqCtx := ctx
+		var cancel context.CancelFunc
+		if reqCtx == nil {
+			reqCtx = context.Background()
+		}
+		reqCtx, cancel = context.WithTimeout(reqCtx, 30*time.Second)
+
+		endpointURL := fmt.Sprintf("%s/%s:onboardUser", APIEndpoint, APIVersion)
+		req, errRequest := http.NewRequestWithContext(reqCtx, http.MethodPost, endpointURL, strings.NewReader(string(rawBody)))
+		if errRequest != nil {
+			cancel()
+			return "", fmt.Errorf("create request: %w", errRequest)
+		}
+		req.Header.Set("Authorization", "Bearer "+accessToken)
+		req.Header.Set("Content-Type", "application/json")
+		req.Header.Set("User-Agent", APIUserAgent)
+		req.Header.Set("X-Goog-Api-Client", APIClient)
+		req.Header.Set("Client-Metadata", ClientMetadata)
+
+		resp, errDo := o.httpClient.Do(req)
+		if errDo != nil {
+			cancel()
+			return "", fmt.Errorf("execute request: %w", errDo)
+		}
+
+		bodyBytes, errRead := io.ReadAll(resp.Body)
+		if errClose := resp.Body.Close(); errClose != nil {
+			log.Errorf("close body error: %v", errClose)
+		}
+		cancel()
+
+		if errRead != nil {
+			return "", fmt.Errorf("read response: %w", errRead)
+		}
+
+		if resp.StatusCode == http.StatusOK {
+			var data map[string]any
+			if errDecode := json.Unmarshal(bodyBytes, &data); errDecode != nil {
+				return "", fmt.Errorf("decode response: %w", errDecode)
+			}
+
+			if done, okDone := data["done"].(bool); okDone && done {
+				projectID := ""
+				if responseData, okResp := data["response"].(map[string]any); okResp {
+					switch projectValue := responseData["cloudaicompanionProject"].(type) {
+					case map[string]any:
+						if id, okID := projectValue["id"].(string); okID {
+							projectID = strings.TrimSpace(id)
+						}
+					case string:
+						projectID = strings.TrimSpace(projectValue)
+					}
+				}
+
+				if projectID != "" {
+					log.Infof("Successfully fetched project_id: %s", projectID)
+					return projectID, nil
+				}
+
+				return "", fmt.Errorf("no project_id in response")
+			}
+
+			time.Sleep(2 * time.Second)
+			continue
+		}
+
+		responsePreview := strings.TrimSpace(string(bodyBytes))
+		if len(responsePreview) > 500 {
+			responsePreview = responsePreview[:500]
+		}
+
+		responseErr := responsePreview
+		if len(responseErr) > 200 {
+			responseErr = responseErr[:200]
+		}
+		return "", fmt.Errorf("http %d: %s", resp.StatusCode, responseErr)
+	}
+
+	return "", nil
+}
--- a/internal/auth/antigravity/constants.go
+++ b/internal/auth/antigravity/constants.go
@@ -0,0 +1,34 @@
+// Package antigravity provides OAuth2 authentication functionality for the Antigravity provider.
+package antigravity
+
+// OAuth client credentials and configuration
+const (
+	ClientID     = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
+	ClientSecret = "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"
+	CallbackPort = 51121
+)
+
+// Scopes defines the OAuth scopes required for Antigravity authentication
+var Scopes = []string{
+	"https://www.googleapis.com/auth/cloud-platform",
+	"https://www.googleapis.com/auth/userinfo.email",
+	"https://www.googleapis.com/auth/userinfo.profile",
+	"https://www.googleapis.com/auth/cclog",
+	"https://www.googleapis.com/auth/experimentsandconfigs",
+}
+
+// OAuth2 endpoints for Google authentication
+const (
+	TokenEndpoint    = "https://oauth2.googleapis.com/token"
+	AuthEndpoint     = "https://accounts.google.com/o/oauth2/v2/auth"
+	UserInfoEndpoint = "https://www.googleapis.com/oauth2/v1/userinfo?alt=json"
+)
+
+// Antigravity API configuration
+const (
+	APIEndpoint    = "https://cloudcode-pa.googleapis.com"
+	APIVersion     = "v1internal"
+	APIUserAgent   = "google-api-nodejs-client/9.15.1"
+	APIClient      = "google-cloud-sdk vscode_cloudshelleditor/0.1"
+	ClientMetadata = `{"ideType":"IDE_UNSPECIFIED","platform":"PLATFORM_UNSPECIFIED","pluginType":"GEMINI"}`
+)
--- a/internal/auth/antigravity/filename.go
+++ b/internal/auth/antigravity/filename.go
@@ -0,0 +1,16 @@
+package antigravity
+
+import (
+	"fmt"
+	"strings"
+)
+
+// CredentialFileName returns the filename used to persist Antigravity credentials.
+// It uses the email as a suffix to disambiguate accounts.
+func CredentialFileName(email string) string {
+	email = strings.TrimSpace(email)
+	if email == "" {
+		return "antigravity.json"
+	}
+	return fmt.Sprintf("antigravity-%s.json", email)
+}
--- a/internal/auth/claude/anthropic_auth.go
+++ b/internal/auth/claude/anthropic_auth.go
@@ -18,11 +18,12 @@ import (
 	log "github.com/sirupsen/logrus"
 )

+// OAuth configuration constants for Claude/Anthropic
 const (
-	anthropicAuthURL  = "https://claude.ai/oauth/authorize"
-	anthropicTokenURL = "https://console.anthropic.com/v1/oauth/token"
-	anthropicClientID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
-	redirectURI       = "http://localhost:54545/callback"
+	AuthURL     = "https://claude.ai/oauth/authorize"
+	TokenURL    = "https://console.anthropic.com/v1/oauth/token"
+	ClientID    = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
+	RedirectURI = "http://localhost:54545/callback"
 )

 // tokenResponse represents the response structure from Anthropic's OAuth token endpoint.
@@ -82,16 +83,16 @@ func (o *ClaudeAuth) GenerateAuthURL(state string, pkceCodes *PKCECodes) (string

 	params := url.Values{
 		"code":                  {"true"},
-		"client_id":             {anthropicClientID},
+		"client_id":             {ClientID},
 		"response_type":         {"code"},
-		"redirect_uri":          {redirectURI},
+		"redirect_uri":          {RedirectURI},
 		"scope":                 {"org:create_api_key user:profile user:inference"},
 		"code_challenge":        {pkceCodes.CodeChallenge},
 		"code_challenge_method": {"S256"},
 		"state":                 {state},
 	}

-	authURL := fmt.Sprintf("%s?%s", anthropicAuthURL, params.Encode())
+	authURL := fmt.Sprintf("%s?%s", AuthURL, params.Encode())
 	return authURL, state, nil
 }

@@ -137,8 +138,8 @@ func (o *ClaudeAuth) ExchangeCodeForTokens(ctx context.Context, code, state stri
 		"code":          newCode,
 		"state":         state,
 		"grant_type":    "authorization_code",
-		"client_id":     anthropicClientID,
-		"redirect_uri":  redirectURI,
+		"client_id":     ClientID,
+		"redirect_uri":  RedirectURI,
 		"code_verifier": pkceCodes.CodeVerifier,
 	}

@@ -154,7 +155,7 @@ func (o *ClaudeAuth) ExchangeCodeForTokens(ctx context.Context, code, state stri

 	// log.Debugf("Token exchange request: %s", string(jsonBody))

-	req, err := http.NewRequestWithContext(ctx, "POST", anthropicTokenURL, strings.NewReader(string(jsonBody)))
+	req, err := http.NewRequestWithContext(ctx, "POST", TokenURL, strings.NewReader(string(jsonBody)))
 	if err != nil {
 		return nil, fmt.Errorf("failed to create token request: %w", err)
 	}
@@ -221,7 +222,7 @@ func (o *ClaudeAuth) RefreshTokens(ctx context.Context, refreshToken string) (*C
 	}

 	reqBody := map[string]interface{}{
-		"client_id":     anthropicClientID,
+		"client_id":     ClientID,
 		"grant_type":    "refresh_token",
 		"refresh_token": refreshToken,
 	}
@@ -231,7 +232,7 @@ func (o *ClaudeAuth) RefreshTokens(ctx context.Context, refreshToken string) (*C
 		return nil, fmt.Errorf("failed to marshal request body: %w", err)
 	}

-	req, err := http.NewRequestWithContext(ctx, "POST", anthropicTokenURL, strings.NewReader(string(jsonBody)))
+	req, err := http.NewRequestWithContext(ctx, "POST", TokenURL, strings.NewReader(string(jsonBody)))
 	if err != nil {
 		return nil, fmt.Errorf("failed to create refresh request: %w", err)
 	}
--- a/internal/auth/codex/filename.go
+++ b/internal/auth/codex/filename.go
@@ -4,9 +4,6 @@ import (
 	"fmt"
 	"strings"
 	"unicode"
-
-	"golang.org/x/text/cases"
-	"golang.org/x/text/language"
 )

 // CredentialFileName returns the filename used to persist Codex OAuth credentials.
@@ -43,15 +40,7 @@ func normalizePlanTypeForFilename(planType string) string {
 	}

 	for i, part := range parts {
-		parts[i] = titleToken(part)
+		parts[i] = strings.ToLower(strings.TrimSpace(part))
 	}
 	return strings.Join(parts, "-")
 }
-
-func titleToken(token string) string {
-	token = strings.TrimSpace(token)
-	if token == "" {
-		return ""
-	}
-	return cases.Title(language.English).String(token)
-}
--- a/internal/auth/codex/openai_auth.go
+++ b/internal/auth/codex/openai_auth.go
@@ -19,11 +19,12 @@ import (
 	log "github.com/sirupsen/logrus"
 )

+// OAuth configuration constants for OpenAI Codex
 const (
-	openaiAuthURL  = "https://auth.openai.com/oauth/authorize"
-	openaiTokenURL = "https://auth.openai.com/oauth/token"
-	openaiClientID = "app_EMoamEEZ73f0CkXaXp7hrann"
-	redirectURI    = "http://localhost:1455/auth/callback"
+	AuthURL     = "https://auth.openai.com/oauth/authorize"
+	TokenURL    = "https://auth.openai.com/oauth/token"
+	ClientID    = "app_EMoamEEZ73f0CkXaXp7hrann"
+	RedirectURI = "http://localhost:1455/auth/callback"
 )

 // CodexAuth handles the OpenAI OAuth2 authentication flow.
@@ -50,9 +51,9 @@ func (o *CodexAuth) GenerateAuthURL(state string, pkceCodes *PKCECodes) (string,
 	}

 	params := url.Values{
-		"client_id":                  {openaiClientID},
+		"client_id":                  {ClientID},
 		"response_type":              {"code"},
-		"redirect_uri":               {redirectURI},
+		"redirect_uri":               {RedirectURI},
 		"scope":                      {"openid email profile offline_access"},
 		"state":                      {state},
 		"code_challenge":             {pkceCodes.CodeChallenge},
@@ -62,7 +63,7 @@ func (o *CodexAuth) GenerateAuthURL(state string, pkceCodes *PKCECodes) (string,
 		"codex_cli_simplified_flow":  {"true"},
 	}

-	authURL := fmt.Sprintf("%s?%s", openaiAuthURL, params.Encode())
+	authURL := fmt.Sprintf("%s?%s", AuthURL, params.Encode())
 	return authURL, nil
 }

@@ -77,13 +78,13 @@ func (o *CodexAuth) ExchangeCodeForTokens(ctx context.Context, code string, pkce
 	// Prepare token exchange request
 	data := url.Values{
 		"grant_type":    {"authorization_code"},
-		"client_id":     {openaiClientID},
+		"client_id":     {ClientID},
 		"code":          {code},
-		"redirect_uri":  {redirectURI},
+		"redirect_uri":  {RedirectURI},
 		"code_verifier": {pkceCodes.CodeVerifier},
 	}

-	req, err := http.NewRequestWithContext(ctx, "POST", openaiTokenURL, strings.NewReader(data.Encode()))
+	req, err := http.NewRequestWithContext(ctx, "POST", TokenURL, strings.NewReader(data.Encode()))
 	if err != nil {
 		return nil, fmt.Errorf("failed to create token request: %w", err)
 	}
@@ -163,13 +164,13 @@ func (o *CodexAuth) RefreshTokens(ctx context.Context, refreshToken string) (*Co
 	}

 	data := url.Values{
-		"client_id":     {openaiClientID},
+		"client_id":     {ClientID},
 		"grant_type":    {"refresh_token"},
 		"refresh_token": {refreshToken},
 		"scope":         {"openid profile email"},
 	}

-	req, err := http.NewRequestWithContext(ctx, "POST", openaiTokenURL, strings.NewReader(data.Encode()))
+	req, err := http.NewRequestWithContext(ctx, "POST", TokenURL, strings.NewReader(data.Encode()))
 	if err != nil {
 		return nil, fmt.Errorf("failed to create refresh request: %w", err)
 	}
--- a/internal/auth/gemini/gemini_auth.go
+++ b/internal/auth/gemini/gemini_auth.go
@@ -28,19 +28,19 @@ import (
 	"golang.org/x/oauth2/google"
 )

+// OAuth configuration constants for Gemini
 const (
-	geminiOauthClientID       = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com"
-	geminiOauthClientSecret   = "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"
-	geminiDefaultCallbackPort = 8085
+	ClientID            = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com"
+	ClientSecret        = "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"
+	DefaultCallbackPort = 8085
 )

-var (
-	geminiOauthScopes = []string{
-		"https://www.googleapis.com/auth/cloud-platform",
-		"https://www.googleapis.com/auth/userinfo.email",
-		"https://www.googleapis.com/auth/userinfo.profile",
-	}
-)
+// OAuth scopes for Gemini authentication
+var Scopes = []string{
+	"https://www.googleapis.com/auth/cloud-platform",
+	"https://www.googleapis.com/auth/userinfo.email",
+	"https://www.googleapis.com/auth/userinfo.profile",
+}

 // GeminiAuth provides methods for handling the Gemini OAuth2 authentication flow.
 // It encapsulates the logic for obtaining, storing, and refreshing authentication tokens
@@ -74,7 +74,7 @@ func NewGeminiAuth() *GeminiAuth {
 //   - *http.Client: An HTTP client configured with authentication
 //   - error: An error if the client configuration fails, nil otherwise
 func (g *GeminiAuth) GetAuthenticatedClient(ctx context.Context, ts *GeminiTokenStorage, cfg *config.Config, opts *WebLoginOptions) (*http.Client, error) {
-	callbackPort := geminiDefaultCallbackPort
+	callbackPort := DefaultCallbackPort
 	if opts != nil && opts.CallbackPort > 0 {
 		callbackPort = opts.CallbackPort
 	}
@@ -112,10 +112,10 @@ func (g *GeminiAuth) GetAuthenticatedClient(ctx context.Context, ts *GeminiToken

 	// Configure the OAuth2 client.
 	conf := &oauth2.Config{
-		ClientID:     geminiOauthClientID,
-		ClientSecret: geminiOauthClientSecret,
+		ClientID:     ClientID,
+		ClientSecret: ClientSecret,
 		RedirectURL:  callbackURL, // This will be used by the local server.
-		Scopes:       geminiOauthScopes,
+		Scopes:       Scopes,
 		Endpoint:     google.Endpoint,
 	}

@@ -198,9 +198,9 @@ func (g *GeminiAuth) createTokenStorage(ctx context.Context, config *oauth2.Conf
 	}

 	ifToken["token_uri"] = "https://oauth2.googleapis.com/token"
-	ifToken["client_id"] = geminiOauthClientID
-	ifToken["client_secret"] = geminiOauthClientSecret
-	ifToken["scopes"] = geminiOauthScopes
+	ifToken["client_id"] = ClientID
+	ifToken["client_secret"] = ClientSecret
+	ifToken["scopes"] = Scopes
 	ifToken["universe_domain"] = "googleapis.com"

 	ts := GeminiTokenStorage{
@@ -226,7 +226,7 @@ func (g *GeminiAuth) createTokenStorage(ctx context.Context, config *oauth2.Conf
 //   - *oauth2.Token: The OAuth2 token obtained from the authorization flow
 //   - error: An error if the token acquisition fails, nil otherwise
 func (g *GeminiAuth) getTokenFromWeb(ctx context.Context, config *oauth2.Config, opts *WebLoginOptions) (*oauth2.Token, error) {
-	callbackPort := geminiDefaultCallbackPort
+	callbackPort := DefaultCallbackPort
 	if opts != nil && opts.CallbackPort > 0 {
 		callbackPort = opts.CallbackPort
 	}
--- a/internal/auth/kiro/aws.go
+++ b/internal/auth/kiro/aws.go
@@ -190,6 +190,9 @@ func LoadKiroIDEToken() (*KiroTokenData, error) {
 		return nil, fmt.Errorf("access token is empty in Kiro IDE token file")
 	}

+	// Normalize AuthMethod to lowercase (Kiro IDE uses "IdC" but we expect "idc")
+	token.AuthMethod = strings.ToLower(token.AuthMethod)
+
 	return &token, nil
 }

@@ -219,6 +222,9 @@ func LoadKiroTokenFromPath(tokenPath string) (*KiroTokenData, error) {
 		return nil, fmt.Errorf("access token is empty in token file")
 	}

+	// Normalize AuthMethod to lowercase (Kiro IDE uses "IdC" but we expect "idc")
+	token.AuthMethod = strings.ToLower(token.AuthMethod)
+
 	return &token, nil
 }

@@ -354,7 +360,7 @@ func SanitizeEmailForFilename(email string) string {
 	}

 	result := email
-	
+
 	// First, handle URL-encoded path traversal attempts (%2F, %2E, %5C, etc.)
 	// This prevents encoded characters from bypassing the sanitization.
 	// Note: We replace % last to catch any remaining encodings including double-encoding (%252F)
@@ -372,7 +378,7 @@ func SanitizeEmailForFilename(email string) string {
 	for _, char := range []string{"/", "\\", ":", "*", "?", "\"", "<", ">", "|", " ", "\x00"} {
 		result = strings.ReplaceAll(result, char, "_")
 	}
-	
+
 	// Prevent path traversal: replace leading dots in each path component
 	// This handles cases like "../../../etc/passwd" → "_.._.._.._etc_passwd"
 	parts := strings.Split(result, "_")
@@ -383,6 +389,65 @@ func SanitizeEmailForFilename(email string) string {
 		parts[i] = part
 	}
 	result = strings.Join(parts, "_")
-	
+
 	return result
 }
+
+// ExtractIDCIdentifier extracts a unique identifier from IDC startUrl.
+// Examples:
+//   - "https://d-1234567890.awsapps.com/start" -> "d-1234567890"
+//   - "https://my-company.awsapps.com/start" -> "my-company"
+//   - "https://acme-corp.awsapps.com/start" -> "acme-corp"
+func ExtractIDCIdentifier(startURL string) string {
+	if startURL == "" {
+		return ""
+	}
+
+	// Remove protocol prefix
+	url := strings.TrimPrefix(startURL, "https://")
+	url = strings.TrimPrefix(url, "http://")
+
+	// Extract subdomain (first part before the first dot)
+	// Format: {identifier}.awsapps.com/start
+	parts := strings.Split(url, ".")
+	if len(parts) > 0 && parts[0] != "" {
+		identifier := parts[0]
+		// Sanitize for filename safety
+		identifier = strings.ReplaceAll(identifier, "/", "_")
+		identifier = strings.ReplaceAll(identifier, "\\", "_")
+		identifier = strings.ReplaceAll(identifier, ":", "_")
+		return identifier
+	}
+
+	return ""
+}
+
+// GenerateTokenFileName generates a unique filename for token storage.
+// Priority: email > startUrl identifier (for IDC) > authMethod only
+// Format: kiro-{authMethod}-{identifier}.json
+func GenerateTokenFileName(tokenData *KiroTokenData) string {
+	authMethod := tokenData.AuthMethod
+	if authMethod == "" {
+		authMethod = "unknown"
+	}
+
+	// Priority 1: Use email if available
+	if tokenData.Email != "" {
+		// Sanitize email for filename (replace @ and . with -)
+		sanitizedEmail := tokenData.Email
+		sanitizedEmail = strings.ReplaceAll(sanitizedEmail, "@", "-")
+		sanitizedEmail = strings.ReplaceAll(sanitizedEmail, ".", "-")
+		return fmt.Sprintf("kiro-%s-%s.json", authMethod, sanitizedEmail)
+	}
+
+	// Priority 2: For IDC, use startUrl identifier
+	if authMethod == "idc" && tokenData.StartURL != "" {
+		identifier := ExtractIDCIdentifier(tokenData.StartURL)
+		if identifier != "" {
+			return fmt.Sprintf("kiro-%s-%s.json", authMethod, identifier)
+		}
+	}
+
+	// Priority 3: Fallback to authMethod only
+	return fmt.Sprintf("kiro-%s.json", authMethod)
+}
--- a/internal/auth/kiro/aws_test.go
+++ b/internal/auth/kiro/aws_test.go
@@ -151,11 +151,161 @@ func TestSanitizeEmailForFilename(t *testing.T) {
 // createTestJWT creates a test JWT token with the given claims
 func createTestJWT(claims map[string]any) string {
 	header := base64.RawURLEncoding.EncodeToString([]byte(`{"alg":"RS256","typ":"JWT"}`))
-	
+
 	payloadBytes, _ := json.Marshal(claims)
 	payload := base64.RawURLEncoding.EncodeToString(payloadBytes)
-	
+
 	signature := base64.RawURLEncoding.EncodeToString([]byte("fake-signature"))
-	
+
 	return header + "." + payload + "." + signature
 }
+
+func TestExtractIDCIdentifier(t *testing.T) {
+	tests := []struct {
+		name     string
+		startURL string
+		expected string
+	}{
+		{
+			name:     "Empty URL",
+			startURL: "",
+			expected: "",
+		},
+		{
+			name:     "Standard IDC URL with d- prefix",
+			startURL: "https://d-1234567890.awsapps.com/start",
+			expected: "d-1234567890",
+		},
+		{
+			name:     "IDC URL with company name",
+			startURL: "https://my-company.awsapps.com/start",
+			expected: "my-company",
+		},
+		{
+			name:     "IDC URL with simple name",
+			startURL: "https://acme-corp.awsapps.com/start",
+			expected: "acme-corp",
+		},
+		{
+			name:     "IDC URL without https",
+			startURL: "http://d-9876543210.awsapps.com/start",
+			expected: "d-9876543210",
+		},
+		{
+			name:     "IDC URL with subdomain only",
+			startURL: "https://test.awsapps.com/start",
+			expected: "test",
+		},
+		{
+			name:     "Builder ID URL",
+			startURL: "https://view.awsapps.com/start",
+			expected: "view",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := ExtractIDCIdentifier(tt.startURL)
+			if result != tt.expected {
+				t.Errorf("ExtractIDCIdentifier() = %q, want %q", result, tt.expected)
+			}
+		})
+	}
+}
+
+func TestGenerateTokenFileName(t *testing.T) {
+	tests := []struct {
+		name      string
+		tokenData *KiroTokenData
+		expected  string
+	}{
+		{
+			name: "IDC with email",
+			tokenData: &KiroTokenData{
+				AuthMethod: "idc",
+				Email:      "user@example.com",
+				StartURL:   "https://d-1234567890.awsapps.com/start",
+			},
+			expected: "kiro-idc-user-example-com.json",
+		},
+		{
+			name: "IDC without email but with startUrl",
+			tokenData: &KiroTokenData{
+				AuthMethod: "idc",
+				Email:      "",
+				StartURL:   "https://d-1234567890.awsapps.com/start",
+			},
+			expected: "kiro-idc-d-1234567890.json",
+		},
+		{
+			name: "IDC with company name in startUrl",
+			tokenData: &KiroTokenData{
+				AuthMethod: "idc",
+				Email:      "",
+				StartURL:   "https://my-company.awsapps.com/start",
+			},
+			expected: "kiro-idc-my-company.json",
+		},
+		{
+			name: "IDC without email and without startUrl",
+			tokenData: &KiroTokenData{
+				AuthMethod: "idc",
+				Email:      "",
+				StartURL:   "",
+			},
+			expected: "kiro-idc.json",
+		},
+		{
+			name: "Builder ID with email",
+			tokenData: &KiroTokenData{
+				AuthMethod: "builder-id",
+				Email:      "user@gmail.com",
+				StartURL:   "https://view.awsapps.com/start",
+			},
+			expected: "kiro-builder-id-user-gmail-com.json",
+		},
+		{
+			name: "Builder ID without email",
+			tokenData: &KiroTokenData{
+				AuthMethod: "builder-id",
+				Email:      "",
+				StartURL:   "https://view.awsapps.com/start",
+			},
+			expected: "kiro-builder-id.json",
+		},
+		{
+			name: "Social auth with email",
+			tokenData: &KiroTokenData{
+				AuthMethod: "google",
+				Email:      "user@gmail.com",
+			},
+			expected: "kiro-google-user-gmail-com.json",
+		},
+		{
+			name: "Empty auth method",
+			tokenData: &KiroTokenData{
+				AuthMethod: "",
+				Email:      "",
+			},
+			expected: "kiro-unknown.json",
+		},
+		{
+			name: "Email with special characters",
+			tokenData: &KiroTokenData{
+				AuthMethod: "idc",
+				Email:      "user.name+tag@sub.example.com",
+				StartURL:   "https://d-1234567890.awsapps.com/start",
+			},
+			expected: "kiro-idc-user-name+tag-sub-example-com.json",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := GenerateTokenFileName(tt.tokenData)
+			if result != tt.expected {
+				t.Errorf("GenerateTokenFileName() = %q, want %q", result, tt.expected)
+			}
+		})
+	}
+}
--- a/internal/auth/kiro/background_refresh.go
+++ b/internal/auth/kiro/background_refresh.go
@@ -3,6 +3,7 @@ package kiro
 import (
 	"context"
 	"log"
+	"strings"
 	"sync"
 	"time"

@@ -58,7 +59,7 @@ type BackgroundRefresher struct {
 	wg               sync.WaitGroup
 	oauth            *KiroOAuth
 	ssoClient        *SSOOIDCClient
-	callbackMu       sync.RWMutex                                    // 保护回调函数的并发访问
+	callbackMu       sync.RWMutex                                   // 保护回调函数的并发访问
 	onTokenRefreshed func(tokenID string, tokenData *KiroTokenData) // 刷新成功回调
 }

@@ -163,7 +164,10 @@ func (r *BackgroundRefresher) refreshSingle(ctx context.Context, token *Token) {
 	var newTokenData *KiroTokenData
 	var err error

-	switch token.AuthMethod {
+	// Normalize auth method to lowercase for case-insensitive matching
+	authMethod := strings.ToLower(token.AuthMethod)
+
+	switch authMethod {
 	case "idc":
 		newTokenData, err = r.ssoClient.RefreshTokenWithRegion(
 			ctx,
--- a/internal/auth/kiro/oauth_web.go
+++ b/internal/auth/kiro/oauth_web.go
@@ -421,7 +421,7 @@ func (h *OAuthWebHandler) saveTokenToFile(tokenData *KiroTokenData) {
 			log.Errorf("OAuth Web: failed to resolve auth directory: %v", err)
 		}
 	}
-	
+
 	// Fall back to default location
 	if authDir == "" {
 		home, err := os.UserHomeDir()
@@ -431,24 +431,16 @@ func (h *OAuthWebHandler) saveTokenToFile(tokenData *KiroTokenData) {
 		}
 		authDir = filepath.Join(home, ".cli-proxy-api")
 	}
-	
+
 	// Create directory if not exists
 	if err := os.MkdirAll(authDir, 0700); err != nil {
 		log.Errorf("OAuth Web: failed to create auth directory: %v", err)
 		return
 	}
-	
-	// Generate filename based on auth method
-	// Format: kiro-{authMethod}.json or kiro-{authMethod}-{email}.json
-	fileName := fmt.Sprintf("kiro-%s.json", tokenData.AuthMethod)
-	if tokenData.Email != "" {
-		// Sanitize email for filename (replace @ and . with -)
-		sanitizedEmail := tokenData.Email
-		sanitizedEmail = strings.ReplaceAll(sanitizedEmail, "@", "-")
-		sanitizedEmail = strings.ReplaceAll(sanitizedEmail, ".", "-")
-		fileName = fmt.Sprintf("kiro-%s-%s.json", tokenData.AuthMethod, sanitizedEmail)
-	}
-	
+
+	// Generate filename using the unified function
+	fileName := GenerateTokenFileName(tokenData)
+
 	authFilePath := filepath.Join(authDir, fileName)
 	
 	// Convert to storage format and save
@@ -811,13 +803,8 @@ func (h *OAuthWebHandler) handleImportToken(c *gin.Context) {
 	// Save token to file
 	h.saveTokenToFile(tokenData)

-	// Generate filename for response
-	fileName := fmt.Sprintf("kiro-%s.json", tokenData.AuthMethod)
-	if tokenData.Email != "" {
-		sanitizedEmail := strings.ReplaceAll(tokenData.Email, "@", "-")
-		sanitizedEmail = strings.ReplaceAll(sanitizedEmail, ".", "-")
-		fileName = fmt.Sprintf("kiro-%s-%s.json", tokenData.AuthMethod, sanitizedEmail)
-	}
+	// Generate filename for response using the unified function
+	fileName := GenerateTokenFileName(tokenData)

 	log.Infof("OAuth Web: token imported successfully")
 	c.JSON(http.StatusOK, gin.H{
--- a/internal/auth/kiro/refresh_manager.go
+++ b/internal/auth/kiro/refresh_manager.go
@@ -6,6 +6,7 @@ import (
 	"time"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	log "github.com/sirupsen/logrus"
 )

@@ -49,6 +50,14 @@ func (m *RefreshManager) Initialize(baseDir string, cfg *config.Config) error {
 		return nil
 	}

+	resolvedBaseDir, err := util.ResolveAuthDir(baseDir)
+	if err != nil {
+		log.Warnf("refresh manager: failed to resolve auth directory %s: %v", baseDir, err)
+	}
+	if resolvedBaseDir != "" {
+		baseDir = resolvedBaseDir
+	}
+
 	// 创建 token 存储库
 	repo := NewFileTokenRepository(baseDir)

--- a/internal/auth/kiro/token_repository.go
+++ b/internal/auth/kiro/token_repository.go
@@ -187,8 +187,9 @@ func (r *FileTokenRepository) readTokenFile(path string) (*Token, error) {
 		return nil, nil
 	}

-	// 检查 auth_method
+	// 检查 auth_method (case-insensitive comparison to handle "IdC", "IDC", "idc", etc.)
 	authMethod, _ := metadata["auth_method"].(string)
+	authMethod = strings.ToLower(authMethod)
 	if authMethod != "idc" && authMethod != "builder-id" {
 		return nil, nil // 只处理 IDC 和 Builder ID token
 	}
--- a/internal/cache/signature_cache.go
+++ b/internal/cache/signature_cache.go
@@ -3,7 +3,6 @@ package cache
 import (
 	"crypto/sha256"
 	"encoding/hex"
-	"fmt"
 	"strings"
 	"sync"
 	"time"
@@ -25,18 +24,18 @@ const (
 	// MinValidSignatureLen is the minimum length for a signature to be considered valid
 	MinValidSignatureLen = 50

-	// SessionCleanupInterval controls how often stale sessions are purged
-	SessionCleanupInterval = 10 * time.Minute
+	// CacheCleanupInterval controls how often stale entries are purged
+	CacheCleanupInterval = 10 * time.Minute
 )

-// signatureCache stores signatures by sessionId -> textHash -> SignatureEntry
+// signatureCache stores signatures by model group -> textHash -> SignatureEntry
 var signatureCache sync.Map

-// sessionCleanupOnce ensures the background cleanup goroutine starts only once
-var sessionCleanupOnce sync.Once
+// cacheCleanupOnce ensures the background cleanup goroutine starts only once
+var cacheCleanupOnce sync.Once

-// sessionCache is the inner map type
-type sessionCache struct {
+// groupCache is the inner map type
+type groupCache struct {
 	mu      sync.RWMutex
 	entries map[string]SignatureEntry
 }
@@ -47,36 +46,36 @@ func hashText(text string) string {
 	return hex.EncodeToString(h[:])[:SignatureTextHashLen]
 }

-// getOrCreateSession gets or creates a session cache
-func getOrCreateSession(sessionID string) *sessionCache {
+// getOrCreateGroupCache gets or creates a cache bucket for a model group
+func getOrCreateGroupCache(groupKey string) *groupCache {
 	// Start background cleanup on first access
-	sessionCleanupOnce.Do(startSessionCleanup)
+	cacheCleanupOnce.Do(startCacheCleanup)

-	if val, ok := signatureCache.Load(sessionID); ok {
-		return val.(*sessionCache)
+	if val, ok := signatureCache.Load(groupKey); ok {
+		return val.(*groupCache)
 	}
-	sc := &sessionCache{entries: make(map[string]SignatureEntry)}
-	actual, _ := signatureCache.LoadOrStore(sessionID, sc)
-	return actual.(*sessionCache)
+	sc := &groupCache{entries: make(map[string]SignatureEntry)}
+	actual, _ := signatureCache.LoadOrStore(groupKey, sc)
+	return actual.(*groupCache)
 }

-// startSessionCleanup launches a background goroutine that periodically
-// removes sessions where all entries have expired.
-func startSessionCleanup() {
+// startCacheCleanup launches a background goroutine that periodically
+// removes caches where all entries have expired.
+func startCacheCleanup() {
 	go func() {
-		ticker := time.NewTicker(SessionCleanupInterval)
+		ticker := time.NewTicker(CacheCleanupInterval)
 		defer ticker.Stop()
 		for range ticker.C {
-			purgeExpiredSessions()
+			purgeExpiredCaches()
 		}
 	}()
 }

-// purgeExpiredSessions removes sessions with no valid (non-expired) entries.
-func purgeExpiredSessions() {
+// purgeExpiredCaches removes caches with no valid (non-expired) entries.
+func purgeExpiredCaches() {
 	now := time.Now()
 	signatureCache.Range(func(key, value any) bool {
-		sc := value.(*sessionCache)
+		sc := value.(*groupCache)
 		sc.mu.Lock()
 		// Remove expired entries
 		for k, entry := range sc.entries {
@@ -86,7 +85,7 @@ func purgeExpiredSessions() {
 		}
 		isEmpty := len(sc.entries) == 0
 		sc.mu.Unlock()
-		// Remove session if empty
+		// Remove cache bucket if empty
 		if isEmpty {
 			signatureCache.Delete(key)
 		}
@@ -94,7 +93,7 @@ func purgeExpiredSessions() {
 	})
 }

-// CacheSignature stores a thinking signature for a given session and text.
+// CacheSignature stores a thinking signature for a given model group and text.
 // Used for Claude models that require signed thinking blocks in multi-turn conversations.
 func CacheSignature(modelName, text, signature string) {
 	if text == "" || signature == "" {
@@ -104,9 +103,9 @@ func CacheSignature(modelName, text, signature string) {
 		return
 	}

-	text = fmt.Sprintf("%s#%s", GetModelGroup(modelName), text)
+	groupKey := GetModelGroup(modelName)
 	textHash := hashText(text)
-	sc := getOrCreateSession(textHash)
+	sc := getOrCreateGroupCache(groupKey)
 	sc.mu.Lock()
 	defer sc.mu.Unlock()

@@ -116,26 +115,25 @@ func CacheSignature(modelName, text, signature string) {
 	}
 }

-// GetCachedSignature retrieves a cached signature for a given session and text.
+// GetCachedSignature retrieves a cached signature for a given model group and text.
 // Returns empty string if not found or expired.
 func GetCachedSignature(modelName, text string) string {
-	family := GetModelGroup(modelName)
+	groupKey := GetModelGroup(modelName)

 	if text == "" {
-		if family == "gemini" {
+		if groupKey == "gemini" {
 			return "skip_thought_signature_validator"
 		}
 		return ""
 	}
-	text = fmt.Sprintf("%s#%s", GetModelGroup(modelName), text)
-	val, ok := signatureCache.Load(hashText(text))
+	val, ok := signatureCache.Load(groupKey)
 	if !ok {
-		if family == "gemini" {
+		if groupKey == "gemini" {
 			return "skip_thought_signature_validator"
 		}
 		return ""
 	}
-	sc := val.(*sessionCache)
+	sc := val.(*groupCache)

 	textHash := hashText(text)

@@ -145,7 +143,7 @@ func GetCachedSignature(modelName, text string) string {
 	entry, exists := sc.entries[textHash]
 	if !exists {
 		sc.mu.Unlock()
-		if family == "gemini" {
+		if groupKey == "gemini" {
 			return "skip_thought_signature_validator"
 		}
 		return ""
@@ -153,7 +151,7 @@ func GetCachedSignature(modelName, text string) string {
 	if now.Sub(entry.Timestamp) > SignatureCacheTTL {
 		delete(sc.entries, textHash)
 		sc.mu.Unlock()
-		if family == "gemini" {
+		if groupKey == "gemini" {
 			return "skip_thought_signature_validator"
 		}
 		return ""
@@ -167,22 +165,17 @@ func GetCachedSignature(modelName, text string) string {
 	return entry.Signature
 }

-// ClearSignatureCache clears signature cache for a specific session or all sessions.
-func ClearSignatureCache(sessionID string) {
-	if sessionID != "" {
-		signatureCache.Range(func(key, _ any) bool {
-			kStr, ok := key.(string)
-			if ok && strings.HasSuffix(kStr, "#"+sessionID) {
-				signatureCache.Delete(key)
-			}
-			return true
-		})
-	} else {
+// ClearSignatureCache clears signature cache for a specific model group or all groups.
+func ClearSignatureCache(modelName string) {
+	if modelName == "" {
 		signatureCache.Range(func(key, _ any) bool {
 			signatureCache.Delete(key)
 			return true
 		})
+		return
 	}
+	groupKey := GetModelGroup(modelName)
+	signatureCache.Delete(groupKey)
 }

 // HasValidSignature checks if a signature is valid (non-empty and long enough)
--- a/internal/cache/signature_cache_test.go
+++ b/internal/cache/signature_cache_test.go
@@ -5,6 +5,8 @@ import (
 	"time"
 )

+const testModelName = "claude-sonnet-4-5"
+
 func TestCacheSignature_BasicStorageAndRetrieval(t *testing.T) {
 	ClearSignatureCache("")

@@ -12,30 +14,31 @@ func TestCacheSignature_BasicStorageAndRetrieval(t *testing.T) {
 	signature := "abc123validSignature1234567890123456789012345678901234567890"

 	// Store signature
-	CacheSignature("test-model", text, signature)
+	CacheSignature(testModelName, text, signature)

 	// Retrieve signature
-	retrieved := GetCachedSignature("test-model", text)
+	retrieved := GetCachedSignature(testModelName, text)
 	if retrieved != signature {
 		t.Errorf("Expected signature '%s', got '%s'", signature, retrieved)
 	}
 }

-func TestCacheSignature_DifferentSessions(t *testing.T) {
+func TestCacheSignature_DifferentModelGroups(t *testing.T) {
 	ClearSignatureCache("")

-	text := "Same text in different sessions"
+	text := "Same text across models"
 	sig1 := "signature1_1234567890123456789012345678901234567890123456"
 	sig2 := "signature2_1234567890123456789012345678901234567890123456"

-	CacheSignature("test-model", text, sig1)
-	CacheSignature("test-model", text, sig2)
+	geminiModel := "gemini-3-pro-preview"
+	CacheSignature(testModelName, text, sig1)
+	CacheSignature(geminiModel, text, sig2)

-	if GetCachedSignature("test-model", text) != sig1 {
-		t.Error("Session-a signature mismatch")
+	if GetCachedSignature(testModelName, text) != sig1 {
+		t.Error("Claude signature mismatch")
 	}
-	if GetCachedSignature("test-model", text) != sig2 {
-		t.Error("Session-b signature mismatch")
+	if GetCachedSignature(geminiModel, text) != sig2 {
+		t.Error("Gemini signature mismatch")
 	}
 }

@@ -43,13 +46,13 @@ func TestCacheSignature_NotFound(t *testing.T) {
 	ClearSignatureCache("")

 	// Non-existent session
-	if got := GetCachedSignature("test-model", "some text"); got != "" {
+	if got := GetCachedSignature(testModelName, "some text"); got != "" {
 		t.Errorf("Expected empty string for nonexistent session, got '%s'", got)
 	}

 	// Existing session but different text
-	CacheSignature("test-model", "text-a", "sigA12345678901234567890123456789012345678901234567890")
-	if got := GetCachedSignature("test-model", "text-b"); got != "" {
+	CacheSignature(testModelName, "text-a", "sigA12345678901234567890123456789012345678901234567890")
+	if got := GetCachedSignature(testModelName, "text-b"); got != "" {
 		t.Errorf("Expected empty string for different text, got '%s'", got)
 	}
 }
@@ -58,12 +61,11 @@ func TestCacheSignature_EmptyInputs(t *testing.T) {
 	ClearSignatureCache("")

 	// All empty/invalid inputs should be no-ops
-	CacheSignature("test-model", "text", "sig12345678901234567890123456789012345678901234567890")
-	CacheSignature("test-model", "", "sig12345678901234567890123456789012345678901234567890")
-	CacheSignature("test-model", "text", "")
-	CacheSignature("test-model", "text", "short") // Too short
+	CacheSignature(testModelName, "", "sig12345678901234567890123456789012345678901234567890")
+	CacheSignature(testModelName, "text", "")
+	CacheSignature(testModelName, "text", "short") // Too short

-	if got := GetCachedSignature("test-model", "text"); got != "" {
+	if got := GetCachedSignature(testModelName, "text"); got != "" {
 		t.Errorf("Expected empty after invalid cache attempts, got '%s'", got)
 	}
 }
@@ -74,27 +76,24 @@ func TestCacheSignature_ShortSignatureRejected(t *testing.T) {
 	text := "Some text"
 	shortSig := "abc123" // Less than 50 chars

-	CacheSignature("test-model", text, shortSig)
+	CacheSignature(testModelName, text, shortSig)

-	if got := GetCachedSignature("test-model", text); got != "" {
+	if got := GetCachedSignature(testModelName, text); got != "" {
 		t.Errorf("Short signature should be rejected, got '%s'", got)
 	}
 }

-func TestClearSignatureCache_SpecificSession(t *testing.T) {
+func TestClearSignatureCache_ModelGroup(t *testing.T) {
 	ClearSignatureCache("")

 	sig := "validSig1234567890123456789012345678901234567890123456"
-	CacheSignature("test-model", "text", sig)
-	CacheSignature("test-model", "text", sig)
+	CacheSignature(testModelName, "text", sig)
+	CacheSignature(testModelName, "text-2", sig)

 	ClearSignatureCache("session-1")

-	if got := GetCachedSignature("test-model", "text"); got != "" {
-		t.Error("session-1 should be cleared")
-	}
-	if got := GetCachedSignature("test-model", "text"); got != sig {
-		t.Error("session-2 should still exist")
+	if got := GetCachedSignature(testModelName, "text"); got != sig {
+		t.Error("signature should remain when clearing unknown session")
 	}
 }

@@ -102,35 +101,37 @@ func TestClearSignatureCache_AllSessions(t *testing.T) {
 	ClearSignatureCache("")

 	sig := "validSig1234567890123456789012345678901234567890123456"
-	CacheSignature("test-model", "text", sig)
-	CacheSignature("test-model", "text", sig)
+	CacheSignature(testModelName, "text", sig)
+	CacheSignature(testModelName, "text-2", sig)

 	ClearSignatureCache("")

-	if got := GetCachedSignature("test-model", "text"); got != "" {
-		t.Error("session-1 should be cleared")
+	if got := GetCachedSignature(testModelName, "text"); got != "" {
+		t.Error("text should be cleared")
 	}
-	if got := GetCachedSignature("test-model", "text"); got != "" {
-		t.Error("session-2 should be cleared")
+	if got := GetCachedSignature(testModelName, "text-2"); got != "" {
+		t.Error("text-2 should be cleared")
 	}
 }

 func TestHasValidSignature(t *testing.T) {
 	tests := []struct {
 		name      string
+		modelName string
 		signature string
 		expected  bool
 	}{
-		{"valid long signature", "abc123validSignature1234567890123456789012345678901234567890", true},
-		{"exactly 50 chars", "12345678901234567890123456789012345678901234567890", true},
-		{"49 chars - invalid", "1234567890123456789012345678901234567890123456789", false},
-		{"empty string", "", false},
-		{"short signature", "abc", false},
+		{"valid long signature", testModelName, "abc123validSignature1234567890123456789012345678901234567890", true},
+		{"exactly 50 chars", testModelName, "12345678901234567890123456789012345678901234567890", true},
+		{"49 chars - invalid", testModelName, "1234567890123456789012345678901234567890123456789", false},
+		{"empty string", testModelName, "", false},
+		{"short signature", testModelName, "abc", false},
+		{"gemini sentinel", "gemini-3-pro-preview", "skip_thought_signature_validator", true},
 	}

 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			result := HasValidSignature("claude-sonnet-4-5-thinking", tt.signature)
+			result := HasValidSignature(tt.modelName, tt.signature)
 			if result != tt.expected {
 				t.Errorf("HasValidSignature(%q) = %v, expected %v", tt.signature, result, tt.expected)
 			}
@@ -147,13 +148,13 @@ func TestCacheSignature_TextHashCollisionResistance(t *testing.T) {
 	sig1 := "signature1_1234567890123456789012345678901234567890123456"
 	sig2 := "signature2_1234567890123456789012345678901234567890123456"

-	CacheSignature("test-model", text1, sig1)
-	CacheSignature("test-model", text2, sig2)
+	CacheSignature(testModelName, text1, sig1)
+	CacheSignature(testModelName, text2, sig2)

-	if GetCachedSignature("test-model", text1) != sig1 {
+	if GetCachedSignature(testModelName, text1) != sig1 {
 		t.Error("text1 signature mismatch")
 	}
-	if GetCachedSignature("test-model", text2) != sig2 {
+	if GetCachedSignature(testModelName, text2) != sig2 {
 		t.Error("text2 signature mismatch")
 	}
 }
@@ -164,9 +165,9 @@ func TestCacheSignature_UnicodeText(t *testing.T) {
 	text := "한글 텍스트와 이모지 🎉 그리고 特殊文字"
 	sig := "unicodeSig123456789012345678901234567890123456789012345"

-	CacheSignature("test-model", text, sig)
+	CacheSignature(testModelName, text, sig)

-	if got := GetCachedSignature("test-model", text); got != sig {
+	if got := GetCachedSignature(testModelName, text); got != sig {
 		t.Errorf("Unicode text signature retrieval failed, got '%s'", got)
 	}
 }
@@ -178,10 +179,10 @@ func TestCacheSignature_Overwrite(t *testing.T) {
 	sig1 := "firstSignature12345678901234567890123456789012345678901"
 	sig2 := "secondSignature1234567890123456789012345678901234567890"

-	CacheSignature("test-model", text, sig1)
-	CacheSignature("test-model", text, sig2) // Overwrite
+	CacheSignature(testModelName, text, sig1)
+	CacheSignature(testModelName, text, sig2) // Overwrite

-	if got := GetCachedSignature("test-model", text); got != sig2 {
+	if got := GetCachedSignature(testModelName, text); got != sig2 {
 		t.Errorf("Expected overwritten signature '%s', got '%s'", sig2, got)
 	}
 }
@@ -196,10 +197,10 @@ func TestCacheSignature_ExpirationLogic(t *testing.T) {
 	text := "text"
 	sig := "validSig1234567890123456789012345678901234567890123456"

-	CacheSignature("test-model", text, sig)
+	CacheSignature(testModelName, text, sig)

 	// Fresh entry should be retrievable
-	if got := GetCachedSignature("test-model", text); got != sig {
+	if got := GetCachedSignature(testModelName, text); got != sig {
 		t.Errorf("Fresh entry should be retrievable, got '%s'", got)
 	}

--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -1,848 +1,69 @@
-// Package registry provides model definitions for various AI service providers.
-// This file contains static model definitions that can be used by clients
-// when registering their supported models.
+// Package registry provides model definitions and lookup helpers for various AI providers.
+// Static model metadata is stored in model_definitions_static_data.go.
 package registry

-// GetClaudeModels returns the standard Claude model definitions
-func GetClaudeModels() []*ModelInfo {
-	return []*ModelInfo{
+import (
+	"sort"
+	"strings"
+)

-		{
-			ID:                  "claude-haiku-4-5-20251001",
-			Object:              "model",
-			Created:             1759276800, // 2025-10-01
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4.5 Haiku",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			// Thinking: not supported for Haiku models
-		},
-		{
-			ID:                  "claude-sonnet-4-5-20250929",
-			Object:              "model",
-			Created:             1759104000, // 2025-09-29
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4.5 Sonnet",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
-		},
-		{
-			ID:                  "claude-opus-4-5-20251101",
-			Object:              "model",
-			Created:             1761955200, // 2025-11-01
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4.5 Opus",
-			Description:         "Premium model combining maximum intelligence with practical performance",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
-		},
-		{
-			ID:                  "claude-opus-4-1-20250805",
-			Object:              "model",
-			Created:             1722945600, // 2025-08-05
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4.1 Opus",
-			ContextLength:       200000,
-			MaxCompletionTokens: 32000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
-		},
-		{
-			ID:                  "claude-opus-4-20250514",
-			Object:              "model",
-			Created:             1715644800, // 2025-05-14
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4 Opus",
-			ContextLength:       200000,
-			MaxCompletionTokens: 32000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
-		},
-		{
-			ID:                  "claude-sonnet-4-20250514",
-			Object:              "model",
-			Created:             1715644800, // 2025-05-14
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4 Sonnet",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
-		},
-		{
-			ID:                  "claude-3-7-sonnet-20250219",
-			Object:              "model",
-			Created:             1708300800, // 2025-02-19
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 3.7 Sonnet",
-			ContextLength:       128000,
-			MaxCompletionTokens: 8192,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
-		},
-		{
-			ID:                  "claude-3-5-haiku-20241022",
-			Object:              "model",
-			Created:             1729555200, // 2024-10-22
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 3.5 Haiku",
-			ContextLength:       128000,
-			MaxCompletionTokens: 8192,
-			// Thinking: not supported for Haiku models
-		},
-	}
-}
-
-// GetGeminiModels returns the standard Gemini model definitions
-func GetGeminiModels() []*ModelInfo {
-	return []*ModelInfo{
-		{
-			ID:                         "gemini-2.5-pro",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-pro",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Pro",
-			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash",
-			Version:                    "001",
-			DisplayName:                "Gemini 2.5 Flash",
-			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash-lite",
-			Object:                     "model",
-			Created:                    1753142400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash-lite",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Flash Lite",
-			Description:                "Our smallest and most cost effective model, built for at scale usage.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-3-pro-preview",
-			Object:                     "model",
-			Created:                    1737158400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-pro-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Pro Preview",
-			Description:                "Gemini 3 Pro Preview",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
-		},
-		{
-			ID:                         "gemini-3-flash-preview",
-			Object:                     "model",
-			Created:                    1765929600,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-flash-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Flash Preview",
-			Description:                "Gemini 3 Flash Preview",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
-		},
-		{
-			ID:                         "gemini-3-pro-image-preview",
-			Object:                     "model",
-			Created:                    1737158400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-pro-image-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Pro Image Preview",
-			Description:                "Gemini 3 Pro Image Preview",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
-		},
-	}
-}
-
-func GetGeminiVertexModels() []*ModelInfo {
-	return []*ModelInfo{
-		{
-			ID:                         "gemini-2.5-pro",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-pro",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Pro",
-			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash",
-			Version:                    "001",
-			DisplayName:                "Gemini 2.5 Flash",
-			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash-lite",
-			Object:                     "model",
-			Created:                    1753142400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash-lite",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Flash Lite",
-			Description:                "Our smallest and most cost effective model, built for at scale usage.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-3-pro-preview",
-			Object:                     "model",
-			Created:                    1737158400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-pro-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Pro Preview",
-			Description:                "Gemini 3 Pro Preview",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
-		},
-		{
-			ID:                         "gemini-3-flash-preview",
-			Object:                     "model",
-			Created:                    1765929600,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-flash-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Flash Preview",
-			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
-		},
-		{
-			ID:                         "gemini-3-pro-image-preview",
-			Object:                     "model",
-			Created:                    1737158400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-pro-image-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Pro Image Preview",
-			Description:                "Gemini 3 Pro Image Preview",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
-		},
-		// Imagen image generation models - use :predict action
-		{
-			ID:                         "imagen-4.0-generate-001",
-			Object:                     "model",
-			Created:                    1750000000,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/imagen-4.0-generate-001",
-			Version:                    "4.0",
-			DisplayName:                "Imagen 4.0 Generate",
-			Description:                "Imagen 4.0 image generation model",
-			SupportedGenerationMethods: []string{"predict"},
-		},
-		{
-			ID:                         "imagen-4.0-ultra-generate-001",
-			Object:                     "model",
-			Created:                    1750000000,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/imagen-4.0-ultra-generate-001",
-			Version:                    "4.0",
-			DisplayName:                "Imagen 4.0 Ultra Generate",
-			Description:                "Imagen 4.0 Ultra high-quality image generation model",
-			SupportedGenerationMethods: []string{"predict"},
-		},
-		{
-			ID:                         "imagen-3.0-generate-002",
-			Object:                     "model",
-			Created:                    1740000000,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/imagen-3.0-generate-002",
-			Version:                    "3.0",
-			DisplayName:                "Imagen 3.0 Generate",
-			Description:                "Imagen 3.0 image generation model",
-			SupportedGenerationMethods: []string{"predict"},
-		},
-		{
-			ID:                         "imagen-3.0-fast-generate-001",
-			Object:                     "model",
-			Created:                    1740000000,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/imagen-3.0-fast-generate-001",
-			Version:                    "3.0",
-			DisplayName:                "Imagen 3.0 Fast Generate",
-			Description:                "Imagen 3.0 fast image generation model",
-			SupportedGenerationMethods: []string{"predict"},
-		},
-		{
-			ID:                         "imagen-4.0-fast-generate-001",
-			Object:                     "model",
-			Created:                    1750000000,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/imagen-4.0-fast-generate-001",
-			Version:                    "4.0",
-			DisplayName:                "Imagen 4.0 Fast Generate",
-			Description:                "Imagen 4.0 fast image generation model",
-			SupportedGenerationMethods: []string{"predict"},
-		},
-	}
-}
-
-// GetGeminiCLIModels returns the standard Gemini model definitions
-func GetGeminiCLIModels() []*ModelInfo {
-	return []*ModelInfo{
-		{
-			ID:                         "gemini-2.5-pro",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-pro",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Pro",
-			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash",
-			Version:                    "001",
-			DisplayName:                "Gemini 2.5 Flash",
-			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash-lite",
-			Object:                     "model",
-			Created:                    1753142400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash-lite",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Flash Lite",
-			Description:                "Our smallest and most cost effective model, built for at scale usage.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-3-pro-preview",
-			Object:                     "model",
-			Created:                    1737158400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-pro-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Pro Preview",
-			Description:                "Our most intelligent model with SOTA reasoning and multimodal understanding, and powerful agentic and vibe coding capabilities",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
-		},
-		{
-			ID:                         "gemini-3-flash-preview",
-			Object:                     "model",
-			Created:                    1765929600,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-flash-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Flash Preview",
-			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
-		},
-	}
-}
-
-// GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
-func GetAIStudioModels() []*ModelInfo {
-	return []*ModelInfo{
-		{
-			ID:                         "gemini-2.5-pro",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-pro",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Pro",
-			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash",
-			Version:                    "001",
-			DisplayName:                "Gemini 2.5 Flash",
-			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash-lite",
-			Object:                     "model",
-			Created:                    1753142400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash-lite",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Flash Lite",
-			Description:                "Our smallest and most cost effective model, built for at scale usage.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-3-pro-preview",
-			Object:                     "model",
-			Created:                    1737158400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-pro-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Pro Preview",
-			Description:                "Gemini 3 Pro Preview",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-3-flash-preview",
-			Object:                     "model",
-			Created:                    1765929600,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-flash-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Flash Preview",
-			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-pro-latest",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-pro-latest",
-			Version:                    "2.5",
-			DisplayName:                "Gemini Pro Latest",
-			Description:                "Latest release of Gemini Pro",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-flash-latest",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-flash-latest",
-			Version:                    "2.5",
-			DisplayName:                "Gemini Flash Latest",
-			Description:                "Latest release of Gemini Flash",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-flash-lite-latest",
-			Object:                     "model",
-			Created:                    1753142400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-flash-lite-latest",
-			Version:                    "2.5",
-			DisplayName:                "Gemini Flash-Lite Latest",
-			Description:                "Latest release of Gemini Flash-Lite",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash-image-preview",
-			Object:                     "model",
-			Created:                    1756166400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash-image-preview",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Flash Image Preview",
-			Description:                "State-of-the-art image generation and editing model.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           8192,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			// image models don't support thinkingConfig; leave Thinking nil
-		},
-		{
-			ID:                         "gemini-2.5-flash-image",
-			Object:                     "model",
-			Created:                    1759363200,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash-image",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Flash Image",
-			Description:                "State-of-the-art image generation and editing model.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           8192,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			// image models don't support thinkingConfig; leave Thinking nil
-		},
-	}
-}
-
-// GetOpenAIModels returns the standard OpenAI model definitions
-func GetOpenAIModels() []*ModelInfo {
-	return []*ModelInfo{
-		{
-			ID:                  "gpt-5",
-			Object:              "model",
-			Created:             1754524800,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-08-07",
-			DisplayName:         "GPT 5",
-			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}},
-		},
-		{
-			ID:                  "gpt-5-codex",
-			Object:              "model",
-			Created:             1757894400,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-09-15",
-			DisplayName:         "GPT 5 Codex",
-			Description:         "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
-		},
-		{
-			ID:                  "gpt-5-codex-mini",
-			Object:              "model",
-			Created:             1762473600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-11-07",
-			DisplayName:         "GPT 5 Codex Mini",
-			Description:         "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
-		},
-		{
-			ID:                  "gpt-5.1",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5",
-			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}},
-		},
-		{
-			ID:                  "gpt-5.1-codex",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5.1 Codex",
-			Description:         "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
-		},
-		{
-			ID:                  "gpt-5.1-codex-mini",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5.1 Codex Mini",
-			Description:         "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
-		},
-		{
-			ID:                  "gpt-5.1-codex-max",
-			Object:              "model",
-			Created:             1763424000,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-max",
-			DisplayName:         "GPT 5.1 Codex Max",
-			Description:         "Stable version of GPT 5.1 Codex Max",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
-		},
-		{
-			ID:                  "gpt-5.2",
-			Object:              "model",
-			Created:             1765440000,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.2",
-			DisplayName:         "GPT 5.2",
-			Description:         "Stable version of GPT 5.2",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}},
-		},
-		{
-			ID:                  "gpt-5.2-codex",
-			Object:              "model",
-			Created:             1765440000,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.2",
-			DisplayName:         "GPT 5.2 Codex",
-			Description:         "Stable version of GPT 5.2 Codex, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
-		},
-	}
-}
-
-// GetQwenModels returns the standard Qwen model definitions
-func GetQwenModels() []*ModelInfo {
-	return []*ModelInfo{
-		{
-			ID:                  "qwen3-coder-plus",
-			Object:              "model",
-			Created:             1753228800,
-			OwnedBy:             "qwen",
-			Type:                "qwen",
-			Version:             "3.0",
-			DisplayName:         "Qwen3 Coder Plus",
-			Description:         "Advanced code generation and understanding model",
-			ContextLength:       32768,
-			MaxCompletionTokens: 8192,
-			SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
-		},
-		{
-			ID:                  "qwen3-coder-flash",
-			Object:              "model",
-			Created:             1753228800,
-			OwnedBy:             "qwen",
-			Type:                "qwen",
-			Version:             "3.0",
-			DisplayName:         "Qwen3 Coder Flash",
-			Description:         "Fast code generation model",
-			ContextLength:       8192,
-			MaxCompletionTokens: 2048,
-			SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
-		},
-		{
-			ID:                  "vision-model",
-			Object:              "model",
-			Created:             1758672000,
-			OwnedBy:             "qwen",
-			Type:                "qwen",
-			Version:             "3.0",
-			DisplayName:         "Qwen3 Vision Model",
-			Description:         "Vision model model",
-			ContextLength:       32768,
-			MaxCompletionTokens: 2048,
-			SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
-		},
-	}
-}
-
-// iFlowThinkingSupport is a shared ThinkingSupport configuration for iFlow models
-// that support thinking mode via chat_template_kwargs.enable_thinking (boolean toggle).
-// Uses level-based configuration so standard normalization flows apply before conversion.
-var iFlowThinkingSupport = &ThinkingSupport{
-	Levels: []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"},
-}
-
-// GetIFlowModels returns supported models for iFlow OAuth accounts.
-func GetIFlowModels() []*ModelInfo {
-	entries := []struct {
-		ID          string
-		DisplayName string
-		Description string
-		Created     int64
-		Thinking    *ThinkingSupport
-	}{
-		{ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600},
-		{ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800},
-		{ID: "qwen3-max", DisplayName: "Qwen3-Max", Description: "Qwen3 flagship model", Created: 1758672000},
-		{ID: "qwen3-vl-plus", DisplayName: "Qwen3-VL-Plus", Description: "Qwen3 multimodal vision-language", Created: 1758672000},
-		{ID: "qwen3-max-preview", DisplayName: "Qwen3-Max-Preview", Description: "Qwen3 Max preview build", Created: 1757030400},
-		{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
-		{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400, Thinking: iFlowThinkingSupport},
-		{ID: "glm-4.7", DisplayName: "GLM-4.7", Description: "Zhipu GLM 4.7 general model", Created: 1766448000, Thinking: iFlowThinkingSupport},
-		{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
-		{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200},
-		{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Chat", Created: 1764576000},
-		{ID: "deepseek-v3.2-reasoner", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Reasoner", Created: 1764576000},
-		{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000},
-		{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200},
-		{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200},
-		{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
-		{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
-		{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
-		{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
-		{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
-		{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: iFlowThinkingSupport},
-		{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
-		{ID: "iflow-rome-30ba3b", DisplayName: "iFlow-ROME", Description: "iFlow Rome 30BA3B model", Created: 1736899200},
-	}
-	models := make([]*ModelInfo, 0, len(entries))
-	for _, entry := range entries {
-		models = append(models, &ModelInfo{
-			ID:          entry.ID,
-			Object:      "model",
-			Created:     entry.Created,
-			OwnedBy:     "iflow",
-			Type:        "iflow",
-			DisplayName: entry.DisplayName,
-			Description: entry.Description,
-			Thinking:    entry.Thinking,
+// GetStaticModelDefinitionsByChannel returns static model definitions for a given channel/provider.
+// It returns nil when the channel is unknown.
+//
+// Supported channels:
+//   - claude
+//   - gemini
+//   - vertex
+//   - gemini-cli
+//   - aistudio
+//   - codex
+//   - qwen
+//   - iflow
+//   - antigravity (returns static overrides only)
+func GetStaticModelDefinitionsByChannel(channel string) []*ModelInfo {
+	key := strings.ToLower(strings.TrimSpace(channel))
+	switch key {
+	case "claude":
+		return GetClaudeModels()
+	case "gemini":
+		return GetGeminiModels()
+	case "vertex":
+		return GetGeminiVertexModels()
+	case "gemini-cli":
+		return GetGeminiCLIModels()
+	case "aistudio":
+		return GetAIStudioModels()
+	case "codex":
+		return GetOpenAIModels()
+	case "qwen":
+		return GetQwenModels()
+	case "iflow":
+		return GetIFlowModels()
+	case "antigravity":
+		cfg := GetAntigravityModelConfig()
+		if len(cfg) == 0 {
+			return nil
+		}
+		models := make([]*ModelInfo, 0, len(cfg))
+		for modelID, entry := range cfg {
+			if modelID == "" || entry == nil {
+				continue
+			}
+			models = append(models, &ModelInfo{
+				ID:                  modelID,
+				Object:              "model",
+				OwnedBy:             "antigravity",
+				Type:                "antigravity",
+				Thinking:            entry.Thinking,
+				MaxCompletionTokens: entry.MaxCompletionTokens,
+			})
+		}
+		sort.Slice(models, func(i, j int) bool {
+			return strings.ToLower(models[i].ID) < strings.ToLower(models[j].ID)
 		})
-	}
-	return models
-}
-
-// AntigravityModelConfig captures static antigravity model overrides, including
-// Thinking budget limits and provider max completion tokens.
-type AntigravityModelConfig struct {
-	Thinking            *ThinkingSupport
-	MaxCompletionTokens int
-}
-
-// GetAntigravityModelConfig returns static configuration for antigravity models.
-// Keys use upstream model names returned by the Antigravity models endpoint.
-func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
-	return map[string]*AntigravityModelConfig{
-		"gemini-2.5-flash":           {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
-		"gemini-2.5-flash-lite":      {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
-		"rev19-uic3-1p":              {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}},
-		"gemini-3-pro-high":          {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
-		"gemini-3-pro-image":         {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
-		"gemini-3-flash":             {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}},
-		"claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
-		"claude-opus-4-5-thinking":   {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
-		"claude-sonnet-4-5":          {MaxCompletionTokens: 64000},
-		"gpt-oss-120b-medium":        {},
-		"tab_flash_lite_preview":     {},
+		return models
+	default:
+		return nil
 	}
 }

--- a/internal/registry/model_definitions_static_data.go
+++ b/internal/registry/model_definitions_static_data.go
@@ -0,0 +1,846 @@
+// Package registry provides model definitions for various AI service providers.
+// This file stores the static model metadata catalog.
+package registry
+
+// GetClaudeModels returns the standard Claude model definitions
+func GetClaudeModels() []*ModelInfo {
+	return []*ModelInfo{
+
+		{
+			ID:                  "claude-haiku-4-5-20251001",
+			Object:              "model",
+			Created:             1759276800, // 2025-10-01
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.5 Haiku",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			// Thinking: not supported for Haiku models
+		},
+		{
+			ID:                  "claude-sonnet-4-5-20250929",
+			Object:              "model",
+			Created:             1759104000, // 2025-09-29
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.5 Sonnet",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
+		},
+		{
+			ID:                  "claude-opus-4-5-20251101",
+			Object:              "model",
+			Created:             1761955200, // 2025-11-01
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.5 Opus",
+			Description:         "Premium model combining maximum intelligence with practical performance",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
+		},
+		{
+			ID:                  "claude-opus-4-1-20250805",
+			Object:              "model",
+			Created:             1722945600, // 2025-08-05
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.1 Opus",
+			ContextLength:       200000,
+			MaxCompletionTokens: 32000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
+		},
+		{
+			ID:                  "claude-opus-4-20250514",
+			Object:              "model",
+			Created:             1715644800, // 2025-05-14
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4 Opus",
+			ContextLength:       200000,
+			MaxCompletionTokens: 32000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
+		},
+		{
+			ID:                  "claude-sonnet-4-20250514",
+			Object:              "model",
+			Created:             1715644800, // 2025-05-14
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4 Sonnet",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
+		},
+		{
+			ID:                  "claude-3-7-sonnet-20250219",
+			Object:              "model",
+			Created:             1708300800, // 2025-02-19
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 3.7 Sonnet",
+			ContextLength:       128000,
+			MaxCompletionTokens: 8192,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
+		},
+		{
+			ID:                  "claude-3-5-haiku-20241022",
+			Object:              "model",
+			Created:             1729555200, // 2024-10-22
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 3.5 Haiku",
+			ContextLength:       128000,
+			MaxCompletionTokens: 8192,
+			// Thinking: not supported for Haiku models
+		},
+	}
+}
+
+// GetGeminiModels returns the standard Gemini model definitions
+func GetGeminiModels() []*ModelInfo {
+	return []*ModelInfo{
+		{
+			ID:                         "gemini-2.5-pro",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-pro",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Pro",
+			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash",
+			Version:                    "001",
+			DisplayName:                "Gemini 2.5 Flash",
+			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash-lite",
+			Object:                     "model",
+			Created:                    1753142400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash-lite",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Flash Lite",
+			Description:                "Our smallest and most cost effective model, built for at scale usage.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-3-pro-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Preview",
+			Description:                "Gemini 3 Pro Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
+		},
+		{
+			ID:                         "gemini-3-flash-preview",
+			Object:                     "model",
+			Created:                    1765929600,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-flash-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Flash Preview",
+			Description:                "Gemini 3 Flash Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
+		},
+		{
+			ID:                         "gemini-3-pro-image-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-image-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Image Preview",
+			Description:                "Gemini 3 Pro Image Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
+		},
+	}
+}
+
+func GetGeminiVertexModels() []*ModelInfo {
+	return []*ModelInfo{
+		{
+			ID:                         "gemini-2.5-pro",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-pro",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Pro",
+			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash",
+			Version:                    "001",
+			DisplayName:                "Gemini 2.5 Flash",
+			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash-lite",
+			Object:                     "model",
+			Created:                    1753142400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash-lite",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Flash Lite",
+			Description:                "Our smallest and most cost effective model, built for at scale usage.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-3-pro-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Preview",
+			Description:                "Gemini 3 Pro Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
+		},
+		{
+			ID:                         "gemini-3-flash-preview",
+			Object:                     "model",
+			Created:                    1765929600,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-flash-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Flash Preview",
+			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
+		},
+		{
+			ID:                         "gemini-3-pro-image-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-image-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Image Preview",
+			Description:                "Gemini 3 Pro Image Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
+		},
+		// Imagen image generation models - use :predict action
+		{
+			ID:                         "imagen-4.0-generate-001",
+			Object:                     "model",
+			Created:                    1750000000,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/imagen-4.0-generate-001",
+			Version:                    "4.0",
+			DisplayName:                "Imagen 4.0 Generate",
+			Description:                "Imagen 4.0 image generation model",
+			SupportedGenerationMethods: []string{"predict"},
+		},
+		{
+			ID:                         "imagen-4.0-ultra-generate-001",
+			Object:                     "model",
+			Created:                    1750000000,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/imagen-4.0-ultra-generate-001",
+			Version:                    "4.0",
+			DisplayName:                "Imagen 4.0 Ultra Generate",
+			Description:                "Imagen 4.0 Ultra high-quality image generation model",
+			SupportedGenerationMethods: []string{"predict"},
+		},
+		{
+			ID:                         "imagen-3.0-generate-002",
+			Object:                     "model",
+			Created:                    1740000000,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/imagen-3.0-generate-002",
+			Version:                    "3.0",
+			DisplayName:                "Imagen 3.0 Generate",
+			Description:                "Imagen 3.0 image generation model",
+			SupportedGenerationMethods: []string{"predict"},
+		},
+		{
+			ID:                         "imagen-3.0-fast-generate-001",
+			Object:                     "model",
+			Created:                    1740000000,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/imagen-3.0-fast-generate-001",
+			Version:                    "3.0",
+			DisplayName:                "Imagen 3.0 Fast Generate",
+			Description:                "Imagen 3.0 fast image generation model",
+			SupportedGenerationMethods: []string{"predict"},
+		},
+		{
+			ID:                         "imagen-4.0-fast-generate-001",
+			Object:                     "model",
+			Created:                    1750000000,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/imagen-4.0-fast-generate-001",
+			Version:                    "4.0",
+			DisplayName:                "Imagen 4.0 Fast Generate",
+			Description:                "Imagen 4.0 fast image generation model",
+			SupportedGenerationMethods: []string{"predict"},
+		},
+	}
+}
+
+// GetGeminiCLIModels returns the standard Gemini model definitions
+func GetGeminiCLIModels() []*ModelInfo {
+	return []*ModelInfo{
+		{
+			ID:                         "gemini-2.5-pro",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-pro",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Pro",
+			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash",
+			Version:                    "001",
+			DisplayName:                "Gemini 2.5 Flash",
+			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash-lite",
+			Object:                     "model",
+			Created:                    1753142400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash-lite",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Flash Lite",
+			Description:                "Our smallest and most cost effective model, built for at scale usage.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-3-pro-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Preview",
+			Description:                "Our most intelligent model with SOTA reasoning and multimodal understanding, and powerful agentic and vibe coding capabilities",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
+		},
+		{
+			ID:                         "gemini-3-flash-preview",
+			Object:                     "model",
+			Created:                    1765929600,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-flash-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Flash Preview",
+			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
+		},
+	}
+}
+
+// GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
+func GetAIStudioModels() []*ModelInfo {
+	return []*ModelInfo{
+		{
+			ID:                         "gemini-2.5-pro",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-pro",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Pro",
+			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash",
+			Version:                    "001",
+			DisplayName:                "Gemini 2.5 Flash",
+			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash-lite",
+			Object:                     "model",
+			Created:                    1753142400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash-lite",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Flash Lite",
+			Description:                "Our smallest and most cost effective model, built for at scale usage.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-3-pro-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Preview",
+			Description:                "Gemini 3 Pro Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-3-flash-preview",
+			Object:                     "model",
+			Created:                    1765929600,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-flash-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Flash Preview",
+			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-pro-latest",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-pro-latest",
+			Version:                    "2.5",
+			DisplayName:                "Gemini Pro Latest",
+			Description:                "Latest release of Gemini Pro",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-flash-latest",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-flash-latest",
+			Version:                    "2.5",
+			DisplayName:                "Gemini Flash Latest",
+			Description:                "Latest release of Gemini Flash",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-flash-lite-latest",
+			Object:                     "model",
+			Created:                    1753142400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-flash-lite-latest",
+			Version:                    "2.5",
+			DisplayName:                "Gemini Flash-Lite Latest",
+			Description:                "Latest release of Gemini Flash-Lite",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		// {
+		// 	ID:                         "gemini-2.5-flash-image-preview",
+		// 	Object:                     "model",
+		// 	Created:                    1756166400,
+		// 	OwnedBy:                    "google",
+		// 	Type:                       "gemini",
+		// 	Name:                       "models/gemini-2.5-flash-image-preview",
+		// 	Version:                    "2.5",
+		// 	DisplayName:                "Gemini 2.5 Flash Image Preview",
+		// 	Description:                "State-of-the-art image generation and editing model.",
+		// 	InputTokenLimit:            1048576,
+		// 	OutputTokenLimit:           8192,
+		// 	SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+		// 	// image models don't support thinkingConfig; leave Thinking nil
+		// },
+		{
+			ID:                         "gemini-2.5-flash-image",
+			Object:                     "model",
+			Created:                    1759363200,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash-image",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Flash Image",
+			Description:                "State-of-the-art image generation and editing model.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           8192,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			// image models don't support thinkingConfig; leave Thinking nil
+		},
+	}
+}
+
+// GetOpenAIModels returns the standard OpenAI model definitions
+func GetOpenAIModels() []*ModelInfo {
+	return []*ModelInfo{
+		{
+			ID:                  "gpt-5",
+			Object:              "model",
+			Created:             1754524800,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5-2025-08-07",
+			DisplayName:         "GPT 5",
+			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}},
+		},
+		{
+			ID:                  "gpt-5-codex",
+			Object:              "model",
+			Created:             1757894400,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5-2025-09-15",
+			DisplayName:         "GPT 5 Codex",
+			Description:         "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
+		},
+		{
+			ID:                  "gpt-5-codex-mini",
+			Object:              "model",
+			Created:             1762473600,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5-2025-11-07",
+			DisplayName:         "GPT 5 Codex Mini",
+			Description:         "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
+		},
+		{
+			ID:                  "gpt-5.1",
+			Object:              "model",
+			Created:             1762905600,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5.1-2025-11-12",
+			DisplayName:         "GPT 5",
+			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}},
+		},
+		{
+			ID:                  "gpt-5.1-codex",
+			Object:              "model",
+			Created:             1762905600,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5.1-2025-11-12",
+			DisplayName:         "GPT 5.1 Codex",
+			Description:         "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
+		},
+		{
+			ID:                  "gpt-5.1-codex-mini",
+			Object:              "model",
+			Created:             1762905600,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5.1-2025-11-12",
+			DisplayName:         "GPT 5.1 Codex Mini",
+			Description:         "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
+		},
+		{
+			ID:                  "gpt-5.1-codex-max",
+			Object:              "model",
+			Created:             1763424000,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5.1-max",
+			DisplayName:         "GPT 5.1 Codex Max",
+			Description:         "Stable version of GPT 5.1 Codex Max",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
+		},
+		{
+			ID:                  "gpt-5.2",
+			Object:              "model",
+			Created:             1765440000,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5.2",
+			DisplayName:         "GPT 5.2",
+			Description:         "Stable version of GPT 5.2",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}},
+		},
+		{
+			ID:                  "gpt-5.2-codex",
+			Object:              "model",
+			Created:             1765440000,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5.2",
+			DisplayName:         "GPT 5.2 Codex",
+			Description:         "Stable version of GPT 5.2 Codex, The best model for coding and agentic tasks across domains.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
+		},
+	}
+}
+
+// GetQwenModels returns the standard Qwen model definitions
+func GetQwenModels() []*ModelInfo {
+	return []*ModelInfo{
+		{
+			ID:                  "qwen3-coder-plus",
+			Object:              "model",
+			Created:             1753228800,
+			OwnedBy:             "qwen",
+			Type:                "qwen",
+			Version:             "3.0",
+			DisplayName:         "Qwen3 Coder Plus",
+			Description:         "Advanced code generation and understanding model",
+			ContextLength:       32768,
+			MaxCompletionTokens: 8192,
+			SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
+		},
+		{
+			ID:                  "qwen3-coder-flash",
+			Object:              "model",
+			Created:             1753228800,
+			OwnedBy:             "qwen",
+			Type:                "qwen",
+			Version:             "3.0",
+			DisplayName:         "Qwen3 Coder Flash",
+			Description:         "Fast code generation model",
+			ContextLength:       8192,
+			MaxCompletionTokens: 2048,
+			SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
+		},
+		{
+			ID:                  "vision-model",
+			Object:              "model",
+			Created:             1758672000,
+			OwnedBy:             "qwen",
+			Type:                "qwen",
+			Version:             "3.0",
+			DisplayName:         "Qwen3 Vision Model",
+			Description:         "Vision model model",
+			ContextLength:       32768,
+			MaxCompletionTokens: 2048,
+			SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
+		},
+	}
+}
+
+// iFlowThinkingSupport is a shared ThinkingSupport configuration for iFlow models
+// that support thinking mode via chat_template_kwargs.enable_thinking (boolean toggle).
+// Uses level-based configuration so standard normalization flows apply before conversion.
+var iFlowThinkingSupport = &ThinkingSupport{
+	Levels: []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"},
+}
+
+// GetIFlowModels returns supported models for iFlow OAuth accounts.
+func GetIFlowModels() []*ModelInfo {
+	entries := []struct {
+		ID          string
+		DisplayName string
+		Description string
+		Created     int64
+		Thinking    *ThinkingSupport
+	}{
+		{ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600},
+		{ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800},
+		{ID: "qwen3-max", DisplayName: "Qwen3-Max", Description: "Qwen3 flagship model", Created: 1758672000},
+		{ID: "qwen3-vl-plus", DisplayName: "Qwen3-VL-Plus", Description: "Qwen3 multimodal vision-language", Created: 1758672000},
+		{ID: "qwen3-max-preview", DisplayName: "Qwen3-Max-Preview", Description: "Qwen3 Max preview build", Created: 1757030400},
+		{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
+		{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400, Thinking: iFlowThinkingSupport},
+		{ID: "glm-4.7", DisplayName: "GLM-4.7", Description: "Zhipu GLM 4.7 general model", Created: 1766448000, Thinking: iFlowThinkingSupport},
+		{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
+		{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200},
+		{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Chat", Created: 1764576000},
+		{ID: "deepseek-v3.2-reasoner", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Reasoner", Created: 1764576000},
+		{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000},
+		{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200},
+		{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200},
+		{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
+		{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
+		{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
+		{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
+		{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
+		{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: iFlowThinkingSupport},
+		{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
+		{ID: "iflow-rome-30ba3b", DisplayName: "iFlow-ROME", Description: "iFlow Rome 30BA3B model", Created: 1736899200},
+	}
+	models := make([]*ModelInfo, 0, len(entries))
+	for _, entry := range entries {
+		models = append(models, &ModelInfo{
+			ID:          entry.ID,
+			Object:      "model",
+			Created:     entry.Created,
+			OwnedBy:     "iflow",
+			Type:        "iflow",
+			DisplayName: entry.DisplayName,
+			Description: entry.Description,
+			Thinking:    entry.Thinking,
+		})
+	}
+	return models
+}
+
+// AntigravityModelConfig captures static antigravity model overrides, including
+// Thinking budget limits and provider max completion tokens.
+type AntigravityModelConfig struct {
+	Thinking            *ThinkingSupport
+	MaxCompletionTokens int
+}
+
+// GetAntigravityModelConfig returns static configuration for antigravity models.
+// Keys use upstream model names returned by the Antigravity models endpoint.
+func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
+	return map[string]*AntigravityModelConfig{
+		// "rev19-uic3-1p":              {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}},
+		"gemini-2.5-flash":           {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
+		"gemini-2.5-flash-lite":      {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
+		"gemini-3-pro-high":          {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
+		"gemini-3-pro-image":         {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
+		"gemini-3-flash":             {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}},
+		"claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
+		"claude-opus-4-5-thinking":   {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
+		"claude-sonnet-4-5":          {MaxCompletionTokens: 64000},
+		"gpt-oss-120b-medium":        {},
+		"tab_flash_lite_preview":     {},
+	}
+}
--- a/internal/registry/model_registry.go
+++ b/internal/registry/model_registry.go
@@ -1042,10 +1042,10 @@ func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string)
 			"owned_by": model.OwnedBy,
 		}
 		if model.Created > 0 {
-			result["created"] = model.Created
+			result["created_at"] = model.Created
 		}
 		if model.Type != "" {
-			result["type"] = model.Type
+			result["type"] = "model"
 		}
 		if model.DisplayName != "" {
 			result["display_name"] = model.DisplayName
--- a/internal/runtime/executor/aistudio_executor.go
+++ b/internal/runtime/executor/aistudio_executor.go
@@ -398,7 +398,8 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
 		return nil, translatedPayload{}, err
 	}
 	payload = fixGeminiImageAspectRatio(baseModel, payload)
-	payload = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", payload, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	payload = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", payload, originalTranslated, requestedModel)
 	payload, _ = sjson.DeleteBytes(payload, "generationConfig.maxOutputTokens")
 	payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseMimeType")
 	payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseJsonSchema")
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -142,92 +142,114 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 		return resp, err
 	}

-	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated, requestedModel)

 	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)

-	var lastStatus int
-	var lastBody []byte
-	var lastErr error
+	attempts := antigravityRetryAttempts(auth, e.cfg)

-	for idx, baseURL := range baseURLs {
-		httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, false, opts.Alt, baseURL)
-		if errReq != nil {
-			err = errReq
-			return resp, err
-		}
+attemptLoop:
+	for attempt := 0; attempt < attempts; attempt++ {
+		var lastStatus int
+		var lastBody []byte
+		var lastErr error

-		httpResp, errDo := httpClient.Do(httpReq)
-		if errDo != nil {
-			recordAPIResponseError(ctx, e.cfg, errDo)
-			if errors.Is(errDo, context.Canceled) || errors.Is(errDo, context.DeadlineExceeded) {
-				return resp, errDo
+		for idx, baseURL := range baseURLs {
+			httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, false, opts.Alt, baseURL)
+			if errReq != nil {
+				err = errReq
+				return resp, err
 			}
-			lastStatus = 0
-			lastBody = nil
-			lastErr = errDo
-			if idx+1 < len(baseURLs) {
-				log.Debugf("antigravity executor: request error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
-				continue
+
+			httpResp, errDo := httpClient.Do(httpReq)
+			if errDo != nil {
+				recordAPIResponseError(ctx, e.cfg, errDo)
+				if errors.Is(errDo, context.Canceled) || errors.Is(errDo, context.DeadlineExceeded) {
+					return resp, errDo
+				}
+				lastStatus = 0
+				lastBody = nil
+				lastErr = errDo
+				if idx+1 < len(baseURLs) {
+					log.Debugf("antigravity executor: request error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+					continue
+				}
+				err = errDo
+				return resp, err
 			}
-			err = errDo
-			return resp, err
+
+			recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+			bodyBytes, errRead := io.ReadAll(httpResp.Body)
+			if errClose := httpResp.Body.Close(); errClose != nil {
+				log.Errorf("antigravity executor: close response body error: %v", errClose)
+			}
+			if errRead != nil {
+				recordAPIResponseError(ctx, e.cfg, errRead)
+				err = errRead
+				return resp, err
+			}
+			appendAPIResponseChunk(ctx, e.cfg, bodyBytes)
+
+			if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices {
+				log.Debugf("antigravity executor: upstream error status: %d, body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), bodyBytes))
+				lastStatus = httpResp.StatusCode
+				lastBody = append([]byte(nil), bodyBytes...)
+				lastErr = nil
+				if httpResp.StatusCode == http.StatusTooManyRequests && idx+1 < len(baseURLs) {
+					log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+					continue
+				}
+				if antigravityShouldRetryNoCapacity(httpResp.StatusCode, bodyBytes) {
+					if idx+1 < len(baseURLs) {
+						log.Debugf("antigravity executor: no capacity on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+						continue
+					}
+					if attempt+1 < attempts {
+						delay := antigravityNoCapacityRetryDelay(attempt)
+						log.Debugf("antigravity executor: no capacity for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts)
+						if errWait := antigravityWait(ctx, delay); errWait != nil {
+							return resp, errWait
+						}
+						continue attemptLoop
+					}
+				}
+				sErr := statusErr{code: httpResp.StatusCode, msg: string(bodyBytes)}
+				if httpResp.StatusCode == http.StatusTooManyRequests {
+					if retryAfter, parseErr := parseRetryDelay(bodyBytes); parseErr == nil && retryAfter != nil {
+						sErr.retryAfter = retryAfter
+					}
+				}
+				err = sErr
+				return resp, err
+			}
+
+			reporter.publish(ctx, parseAntigravityUsage(bodyBytes))
+			var param any
+			converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bodyBytes, &param)
+			resp = cliproxyexecutor.Response{Payload: []byte(converted)}
+			reporter.ensurePublished(ctx)
+			return resp, nil
 		}

-		recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
-		bodyBytes, errRead := io.ReadAll(httpResp.Body)
-		if errClose := httpResp.Body.Close(); errClose != nil {
-			log.Errorf("antigravity executor: close response body error: %v", errClose)
-		}
-		if errRead != nil {
-			recordAPIResponseError(ctx, e.cfg, errRead)
-			err = errRead
-			return resp, err
-		}
-		appendAPIResponseChunk(ctx, e.cfg, bodyBytes)
-
-		if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices {
-			log.Debugf("antigravity executor: upstream error status: %d, body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), bodyBytes))
-			lastStatus = httpResp.StatusCode
-			lastBody = append([]byte(nil), bodyBytes...)
-			lastErr = nil
-			if httpResp.StatusCode == http.StatusTooManyRequests && idx+1 < len(baseURLs) {
-				log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
-				continue
-			}
-			sErr := statusErr{code: httpResp.StatusCode, msg: string(bodyBytes)}
-			if httpResp.StatusCode == http.StatusTooManyRequests {
-				if retryAfter, parseErr := parseRetryDelay(bodyBytes); parseErr == nil && retryAfter != nil {
+		switch {
+		case lastStatus != 0:
+			sErr := statusErr{code: lastStatus, msg: string(lastBody)}
+			if lastStatus == http.StatusTooManyRequests {
+				if retryAfter, parseErr := parseRetryDelay(lastBody); parseErr == nil && retryAfter != nil {
 					sErr.retryAfter = retryAfter
 				}
 			}
 			err = sErr
-			return resp, err
+		case lastErr != nil:
+			err = lastErr
+		default:
+			err = statusErr{code: http.StatusServiceUnavailable, msg: "antigravity executor: no base url available"}
 		}
-
-		reporter.publish(ctx, parseAntigravityUsage(bodyBytes))
-		var param any
-		converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bodyBytes, &param)
-		resp = cliproxyexecutor.Response{Payload: []byte(converted)}
-		reporter.ensurePublished(ctx)
-		return resp, nil
+		return resp, err
 	}

-	switch {
-	case lastStatus != 0:
-		sErr := statusErr{code: lastStatus, msg: string(lastBody)}
-		if lastStatus == http.StatusTooManyRequests {
-			if retryAfter, parseErr := parseRetryDelay(lastBody); parseErr == nil && retryAfter != nil {
-				sErr.retryAfter = retryAfter
-			}
-		}
-		err = sErr
-	case lastErr != nil:
-		err = lastErr
-	default:
-		err = statusErr{code: http.StatusServiceUnavailable, msg: "antigravity executor: no base url available"}
-	}
 	return resp, err
 }

@@ -261,155 +283,177 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
 		return resp, err
 	}

-	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated, requestedModel)

 	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)

-	var lastStatus int
-	var lastBody []byte
-	var lastErr error
+	attempts := antigravityRetryAttempts(auth, e.cfg)

-	for idx, baseURL := range baseURLs {
-		httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, true, opts.Alt, baseURL)
-		if errReq != nil {
-			err = errReq
-			return resp, err
-		}
+attemptLoop:
+	for attempt := 0; attempt < attempts; attempt++ {
+		var lastStatus int
+		var lastBody []byte
+		var lastErr error

-		httpResp, errDo := httpClient.Do(httpReq)
-		if errDo != nil {
-			recordAPIResponseError(ctx, e.cfg, errDo)
-			if errors.Is(errDo, context.Canceled) || errors.Is(errDo, context.DeadlineExceeded) {
-				return resp, errDo
+		for idx, baseURL := range baseURLs {
+			httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, true, opts.Alt, baseURL)
+			if errReq != nil {
+				err = errReq
+				return resp, err
 			}
-			lastStatus = 0
-			lastBody = nil
-			lastErr = errDo
-			if idx+1 < len(baseURLs) {
-				log.Debugf("antigravity executor: request error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
-				continue
-			}
-			err = errDo
-			return resp, err
-		}
-		recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
-		if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices {
-			bodyBytes, errRead := io.ReadAll(httpResp.Body)
-			if errClose := httpResp.Body.Close(); errClose != nil {
-				log.Errorf("antigravity executor: close response body error: %v", errClose)
-			}
-			if errRead != nil {
-				recordAPIResponseError(ctx, e.cfg, errRead)
-				if errors.Is(errRead, context.Canceled) || errors.Is(errRead, context.DeadlineExceeded) {
-					err = errRead
-					return resp, err
-				}
-				if errCtx := ctx.Err(); errCtx != nil {
-					err = errCtx
-					return resp, err
+
+			httpResp, errDo := httpClient.Do(httpReq)
+			if errDo != nil {
+				recordAPIResponseError(ctx, e.cfg, errDo)
+				if errors.Is(errDo, context.Canceled) || errors.Is(errDo, context.DeadlineExceeded) {
+					return resp, errDo
 				}
 				lastStatus = 0
 				lastBody = nil
-				lastErr = errRead
+				lastErr = errDo
 				if idx+1 < len(baseURLs) {
-					log.Debugf("antigravity executor: read error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+					log.Debugf("antigravity executor: request error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
 					continue
 				}
-				err = errRead
+				err = errDo
 				return resp, err
 			}
-			appendAPIResponseChunk(ctx, e.cfg, bodyBytes)
-			lastStatus = httpResp.StatusCode
-			lastBody = append([]byte(nil), bodyBytes...)
-			lastErr = nil
-			if httpResp.StatusCode == http.StatusTooManyRequests && idx+1 < len(baseURLs) {
-				log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
-				continue
+			recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+			if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices {
+				bodyBytes, errRead := io.ReadAll(httpResp.Body)
+				if errClose := httpResp.Body.Close(); errClose != nil {
+					log.Errorf("antigravity executor: close response body error: %v", errClose)
+				}
+				if errRead != nil {
+					recordAPIResponseError(ctx, e.cfg, errRead)
+					if errors.Is(errRead, context.Canceled) || errors.Is(errRead, context.DeadlineExceeded) {
+						err = errRead
+						return resp, err
+					}
+					if errCtx := ctx.Err(); errCtx != nil {
+						err = errCtx
+						return resp, err
+					}
+					lastStatus = 0
+					lastBody = nil
+					lastErr = errRead
+					if idx+1 < len(baseURLs) {
+						log.Debugf("antigravity executor: read error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+						continue
+					}
+					err = errRead
+					return resp, err
+				}
+				appendAPIResponseChunk(ctx, e.cfg, bodyBytes)
+				lastStatus = httpResp.StatusCode
+				lastBody = append([]byte(nil), bodyBytes...)
+				lastErr = nil
+				if httpResp.StatusCode == http.StatusTooManyRequests && idx+1 < len(baseURLs) {
+					log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+					continue
+				}
+				if antigravityShouldRetryNoCapacity(httpResp.StatusCode, bodyBytes) {
+					if idx+1 < len(baseURLs) {
+						log.Debugf("antigravity executor: no capacity on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+						continue
+					}
+					if attempt+1 < attempts {
+						delay := antigravityNoCapacityRetryDelay(attempt)
+						log.Debugf("antigravity executor: no capacity for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts)
+						if errWait := antigravityWait(ctx, delay); errWait != nil {
+							return resp, errWait
+						}
+						continue attemptLoop
+					}
+				}
+				sErr := statusErr{code: httpResp.StatusCode, msg: string(bodyBytes)}
+				if httpResp.StatusCode == http.StatusTooManyRequests {
+					if retryAfter, parseErr := parseRetryDelay(bodyBytes); parseErr == nil && retryAfter != nil {
+						sErr.retryAfter = retryAfter
+					}
+				}
+				err = sErr
+				return resp, err
 			}
-			sErr := statusErr{code: httpResp.StatusCode, msg: string(bodyBytes)}
-			if httpResp.StatusCode == http.StatusTooManyRequests {
-				if retryAfter, parseErr := parseRetryDelay(bodyBytes); parseErr == nil && retryAfter != nil {
+
+			out := make(chan cliproxyexecutor.StreamChunk)
+			go func(resp *http.Response) {
+				defer close(out)
+				defer func() {
+					if errClose := resp.Body.Close(); errClose != nil {
+						log.Errorf("antigravity executor: close response body error: %v", errClose)
+					}
+				}()
+				scanner := bufio.NewScanner(resp.Body)
+				scanner.Buffer(nil, streamScannerBuffer)
+				for scanner.Scan() {
+					line := scanner.Bytes()
+					appendAPIResponseChunk(ctx, e.cfg, line)
+
+					// Filter usage metadata for all models
+					// Only retain usage statistics in the terminal chunk
+					line = FilterSSEUsageMetadata(line)
+
+					payload := jsonPayload(line)
+					if payload == nil {
+						continue
+					}
+
+					if detail, ok := parseAntigravityStreamUsage(payload); ok {
+						reporter.publish(ctx, detail)
+					}
+
+					out <- cliproxyexecutor.StreamChunk{Payload: payload}
+				}
+				if errScan := scanner.Err(); errScan != nil {
+					recordAPIResponseError(ctx, e.cfg, errScan)
+					reporter.publishFailure(ctx)
+					out <- cliproxyexecutor.StreamChunk{Err: errScan}
+				} else {
+					reporter.ensurePublished(ctx)
+				}
+			}(httpResp)
+
+			var buffer bytes.Buffer
+			for chunk := range out {
+				if chunk.Err != nil {
+					return resp, chunk.Err
+				}
+				if len(chunk.Payload) > 0 {
+					_, _ = buffer.Write(chunk.Payload)
+					_, _ = buffer.Write([]byte("\n"))
+				}
+			}
+			resp = cliproxyexecutor.Response{Payload: e.convertStreamToNonStream(buffer.Bytes())}
+
+			reporter.publish(ctx, parseAntigravityUsage(resp.Payload))
+			var param any
+			converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, resp.Payload, &param)
+			resp = cliproxyexecutor.Response{Payload: []byte(converted)}
+			reporter.ensurePublished(ctx)
+
+			return resp, nil
+		}
+
+		switch {
+		case lastStatus != 0:
+			sErr := statusErr{code: lastStatus, msg: string(lastBody)}
+			if lastStatus == http.StatusTooManyRequests {
+				if retryAfter, parseErr := parseRetryDelay(lastBody); parseErr == nil && retryAfter != nil {
 					sErr.retryAfter = retryAfter
 				}
 			}
 			err = sErr
-			return resp, err
+		case lastErr != nil:
+			err = lastErr
+		default:
+			err = statusErr{code: http.StatusServiceUnavailable, msg: "antigravity executor: no base url available"}
 		}
-
-		out := make(chan cliproxyexecutor.StreamChunk)
-		go func(resp *http.Response) {
-			defer close(out)
-			defer func() {
-				if errClose := resp.Body.Close(); errClose != nil {
-					log.Errorf("antigravity executor: close response body error: %v", errClose)
-				}
-			}()
-			scanner := bufio.NewScanner(resp.Body)
-			scanner.Buffer(nil, streamScannerBuffer)
-			for scanner.Scan() {
-				line := scanner.Bytes()
-				appendAPIResponseChunk(ctx, e.cfg, line)
-
-				// Filter usage metadata for all models
-				// Only retain usage statistics in the terminal chunk
-				line = FilterSSEUsageMetadata(line)
-
-				payload := jsonPayload(line)
-				if payload == nil {
-					continue
-				}
-
-				if detail, ok := parseAntigravityStreamUsage(payload); ok {
-					reporter.publish(ctx, detail)
-				}
-
-				out <- cliproxyexecutor.StreamChunk{Payload: payload}
-			}
-			if errScan := scanner.Err(); errScan != nil {
-				recordAPIResponseError(ctx, e.cfg, errScan)
-				reporter.publishFailure(ctx)
-				out <- cliproxyexecutor.StreamChunk{Err: errScan}
-			} else {
-				reporter.ensurePublished(ctx)
-			}
-		}(httpResp)
-
-		var buffer bytes.Buffer
-		for chunk := range out {
-			if chunk.Err != nil {
-				return resp, chunk.Err
-			}
-			if len(chunk.Payload) > 0 {
-				_, _ = buffer.Write(chunk.Payload)
-				_, _ = buffer.Write([]byte("\n"))
-			}
-		}
-		resp = cliproxyexecutor.Response{Payload: e.convertStreamToNonStream(buffer.Bytes())}
-
-		reporter.publish(ctx, parseAntigravityUsage(resp.Payload))
-		var param any
-		converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, resp.Payload, &param)
-		resp = cliproxyexecutor.Response{Payload: []byte(converted)}
-		reporter.ensurePublished(ctx)
-
-		return resp, nil
+		return resp, err
 	}

-	switch {
-	case lastStatus != 0:
-		sErr := statusErr{code: lastStatus, msg: string(lastBody)}
-		if lastStatus == http.StatusTooManyRequests {
-			if retryAfter, parseErr := parseRetryDelay(lastBody); parseErr == nil && retryAfter != nil {
-				sErr.retryAfter = retryAfter
-			}
-		}
-		err = sErr
-	case lastErr != nil:
-		err = lastErr
-	default:
-		err = statusErr{code: http.StatusServiceUnavailable, msg: "antigravity executor: no base url available"}
-	}
 	return resp, err
 }

@@ -627,144 +671,166 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 		return nil, err
 	}

-	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated, requestedModel)

 	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)

-	var lastStatus int
-	var lastBody []byte
-	var lastErr error
+	attempts := antigravityRetryAttempts(auth, e.cfg)

-	for idx, baseURL := range baseURLs {
-		httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, true, opts.Alt, baseURL)
-		if errReq != nil {
-			err = errReq
-			return nil, err
-		}
-		httpResp, errDo := httpClient.Do(httpReq)
-		if errDo != nil {
-			recordAPIResponseError(ctx, e.cfg, errDo)
-			if errors.Is(errDo, context.Canceled) || errors.Is(errDo, context.DeadlineExceeded) {
-				return nil, errDo
+attemptLoop:
+	for attempt := 0; attempt < attempts; attempt++ {
+		var lastStatus int
+		var lastBody []byte
+		var lastErr error
+
+		for idx, baseURL := range baseURLs {
+			httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, true, opts.Alt, baseURL)
+			if errReq != nil {
+				err = errReq
+				return nil, err
 			}
-			lastStatus = 0
-			lastBody = nil
-			lastErr = errDo
-			if idx+1 < len(baseURLs) {
-				log.Debugf("antigravity executor: request error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
-				continue
-			}
-			err = errDo
-			return nil, err
-		}
-		recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
-		if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices {
-			bodyBytes, errRead := io.ReadAll(httpResp.Body)
-			if errClose := httpResp.Body.Close(); errClose != nil {
-				log.Errorf("antigravity executor: close response body error: %v", errClose)
-			}
-			if errRead != nil {
-				recordAPIResponseError(ctx, e.cfg, errRead)
-				if errors.Is(errRead, context.Canceled) || errors.Is(errRead, context.DeadlineExceeded) {
-					err = errRead
-					return nil, err
-				}
-				if errCtx := ctx.Err(); errCtx != nil {
-					err = errCtx
-					return nil, err
+			httpResp, errDo := httpClient.Do(httpReq)
+			if errDo != nil {
+				recordAPIResponseError(ctx, e.cfg, errDo)
+				if errors.Is(errDo, context.Canceled) || errors.Is(errDo, context.DeadlineExceeded) {
+					return nil, errDo
 				}
 				lastStatus = 0
 				lastBody = nil
-				lastErr = errRead
+				lastErr = errDo
 				if idx+1 < len(baseURLs) {
-					log.Debugf("antigravity executor: read error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+					log.Debugf("antigravity executor: request error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
 					continue
 				}
-				err = errRead
+				err = errDo
 				return nil, err
 			}
-			appendAPIResponseChunk(ctx, e.cfg, bodyBytes)
-			lastStatus = httpResp.StatusCode
-			lastBody = append([]byte(nil), bodyBytes...)
-			lastErr = nil
-			if httpResp.StatusCode == http.StatusTooManyRequests && idx+1 < len(baseURLs) {
-				log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
-				continue
+			recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+			if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices {
+				bodyBytes, errRead := io.ReadAll(httpResp.Body)
+				if errClose := httpResp.Body.Close(); errClose != nil {
+					log.Errorf("antigravity executor: close response body error: %v", errClose)
+				}
+				if errRead != nil {
+					recordAPIResponseError(ctx, e.cfg, errRead)
+					if errors.Is(errRead, context.Canceled) || errors.Is(errRead, context.DeadlineExceeded) {
+						err = errRead
+						return nil, err
+					}
+					if errCtx := ctx.Err(); errCtx != nil {
+						err = errCtx
+						return nil, err
+					}
+					lastStatus = 0
+					lastBody = nil
+					lastErr = errRead
+					if idx+1 < len(baseURLs) {
+						log.Debugf("antigravity executor: read error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+						continue
+					}
+					err = errRead
+					return nil, err
+				}
+				appendAPIResponseChunk(ctx, e.cfg, bodyBytes)
+				lastStatus = httpResp.StatusCode
+				lastBody = append([]byte(nil), bodyBytes...)
+				lastErr = nil
+				if httpResp.StatusCode == http.StatusTooManyRequests && idx+1 < len(baseURLs) {
+					log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+					continue
+				}
+				if antigravityShouldRetryNoCapacity(httpResp.StatusCode, bodyBytes) {
+					if idx+1 < len(baseURLs) {
+						log.Debugf("antigravity executor: no capacity on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+						continue
+					}
+					if attempt+1 < attempts {
+						delay := antigravityNoCapacityRetryDelay(attempt)
+						log.Debugf("antigravity executor: no capacity for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts)
+						if errWait := antigravityWait(ctx, delay); errWait != nil {
+							return nil, errWait
+						}
+						continue attemptLoop
+					}
+				}
+				sErr := statusErr{code: httpResp.StatusCode, msg: string(bodyBytes)}
+				if httpResp.StatusCode == http.StatusTooManyRequests {
+					if retryAfter, parseErr := parseRetryDelay(bodyBytes); parseErr == nil && retryAfter != nil {
+						sErr.retryAfter = retryAfter
+					}
+				}
+				err = sErr
+				return nil, err
 			}
-			sErr := statusErr{code: httpResp.StatusCode, msg: string(bodyBytes)}
-			if httpResp.StatusCode == http.StatusTooManyRequests {
-				if retryAfter, parseErr := parseRetryDelay(bodyBytes); parseErr == nil && retryAfter != nil {
+
+			out := make(chan cliproxyexecutor.StreamChunk)
+			stream = out
+			go func(resp *http.Response) {
+				defer close(out)
+				defer func() {
+					if errClose := resp.Body.Close(); errClose != nil {
+						log.Errorf("antigravity executor: close response body error: %v", errClose)
+					}
+				}()
+				scanner := bufio.NewScanner(resp.Body)
+				scanner.Buffer(nil, streamScannerBuffer)
+				var param any
+				for scanner.Scan() {
+					line := scanner.Bytes()
+					appendAPIResponseChunk(ctx, e.cfg, line)
+
+					// Filter usage metadata for all models
+					// Only retain usage statistics in the terminal chunk
+					line = FilterSSEUsageMetadata(line)
+
+					payload := jsonPayload(line)
+					if payload == nil {
+						continue
+					}
+
+					if detail, ok := parseAntigravityStreamUsage(payload); ok {
+						reporter.publish(ctx, detail)
+					}
+
+					chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bytes.Clone(payload), &param)
+					for i := range chunks {
+						out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
+					}
+				}
+				tail := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, []byte("[DONE]"), &param)
+				for i := range tail {
+					out <- cliproxyexecutor.StreamChunk{Payload: []byte(tail[i])}
+				}
+				if errScan := scanner.Err(); errScan != nil {
+					recordAPIResponseError(ctx, e.cfg, errScan)
+					reporter.publishFailure(ctx)
+					out <- cliproxyexecutor.StreamChunk{Err: errScan}
+				} else {
+					reporter.ensurePublished(ctx)
+				}
+			}(httpResp)
+			return stream, nil
+		}
+
+		switch {
+		case lastStatus != 0:
+			sErr := statusErr{code: lastStatus, msg: string(lastBody)}
+			if lastStatus == http.StatusTooManyRequests {
+				if retryAfter, parseErr := parseRetryDelay(lastBody); parseErr == nil && retryAfter != nil {
 					sErr.retryAfter = retryAfter
 				}
 			}
 			err = sErr
-			return nil, err
+		case lastErr != nil:
+			err = lastErr
+		default:
+			err = statusErr{code: http.StatusServiceUnavailable, msg: "antigravity executor: no base url available"}
 		}
-
-		out := make(chan cliproxyexecutor.StreamChunk)
-		stream = out
-		go func(resp *http.Response) {
-			defer close(out)
-			defer func() {
-				if errClose := resp.Body.Close(); errClose != nil {
-					log.Errorf("antigravity executor: close response body error: %v", errClose)
-				}
-			}()
-			scanner := bufio.NewScanner(resp.Body)
-			scanner.Buffer(nil, streamScannerBuffer)
-			var param any
-			for scanner.Scan() {
-				line := scanner.Bytes()
-				appendAPIResponseChunk(ctx, e.cfg, line)
-
-				// Filter usage metadata for all models
-				// Only retain usage statistics in the terminal chunk
-				line = FilterSSEUsageMetadata(line)
-
-				payload := jsonPayload(line)
-				if payload == nil {
-					continue
-				}
-
-				if detail, ok := parseAntigravityStreamUsage(payload); ok {
-					reporter.publish(ctx, detail)
-				}
-
-				chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bytes.Clone(payload), &param)
-				for i := range chunks {
-					out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
-				}
-			}
-			tail := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, []byte("[DONE]"), &param)
-			for i := range tail {
-				out <- cliproxyexecutor.StreamChunk{Payload: []byte(tail[i])}
-			}
-			if errScan := scanner.Err(); errScan != nil {
-				recordAPIResponseError(ctx, e.cfg, errScan)
-				reporter.publishFailure(ctx)
-				out <- cliproxyexecutor.StreamChunk{Err: errScan}
-			} else {
-				reporter.ensurePublished(ctx)
-			}
-		}(httpResp)
-		return stream, nil
+		return nil, err
 	}

-	switch {
-	case lastStatus != 0:
-		sErr := statusErr{code: lastStatus, msg: string(lastBody)}
-		if lastStatus == http.StatusTooManyRequests {
-			if retryAfter, parseErr := parseRetryDelay(lastBody); parseErr == nil && retryAfter != nil {
-				sErr.retryAfter = retryAfter
-			}
-		}
-		err = sErr
-	case lastErr != nil:
-		err = lastErr
-	default:
-		err = statusErr{code: http.StatusServiceUnavailable, msg: "antigravity executor: no base url available"}
-	}
 	return nil, err
 }

@@ -994,7 +1060,7 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 		now := time.Now().Unix()
 		modelConfig := registry.GetAntigravityModelConfig()
 		models := make([]*registry.ModelInfo, 0, len(result.Map()))
-		for originalName := range result.Map() {
+		for originalName, modelData := range result.Map() {
 			modelID := strings.TrimSpace(originalName)
 			if modelID == "" {
 				continue
@@ -1004,12 +1070,18 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 				continue
 			}
 			modelCfg := modelConfig[modelID]
-			modelName := modelID
+
+			// Extract displayName from upstream response, fallback to modelID
+			displayName := modelData.Get("displayName").String()
+			if displayName == "" {
+				displayName = modelID
+			}
+
 			modelInfo := &registry.ModelInfo{
 				ID:          modelID,
-				Name:        modelName,
-				Description: modelID,
-				DisplayName: modelID,
+				Name:        modelID,
+				Description: displayName,
+				DisplayName: displayName,
 				Version:     modelID,
 				Object:      "model",
 				Created:     now,
@@ -1213,7 +1285,17 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
 		// Use the centralized schema cleaner to handle unsupported keywords,
 		// const->enum conversion, and flattening of types/anyOf.
 		strJSON = util.CleanJSONSchemaForAntigravity(strJSON)
-
+		payload = []byte(strJSON)
+	} else {
+		strJSON := string(payload)
+		paths := make([]string, 0)
+		util.Walk(gjson.Parse(strJSON), "", "parametersJsonSchema", &paths)
+		for _, p := range paths {
+			strJSON, _ = util.RenameKey(strJSON, p, p[:len(p)-len("parametersJsonSchema")]+"parameters")
+		}
+		// Clean tool schemas for Gemini to remove unsupported JSON Schema keywords
+		// without adding empty-schema placeholders.
+		strJSON = util.CleanJSONSchemaForGemini(strJSON)
 		payload = []byte(strJSON)
 	}

@@ -1230,6 +1312,12 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
 		}
 	}

+	if strings.Contains(modelName, "claude") {
+		payload, _ = sjson.SetBytes(payload, "request.toolConfig.functionCallingConfig.mode", "VALIDATED")
+	} else {
+		payload, _ = sjson.DeleteBytes(payload, "request.generationConfig.maxOutputTokens")
+	}
+
 	httpReq, errReq := http.NewRequestWithContext(ctx, http.MethodPost, requestURL.String(), bytes.NewReader(payload))
 	if errReq != nil {
 		return nil, errReq
@@ -1359,14 +1447,70 @@ func resolveUserAgent(auth *cliproxyauth.Auth) string {
 	return defaultAntigravityAgent
 }

+func antigravityRetryAttempts(auth *cliproxyauth.Auth, cfg *config.Config) int {
+	retry := 0
+	if cfg != nil {
+		retry = cfg.RequestRetry
+	}
+	if auth != nil {
+		if override, ok := auth.RequestRetryOverride(); ok {
+			retry = override
+		}
+	}
+	if retry < 0 {
+		retry = 0
+	}
+	attempts := retry + 1
+	if attempts < 1 {
+		return 1
+	}
+	return attempts
+}
+
+func antigravityShouldRetryNoCapacity(statusCode int, body []byte) bool {
+	if statusCode != http.StatusServiceUnavailable {
+		return false
+	}
+	if len(body) == 0 {
+		return false
+	}
+	msg := strings.ToLower(string(body))
+	return strings.Contains(msg, "no capacity available")
+}
+
+func antigravityNoCapacityRetryDelay(attempt int) time.Duration {
+	if attempt < 0 {
+		attempt = 0
+	}
+	delay := time.Duration(attempt+1) * 250 * time.Millisecond
+	if delay > 2*time.Second {
+		delay = 2 * time.Second
+	}
+	return delay
+}
+
+func antigravityWait(ctx context.Context, wait time.Duration) error {
+	if wait <= 0 {
+		return nil
+	}
+	timer := time.NewTimer(wait)
+	defer timer.Stop()
+	select {
+	case <-ctx.Done():
+		return ctx.Err()
+	case <-timer.C:
+		return nil
+	}
+}
+
 func antigravityBaseURLFallbackOrder(auth *cliproxyauth.Auth) []string {
 	if base := resolveCustomAntigravityBaseURL(auth); base != "" {
 		return []string{base}
 	}
 	return []string{
-		antigravitySandboxBaseURLDaily,
 		antigravityBaseURLDaily,
-		antigravityBaseURLProd,
+		antigravitySandboxBaseURLDaily,
+		// antigravityBaseURLProd,
 	}
 }

@@ -1405,26 +1549,10 @@ func geminiToAntigravity(modelName string, payload []byte, projectID string) []b
 	template, _ = sjson.Set(template, "request.sessionId", generateStableSessionID(payload))

 	template, _ = sjson.Delete(template, "request.safetySettings")
-	//	template, _ = sjson.Set(template, "request.toolConfig.functionCallingConfig.mode", "VALIDATED")
-
-	if strings.Contains(modelName, "claude") || strings.Contains(modelName, "gemini-3-pro-high") {
-		gjson.Get(template, "request.tools").ForEach(func(key, tool gjson.Result) bool {
-			tool.Get("functionDeclarations").ForEach(func(funKey, funcDecl gjson.Result) bool {
-				if funcDecl.Get("parametersJsonSchema").Exists() {
-					template, _ = sjson.SetRaw(template, fmt.Sprintf("request.tools.%d.functionDeclarations.%d.parameters", key.Int(), funKey.Int()), funcDecl.Get("parametersJsonSchema").Raw)
-					template, _ = sjson.Delete(template, fmt.Sprintf("request.tools.%d.functionDeclarations.%d.parameters.$schema", key.Int(), funKey.Int()))
-					template, _ = sjson.Delete(template, fmt.Sprintf("request.tools.%d.functionDeclarations.%d.parametersJsonSchema", key.Int(), funKey.Int()))
-				}
-				return true
-			})
-			return true
-		})
+	if toolConfig := gjson.Get(template, "toolConfig"); toolConfig.Exists() && !gjson.Get(template, "request.toolConfig").Exists() {
+		template, _ = sjson.SetRaw(template, "request.toolConfig", toolConfig.Raw)
+		template, _ = sjson.Delete(template, "toolConfig")
 	}
-
-	if !strings.Contains(modelName, "claude") {
-		template, _ = sjson.Delete(template, "request.generationConfig.maxOutputTokens")
-	}
-
 	return []byte(template)
 }

--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -114,7 +114,8 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	// based on client type and configuration.
 	body = applyCloaking(ctx, e.cfg, auth, body, baseModel)

-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)

 	// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
 	body = disableThinkingIfToolChoiceForced(body)
@@ -162,7 +163,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("response body close error: %v", errClose)
@@ -245,7 +246,8 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	// based on client type and configuration.
 	body = applyCloaking(ctx, e.cfg, auth, body, baseModel)

-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)

 	// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
 	body = disableThinkingIfToolChoiceForced(body)
@@ -293,7 +295,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("response body close error: %v", errClose)
 		}
@@ -731,6 +733,11 @@ func applyClaudeToolPrefix(body []byte, prefix string) []byte {

 	if tools := gjson.GetBytes(body, "tools"); tools.Exists() && tools.IsArray() {
 		tools.ForEach(func(index, tool gjson.Result) bool {
+			// Skip built-in tools (web_search, code_execution, etc.) which have
+			// a "type" field and require their name to remain unchanged.
+			if tool.Get("type").Exists() && tool.Get("type").String() != "" {
+				return true
+			}
 			name := tool.Get("name").String()
 			if name == "" || strings.HasPrefix(name, prefix) {
 				return true
--- a/internal/runtime/executor/claude_executor_test.go
+++ b/internal/runtime/executor/claude_executor_test.go
@@ -25,6 +25,18 @@ func TestApplyClaudeToolPrefix(t *testing.T) {
 	}
 }

+func TestApplyClaudeToolPrefix_SkipsBuiltinTools(t *testing.T) {
+	input := []byte(`{"tools":[{"type":"web_search_20250305","name":"web_search"},{"name":"my_custom_tool","input_schema":{"type":"object"}}]}`)
+	out := applyClaudeToolPrefix(input, "proxy_")
+
+	if got := gjson.GetBytes(out, "tools.0.name").String(); got != "web_search" {
+		t.Fatalf("built-in tool name should not be prefixed: tools.0.name = %q, want %q", got, "web_search")
+	}
+	if got := gjson.GetBytes(out, "tools.1.name").String(); got != "proxy_my_custom_tool" {
+		t.Fatalf("custom tool should be prefixed: tools.1.name = %q, want %q", got, "proxy_my_custom_tool")
+	}
+}
+
 func TestStripClaudeToolPrefixFromResponse(t *testing.T) {
 	input := []byte(`{"content":[{"type":"tool_use","name":"proxy_alpha","id":"t1","input":{}},{"type":"tool_use","name":"bravo","id":"t2","input":{}}]}`)
 	out := stripClaudeToolPrefixFromResponse(input, "proxy_")
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -101,7 +101,8 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 		return resp, err
 	}

-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 	body, _ = sjson.SetBytes(body, "stream", true)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
@@ -149,7 +150,7 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		return resp, err
 	}
@@ -213,7 +214,8 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 		return nil, err
 	}

-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
 	body, _ = sjson.DeleteBytes(body, "safety_identifier")
@@ -263,7 +265,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 			return nil, readErr
 		}
 		appendAPIResponseChunk(ctx, e.cfg, data)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
 		err = statusErr{code: httpResp.StatusCode, msg: string(data)}
 		return nil, err
 	}
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -129,7 +129,8 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 	}

 	basePayload = fixGeminiCLIImageAspectRatio(baseModel, basePayload)
-	basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated, requestedModel)

 	action := "generateContent"
 	if req.Metadata != nil {
@@ -226,7 +227,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth

 		lastStatus = httpResp.StatusCode
 		lastBody = append([]byte(nil), data...)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
 		if httpResp.StatusCode == 429 {
 			if idx+1 < len(models) {
 				log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1])
@@ -278,7 +279,8 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 	}

 	basePayload = fixGeminiCLIImageAspectRatio(baseModel, basePayload)
-	basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated, requestedModel)

 	projectID := resolveGeminiProjectID(auth)

@@ -358,7 +360,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 			appendAPIResponseChunk(ctx, e.cfg, data)
 			lastStatus = httpResp.StatusCode
 			lastBody = append([]byte(nil), data...)
-			log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
+			logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
 			if httpResp.StatusCode == 429 {
 				if idx+1 < len(models) {
 					log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1])
--- a/internal/runtime/executor/gemini_executor.go
+++ b/internal/runtime/executor/gemini_executor.go
@@ -126,7 +126,8 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	}

 	body = fixGeminiImageAspectRatio(baseModel, body)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

 	action := "generateContent"
@@ -187,7 +188,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		return resp, err
 	}
@@ -228,7 +229,8 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	}

 	body = fixGeminiImageAspectRatio(baseModel, body)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

 	baseURL := resolveGeminiBaseURL(auth)
@@ -280,7 +282,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("gemini executor: close response body error: %v", errClose)
 		}
@@ -400,7 +402,7 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 	}
 	appendAPIResponseChunk(ctx, e.cfg, data)
 	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
-		log.Debugf("request error, error status: %d, error body: %s", resp.StatusCode, summarizeErrorBody(resp.Header.Get("Content-Type"), data))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", resp.StatusCode, summarizeErrorBody(resp.Header.Get("Content-Type"), data))
 		return cliproxyexecutor.Response{}, statusErr{code: resp.StatusCode, msg: string(data)}
 	}

--- a/internal/runtime/executor/gemini_vertex_executor.go
+++ b/internal/runtime/executor/gemini_vertex_executor.go
@@ -325,7 +325,8 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 		}

 		body = fixGeminiImageAspectRatio(baseModel, body)
-		body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+		requestedModel := payloadRequestedModel(opts, req.Model)
+		body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
 		body, _ = sjson.SetBytes(body, "model", baseModel)
 	}

@@ -388,7 +389,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		return resp, err
 	}
@@ -438,7 +439,8 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	}

 	body = fixGeminiImageAspectRatio(baseModel, body)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

 	action := getVertexAction(baseModel, false)
@@ -501,7 +503,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		return resp, err
 	}
@@ -541,7 +543,8 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 	}

 	body = fixGeminiImageAspectRatio(baseModel, body)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

 	action := getVertexAction(baseModel, true)
@@ -598,7 +601,7 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("vertex executor: close response body error: %v", errClose)
 		}
@@ -664,7 +667,8 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 	}

 	body = fixGeminiImageAspectRatio(baseModel, body)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

 	action := getVertexAction(baseModel, true)
@@ -721,7 +725,7 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("vertex executor: close response body error: %v", errClose)
 		}
@@ -834,7 +838,7 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(b)}
 	}
 	data, errRead := io.ReadAll(httpResp.Body)
@@ -918,7 +922,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(b)}
 	}
 	data, errRead := io.ReadAll(httpResp.Body)
--- a/internal/runtime/executor/github_copilot_executor.go
+++ b/internal/runtime/executor/github_copilot_executor.go
@@ -119,7 +119,8 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
 	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	body = e.normalizeModel(req.Model, body)
-	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.SetBytes(body, "stream", false)

 	path := githubCopilotChatPath
@@ -218,7 +219,8 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
 	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 	body = e.normalizeModel(req.Model, body)
-	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.SetBytes(body, "stream", true)
 	// Enable stream options for usage stats in stream
 	if !useResponses {
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -98,7 +98,8 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	}

 	body = preserveReasoningContentInMessages(body)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)

 	endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint

@@ -141,7 +142,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("iflow request error: status %d body %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		return resp, err
 	}
@@ -201,7 +202,8 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
 		body = ensureToolsArray(body)
 	}
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)

 	endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint

@@ -242,7 +244,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 			log.Errorf("iflow executor: close response body error: %v", errClose)
 		}
 		appendAPIResponseChunk(ctx, e.cfg, data)
-		log.Debugf("iflow streaming error: status %d body %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
+		logWithRequestID(ctx).Debugf("request error, error status: %d error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
 		err = statusErr{code: httpResp.StatusCode, msg: string(data)}
 		return nil, err
 	}
--- a/internal/runtime/executor/kiro_executor.go
+++ b/internal/runtime/executor/kiro_executor.go
@@ -791,28 +791,28 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
 				_ = httpResp.Body.Close()
 				appendAPIResponseChunk(ctx, e.cfg, respBody)

-				if attempt < maxRetries {
-					log.Warnf("kiro: received 401 error, attempting token refresh and retry (attempt %d/%d)", attempt+1, maxRetries+1)
+				log.Warnf("kiro: received 401 error, attempting token refresh")
+				refreshedAuth, refreshErr := e.Refresh(ctx, auth)
+				if refreshErr != nil {
+					log.Errorf("kiro: token refresh failed: %v", refreshErr)
+					return resp, statusErr{code: httpResp.StatusCode, msg: string(respBody)}
+				}

-					refreshedAuth, refreshErr := e.Refresh(ctx, auth)
-					if refreshErr != nil {
-						log.Errorf("kiro: token refresh failed: %v", refreshErr)
-						return resp, statusErr{code: httpResp.StatusCode, msg: string(respBody)}
+				if refreshedAuth != nil {
+					auth = refreshedAuth
+					// Persist the refreshed auth to file so subsequent requests use it
+					if persistErr := e.persistRefreshedAuth(auth); persistErr != nil {
+						log.Warnf("kiro: failed to persist refreshed auth: %v", persistErr)
+						// Continue anyway - the token is valid for this request
 					}
-
-					if refreshedAuth != nil {
-						auth = refreshedAuth
-						// Persist the refreshed auth to file so subsequent requests use it
-						if persistErr := e.persistRefreshedAuth(auth); persistErr != nil {
-							log.Warnf("kiro: failed to persist refreshed auth: %v", persistErr)
-							// Continue anyway - the token is valid for this request
-						}
-						accessToken, profileArn = kiroCredentials(auth)
-						// Rebuild payload with new profile ARN if changed
-						kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers)
-						log.Infof("kiro: token refreshed successfully, retrying request")
+					accessToken, profileArn = kiroCredentials(auth)
+					// Rebuild payload with new profile ARN if changed
+					kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers)
+					if attempt < maxRetries {
+						log.Infof("kiro: token refreshed successfully, retrying request (attempt %d/%d)", attempt+1, maxRetries+1)
 						continue
 					}
+					log.Infof("kiro: token refreshed successfully, no retries remaining")
 				}

 				log.Warnf("kiro request error, status: 401, body: %s", summarizeErrorBody(httpResp.Header.Get("Content-Type"), respBody))
@@ -1199,28 +1199,28 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
 				_ = httpResp.Body.Close()
 				appendAPIResponseChunk(ctx, e.cfg, respBody)

-				if attempt < maxRetries {
-					log.Warnf("kiro: stream received 401 error, attempting token refresh and retry (attempt %d/%d)", attempt+1, maxRetries+1)
+				log.Warnf("kiro: stream received 401 error, attempting token refresh")
+				refreshedAuth, refreshErr := e.Refresh(ctx, auth)
+				if refreshErr != nil {
+					log.Errorf("kiro: token refresh failed: %v", refreshErr)
+					return nil, statusErr{code: httpResp.StatusCode, msg: string(respBody)}
+				}

-					refreshedAuth, refreshErr := e.Refresh(ctx, auth)
-					if refreshErr != nil {
-						log.Errorf("kiro: token refresh failed: %v", refreshErr)
-						return nil, statusErr{code: httpResp.StatusCode, msg: string(respBody)}
+				if refreshedAuth != nil {
+					auth = refreshedAuth
+					// Persist the refreshed auth to file so subsequent requests use it
+					if persistErr := e.persistRefreshedAuth(auth); persistErr != nil {
+						log.Warnf("kiro: failed to persist refreshed auth: %v", persistErr)
+						// Continue anyway - the token is valid for this request
 					}
-
-					if refreshedAuth != nil {
-						auth = refreshedAuth
-						// Persist the refreshed auth to file so subsequent requests use it
-						if persistErr := e.persistRefreshedAuth(auth); persistErr != nil {
-							log.Warnf("kiro: failed to persist refreshed auth: %v", persistErr)
-							// Continue anyway - the token is valid for this request
-						}
-						accessToken, profileArn = kiroCredentials(auth)
-						// Rebuild payload with new profile ARN if changed
-						kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers)
-						log.Infof("kiro: token refreshed successfully, retrying stream request")
+					accessToken, profileArn = kiroCredentials(auth)
+					// Rebuild payload with new profile ARN if changed
+					kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers)
+					if attempt < maxRetries {
+						log.Infof("kiro: token refreshed successfully, retrying stream request (attempt %d/%d)", attempt+1, maxRetries+1)
 						continue
 					}
+					log.Infof("kiro: token refreshed successfully, no retries remaining")
 				}

 				log.Warnf("kiro stream error, status: 401, body: %s", string(respBody))
--- a/internal/runtime/executor/logging_helpers.go
+++ b/internal/runtime/executor/logging_helpers.go
@@ -12,7 +12,10 @@ import (

 	"github.com/gin-gonic/gin"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	log "github.com/sirupsen/logrus"
+	"github.com/tidwall/gjson"
 )

 const (
@@ -332,6 +335,12 @@ func summarizeErrorBody(contentType string, body []byte) string {
 		}
 		return "[html body omitted]"
 	}
+
+	// Try to extract error message from JSON response
+	if message := extractJSONErrorMessage(body); message != "" {
+		return message
+	}
+
 	return string(body)
 }

@@ -358,3 +367,25 @@ func extractHTMLTitle(body []byte) string {
 	}
 	return strings.Join(strings.Fields(title), " ")
 }
+
+// extractJSONErrorMessage attempts to extract error.message from JSON error responses
+func extractJSONErrorMessage(body []byte) string {
+	result := gjson.GetBytes(body, "error.message")
+	if result.Exists() && result.String() != "" {
+		return result.String()
+	}
+	return ""
+}
+
+// logWithRequestID returns a logrus Entry with request_id field populated from context.
+// If no request ID is found in context, it returns the standard logger.
+func logWithRequestID(ctx context.Context) *log.Entry {
+	if ctx == nil {
+		return log.NewEntry(log.StandardLogger())
+	}
+	requestID := logging.GetRequestID(ctx)
+	if requestID == "" {
+		return log.NewEntry(log.StandardLogger())
+	}
+	return log.WithField("request_id", requestID)
+}
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -90,7 +90,8 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, opts.Stream)
 	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), opts.Stream)
-	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated, requestedModel)

 	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -145,7 +146,7 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		return resp, err
 	}
@@ -185,7 +186,8 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
-	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated, requestedModel)

 	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -237,7 +239,7 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("openai compat executor: close response body error: %v", errClose)
 		}
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -5,6 +5,8 @@ import (
 	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -12,8 +14,9 @@ import (
 // applyPayloadConfigWithRoot behaves like applyPayloadConfig but treats all parameter
 // paths as relative to the provided root path (for example, "request" for Gemini CLI)
 // and restricts matches to the given protocol when supplied. Defaults are checked
-// against the original payload when provided.
-func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string, payload, original []byte) []byte {
+// against the original payload when provided. requestedModel carries the client-visible
+// model name before alias resolution so payload rules can target aliases precisely.
+func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string, payload, original []byte, requestedModel string) []byte {
 	if cfg == nil || len(payload) == 0 {
 		return payload
 	}
@@ -22,10 +25,11 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 		return payload
 	}
 	model = strings.TrimSpace(model)
-	if model == "" {
+	requestedModel = strings.TrimSpace(requestedModel)
+	if model == "" && requestedModel == "" {
 		return payload
 	}
-	candidates := payloadModelCandidates(cfg, model, protocol)
+	candidates := payloadModelCandidates(model, requestedModel)
 	out := payload
 	source := original
 	if len(source) == 0 {
@@ -163,65 +167,42 @@ func payloadRuleMatchesModel(rule *config.PayloadRule, model, protocol string) b
 	return false
 }

-func payloadModelCandidates(cfg *config.Config, model, protocol string) []string {
+func payloadModelCandidates(model, requestedModel string) []string {
 	model = strings.TrimSpace(model)
-	if model == "" {
+	requestedModel = strings.TrimSpace(requestedModel)
+	if model == "" && requestedModel == "" {
 		return nil
 	}
-	candidates := []string{model}
-	if cfg == nil {
-		return candidates
-	}
-	aliases := payloadModelAliases(cfg, model, protocol)
-	if len(aliases) == 0 {
-		return candidates
-	}
-	seen := map[string]struct{}{strings.ToLower(model): struct{}{}}
-	for _, alias := range aliases {
-		alias = strings.TrimSpace(alias)
-		if alias == "" {
-			continue
+	candidates := make([]string, 0, 3)
+	seen := make(map[string]struct{}, 3)
+	addCandidate := func(value string) {
+		value = strings.TrimSpace(value)
+		if value == "" {
+			return
 		}
-		key := strings.ToLower(alias)
+		key := strings.ToLower(value)
 		if _, ok := seen[key]; ok {
-			continue
+			return
 		}
 		seen[key] = struct{}{}
-		candidates = append(candidates, alias)
+		candidates = append(candidates, value)
+	}
+	if model != "" {
+		addCandidate(model)
+	}
+	if requestedModel != "" {
+		parsed := thinking.ParseSuffix(requestedModel)
+		base := strings.TrimSpace(parsed.ModelName)
+		if base != "" {
+			addCandidate(base)
+		}
+		if parsed.HasSuffix {
+			addCandidate(requestedModel)
+		}
 	}
 	return candidates
 }

-func payloadModelAliases(cfg *config.Config, model, protocol string) []string {
-	if cfg == nil {
-		return nil
-	}
-	model = strings.TrimSpace(model)
-	if model == "" {
-		return nil
-	}
-	channel := strings.ToLower(strings.TrimSpace(protocol))
-	if channel == "" {
-		return nil
-	}
-	entries := cfg.OAuthModelAlias[channel]
-	if len(entries) == 0 {
-		return nil
-	}
-	aliases := make([]string, 0, 2)
-	for _, entry := range entries {
-		if !strings.EqualFold(strings.TrimSpace(entry.Name), model) {
-			continue
-		}
-		alias := strings.TrimSpace(entry.Alias)
-		if alias == "" {
-			continue
-		}
-		aliases = append(aliases, alias)
-	}
-	return aliases
-}
-
 // buildPayloadPath combines an optional root path with a relative parameter path.
 // When root is empty, the parameter path is used as-is. When root is non-empty,
 // the parameter path is treated as relative to root.
@@ -258,6 +239,35 @@ func payloadRawValue(value any) ([]byte, bool) {
 	}
 }

+func payloadRequestedModel(opts cliproxyexecutor.Options, fallback string) string {
+	fallback = strings.TrimSpace(fallback)
+	if len(opts.Metadata) == 0 {
+		return fallback
+	}
+	raw, ok := opts.Metadata[cliproxyexecutor.RequestedModelMetadataKey]
+	if !ok || raw == nil {
+		return fallback
+	}
+	switch v := raw.(type) {
+	case string:
+		if strings.TrimSpace(v) == "" {
+			return fallback
+		}
+		return strings.TrimSpace(v)
+	case []byte:
+		if len(v) == 0 {
+			return fallback
+		}
+		trimmed := strings.TrimSpace(string(v))
+		if trimmed == "" {
+			return fallback
+		}
+		return trimmed
+	default:
+		return fallback
+	}
+}
+
 // matchModelPattern performs simple wildcard matching where '*' matches zero or more characters.
 // Examples:
 //
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -91,7 +91,8 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 		return resp, err
 	}

-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)

 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
@@ -132,7 +133,7 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		return resp, err
 	}
@@ -184,7 +185,8 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 		body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`))
 	}
 	body, _ = sjson.SetBytes(body, "stream_options.include_usage", true)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)

 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
@@ -220,7 +222,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("qwen executor: close response body error: %v", errClose)
 		}
--- a/internal/translator/antigravity/claude/antigravity_claude_request_test.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_request_test.go
@@ -74,13 +74,13 @@ func TestConvertClaudeRequestToAntigravity_RoleMapping(t *testing.T) {
 }

 func TestConvertClaudeRequestToAntigravity_ThinkingBlocks(t *testing.T) {
+	cache.ClearSignatureCache("")
+
 	// Valid signature must be at least 50 characters
 	validSignature := "abc123validSignature1234567890123456789012345678901234567890"
 	thinkingText := "Let me think..."

-	// Pre-cache the signature (simulating a response from the same session)
-	// The session ID is derived from the first user message hash
-	// Since there's no user message in this test, we need to add one
+	// Pre-cache the signature (simulating a previous response for the same thinking text)
 	inputJSON := []byte(`{
 		"model": "claude-sonnet-4-5-thinking",
 		"messages": [
@@ -117,6 +117,8 @@ func TestConvertClaudeRequestToAntigravity_ThinkingBlocks(t *testing.T) {
 }

 func TestConvertClaudeRequestToAntigravity_ThinkingBlockWithoutSignature(t *testing.T) {
+	cache.ClearSignatureCache("")
+
 	// Unsigned thinking blocks should be removed entirely (not converted to text)
 	inputJSON := []byte(`{
 		"model": "claude-sonnet-4-5-thinking",
@@ -238,6 +240,8 @@ func TestConvertClaudeRequestToAntigravity_ToolUse(t *testing.T) {
 }

 func TestConvertClaudeRequestToAntigravity_ToolUse_WithSignature(t *testing.T) {
+	cache.ClearSignatureCache("")
+
 	validSignature := "abc123validSignature1234567890123456789012345678901234567890"
 	thinkingText := "Let me think..."

@@ -279,6 +283,8 @@ func TestConvertClaudeRequestToAntigravity_ToolUse_WithSignature(t *testing.T) {
 }

 func TestConvertClaudeRequestToAntigravity_ReorderThinking(t *testing.T) {
+	cache.ClearSignatureCache("")
+
 	// Case: text block followed by thinking block -> should be reordered to thinking first
 	validSignature := "abc123validSignature1234567890123456789012345678901234567890"
 	thinkingText := "Planning..."
@@ -487,6 +493,8 @@ func TestConvertClaudeRequestToAntigravity_TrailingUnsignedThinking_Removed(t *t
 }

 func TestConvertClaudeRequestToAntigravity_TrailingSignedThinking_Kept(t *testing.T) {
+	cache.ClearSignatureCache("")
+
 	// Last assistant message ends with signed thinking block - should be kept
 	validSignature := "abc123validSignature1234567890123456789012345678901234567890"
 	thinkingText := "Valid thinking..."
--- a/internal/translator/antigravity/claude/antigravity_claude_response.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_response.go
@@ -139,7 +139,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq

 						if params.CurrentThinkingText.Len() > 0 {
 							cache.CacheSignature(modelName, params.CurrentThinkingText.String(), thoughtSignature.String())
-							// log.Debugf("Cached signature for thinking block (sessionID=%s, textLen=%d)", params.SessionID, params.CurrentThinkingText.Len())
+							// log.Debugf("Cached signature for thinking block (textLen=%d)", params.CurrentThinkingText.Len())
 							params.CurrentThinkingText.Reset()
 						}

--- a/internal/translator/antigravity/claude/antigravity_claude_response_test.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_response_test.go
@@ -12,10 +12,10 @@ import (
 // Signature Caching Tests
 // ============================================================================

-func TestConvertAntigravityResponseToClaude_SessionIDDerived(t *testing.T) {
+func TestConvertAntigravityResponseToClaude_ParamsInitialized(t *testing.T) {
 	cache.ClearSignatureCache("")

-	// Request with user message - should derive session ID
+	// Request with user message - should initialize params
 	requestJSON := []byte(`{
 		"messages": [
 			{"role": "user", "content": [{"type": "text", "text": "Hello world"}]}
@@ -37,10 +37,12 @@ func TestConvertAntigravityResponseToClaude_SessionIDDerived(t *testing.T) {
 	ctx := context.Background()
 	ConvertAntigravityResponseToClaude(ctx, "claude-sonnet-4-5-thinking", requestJSON, requestJSON, responseJSON, &param)

-	// Verify session ID was set
 	params := param.(*Params)
-	if params.SessionID == "" {
-		t.Error("SessionID should be derived from request")
+	if !params.HasFirstResponse {
+		t.Error("HasFirstResponse should be set after first chunk")
+	}
+	if params.CurrentThinkingText.Len() == 0 {
+		t.Error("Thinking text should be accumulated")
 	}
 }

@@ -130,12 +132,8 @@ func TestConvertAntigravityResponseToClaude_SignatureCached(t *testing.T) {
 	// Process thinking chunk
 	ConvertAntigravityResponseToClaude(ctx, "claude-sonnet-4-5-thinking", requestJSON, requestJSON, thinkingChunk, &param)
 	params := param.(*Params)
-	sessionID := params.SessionID
 	thinkingText := params.CurrentThinkingText.String()

-	if sessionID == "" {
-		t.Fatal("SessionID should be set")
-	}
 	if thinkingText == "" {
 		t.Fatal("Thinking text should be accumulated")
 	}
--- a/internal/translator/antigravity/gemini/antigravity_gemini_request_test.go
+++ b/internal/translator/antigravity/gemini/antigravity_gemini_request_test.go
@@ -62,40 +62,6 @@ func TestConvertGeminiRequestToAntigravity_AddSkipSentinelToFunctionCall(t *test
 	}
 }

-func TestConvertGeminiRequestToAntigravity_RemoveThinkingBlocks(t *testing.T) {
-	// Thinking blocks should be removed entirely for Gemini
-	validSignature := "abc123validSignature1234567890123456789012345678901234567890"
-	inputJSON := []byte(fmt.Sprintf(`{
-		"model": "gemini-3-pro-preview",
-		"contents": [
-			{
-				"role": "model",
-				"parts": [
-					{"thought": true, "text": "Thinking...", "thoughtSignature": "%s"},
-					{"text": "Here is my response"}
-				]
-			}
-		]
-	}`, validSignature))
-
-	output := ConvertGeminiRequestToAntigravity("gemini-3-pro-preview", inputJSON, false)
-	outputStr := string(output)
-
-	// Check that thinking block is removed
-	parts := gjson.Get(outputStr, "request.contents.0.parts").Array()
-	if len(parts) != 1 {
-		t.Fatalf("Expected 1 part (thinking removed), got %d", len(parts))
-	}
-
-	// Only text part should remain
-	if parts[0].Get("thought").Bool() {
-		t.Error("Thinking block should be removed for Gemini")
-	}
-	if parts[0].Get("text").String() != "Here is my response" {
-		t.Errorf("Expected text 'Here is my response', got '%s'", parts[0].Get("text").String())
-	}
-}
-
 func TestConvertGeminiRequestToAntigravity_ParallelFunctionCalls(t *testing.T) {
 	// Multiple functionCalls should all get skip_thought_signature_validator
 	inputJSON := []byte(`{
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
@@ -305,12 +305,12 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 		}
 	}

-	// tools -> request.tools[0].functionDeclarations + request.tools[0].googleSearch passthrough
+	// tools -> request.tools[].functionDeclarations + request.tools[].googleSearch passthrough
 	tools := gjson.GetBytes(rawJSON, "tools")
 	if tools.IsArray() && len(tools.Array()) > 0 {
-		toolNode := []byte(`{}`)
-		hasTool := false
+		functionToolNode := []byte(`{}`)
 		hasFunction := false
+		googleSearchNodes := make([][]byte, 0)
 		for _, t := range tools.Array() {
 			if t.Get("type").String() == "function" {
 				fn := t.Get("function")
@@ -349,31 +349,37 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 					}
 					fnRaw, _ = sjson.Delete(fnRaw, "strict")
 					if !hasFunction {
-						toolNode, _ = sjson.SetRawBytes(toolNode, "functionDeclarations", []byte("[]"))
+						functionToolNode, _ = sjson.SetRawBytes(functionToolNode, "functionDeclarations", []byte("[]"))
 					}
-					tmp, errSet := sjson.SetRawBytes(toolNode, "functionDeclarations.-1", []byte(fnRaw))
+					tmp, errSet := sjson.SetRawBytes(functionToolNode, "functionDeclarations.-1", []byte(fnRaw))
 					if errSet != nil {
 						log.Warnf("Failed to append tool declaration for '%s': %v", fn.Get("name").String(), errSet)
 						continue
 					}
-					toolNode = tmp
+					functionToolNode = tmp
 					hasFunction = true
-					hasTool = true
 				}
 			}
 			if gs := t.Get("google_search"); gs.Exists() {
+				googleToolNode := []byte(`{}`)
 				var errSet error
-				toolNode, errSet = sjson.SetRawBytes(toolNode, "googleSearch", []byte(gs.Raw))
+				googleToolNode, errSet = sjson.SetRawBytes(googleToolNode, "googleSearch", []byte(gs.Raw))
 				if errSet != nil {
 					log.Warnf("Failed to set googleSearch tool: %v", errSet)
 					continue
 				}
-				hasTool = true
+				googleSearchNodes = append(googleSearchNodes, googleToolNode)
 			}
 		}
-		if hasTool {
-			out, _ = sjson.SetRawBytes(out, "request.tools", []byte("[]"))
-			out, _ = sjson.SetRawBytes(out, "request.tools.0", toolNode)
+		if hasFunction || len(googleSearchNodes) > 0 {
+			toolsNode := []byte("[]")
+			if hasFunction {
+				toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", functionToolNode)
+			}
+			for _, googleNode := range googleSearchNodes {
+				toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", googleNode)
+			}
+			out, _ = sjson.SetRawBytes(out, "request.tools", toolsNode)
 		}
 	}

--- a/internal/translator/claude/gemini/claude_gemini_request.go
+++ b/internal/translator/claude/gemini/claude_gemini_request.go
@@ -98,9 +98,8 @@ func ConvertGeminiRequestToClaude(modelName string, inputRawJSON []byte, stream
 		// Temperature setting for controlling response randomness
 		if temp := genConfig.Get("temperature"); temp.Exists() {
 			out, _ = sjson.Set(out, "temperature", temp.Float())
-		}
-		// Top P setting for nucleus sampling
-		if topP := genConfig.Get("topP"); topP.Exists() {
+		} else if topP := genConfig.Get("topP"); topP.Exists() {
+			// Top P setting for nucleus sampling (filtered out if temperature is set)
 			out, _ = sjson.Set(out, "top_p", topP.Float())
 		}
 		// Stop sequences configuration for custom termination conditions
--- a/internal/translator/claude/openai/chat-completions/claude_openai_request.go
+++ b/internal/translator/claude/openai/chat-completions/claude_openai_request.go
@@ -110,10 +110,8 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
 	// Temperature setting for controlling response randomness
 	if temp := root.Get("temperature"); temp.Exists() {
 		out, _ = sjson.Set(out, "temperature", temp.Float())
-	}
-
-	// Top P setting for nucleus sampling
-	if topP := root.Get("top_p"); topP.Exists() {
+	} else if topP := root.Get("top_p"); topP.Exists() {
+		// Top P setting for nucleus sampling (filtered out if temperature is set)
 		out, _ = sjson.Set(out, "top_p", topP.Float())
 	}

--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
@@ -283,12 +283,12 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 		}
 	}

-	// tools -> request.tools[0].functionDeclarations + request.tools[0].googleSearch passthrough
+	// tools -> request.tools[].functionDeclarations + request.tools[].googleSearch passthrough
 	tools := gjson.GetBytes(rawJSON, "tools")
 	if tools.IsArray() && len(tools.Array()) > 0 {
-		toolNode := []byte(`{}`)
-		hasTool := false
+		functionToolNode := []byte(`{}`)
 		hasFunction := false
+		googleSearchNodes := make([][]byte, 0)
 		for _, t := range tools.Array() {
 			if t.Get("type").String() == "function" {
 				fn := t.Get("function")
@@ -327,31 +327,37 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 					}
 					fnRaw, _ = sjson.Delete(fnRaw, "strict")
 					if !hasFunction {
-						toolNode, _ = sjson.SetRawBytes(toolNode, "functionDeclarations", []byte("[]"))
+						functionToolNode, _ = sjson.SetRawBytes(functionToolNode, "functionDeclarations", []byte("[]"))
 					}
-					tmp, errSet := sjson.SetRawBytes(toolNode, "functionDeclarations.-1", []byte(fnRaw))
+					tmp, errSet := sjson.SetRawBytes(functionToolNode, "functionDeclarations.-1", []byte(fnRaw))
 					if errSet != nil {
 						log.Warnf("Failed to append tool declaration for '%s': %v", fn.Get("name").String(), errSet)
 						continue
 					}
-					toolNode = tmp
+					functionToolNode = tmp
 					hasFunction = true
-					hasTool = true
 				}
 			}
 			if gs := t.Get("google_search"); gs.Exists() {
+				googleToolNode := []byte(`{}`)
 				var errSet error
-				toolNode, errSet = sjson.SetRawBytes(toolNode, "googleSearch", []byte(gs.Raw))
+				googleToolNode, errSet = sjson.SetRawBytes(googleToolNode, "googleSearch", []byte(gs.Raw))
 				if errSet != nil {
 					log.Warnf("Failed to set googleSearch tool: %v", errSet)
 					continue
 				}
-				hasTool = true
+				googleSearchNodes = append(googleSearchNodes, googleToolNode)
 			}
 		}
-		if hasTool {
-			out, _ = sjson.SetRawBytes(out, "request.tools", []byte("[]"))
-			out, _ = sjson.SetRawBytes(out, "request.tools.0", toolNode)
+		if hasFunction || len(googleSearchNodes) > 0 {
+			toolsNode := []byte("[]")
+			if hasFunction {
+				toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", functionToolNode)
+			}
+			for _, googleNode := range googleSearchNodes {
+				toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", googleNode)
+			}
+			out, _ = sjson.SetRawBytes(out, "request.tools", toolsNode)
 		}
 	}

--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
@@ -289,12 +289,12 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 		}
 	}

-	// tools -> tools[0].functionDeclarations + tools[0].googleSearch passthrough
+	// tools -> tools[].functionDeclarations + tools[].googleSearch passthrough
 	tools := gjson.GetBytes(rawJSON, "tools")
 	if tools.IsArray() && len(tools.Array()) > 0 {
-		toolNode := []byte(`{}`)
-		hasTool := false
+		functionToolNode := []byte(`{}`)
 		hasFunction := false
+		googleSearchNodes := make([][]byte, 0)
 		for _, t := range tools.Array() {
 			if t.Get("type").String() == "function" {
 				fn := t.Get("function")
@@ -333,31 +333,37 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 					}
 					fnRaw, _ = sjson.Delete(fnRaw, "strict")
 					if !hasFunction {
-						toolNode, _ = sjson.SetRawBytes(toolNode, "functionDeclarations", []byte("[]"))
+						functionToolNode, _ = sjson.SetRawBytes(functionToolNode, "functionDeclarations", []byte("[]"))
 					}
-					tmp, errSet := sjson.SetRawBytes(toolNode, "functionDeclarations.-1", []byte(fnRaw))
+					tmp, errSet := sjson.SetRawBytes(functionToolNode, "functionDeclarations.-1", []byte(fnRaw))
 					if errSet != nil {
 						log.Warnf("Failed to append tool declaration for '%s': %v", fn.Get("name").String(), errSet)
 						continue
 					}
-					toolNode = tmp
+					functionToolNode = tmp
 					hasFunction = true
-					hasTool = true
 				}
 			}
 			if gs := t.Get("google_search"); gs.Exists() {
+				googleToolNode := []byte(`{}`)
 				var errSet error
-				toolNode, errSet = sjson.SetRawBytes(toolNode, "googleSearch", []byte(gs.Raw))
+				googleToolNode, errSet = sjson.SetRawBytes(googleToolNode, "googleSearch", []byte(gs.Raw))
 				if errSet != nil {
 					log.Warnf("Failed to set googleSearch tool: %v", errSet)
 					continue
 				}
-				hasTool = true
+				googleSearchNodes = append(googleSearchNodes, googleToolNode)
 			}
 		}
-		if hasTool {
-			out, _ = sjson.SetRawBytes(out, "tools", []byte("[]"))
-			out, _ = sjson.SetRawBytes(out, "tools.0", toolNode)
+		if hasFunction || len(googleSearchNodes) > 0 {
+			toolsNode := []byte("[]")
+			if hasFunction {
+				toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", functionToolNode)
+			}
+			for _, googleNode := range googleSearchNodes {
+				toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", googleNode)
+			}
+			out, _ = sjson.SetRawBytes(out, "tools", toolsNode)
 		}
 	}

--- a/internal/translator/kiro/claude/kiro_claude_request.go
+++ b/internal/translator/kiro/claude/kiro_claude_request.go
@@ -499,6 +499,16 @@ func shortenToolNameIfNeeded(name string) string {
 	return name[:limit]
 }

+func ensureKiroInputSchema(parameters interface{}) interface{} {
+	if parameters != nil {
+		return parameters
+	}
+	return map[string]interface{}{
+		"type":       "object",
+		"properties": map[string]interface{}{},
+	}
+}
+
 // convertClaudeToolsToKiro converts Claude tools to Kiro format
 func convertClaudeToolsToKiro(tools gjson.Result) []KiroToolWrapper {
 	var kiroTools []KiroToolWrapper
@@ -509,7 +519,12 @@ func convertClaudeToolsToKiro(tools gjson.Result) []KiroToolWrapper {
 	for _, tool := range tools.Array() {
 		name := tool.Get("name").String()
 		description := tool.Get("description").String()
-		inputSchema := tool.Get("input_schema").Value()
+		inputSchemaResult := tool.Get("input_schema")
+		var inputSchema interface{}
+		if inputSchemaResult.Exists() && inputSchemaResult.Type != gjson.Null {
+			inputSchema = inputSchemaResult.Value()
+		}
+		inputSchema = ensureKiroInputSchema(inputSchema)

 		// Shorten tool name if it exceeds 64 characters (common with MCP tools)
 		originalName := name
--- a/internal/translator/kiro/openai/kiro_openai.go
+++ b/internal/translator/kiro/openai/kiro_openai.go
@@ -314,7 +314,7 @@ func ConvertOpenAIToolsToKiroFormat(tools []map[string]interface{}) []KiroToolWr

 		name := kirocommon.GetString(fn, "name")
 		description := kirocommon.GetString(fn, "description")
-		parameters := fn["parameters"]
+		parameters := ensureKiroInputSchema(fn["parameters"])

 		if name == "" {
 			continue
@@ -368,4 +368,4 @@ func ConvertClaudeToolUseToOpenAI(toolUseID, toolName string, input map[string]i
 // LogStreamEvent logs a streaming event for debugging
 func LogStreamEvent(eventType, data string) {
 	log.Debugf("kiro-openai: stream event type=%s, data_len=%d", eventType, len(data))
-}
+}
--- a/internal/translator/kiro/openai/kiro_openai_request.go
+++ b/internal/translator/kiro/openai/kiro_openai_request.go
@@ -381,6 +381,16 @@ func shortenToolNameIfNeeded(name string) string {
 	return name[:limit]
 }

+func ensureKiroInputSchema(parameters interface{}) interface{} {
+	if parameters != nil {
+		return parameters
+	}
+	return map[string]interface{}{
+		"type":       "object",
+		"properties": map[string]interface{}{},
+	}
+}
+
 // convertOpenAIToolsToKiro converts OpenAI tools to Kiro format
 func convertOpenAIToolsToKiro(tools gjson.Result) []KiroToolWrapper {
 	var kiroTools []KiroToolWrapper
@@ -401,7 +411,12 @@ func convertOpenAIToolsToKiro(tools gjson.Result) []KiroToolWrapper {

 		name := fn.Get("name").String()
 		description := fn.Get("description").String()
-		parameters := fn.Get("parameters").Value()
+		parametersResult := fn.Get("parameters")
+		var parameters interface{}
+		if parametersResult.Exists() && parametersResult.Type != gjson.Null {
+			parameters = parametersResult.Value()
+		}
+		parameters = ensureKiroInputSchema(parameters)

 		// Shorten tool name if it exceeds 64 characters (common with MCP tools)
 		originalName := name
--- a/internal/translator/openai/claude/openai_claude_request.go
+++ b/internal/translator/openai/claude/openai_claude_request.go
@@ -89,12 +89,14 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream

 	// Handle system message first
 	systemMsgJSON := `{"role":"system","content":[]}`
+	hasSystemContent := false
 	if system := root.Get("system"); system.Exists() {
 		if system.Type == gjson.String {
 			if system.String() != "" {
 				oldSystem := `{"type":"text","text":""}`
 				oldSystem, _ = sjson.Set(oldSystem, "text", system.String())
 				systemMsgJSON, _ = sjson.SetRaw(systemMsgJSON, "content.-1", oldSystem)
+				hasSystemContent = true
 			}
 		} else if system.Type == gjson.JSON {
 			if system.IsArray() {
@@ -102,12 +104,16 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 				for i := 0; i < len(systemResults); i++ {
 					if contentItem, ok := convertClaudeContentPart(systemResults[i]); ok {
 						systemMsgJSON, _ = sjson.SetRaw(systemMsgJSON, "content.-1", contentItem)
+						hasSystemContent = true
 					}
 				}
 			}
 		}
 	}
-	messagesJSON, _ = sjson.SetRaw(messagesJSON, "-1", systemMsgJSON)
+	// Only add system message if it has content
+	if hasSystemContent {
+		messagesJSON, _ = sjson.SetRaw(messagesJSON, "-1", systemMsgJSON)
+	}

 	// Process Anthropic messages
 	if messages := root.Get("messages"); messages.Exists() && messages.IsArray() {
--- a/internal/translator/openai/claude/openai_claude_request_test.go
+++ b/internal/translator/openai/claude/openai_claude_request_test.go
@@ -181,11 +181,11 @@ func TestConvertClaudeRequestToOpenAI_ThinkingToReasoningContent(t *testing.T) {
 			result := ConvertClaudeRequestToOpenAI("test-model", []byte(tt.inputJSON), false)
 			resultJSON := gjson.ParseBytes(result)

-			// Find the relevant message (skip system message at index 0)
+			// Find the relevant message
 			messages := resultJSON.Get("messages").Array()
-			if len(messages) < 2 {
+			if len(messages) < 1 {
 				if tt.wantHasReasoningContent || tt.wantHasContent {
-					t.Fatalf("Expected at least 2 messages (system + user/assistant), got %d", len(messages))
+					t.Fatalf("Expected at least 1 message, got %d", len(messages))
 				}
 				return
 			}
@@ -272,15 +272,15 @@ func TestConvertClaudeRequestToOpenAI_ThinkingOnlyMessagePreserved(t *testing.T)

 	messages := resultJSON.Get("messages").Array()

-	// Should have: system (auto-added) + user + assistant (thinking-only) + user = 4 messages
-	if len(messages) != 4 {
-		t.Fatalf("Expected 4 messages, got %d. Messages: %v", len(messages), resultJSON.Get("messages").Raw)
+	// Should have: user + assistant (thinking-only) + user = 3 messages
+	if len(messages) != 3 {
+		t.Fatalf("Expected 3 messages, got %d. Messages: %v", len(messages), resultJSON.Get("messages").Raw)
 	}

-	// Check the assistant message (index 2) has reasoning_content
-	assistantMsg := messages[2]
+	// Check the assistant message (index 1) has reasoning_content
+	assistantMsg := messages[1]
 	if assistantMsg.Get("role").String() != "assistant" {
-		t.Errorf("Expected message[2] to be assistant, got %s", assistantMsg.Get("role").String())
+		t.Errorf("Expected message[1] to be assistant, got %s", assistantMsg.Get("role").String())
 	}

 	if !assistantMsg.Get("reasoning_content").Exists() {
@@ -292,6 +292,104 @@ func TestConvertClaudeRequestToOpenAI_ThinkingOnlyMessagePreserved(t *testing.T)
 	}
 }

+func TestConvertClaudeRequestToOpenAI_SystemMessageScenarios(t *testing.T) {
+	tests := []struct {
+		name        string
+		inputJSON   string
+		wantHasSys  bool
+		wantSysText string
+	}{
+		{
+			name: "No system field",
+			inputJSON: `{
+				"model": "claude-3-opus",
+				"messages": [{"role": "user", "content": "hello"}]
+			}`,
+			wantHasSys: false,
+		},
+		{
+			name: "Empty string system field",
+			inputJSON: `{
+				"model": "claude-3-opus",
+				"system": "",
+				"messages": [{"role": "user", "content": "hello"}]
+			}`,
+			wantHasSys: false,
+		},
+		{
+			name: "String system field",
+			inputJSON: `{
+				"model": "claude-3-opus",
+				"system": "Be helpful",
+				"messages": [{"role": "user", "content": "hello"}]
+			}`,
+			wantHasSys:  true,
+			wantSysText: "Be helpful",
+		},
+		{
+			name: "Array system field with text",
+			inputJSON: `{
+				"model": "claude-3-opus",
+				"system": [{"type": "text", "text": "Array system"}],
+				"messages": [{"role": "user", "content": "hello"}]
+			}`,
+			wantHasSys:  true,
+			wantSysText: "Array system",
+		},
+		{
+			name: "Array system field with multiple text blocks",
+			inputJSON: `{
+				"model": "claude-3-opus",
+				"system": [
+					{"type": "text", "text": "Block 1"},
+					{"type": "text", "text": "Block 2"}
+				],
+				"messages": [{"role": "user", "content": "hello"}]
+			}`,
+			wantHasSys:  true,
+			wantSysText: "Block 2", // We will update the test logic to check all blocks or specifically the second one
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := ConvertClaudeRequestToOpenAI("test-model", []byte(tt.inputJSON), false)
+			resultJSON := gjson.ParseBytes(result)
+			messages := resultJSON.Get("messages").Array()
+
+			hasSys := false
+			var sysMsg gjson.Result
+			if len(messages) > 0 && messages[0].Get("role").String() == "system" {
+				hasSys = true
+				sysMsg = messages[0]
+			}
+
+			if hasSys != tt.wantHasSys {
+				t.Errorf("got hasSystem = %v, want %v", hasSys, tt.wantHasSys)
+			}
+
+			if tt.wantHasSys {
+				// Check content - it could be string or array in OpenAI
+				content := sysMsg.Get("content")
+				var gotText string
+				if content.IsArray() {
+					arr := content.Array()
+					if len(arr) > 0 {
+						// Get the last element's text for validation
+						gotText = arr[len(arr)-1].Get("text").String()
+					}
+				} else {
+					gotText = content.String()
+				}
+
+				if tt.wantSysText != "" && gotText != tt.wantSysText {
+					t.Errorf("got system text = %q, want %q", gotText, tt.wantSysText)
+				}
+			}
+		})
+	}
+}
+
 func TestConvertClaudeRequestToOpenAI_ToolResultOrderAndContent(t *testing.T) {
 	inputJSON := `{
 		"model": "claude-3-opus",
@@ -318,39 +416,35 @@ func TestConvertClaudeRequestToOpenAI_ToolResultOrderAndContent(t *testing.T) {
 	messages := resultJSON.Get("messages").Array()

 	// OpenAI requires: tool messages MUST immediately follow assistant(tool_calls).
-	// Correct order: system + assistant(tool_calls) + tool(result) + user(before+after)
-	if len(messages) != 4 {
-		t.Fatalf("Expected 4 messages, got %d. Messages: %s", len(messages), resultJSON.Get("messages").Raw)
+	// Correct order: assistant(tool_calls) + tool(result) + user(before+after)
+	if len(messages) != 3 {
+		t.Fatalf("Expected 3 messages, got %d. Messages: %s", len(messages), resultJSON.Get("messages").Raw)
 	}

-	if messages[0].Get("role").String() != "system" {
-		t.Fatalf("Expected messages[0] to be system, got %s", messages[0].Get("role").String())
-	}
-
-	if messages[1].Get("role").String() != "assistant" || !messages[1].Get("tool_calls").Exists() {
-		t.Fatalf("Expected messages[1] to be assistant tool_calls, got %s: %s", messages[1].Get("role").String(), messages[1].Raw)
+	if messages[0].Get("role").String() != "assistant" || !messages[0].Get("tool_calls").Exists() {
+		t.Fatalf("Expected messages[0] to be assistant tool_calls, got %s: %s", messages[0].Get("role").String(), messages[0].Raw)
 	}

 	// tool message MUST immediately follow assistant(tool_calls) per OpenAI spec
-	if messages[2].Get("role").String() != "tool" {
-		t.Fatalf("Expected messages[2] to be tool (must follow tool_calls), got %s", messages[2].Get("role").String())
+	if messages[1].Get("role").String() != "tool" {
+		t.Fatalf("Expected messages[1] to be tool (must follow tool_calls), got %s", messages[1].Get("role").String())
 	}
-	if got := messages[2].Get("tool_call_id").String(); got != "call_1" {
+	if got := messages[1].Get("tool_call_id").String(); got != "call_1" {
 		t.Fatalf("Expected tool_call_id %q, got %q", "call_1", got)
 	}
-	if got := messages[2].Get("content").String(); got != "tool ok" {
+	if got := messages[1].Get("content").String(); got != "tool ok" {
 		t.Fatalf("Expected tool content %q, got %q", "tool ok", got)
 	}

 	// User message comes after tool message
-	if messages[3].Get("role").String() != "user" {
-		t.Fatalf("Expected messages[3] to be user, got %s", messages[3].Get("role").String())
+	if messages[2].Get("role").String() != "user" {
+		t.Fatalf("Expected messages[2] to be user, got %s", messages[2].Get("role").String())
 	}
 	// User message should contain both "before" and "after" text
-	if got := messages[3].Get("content.0.text").String(); got != "before" {
+	if got := messages[2].Get("content.0.text").String(); got != "before" {
 		t.Fatalf("Expected user text[0] %q, got %q", "before", got)
 	}
-	if got := messages[3].Get("content.1.text").String(); got != "after" {
+	if got := messages[2].Get("content.1.text").String(); got != "after" {
 		t.Fatalf("Expected user text[1] %q, got %q", "after", got)
 	}
 }
@@ -378,16 +472,16 @@ func TestConvertClaudeRequestToOpenAI_ToolResultObjectContent(t *testing.T) {
 	resultJSON := gjson.ParseBytes(result)
 	messages := resultJSON.Get("messages").Array()

-	// system + assistant(tool_calls) + tool(result)
-	if len(messages) != 3 {
-		t.Fatalf("Expected 3 messages, got %d. Messages: %s", len(messages), resultJSON.Get("messages").Raw)
+	// assistant(tool_calls) + tool(result)
+	if len(messages) != 2 {
+		t.Fatalf("Expected 2 messages, got %d. Messages: %s", len(messages), resultJSON.Get("messages").Raw)
 	}

-	if messages[2].Get("role").String() != "tool" {
-		t.Fatalf("Expected messages[2] to be tool, got %s", messages[2].Get("role").String())
+	if messages[1].Get("role").String() != "tool" {
+		t.Fatalf("Expected messages[1] to be tool, got %s", messages[1].Get("role").String())
 	}

-	toolContent := messages[2].Get("content").String()
+	toolContent := messages[1].Get("content").String()
 	parsed := gjson.Parse(toolContent)
 	if parsed.Get("foo").String() != "bar" {
 		t.Fatalf("Expected tool content JSON foo=bar, got %q", toolContent)
@@ -414,18 +508,14 @@ func TestConvertClaudeRequestToOpenAI_AssistantTextToolUseTextOrder(t *testing.T
 	messages := resultJSON.Get("messages").Array()

 	// New behavior: content + tool_calls unified in single assistant message
-	// Expect: system + assistant(content[pre,post] + tool_calls)
-	if len(messages) != 2 {
-		t.Fatalf("Expected 2 messages, got %d. Messages: %s", len(messages), resultJSON.Get("messages").Raw)
+	// Expect: assistant(content[pre,post] + tool_calls)
+	if len(messages) != 1 {
+		t.Fatalf("Expected 1 message, got %d. Messages: %s", len(messages), resultJSON.Get("messages").Raw)
 	}

-	if messages[0].Get("role").String() != "system" {
-		t.Fatalf("Expected messages[0] to be system, got %s", messages[0].Get("role").String())
-	}
-
-	assistantMsg := messages[1]
+	assistantMsg := messages[0]
 	if assistantMsg.Get("role").String() != "assistant" {
-		t.Fatalf("Expected messages[1] to be assistant, got %s", assistantMsg.Get("role").String())
+		t.Fatalf("Expected messages[0] to be assistant, got %s", assistantMsg.Get("role").String())
 	}

 	// Should have both content and tool_calls in same message
@@ -470,14 +560,14 @@ func TestConvertClaudeRequestToOpenAI_AssistantThinkingToolUseThinkingSplit(t *t
 	messages := resultJSON.Get("messages").Array()

 	// New behavior: all content, thinking, and tool_calls unified in single assistant message
-	// Expect: system + assistant(content[pre,post] + tool_calls + reasoning_content[t1+t2])
-	if len(messages) != 2 {
-		t.Fatalf("Expected 2 messages, got %d. Messages: %s", len(messages), resultJSON.Get("messages").Raw)
+	// Expect: assistant(content[pre,post] + tool_calls + reasoning_content[t1+t2])
+	if len(messages) != 1 {
+		t.Fatalf("Expected 1 message, got %d. Messages: %s", len(messages), resultJSON.Get("messages").Raw)
 	}

-	assistantMsg := messages[1]
+	assistantMsg := messages[0]
 	if assistantMsg.Get("role").String() != "assistant" {
-		t.Fatalf("Expected messages[1] to be assistant, got %s", assistantMsg.Get("role").String())
+		t.Fatalf("Expected messages[0] to be assistant, got %s", assistantMsg.Get("role").String())
 	}

 	// Should have content with both pre and post
--- a/internal/util/gemini_schema.go
+++ b/internal/util/gemini_schema.go
@@ -12,10 +12,23 @@ import (

 var gjsonPathKeyReplacer = strings.NewReplacer(".", "\\.", "*", "\\*", "?", "\\?")

+const placeholderReasonDescription = "Brief explanation of why you are calling this tool"
+
 // CleanJSONSchemaForAntigravity transforms a JSON schema to be compatible with Antigravity API.
 // It handles unsupported keywords, type flattening, and schema simplification while preserving
 // semantic information as description hints.
 func CleanJSONSchemaForAntigravity(jsonStr string) string {
+	return cleanJSONSchema(jsonStr, true)
+}
+
+// CleanJSONSchemaForGemini transforms a JSON schema to be compatible with Gemini tool calling.
+// It removes unsupported keywords and simplifies schemas, without adding empty-schema placeholders.
+func CleanJSONSchemaForGemini(jsonStr string) string {
+	return cleanJSONSchema(jsonStr, false)
+}
+
+// cleanJSONSchema performs the core cleaning operations on the JSON schema.
+func cleanJSONSchema(jsonStr string, addPlaceholder bool) string {
 	// Phase 1: Convert and add hints
 	jsonStr = convertRefsToHints(jsonStr)
 	jsonStr = convertConstToEnum(jsonStr)
@@ -31,10 +44,94 @@ func CleanJSONSchemaForAntigravity(jsonStr string) string {

 	// Phase 3: Cleanup
 	jsonStr = removeUnsupportedKeywords(jsonStr)
+	if !addPlaceholder {
+		// Gemini schema cleanup: remove nullable/title and placeholder-only fields.
+		jsonStr = removeKeywords(jsonStr, []string{"nullable", "title"})
+		jsonStr = removePlaceholderFields(jsonStr)
+	}
 	jsonStr = cleanupRequiredFields(jsonStr)
-
 	// Phase 4: Add placeholder for empty object schemas (Claude VALIDATED mode requirement)
-	jsonStr = addEmptySchemaPlaceholder(jsonStr)
+	if addPlaceholder {
+		jsonStr = addEmptySchemaPlaceholder(jsonStr)
+	}
+
+	return jsonStr
+}
+
+// removeKeywords removes all occurrences of specified keywords from the JSON schema.
+func removeKeywords(jsonStr string, keywords []string) string {
+	for _, key := range keywords {
+		for _, p := range findPaths(jsonStr, key) {
+			if isPropertyDefinition(trimSuffix(p, "."+key)) {
+				continue
+			}
+			jsonStr, _ = sjson.Delete(jsonStr, p)
+		}
+	}
+	return jsonStr
+}
+
+// removePlaceholderFields removes placeholder-only properties ("_" and "reason") and their required entries.
+func removePlaceholderFields(jsonStr string) string {
+	// Remove "_" placeholder properties.
+	paths := findPaths(jsonStr, "_")
+	sortByDepth(paths)
+	for _, p := range paths {
+		if !strings.HasSuffix(p, ".properties._") {
+			continue
+		}
+		jsonStr, _ = sjson.Delete(jsonStr, p)
+		parentPath := trimSuffix(p, ".properties._")
+		reqPath := joinPath(parentPath, "required")
+		req := gjson.Get(jsonStr, reqPath)
+		if req.IsArray() {
+			var filtered []string
+			for _, r := range req.Array() {
+				if r.String() != "_" {
+					filtered = append(filtered, r.String())
+				}
+			}
+			if len(filtered) == 0 {
+				jsonStr, _ = sjson.Delete(jsonStr, reqPath)
+			} else {
+				jsonStr, _ = sjson.Set(jsonStr, reqPath, filtered)
+			}
+		}
+	}
+
+	// Remove placeholder-only "reason" objects.
+	reasonPaths := findPaths(jsonStr, "reason")
+	sortByDepth(reasonPaths)
+	for _, p := range reasonPaths {
+		if !strings.HasSuffix(p, ".properties.reason") {
+			continue
+		}
+		parentPath := trimSuffix(p, ".properties.reason")
+		props := gjson.Get(jsonStr, joinPath(parentPath, "properties"))
+		if !props.IsObject() || len(props.Map()) != 1 {
+			continue
+		}
+		desc := gjson.Get(jsonStr, p+".description").String()
+		if desc != placeholderReasonDescription {
+			continue
+		}
+		jsonStr, _ = sjson.Delete(jsonStr, p)
+		reqPath := joinPath(parentPath, "required")
+		req := gjson.Get(jsonStr, reqPath)
+		if req.IsArray() {
+			var filtered []string
+			for _, r := range req.Array() {
+				if r.String() != "reason" {
+					filtered = append(filtered, r.String())
+				}
+			}
+			if len(filtered) == 0 {
+				jsonStr, _ = sjson.Delete(jsonStr, reqPath)
+			} else {
+				jsonStr, _ = sjson.Set(jsonStr, reqPath, filtered)
+			}
+		}
+	}

 	return jsonStr
 }
@@ -78,7 +175,7 @@ func convertConstToEnum(jsonStr string) string {
 	return jsonStr
 }

-// convertEnumValuesToStrings ensures all enum values are strings.
+// convertEnumValuesToStrings ensures all enum values are strings and the schema type is set to string.
 // Gemini API requires enum values to be of type string, not numbers or booleans.
 func convertEnumValuesToStrings(jsonStr string) string {
 	for _, p := range findPaths(jsonStr, "enum") {
@@ -88,19 +185,15 @@ func convertEnumValuesToStrings(jsonStr string) string {
 		}

 		var stringVals []string
-		needsConversion := false
 		for _, item := range arr.Array() {
-			// Check if any value is not a string
-			if item.Type != gjson.String {
-				needsConversion = true
-			}
 			stringVals = append(stringVals, item.String())
 		}

-		// Only update if we found non-string values
-		if needsConversion {
-			jsonStr, _ = sjson.Set(jsonStr, p, stringVals)
-		}
+		// Always update enum values to strings and set type to "string"
+		// This ensures compatibility with Antigravity Gemini which only allows enum for STRING type
+		jsonStr, _ = sjson.Set(jsonStr, p, stringVals)
+		parentPath := trimSuffix(p, ".enum")
+		jsonStr, _ = sjson.Set(jsonStr, joinPath(parentPath, "type"), "string")
 	}
 	return jsonStr
 }
@@ -409,7 +502,7 @@ func addEmptySchemaPlaceholder(jsonStr string) string {
 			// Add placeholder "reason" property
 			reasonPath := joinPath(propsPath, "reason")
 			jsonStr, _ = sjson.Set(jsonStr, reasonPath+".type", "string")
-			jsonStr, _ = sjson.Set(jsonStr, reasonPath+".description", "Brief explanation of why you are calling this tool")
+			jsonStr, _ = sjson.Set(jsonStr, reasonPath+".description", placeholderReasonDescription)

 			// Add to required array
 			jsonStr, _ = sjson.Set(jsonStr, reqPath, []string{"reason"})
--- a/internal/watcher/synthesizer/file.go
+++ b/internal/watcher/synthesizer/file.go
@@ -167,6 +167,16 @@ func SynthesizeGeminiVirtualAuths(primary *coreauth.Auth, metadata map[string]an
 			"virtual_parent_id": primary.ID,
 			"type":              metadata["type"],
 		}
+		if v, ok := metadata["disable_cooling"]; ok {
+			metadataCopy["disable_cooling"] = v
+		} else if v, ok := metadata["disable-cooling"]; ok {
+			metadataCopy["disable_cooling"] = v
+		}
+		if v, ok := metadata["request_retry"]; ok {
+			metadataCopy["request_retry"] = v
+		} else if v, ok := metadata["request-retry"]; ok {
+			metadataCopy["request_retry"] = v
+		}
 		proxy := strings.TrimSpace(primary.ProxyURL)
 		if proxy != "" {
 			metadataCopy["proxy_url"] = proxy
--- a/internal/watcher/synthesizer/file_test.go
+++ b/internal/watcher/synthesizer/file_test.go
@@ -69,10 +69,12 @@ func TestFileSynthesizer_Synthesize_ValidAuthFile(t *testing.T) {

 	// Create a valid auth file
 	authData := map[string]any{
-		"type":      "claude",
-		"email":     "test@example.com",
-		"proxy_url": "http://proxy.local",
-		"prefix":    "test-prefix",
+		"type":            "claude",
+		"email":           "test@example.com",
+		"proxy_url":       "http://proxy.local",
+		"prefix":          "test-prefix",
+		"disable_cooling": true,
+		"request_retry":   2,
 	}
 	data, _ := json.Marshal(authData)
 	err := os.WriteFile(filepath.Join(tempDir, "claude-auth.json"), data, 0644)
@@ -108,6 +110,12 @@ func TestFileSynthesizer_Synthesize_ValidAuthFile(t *testing.T) {
 	if auths[0].ProxyURL != "http://proxy.local" {
 		t.Errorf("expected proxy_url http://proxy.local, got %s", auths[0].ProxyURL)
 	}
+	if v, ok := auths[0].Metadata["disable_cooling"].(bool); !ok || !v {
+		t.Errorf("expected disable_cooling true, got %v", auths[0].Metadata["disable_cooling"])
+	}
+	if v, ok := auths[0].Metadata["request_retry"].(float64); !ok || int(v) != 2 {
+		t.Errorf("expected request_retry 2, got %v", auths[0].Metadata["request_retry"])
+	}
 	if auths[0].Status != coreauth.StatusActive {
 		t.Errorf("expected status active, got %s", auths[0].Status)
 	}
@@ -336,9 +344,11 @@ func TestSynthesizeGeminiVirtualAuths_MultiProject(t *testing.T) {
 		},
 	}
 	metadata := map[string]any{
-		"project_id": "project-a, project-b, project-c",
-		"email":      "test@example.com",
-		"type":       "gemini",
+		"project_id":      "project-a, project-b, project-c",
+		"email":           "test@example.com",
+		"type":            "gemini",
+		"request_retry":   2,
+		"disable_cooling": true,
 	}

 	virtuals := SynthesizeGeminiVirtualAuths(primary, metadata, now)
@@ -376,6 +386,12 @@ func TestSynthesizeGeminiVirtualAuths_MultiProject(t *testing.T) {
 		if v.ProxyURL != "http://proxy.local" {
 			t.Errorf("expected proxy_url http://proxy.local, got %s", v.ProxyURL)
 		}
+		if vv, ok := v.Metadata["disable_cooling"].(bool); !ok || !vv {
+			t.Errorf("expected disable_cooling true, got %v", v.Metadata["disable_cooling"])
+		}
+		if vv, ok := v.Metadata["request_retry"].(int); !ok || vv != 2 {
+			t.Errorf("expected request_retry 2, got %v", v.Metadata["request_retry"])
+		}
 		if v.Attributes["runtime_only"] != "true" {
 			t.Error("expected runtime_only=true")
 		}
--- a/sdk/api/handlers/claude/code_handlers.go
+++ b/sdk/api/handlers/claude/code_handlers.go
@@ -128,8 +128,23 @@ func (h *ClaudeCodeAPIHandler) ClaudeCountTokens(c *gin.Context) {
 // Parameters:
 //   - c: The Gin context for the request.
 func (h *ClaudeCodeAPIHandler) ClaudeModels(c *gin.Context) {
+	models := h.Models()
+	firstID := ""
+	lastID := ""
+	if len(models) > 0 {
+		if id, ok := models[0]["id"].(string); ok {
+			firstID = id
+		}
+		if id, ok := models[len(models)-1]["id"].(string); ok {
+			lastID = id
+		}
+	}
+
 	c.JSON(http.StatusOK, gin.H{
-		"data": h.Models(),
+		"data":     models,
+		"has_more": false,
+		"first_id": firstID,
+		"last_id":  lastID,
 	})
 }

--- a/sdk/api/handlers/gemini/gemini_handlers.go
+++ b/sdk/api/handlers/gemini/gemini_handlers.go
@@ -60,8 +60,12 @@ func (h *GeminiAPIHandler) GeminiModels(c *gin.Context) {
 			if !strings.HasPrefix(name, "models/") {
 				normalizedModel["name"] = "models/" + name
 			}
-			normalizedModel["displayName"] = name
-			normalizedModel["description"] = name
+			if displayName, _ := normalizedModel["displayName"].(string); displayName == "" {
+				normalizedModel["displayName"] = name
+			}
+			if description, _ := normalizedModel["description"].(string); description == "" {
+				normalizedModel["description"] = name
+			}
 		}
 		if _, ok := normalizedModel["supportedGenerationMethods"]; !ok {
 			normalizedModel["supportedGenerationMethods"] = defaultMethods
--- a/sdk/api/handlers/handlers.go
+++ b/sdk/api/handlers/handlers.go
@@ -386,6 +386,7 @@ func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType
 		return nil, errMsg
 	}
 	reqMeta := requestExecutionMetadata(ctx)
+	reqMeta[coreexecutor.RequestedModelMetadataKey] = normalizedModel
 	req := coreexecutor.Request{
 		Model:   normalizedModel,
 		Payload: cloneBytes(rawJSON),
@@ -424,6 +425,7 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle
 		return nil, errMsg
 	}
 	reqMeta := requestExecutionMetadata(ctx)
+	reqMeta[coreexecutor.RequestedModelMetadataKey] = normalizedModel
 	req := coreexecutor.Request{
 		Model:   normalizedModel,
 		Payload: cloneBytes(rawJSON),
@@ -465,6 +467,7 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
 		return nil, errChan
 	}
 	reqMeta := requestExecutionMetadata(ctx)
+	reqMeta[coreexecutor.RequestedModelMetadataKey] = normalizedModel
 	req := coreexecutor.Request{
 		Model:   normalizedModel,
 		Payload: cloneBytes(rawJSON),
--- a/sdk/auth/antigravity.go
+++ b/sdk/auth/antigravity.go
@@ -2,15 +2,13 @@ package auth

 import (
 	"context"
-	"encoding/json"
 	"fmt"
-	"io"
 	"net"
 	"net/http"
-	"net/url"
 	"strings"
 	"time"

+	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/antigravity"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/browser"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
@@ -19,20 +17,6 @@ import (
 	log "github.com/sirupsen/logrus"
 )

-const (
-	antigravityClientID     = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
-	antigravityClientSecret = "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"
-	antigravityCallbackPort = 51121
-)
-
-var antigravityScopes = []string{
-	"https://www.googleapis.com/auth/cloud-platform",
-	"https://www.googleapis.com/auth/userinfo.email",
-	"https://www.googleapis.com/auth/userinfo.profile",
-	"https://www.googleapis.com/auth/cclog",
-	"https://www.googleapis.com/auth/experimentsandconfigs",
-}
-
 // AntigravityAuthenticator implements OAuth login for the antigravity provider.
 type AntigravityAuthenticator struct{}

@@ -60,12 +44,12 @@ func (AntigravityAuthenticator) Login(ctx context.Context, cfg *config.Config, o
 		opts = &LoginOptions{}
 	}

-	callbackPort := antigravityCallbackPort
+	callbackPort := antigravity.CallbackPort
 	if opts.CallbackPort > 0 {
 		callbackPort = opts.CallbackPort
 	}

-	httpClient := util.SetProxy(&cfg.SDKConfig, &http.Client{})
+	authSvc := antigravity.NewAntigravityAuth(cfg, nil)

 	state, err := misc.GenerateRandomState()
 	if err != nil {
@@ -83,7 +67,7 @@ func (AntigravityAuthenticator) Login(ctx context.Context, cfg *config.Config, o
 	}()

 	redirectURI := fmt.Sprintf("http://localhost:%d/oauth-callback", port)
-	authURL := buildAntigravityAuthURL(redirectURI, state)
+	authURL := authSvc.BuildAuthURL(state, redirectURI)

 	if !opts.NoBrowser {
 		fmt.Println("Opening browser for antigravity authentication")
@@ -164,22 +148,29 @@ waitForCallback:
 		return nil, fmt.Errorf("antigravity: missing authorization code")
 	}

-	tokenResp, errToken := exchangeAntigravityCode(ctx, cbRes.Code, redirectURI, httpClient)
+	tokenResp, errToken := authSvc.ExchangeCodeForTokens(ctx, cbRes.Code, redirectURI)
 	if errToken != nil {
 		return nil, fmt.Errorf("antigravity: token exchange failed: %w", errToken)
 	}

-	email := ""
-	if tokenResp.AccessToken != "" {
-		if info, errInfo := fetchAntigravityUserInfo(ctx, tokenResp.AccessToken, httpClient); errInfo == nil && strings.TrimSpace(info.Email) != "" {
-			email = strings.TrimSpace(info.Email)
-		}
+	accessToken := strings.TrimSpace(tokenResp.AccessToken)
+	if accessToken == "" {
+		return nil, fmt.Errorf("antigravity: token exchange returned empty access token")
+	}
+
+	email, errInfo := authSvc.FetchUserInfo(ctx, accessToken)
+	if errInfo != nil {
+		return nil, fmt.Errorf("antigravity: fetch user info failed: %w", errInfo)
+	}
+	email = strings.TrimSpace(email)
+	if email == "" {
+		return nil, fmt.Errorf("antigravity: empty email returned from user info")
 	}

 	// Fetch project ID via loadCodeAssist (same approach as Gemini CLI)
 	projectID := ""
-	if tokenResp.AccessToken != "" {
-		fetchedProjectID, errProject := fetchAntigravityProjectID(ctx, tokenResp.AccessToken, httpClient)
+	if accessToken != "" {
+		fetchedProjectID, errProject := authSvc.FetchProjectID(ctx, accessToken)
 		if errProject != nil {
 			log.Warnf("antigravity: failed to fetch project ID: %v", errProject)
 		} else {
@@ -204,7 +195,7 @@ waitForCallback:
 		metadata["project_id"] = projectID
 	}

-	fileName := sanitizeAntigravityFileName(email)
+	fileName := antigravity.CredentialFileName(email)
 	label := email
 	if label == "" {
 		label = "antigravity"
@@ -231,7 +222,7 @@ type callbackResult struct {

 func startAntigravityCallbackServer(port int) (*http.Server, int, <-chan callbackResult, error) {
 	if port <= 0 {
-		port = antigravityCallbackPort
+		port = antigravity.CallbackPort
 	}
 	addr := fmt.Sprintf(":%d", port)
 	listener, err := net.Listen("tcp", addr)
@@ -267,309 +258,9 @@ func startAntigravityCallbackServer(port int) (*http.Server, int, <-chan callbac
 	return srv, port, resultCh, nil
 }

-type antigravityTokenResponse struct {
-	AccessToken  string `json:"access_token"`
-	RefreshToken string `json:"refresh_token"`
-	ExpiresIn    int64  `json:"expires_in"`
-	TokenType    string `json:"token_type"`
-}
-
-func exchangeAntigravityCode(ctx context.Context, code, redirectURI string, httpClient *http.Client) (*antigravityTokenResponse, error) {
-	data := url.Values{}
-	data.Set("code", code)
-	data.Set("client_id", antigravityClientID)
-	data.Set("client_secret", antigravityClientSecret)
-	data.Set("redirect_uri", redirectURI)
-	data.Set("grant_type", "authorization_code")
-
-	req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://oauth2.googleapis.com/token", strings.NewReader(data.Encode()))
-	if err != nil {
-		return nil, err
-	}
-	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
-
-	resp, errDo := httpClient.Do(req)
-	if errDo != nil {
-		return nil, errDo
-	}
-	defer func() {
-		if errClose := resp.Body.Close(); errClose != nil {
-			log.Errorf("antigravity token exchange: close body error: %v", errClose)
-		}
-	}()
-
-	var token antigravityTokenResponse
-	if errDecode := json.NewDecoder(resp.Body).Decode(&token); errDecode != nil {
-		return nil, errDecode
-	}
-	if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusMultipleChoices {
-		return nil, fmt.Errorf("oauth token exchange failed: status %d", resp.StatusCode)
-	}
-	return &token, nil
-}
-
-type antigravityUserInfo struct {
-	Email string `json:"email"`
-}
-
-func fetchAntigravityUserInfo(ctx context.Context, accessToken string, httpClient *http.Client) (*antigravityUserInfo, error) {
-	if strings.TrimSpace(accessToken) == "" {
-		return &antigravityUserInfo{}, nil
-	}
-	req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://www.googleapis.com/oauth2/v1/userinfo?alt=json", nil)
-	if err != nil {
-		return nil, err
-	}
-	req.Header.Set("Authorization", "Bearer "+accessToken)
-
-	resp, errDo := httpClient.Do(req)
-	if errDo != nil {
-		return nil, errDo
-	}
-	defer func() {
-		if errClose := resp.Body.Close(); errClose != nil {
-			log.Errorf("antigravity userinfo: close body error: %v", errClose)
-		}
-	}()
-
-	if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusMultipleChoices {
-		return &antigravityUserInfo{}, nil
-	}
-	var info antigravityUserInfo
-	if errDecode := json.NewDecoder(resp.Body).Decode(&info); errDecode != nil {
-		return nil, errDecode
-	}
-	return &info, nil
-}
-
-func buildAntigravityAuthURL(redirectURI, state string) string {
-	params := url.Values{}
-	params.Set("access_type", "offline")
-	params.Set("client_id", antigravityClientID)
-	params.Set("prompt", "consent")
-	params.Set("redirect_uri", redirectURI)
-	params.Set("response_type", "code")
-	params.Set("scope", strings.Join(antigravityScopes, " "))
-	params.Set("state", state)
-	return "https://accounts.google.com/o/oauth2/v2/auth?" + params.Encode()
-}
-
-func sanitizeAntigravityFileName(email string) string {
-	if strings.TrimSpace(email) == "" {
-		return "antigravity.json"
-	}
-	replacer := strings.NewReplacer("@", "_", ".", "_")
-	return fmt.Sprintf("antigravity-%s.json", replacer.Replace(email))
-}
-
-// Antigravity API constants for project discovery
-const (
-	antigravityAPIEndpoint    = "https://cloudcode-pa.googleapis.com"
-	antigravityAPIVersion     = "v1internal"
-	antigravityAPIUserAgent   = "google-api-nodejs-client/9.15.1"
-	antigravityAPIClient      = "google-cloud-sdk vscode_cloudshelleditor/0.1"
-	antigravityClientMetadata = `{"ideType":"IDE_UNSPECIFIED","platform":"PLATFORM_UNSPECIFIED","pluginType":"GEMINI"}`
-)
-
 // FetchAntigravityProjectID exposes project discovery for external callers.
 func FetchAntigravityProjectID(ctx context.Context, accessToken string, httpClient *http.Client) (string, error) {
-	return fetchAntigravityProjectID(ctx, accessToken, httpClient)
-}
-
-// fetchAntigravityProjectID retrieves the project ID for the authenticated user via loadCodeAssist.
-// This uses the same approach as Gemini CLI to get the cloudaicompanionProject.
-func fetchAntigravityProjectID(ctx context.Context, accessToken string, httpClient *http.Client) (string, error) {
-	// Call loadCodeAssist to get the project
-	loadReqBody := map[string]any{
-		"metadata": map[string]string{
-			"ideType":    "ANTIGRAVITY",
-			"platform":   "PLATFORM_UNSPECIFIED",
-			"pluginType": "GEMINI",
-		},
-	}
-
-	rawBody, errMarshal := json.Marshal(loadReqBody)
-	if errMarshal != nil {
-		return "", fmt.Errorf("marshal request body: %w", errMarshal)
-	}
-
-	endpointURL := fmt.Sprintf("%s/%s:loadCodeAssist", antigravityAPIEndpoint, antigravityAPIVersion)
-	req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpointURL, strings.NewReader(string(rawBody)))
-	if err != nil {
-		return "", fmt.Errorf("create request: %w", err)
-	}
-	req.Header.Set("Authorization", "Bearer "+accessToken)
-	req.Header.Set("Content-Type", "application/json")
-	req.Header.Set("User-Agent", antigravityAPIUserAgent)
-	req.Header.Set("X-Goog-Api-Client", antigravityAPIClient)
-	req.Header.Set("Client-Metadata", antigravityClientMetadata)
-
-	resp, errDo := httpClient.Do(req)
-	if errDo != nil {
-		return "", fmt.Errorf("execute request: %w", errDo)
-	}
-	defer func() {
-		if errClose := resp.Body.Close(); errClose != nil {
-			log.Errorf("antigravity loadCodeAssist: close body error: %v", errClose)
-		}
-	}()
-
-	bodyBytes, errRead := io.ReadAll(resp.Body)
-	if errRead != nil {
-		return "", fmt.Errorf("read response: %w", errRead)
-	}
-
-	if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusMultipleChoices {
-		return "", fmt.Errorf("request failed with status %d: %s", resp.StatusCode, strings.TrimSpace(string(bodyBytes)))
-	}
-
-	var loadResp map[string]any
-	if errDecode := json.Unmarshal(bodyBytes, &loadResp); errDecode != nil {
-		return "", fmt.Errorf("decode response: %w", errDecode)
-	}
-
-	// Extract projectID from response
-	projectID := ""
-	if id, ok := loadResp["cloudaicompanionProject"].(string); ok {
-		projectID = strings.TrimSpace(id)
-	}
-	if projectID == "" {
-		if projectMap, ok := loadResp["cloudaicompanionProject"].(map[string]any); ok {
-			if id, okID := projectMap["id"].(string); okID {
-				projectID = strings.TrimSpace(id)
-			}
-		}
-	}
-
-	if projectID == "" {
-		tierID := "legacy-tier"
-		if tiers, okTiers := loadResp["allowedTiers"].([]any); okTiers {
-			for _, rawTier := range tiers {
-				tier, okTier := rawTier.(map[string]any)
-				if !okTier {
-					continue
-				}
-				if isDefault, okDefault := tier["isDefault"].(bool); okDefault && isDefault {
-					if id, okID := tier["id"].(string); okID && strings.TrimSpace(id) != "" {
-						tierID = strings.TrimSpace(id)
-						break
-					}
-				}
-			}
-		}
-
-		projectID, err = antigravityOnboardUser(ctx, accessToken, tierID, httpClient)
-		if err != nil {
-			return "", err
-		}
-		return projectID, nil
-	}
-
-	return projectID, nil
-}
-
-// antigravityOnboardUser attempts to fetch the project ID via onboardUser by polling for completion.
-// It returns an empty string when the operation times out or completes without a project ID.
-func antigravityOnboardUser(ctx context.Context, accessToken, tierID string, httpClient *http.Client) (string, error) {
-	if httpClient == nil {
-		httpClient = http.DefaultClient
-	}
-	fmt.Println("Antigravity: onboarding user...", tierID)
-	requestBody := map[string]any{
-		"tierId": tierID,
-		"metadata": map[string]string{
-			"ideType":    "ANTIGRAVITY",
-			"platform":   "PLATFORM_UNSPECIFIED",
-			"pluginType": "GEMINI",
-		},
-	}
-
-	rawBody, errMarshal := json.Marshal(requestBody)
-	if errMarshal != nil {
-		return "", fmt.Errorf("marshal request body: %w", errMarshal)
-	}
-
-	maxAttempts := 5
-	for attempt := 1; attempt <= maxAttempts; attempt++ {
-		log.Debugf("Polling attempt %d/%d", attempt, maxAttempts)
-
-		reqCtx := ctx
-		var cancel context.CancelFunc
-		if reqCtx == nil {
-			reqCtx = context.Background()
-		}
-		reqCtx, cancel = context.WithTimeout(reqCtx, 30*time.Second)
-
-		endpointURL := fmt.Sprintf("%s/%s:onboardUser", antigravityAPIEndpoint, antigravityAPIVersion)
-		req, errRequest := http.NewRequestWithContext(reqCtx, http.MethodPost, endpointURL, strings.NewReader(string(rawBody)))
-		if errRequest != nil {
-			cancel()
-			return "", fmt.Errorf("create request: %w", errRequest)
-		}
-		req.Header.Set("Authorization", "Bearer "+accessToken)
-		req.Header.Set("Content-Type", "application/json")
-		req.Header.Set("User-Agent", antigravityAPIUserAgent)
-		req.Header.Set("X-Goog-Api-Client", antigravityAPIClient)
-		req.Header.Set("Client-Metadata", antigravityClientMetadata)
-
-		resp, errDo := httpClient.Do(req)
-		if errDo != nil {
-			cancel()
-			return "", fmt.Errorf("execute request: %w", errDo)
-		}
-
-		bodyBytes, errRead := io.ReadAll(resp.Body)
-		if errClose := resp.Body.Close(); errClose != nil {
-			log.Errorf("close body error: %v", errClose)
-		}
-		cancel()
-
-		if errRead != nil {
-			return "", fmt.Errorf("read response: %w", errRead)
-		}
-
-		if resp.StatusCode == http.StatusOK {
-			var data map[string]any
-			if errDecode := json.Unmarshal(bodyBytes, &data); errDecode != nil {
-				return "", fmt.Errorf("decode response: %w", errDecode)
-			}
-
-			if done, okDone := data["done"].(bool); okDone && done {
-				projectID := ""
-				if responseData, okResp := data["response"].(map[string]any); okResp {
-					switch projectValue := responseData["cloudaicompanionProject"].(type) {
-					case map[string]any:
-						if id, okID := projectValue["id"].(string); okID {
-							projectID = strings.TrimSpace(id)
-						}
-					case string:
-						projectID = strings.TrimSpace(projectValue)
-					}
-				}
-
-				if projectID != "" {
-					log.Infof("Successfully fetched project_id: %s", projectID)
-					return projectID, nil
-				}
-
-				return "", fmt.Errorf("no project_id in response")
-			}
-
-			time.Sleep(2 * time.Second)
-			continue
-		}
-
-		responsePreview := strings.TrimSpace(string(bodyBytes))
-		if len(responsePreview) > 500 {
-			responsePreview = responsePreview[:500]
-		}
-
-		responseErr := responsePreview
-		if len(responseErr) > 200 {
-			responseErr = responseErr[:200]
-		}
-		return "", fmt.Errorf("http %d: %s", resp.StatusCode, responseErr)
-	}
-
-	return "", nil
+	cfg := &config.Config{}
+	authSvc := antigravity.NewAntigravityAuth(cfg, httpClient)
+	return authSvc.FetchProjectID(ctx, accessToken)
 }
--- a/sdk/auth/filestore.go
+++ b/sdk/auth/filestore.go
@@ -73,9 +73,7 @@ func (s *FileTokenStore) Save(ctx context.Context, auth *cliproxyauth.Auth) (str
 			return "", fmt.Errorf("auth filestore: marshal metadata failed: %w", errMarshal)
 		}
 		if existing, errRead := os.ReadFile(path); errRead == nil {
-			// Use metadataEqualIgnoringTimestamps to skip writes when only timestamp fields change.
-			// This prevents the token refresh loop caused by timestamp/expired/expires_in changes.
-			if metadataEqualIgnoringTimestamps(existing, raw, auth.Provider) {
+			if jsonEqual(existing, raw) {
 				return path, nil
 			}
 			file, errOpen := os.OpenFile(path, os.O_WRONLY|os.O_TRUNC, 0o600)
@@ -308,8 +306,7 @@ func (s *FileTokenStore) baseDirSnapshot() string {
 	return s.baseDir
 }

-// DEPRECATED: Use metadataEqualIgnoringTimestamps for comparing auth metadata.
-// This function is kept for backward compatibility but can cause refresh loops.
+// jsonEqual compares two JSON blobs by parsing them into Go objects and deep comparing.
 func jsonEqual(a, b []byte) bool {
 	var objA any
 	var objB any
@@ -322,41 +319,6 @@ func jsonEqual(a, b []byte) bool {
 	return deepEqualJSON(objA, objB)
 }

-// metadataEqualIgnoringTimestamps compares two metadata JSON blobs,
-// ignoring fields that change on every refresh but don't affect functionality.
-// This prevents unnecessary file writes that would trigger watcher events and
-// create refresh loops.
-// The provider parameter controls whether access_token is ignored: providers like
-// Google OAuth (gemini, gemini-cli) can re-fetch tokens when needed, while others
-// like iFlow require the refreshed token to be persisted.
-func metadataEqualIgnoringTimestamps(a, b []byte, provider string) bool {
-	var objA, objB map[string]any
-	if err := json.Unmarshal(a, &objA); err != nil {
-		return false
-	}
-	if err := json.Unmarshal(b, &objB); err != nil {
-		return false
-	}
-
-	// Fields to ignore: these change on every refresh but don't affect authentication logic.
-	// - timestamp, expired, expires_in, last_refresh: time-related fields that change on refresh
-	ignoredFields := []string{"timestamp", "expired", "expires_in", "last_refresh"}
-
-	// For providers that can re-fetch tokens when needed (e.g., Google OAuth),
-	// we ignore access_token to avoid unnecessary file writes.
-	switch provider {
-	case "gemini", "gemini-cli", "antigravity":
-		ignoredFields = append(ignoredFields, "access_token")
-	}
-
-	for _, field := range ignoredFields {
-		delete(objA, field)
-		delete(objB, field)
-	}
-
-	return deepEqualJSON(objA, objB)
-}
-
 func deepEqualJSON(a, b any) bool {
 	switch valA := a.(type) {
 	case map[string]any:
--- a/sdk/cliproxy/auth/conductor.go
+++ b/sdk/cliproxy/auth/conductor.go
@@ -61,6 +61,15 @@ func SetQuotaCooldownDisabled(disable bool) {
 	quotaCooldownDisabled.Store(disable)
 }

+func quotaCooldownDisabledForAuth(auth *Auth) bool {
+	if auth != nil {
+		if override, ok := auth.DisableCoolingOverride(); ok {
+			return override
+		}
+	}
+	return quotaCooldownDisabled.Load()
+}
+
 // Result captures execution outcome used to adjust auth state.
 type Result struct {
 	// AuthID references the auth that produced this result.
@@ -468,20 +477,16 @@ func (m *Manager) Execute(ctx context.Context, providers []string, req cliproxye
 		return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
 	}

-	retryTimes, maxWait := m.retrySettings()
-	attempts := retryTimes + 1
-	if attempts < 1 {
-		attempts = 1
-	}
+	_, maxWait := m.retrySettings()

 	var lastErr error
-	for attempt := 0; attempt < attempts; attempt++ {
+	for attempt := 0; ; attempt++ {
 		resp, errExec := m.executeMixedOnce(ctx, normalized, req, opts)
 		if errExec == nil {
 			return resp, nil
 		}
 		lastErr = errExec
-		wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, attempts, normalized, req.Model, maxWait)
+		wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, normalized, req.Model, maxWait)
 		if !shouldRetry {
 			break
 		}
@@ -503,20 +508,16 @@ func (m *Manager) ExecuteCount(ctx context.Context, providers []string, req clip
 		return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
 	}

-	retryTimes, maxWait := m.retrySettings()
-	attempts := retryTimes + 1
-	if attempts < 1 {
-		attempts = 1
-	}
+	_, maxWait := m.retrySettings()

 	var lastErr error
-	for attempt := 0; attempt < attempts; attempt++ {
+	for attempt := 0; ; attempt++ {
 		resp, errExec := m.executeCountMixedOnce(ctx, normalized, req, opts)
 		if errExec == nil {
 			return resp, nil
 		}
 		lastErr = errExec
-		wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, attempts, normalized, req.Model, maxWait)
+		wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, normalized, req.Model, maxWait)
 		if !shouldRetry {
 			break
 		}
@@ -538,20 +539,16 @@ func (m *Manager) ExecuteStream(ctx context.Context, providers []string, req cli
 		return nil, &Error{Code: "provider_not_found", Message: "no provider supplied"}
 	}

-	retryTimes, maxWait := m.retrySettings()
-	attempts := retryTimes + 1
-	if attempts < 1 {
-		attempts = 1
-	}
+	_, maxWait := m.retrySettings()

 	var lastErr error
-	for attempt := 0; attempt < attempts; attempt++ {
+	for attempt := 0; ; attempt++ {
 		chunks, errStream := m.executeStreamMixedOnce(ctx, normalized, req, opts)
 		if errStream == nil {
 			return chunks, nil
 		}
 		lastErr = errStream
-		wait, shouldRetry := m.shouldRetryAfterError(errStream, attempt, attempts, normalized, req.Model, maxWait)
+		wait, shouldRetry := m.shouldRetryAfterError(errStream, attempt, normalized, req.Model, maxWait)
 		if !shouldRetry {
 			break
 		}
@@ -570,6 +567,7 @@ func (m *Manager) executeMixedOnce(ctx context.Context, providers []string, req
 		return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
 	}
 	routeModel := req.Model
+	opts = ensureRequestedModelMetadata(opts, routeModel)
 	tried := make(map[string]struct{})
 	var lastErr error
 	for {
@@ -597,6 +595,9 @@ func (m *Manager) executeMixedOnce(ctx context.Context, providers []string, req
 		resp, errExec := executor.Execute(execCtx, auth, execReq, opts)
 		result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil}
 		if errExec != nil {
+			if errCtx := execCtx.Err(); errCtx != nil {
+				return cliproxyexecutor.Response{}, errCtx
+			}
 			result.Error = &Error{Message: errExec.Error()}
 			var se cliproxyexecutor.StatusError
 			if errors.As(errExec, &se) && se != nil {
@@ -619,6 +620,7 @@ func (m *Manager) executeCountMixedOnce(ctx context.Context, providers []string,
 		return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
 	}
 	routeModel := req.Model
+	opts = ensureRequestedModelMetadata(opts, routeModel)
 	tried := make(map[string]struct{})
 	var lastErr error
 	for {
@@ -646,6 +648,9 @@ func (m *Manager) executeCountMixedOnce(ctx context.Context, providers []string,
 		resp, errExec := executor.CountTokens(execCtx, auth, execReq, opts)
 		result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil}
 		if errExec != nil {
+			if errCtx := execCtx.Err(); errCtx != nil {
+				return cliproxyexecutor.Response{}, errCtx
+			}
 			result.Error = &Error{Message: errExec.Error()}
 			var se cliproxyexecutor.StatusError
 			if errors.As(errExec, &se) && se != nil {
@@ -668,6 +673,7 @@ func (m *Manager) executeStreamMixedOnce(ctx context.Context, providers []string
 		return nil, &Error{Code: "provider_not_found", Message: "no provider supplied"}
 	}
 	routeModel := req.Model
+	opts = ensureRequestedModelMetadata(opts, routeModel)
 	tried := make(map[string]struct{})
 	var lastErr error
 	for {
@@ -694,6 +700,9 @@ func (m *Manager) executeStreamMixedOnce(ctx context.Context, providers []string
 		execReq.Model = m.applyAPIKeyModelAlias(auth, execReq.Model)
 		chunks, errStream := executor.ExecuteStream(execCtx, auth, execReq, opts)
 		if errStream != nil {
+			if errCtx := execCtx.Err(); errCtx != nil {
+				return nil, errCtx
+			}
 			rerr := &Error{Message: errStream.Error()}
 			var se cliproxyexecutor.StatusError
 			if errors.As(errStream, &se) && se != nil {
@@ -729,167 +738,42 @@ func (m *Manager) executeStreamMixedOnce(ctx context.Context, providers []string
 	}
 }

-func (m *Manager) executeWithProvider(ctx context.Context, provider string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
-	if provider == "" {
-		return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "provider identifier is empty"}
+func ensureRequestedModelMetadata(opts cliproxyexecutor.Options, requestedModel string) cliproxyexecutor.Options {
+	requestedModel = strings.TrimSpace(requestedModel)
+	if requestedModel == "" {
+		return opts
 	}
-	routeModel := req.Model
-	tried := make(map[string]struct{})
-	var lastErr error
-	for {
-		auth, executor, errPick := m.pickNext(ctx, provider, routeModel, opts, tried)
-		if errPick != nil {
-			if lastErr != nil {
-				return cliproxyexecutor.Response{}, lastErr
-			}
-			return cliproxyexecutor.Response{}, errPick
-		}
-
-		entry := logEntryWithRequestID(ctx)
-		debugLogAuthSelection(entry, auth, provider, req.Model)
-
-		tried[auth.ID] = struct{}{}
-		execCtx := ctx
-		if rt := m.roundTripperFor(auth); rt != nil {
-			execCtx = context.WithValue(execCtx, roundTripperContextKey{}, rt)
-			execCtx = context.WithValue(execCtx, "cliproxy.roundtripper", rt)
-		}
-		execReq := req
-		execReq.Model = rewriteModelForAuth(routeModel, auth)
-		execReq.Model = m.applyOAuthModelAlias(auth, execReq.Model)
-		execReq.Model = m.applyAPIKeyModelAlias(auth, execReq.Model)
-		resp, errExec := executor.Execute(execCtx, auth, execReq, opts)
-		result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil}
-		if errExec != nil {
-			result.Error = &Error{Message: errExec.Error()}
-			var se cliproxyexecutor.StatusError
-			if errors.As(errExec, &se) && se != nil {
-				result.Error.HTTPStatus = se.StatusCode()
-			}
-			if ra := retryAfterFromError(errExec); ra != nil {
-				result.RetryAfter = ra
-			}
-			m.MarkResult(execCtx, result)
-			lastErr = errExec
-			continue
-		}
-		m.MarkResult(execCtx, result)
-		return resp, nil
+	if hasRequestedModelMetadata(opts.Metadata) {
+		return opts
 	}
+	if len(opts.Metadata) == 0 {
+		opts.Metadata = map[string]any{cliproxyexecutor.RequestedModelMetadataKey: requestedModel}
+		return opts
+	}
+	meta := make(map[string]any, len(opts.Metadata)+1)
+	for k, v := range opts.Metadata {
+		meta[k] = v
+	}
+	meta[cliproxyexecutor.RequestedModelMetadataKey] = requestedModel
+	opts.Metadata = meta
+	return opts
 }

-func (m *Manager) executeCountWithProvider(ctx context.Context, provider string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
-	if provider == "" {
-		return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "provider identifier is empty"}
+func hasRequestedModelMetadata(meta map[string]any) bool {
+	if len(meta) == 0 {
+		return false
 	}
-	routeModel := req.Model
-	tried := make(map[string]struct{})
-	var lastErr error
-	for {
-		auth, executor, errPick := m.pickNext(ctx, provider, routeModel, opts, tried)
-		if errPick != nil {
-			if lastErr != nil {
-				return cliproxyexecutor.Response{}, lastErr
-			}
-			return cliproxyexecutor.Response{}, errPick
-		}
-
-		entry := logEntryWithRequestID(ctx)
-		debugLogAuthSelection(entry, auth, provider, req.Model)
-
-		tried[auth.ID] = struct{}{}
-		execCtx := ctx
-		if rt := m.roundTripperFor(auth); rt != nil {
-			execCtx = context.WithValue(execCtx, roundTripperContextKey{}, rt)
-			execCtx = context.WithValue(execCtx, "cliproxy.roundtripper", rt)
-		}
-		execReq := req
-		execReq.Model = rewriteModelForAuth(routeModel, auth)
-		execReq.Model = m.applyOAuthModelAlias(auth, execReq.Model)
-		execReq.Model = m.applyAPIKeyModelAlias(auth, execReq.Model)
-		resp, errExec := executor.CountTokens(execCtx, auth, execReq, opts)
-		result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil}
-		if errExec != nil {
-			result.Error = &Error{Message: errExec.Error()}
-			var se cliproxyexecutor.StatusError
-			if errors.As(errExec, &se) && se != nil {
-				result.Error.HTTPStatus = se.StatusCode()
-			}
-			if ra := retryAfterFromError(errExec); ra != nil {
-				result.RetryAfter = ra
-			}
-			m.MarkResult(execCtx, result)
-			lastErr = errExec
-			continue
-		}
-		m.MarkResult(execCtx, result)
-		return resp, nil
+	raw, ok := meta[cliproxyexecutor.RequestedModelMetadataKey]
+	if !ok || raw == nil {
+		return false
 	}
-}
-
-func (m *Manager) executeStreamWithProvider(ctx context.Context, provider string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (<-chan cliproxyexecutor.StreamChunk, error) {
-	if provider == "" {
-		return nil, &Error{Code: "provider_not_found", Message: "provider identifier is empty"}
-	}
-	routeModel := req.Model
-	tried := make(map[string]struct{})
-	var lastErr error
-	for {
-		auth, executor, errPick := m.pickNext(ctx, provider, routeModel, opts, tried)
-		if errPick != nil {
-			if lastErr != nil {
-				return nil, lastErr
-			}
-			return nil, errPick
-		}
-
-		entry := logEntryWithRequestID(ctx)
-		debugLogAuthSelection(entry, auth, provider, req.Model)
-
-		tried[auth.ID] = struct{}{}
-		execCtx := ctx
-		if rt := m.roundTripperFor(auth); rt != nil {
-			execCtx = context.WithValue(execCtx, roundTripperContextKey{}, rt)
-			execCtx = context.WithValue(execCtx, "cliproxy.roundtripper", rt)
-		}
-		execReq := req
-		execReq.Model = rewriteModelForAuth(routeModel, auth)
-		execReq.Model = m.applyOAuthModelAlias(auth, execReq.Model)
-		execReq.Model = m.applyAPIKeyModelAlias(auth, execReq.Model)
-		chunks, errStream := executor.ExecuteStream(execCtx, auth, execReq, opts)
-		if errStream != nil {
-			rerr := &Error{Message: errStream.Error()}
-			var se cliproxyexecutor.StatusError
-			if errors.As(errStream, &se) && se != nil {
-				rerr.HTTPStatus = se.StatusCode()
-			}
-			result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: false, Error: rerr}
-			result.RetryAfter = retryAfterFromError(errStream)
-			m.MarkResult(execCtx, result)
-			lastErr = errStream
-			continue
-		}
-		out := make(chan cliproxyexecutor.StreamChunk)
-		go func(streamCtx context.Context, streamAuth *Auth, streamProvider string, streamChunks <-chan cliproxyexecutor.StreamChunk) {
-			defer close(out)
-			var failed bool
-			for chunk := range streamChunks {
-				if chunk.Err != nil && !failed {
-					failed = true
-					rerr := &Error{Message: chunk.Err.Error()}
-					var se cliproxyexecutor.StatusError
-					if errors.As(chunk.Err, &se) && se != nil {
-						rerr.HTTPStatus = se.StatusCode()
-					}
-					m.MarkResult(streamCtx, Result{AuthID: streamAuth.ID, Provider: streamProvider, Model: routeModel, Success: false, Error: rerr})
-				}
-				out <- chunk
-			}
-			if !failed {
-				m.MarkResult(streamCtx, Result{AuthID: streamAuth.ID, Provider: streamProvider, Model: routeModel, Success: true})
-			}
-		}(execCtx, auth.Clone(), provider, chunks)
-		return out, nil
+	switch v := raw.(type) {
+	case string:
+		return strings.TrimSpace(v) != ""
+	case []byte:
+		return strings.TrimSpace(string(v)) != ""
+	default:
+		return false
 	}
 }

@@ -1140,35 +1024,6 @@ func (m *Manager) normalizeProviders(providers []string) []string {
 	return result
 }

-// rotateProviders returns a rotated view of the providers list starting from the
-// current offset for the model, and atomically increments the offset for the next call.
-// This ensures concurrent requests get different starting providers.
-func (m *Manager) rotateProviders(model string, providers []string) []string {
-	if len(providers) == 0 {
-		return nil
-	}
-
-	// Atomic read-and-increment: get current offset and advance cursor in one lock
-	m.mu.Lock()
-	offset := m.providerOffsets[model]
-	m.providerOffsets[model] = (offset + 1) % len(providers)
-	m.mu.Unlock()
-
-	if len(providers) > 0 {
-		offset %= len(providers)
-	}
-	if offset < 0 {
-		offset = 0
-	}
-	if offset == 0 {
-		return providers
-	}
-	rotated := make([]string, 0, len(providers))
-	rotated = append(rotated, providers[offset:]...)
-	rotated = append(rotated, providers[:offset]...)
-	return rotated
-}
-
 func (m *Manager) retrySettings() (int, time.Duration) {
 	if m == nil {
 		return 0, 0
@@ -1176,11 +1031,15 @@ func (m *Manager) retrySettings() (int, time.Duration) {
 	return int(m.requestRetry.Load()), time.Duration(m.maxRetryInterval.Load())
 }

-func (m *Manager) closestCooldownWait(providers []string, model string) (time.Duration, bool) {
+func (m *Manager) closestCooldownWait(providers []string, model string, attempt int) (time.Duration, bool) {
 	if m == nil || len(providers) == 0 {
 		return 0, false
 	}
 	now := time.Now()
+	defaultRetry := int(m.requestRetry.Load())
+	if defaultRetry < 0 {
+		defaultRetry = 0
+	}
 	providerSet := make(map[string]struct{}, len(providers))
 	for i := range providers {
 		key := strings.TrimSpace(strings.ToLower(providers[i]))
@@ -1203,6 +1062,16 @@ func (m *Manager) closestCooldownWait(providers []string, model string) (time.Du
 		if _, ok := providerSet[providerKey]; !ok {
 			continue
 		}
+		effectiveRetry := defaultRetry
+		if override, ok := auth.RequestRetryOverride(); ok {
+			effectiveRetry = override
+		}
+		if effectiveRetry < 0 {
+			effectiveRetry = 0
+		}
+		if attempt >= effectiveRetry {
+			continue
+		}
 		blocked, reason, next := isAuthBlockedForModel(auth, model, now)
 		if !blocked || next.IsZero() || reason == blockReasonDisabled {
 			continue
@@ -1219,8 +1088,8 @@ func (m *Manager) closestCooldownWait(providers []string, model string) (time.Du
 	return minWait, found
 }

-func (m *Manager) shouldRetryAfterError(err error, attempt, maxAttempts int, providers []string, model string, maxWait time.Duration) (time.Duration, bool) {
-	if err == nil || attempt >= maxAttempts-1 {
+func (m *Manager) shouldRetryAfterError(err error, attempt int, providers []string, model string, maxWait time.Duration) (time.Duration, bool) {
+	if err == nil {
 		return 0, false
 	}
 	if maxWait <= 0 {
@@ -1229,7 +1098,7 @@ func (m *Manager) shouldRetryAfterError(err error, attempt, maxAttempts int, pro
 	if status := statusCodeFromError(err); status == http.StatusOK {
 		return 0, false
 	}
-	wait, found := m.closestCooldownWait(providers, model)
+	wait, found := m.closestCooldownWait(providers, model, attempt)
 	if !found || wait > maxWait {
 		return 0, false
 	}
@@ -1250,42 +1119,6 @@ func waitForCooldown(ctx context.Context, wait time.Duration) error {
 	}
 }

-func (m *Manager) executeProvidersOnce(ctx context.Context, providers []string, fn func(context.Context, string) (cliproxyexecutor.Response, error)) (cliproxyexecutor.Response, error) {
-	if len(providers) == 0 {
-		return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
-	}
-	var lastErr error
-	for _, provider := range providers {
-		resp, errExec := fn(ctx, provider)
-		if errExec == nil {
-			return resp, nil
-		}
-		lastErr = errExec
-	}
-	if lastErr != nil {
-		return cliproxyexecutor.Response{}, lastErr
-	}
-	return cliproxyexecutor.Response{}, &Error{Code: "auth_not_found", Message: "no auth available"}
-}
-
-func (m *Manager) executeStreamProvidersOnce(ctx context.Context, providers []string, fn func(context.Context, string) (<-chan cliproxyexecutor.StreamChunk, error)) (<-chan cliproxyexecutor.StreamChunk, error) {
-	if len(providers) == 0 {
-		return nil, &Error{Code: "provider_not_found", Message: "no provider supplied"}
-	}
-	var lastErr error
-	for _, provider := range providers {
-		chunks, errExec := fn(ctx, provider)
-		if errExec == nil {
-			return chunks, nil
-		}
-		lastErr = errExec
-	}
-	if lastErr != nil {
-		return nil, lastErr
-	}
-	return nil, &Error{Code: "auth_not_found", Message: "no auth available"}
-}
-
 // MarkResult records an execution result and notifies hooks.
 func (m *Manager) MarkResult(ctx context.Context, result Result) {
 	if result.AuthID == "" {
@@ -1354,7 +1187,7 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
 					if result.RetryAfter != nil {
 						next = now.Add(*result.RetryAfter)
 					} else {
-						cooldown, nextLevel := nextQuotaCooldown(backoffLevel)
+						cooldown, nextLevel := nextQuotaCooldown(backoffLevel, quotaCooldownDisabledForAuth(auth))
 						if cooldown > 0 {
 							next = now.Add(cooldown)
 						}
@@ -1371,8 +1204,12 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
 					shouldSuspendModel = true
 					setModelQuota = true
 				case 408, 500, 502, 503, 504:
-					next := now.Add(1 * time.Minute)
-					state.NextRetryAfter = next
+					if quotaCooldownDisabledForAuth(auth) {
+						state.NextRetryAfter = time.Time{}
+					} else {
+						next := now.Add(1 * time.Minute)
+						state.NextRetryAfter = next
+					}
 				default:
 					state.NextRetryAfter = time.Time{}
 				}
@@ -1613,7 +1450,7 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Durati
 		if retryAfter != nil {
 			next = now.Add(*retryAfter)
 		} else {
-			cooldown, nextLevel := nextQuotaCooldown(auth.Quota.BackoffLevel)
+			cooldown, nextLevel := nextQuotaCooldown(auth.Quota.BackoffLevel, quotaCooldownDisabledForAuth(auth))
 			if cooldown > 0 {
 				next = now.Add(cooldown)
 			}
@@ -1623,7 +1460,11 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Durati
 		auth.NextRetryAfter = next
 	case 408, 500, 502, 503, 504:
 		auth.StatusMessage = "transient upstream error"
-		auth.NextRetryAfter = now.Add(1 * time.Minute)
+		if quotaCooldownDisabledForAuth(auth) {
+			auth.NextRetryAfter = time.Time{}
+		} else {
+			auth.NextRetryAfter = now.Add(1 * time.Minute)
+		}
 	default:
 		if auth.StatusMessage == "" {
 			auth.StatusMessage = "request failed"
@@ -1632,11 +1473,11 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Durati
 }

 // nextQuotaCooldown returns the next cooldown duration and updated backoff level for repeated quota errors.
-func nextQuotaCooldown(prevLevel int) (time.Duration, int) {
+func nextQuotaCooldown(prevLevel int, disableCooling bool) (time.Duration, int) {
 	if prevLevel < 0 {
 		prevLevel = 0
 	}
-	if quotaCooldownDisabled.Load() {
+	if disableCooling {
 		return 0, prevLevel
 	}
 	cooldown := quotaBackoffBase * time.Duration(1<<prevLevel)
@@ -1812,6 +1653,9 @@ func (m *Manager) persist(ctx context.Context, auth *Auth) error {
 	if m.store == nil || auth == nil {
 		return nil
 	}
+	if shouldSkipPersist(ctx) {
+		return nil
+	}
 	if auth.Attributes != nil {
 		if v := strings.ToLower(strings.TrimSpace(auth.Attributes["runtime_only"])); v == "true" {
 			return nil
--- a/sdk/cliproxy/auth/conductor_overrides_test.go
+++ b/sdk/cliproxy/auth/conductor_overrides_test.go
@@ -0,0 +1,97 @@
+package auth
+
+import (
+	"context"
+	"testing"
+	"time"
+)
+
+func TestManager_ShouldRetryAfterError_RespectsAuthRequestRetryOverride(t *testing.T) {
+	m := NewManager(nil, nil, nil)
+	m.SetRetryConfig(3, 30*time.Second)
+
+	model := "test-model"
+	next := time.Now().Add(5 * time.Second)
+
+	auth := &Auth{
+		ID:       "auth-1",
+		Provider: "claude",
+		Metadata: map[string]any{
+			"request_retry": float64(0),
+		},
+		ModelStates: map[string]*ModelState{
+			model: {
+				Unavailable:    true,
+				Status:         StatusError,
+				NextRetryAfter: next,
+			},
+		},
+	}
+	if _, errRegister := m.Register(context.Background(), auth); errRegister != nil {
+		t.Fatalf("register auth: %v", errRegister)
+	}
+
+	_, maxWait := m.retrySettings()
+	wait, shouldRetry := m.shouldRetryAfterError(&Error{HTTPStatus: 500, Message: "boom"}, 0, []string{"claude"}, model, maxWait)
+	if shouldRetry {
+		t.Fatalf("expected shouldRetry=false for request_retry=0, got true (wait=%v)", wait)
+	}
+
+	auth.Metadata["request_retry"] = float64(1)
+	if _, errUpdate := m.Update(context.Background(), auth); errUpdate != nil {
+		t.Fatalf("update auth: %v", errUpdate)
+	}
+
+	wait, shouldRetry = m.shouldRetryAfterError(&Error{HTTPStatus: 500, Message: "boom"}, 0, []string{"claude"}, model, maxWait)
+	if !shouldRetry {
+		t.Fatalf("expected shouldRetry=true for request_retry=1, got false")
+	}
+	if wait <= 0 {
+		t.Fatalf("expected wait > 0, got %v", wait)
+	}
+
+	_, shouldRetry = m.shouldRetryAfterError(&Error{HTTPStatus: 500, Message: "boom"}, 1, []string{"claude"}, model, maxWait)
+	if shouldRetry {
+		t.Fatalf("expected shouldRetry=false on attempt=1 for request_retry=1, got true")
+	}
+}
+
+func TestManager_MarkResult_RespectsAuthDisableCoolingOverride(t *testing.T) {
+	prev := quotaCooldownDisabled.Load()
+	quotaCooldownDisabled.Store(false)
+	t.Cleanup(func() { quotaCooldownDisabled.Store(prev) })
+
+	m := NewManager(nil, nil, nil)
+
+	auth := &Auth{
+		ID:       "auth-1",
+		Provider: "claude",
+		Metadata: map[string]any{
+			"disable_cooling": true,
+		},
+	}
+	if _, errRegister := m.Register(context.Background(), auth); errRegister != nil {
+		t.Fatalf("register auth: %v", errRegister)
+	}
+
+	model := "test-model"
+	m.MarkResult(context.Background(), Result{
+		AuthID:   "auth-1",
+		Provider: "claude",
+		Model:    model,
+		Success:  false,
+		Error:    &Error{HTTPStatus: 500, Message: "boom"},
+	})
+
+	updated, ok := m.GetByID("auth-1")
+	if !ok || updated == nil {
+		t.Fatalf("expected auth to be present")
+	}
+	state := updated.ModelStates[model]
+	if state == nil {
+		t.Fatalf("expected model state to be present")
+	}
+	if !state.NextRetryAfter.IsZero() {
+		t.Fatalf("expected NextRetryAfter to be zero when disable_cooling=true, got %v", state.NextRetryAfter)
+	}
+}
--- a/sdk/cliproxy/auth/persist_policy.go
+++ b/sdk/cliproxy/auth/persist_policy.go
@@ -0,0 +1,24 @@
+package auth
+
+import "context"
+
+type skipPersistContextKey struct{}
+
+// WithSkipPersist returns a derived context that disables persistence for Manager Update/Register calls.
+// It is intended for code paths that are reacting to file watcher events, where the file on disk is
+// already the source of truth and persisting again would create a write-back loop.
+func WithSkipPersist(ctx context.Context) context.Context {
+	if ctx == nil {
+		ctx = context.Background()
+	}
+	return context.WithValue(ctx, skipPersistContextKey{}, true)
+}
+
+func shouldSkipPersist(ctx context.Context) bool {
+	if ctx == nil {
+		return false
+	}
+	v := ctx.Value(skipPersistContextKey{})
+	enabled, ok := v.(bool)
+	return ok && enabled
+}
--- a/sdk/cliproxy/auth/persist_policy_test.go
+++ b/sdk/cliproxy/auth/persist_policy_test.go
@@ -0,0 +1,62 @@
+package auth
+
+import (
+	"context"
+	"sync/atomic"
+	"testing"
+)
+
+type countingStore struct {
+	saveCount atomic.Int32
+}
+
+func (s *countingStore) List(context.Context) ([]*Auth, error) { return nil, nil }
+
+func (s *countingStore) Save(context.Context, *Auth) (string, error) {
+	s.saveCount.Add(1)
+	return "", nil
+}
+
+func (s *countingStore) Delete(context.Context, string) error { return nil }
+
+func TestWithSkipPersist_DisablesUpdatePersistence(t *testing.T) {
+	store := &countingStore{}
+	mgr := NewManager(store, nil, nil)
+	auth := &Auth{
+		ID:       "auth-1",
+		Provider: "antigravity",
+		Metadata: map[string]any{"type": "antigravity"},
+	}
+
+	if _, err := mgr.Update(context.Background(), auth); err != nil {
+		t.Fatalf("Update returned error: %v", err)
+	}
+	if got := store.saveCount.Load(); got != 1 {
+		t.Fatalf("expected 1 Save call, got %d", got)
+	}
+
+	ctxSkip := WithSkipPersist(context.Background())
+	if _, err := mgr.Update(ctxSkip, auth); err != nil {
+		t.Fatalf("Update(skipPersist) returned error: %v", err)
+	}
+	if got := store.saveCount.Load(); got != 1 {
+		t.Fatalf("expected Save call count to remain 1, got %d", got)
+	}
+}
+
+func TestWithSkipPersist_DisablesRegisterPersistence(t *testing.T) {
+	store := &countingStore{}
+	mgr := NewManager(store, nil, nil)
+	auth := &Auth{
+		ID:       "auth-1",
+		Provider: "antigravity",
+		Metadata: map[string]any{"type": "antigravity"},
+	}
+
+	if _, err := mgr.Register(WithSkipPersist(context.Background()), auth); err != nil {
+		t.Fatalf("Register(skipPersist) returned error: %v", err)
+	}
+	if got := store.saveCount.Load(); got != 0 {
+		t.Fatalf("expected 0 Save calls, got %d", got)
+	}
+}
--- a/sdk/cliproxy/auth/types.go
+++ b/sdk/cliproxy/auth/types.go
@@ -194,6 +194,108 @@ func (a *Auth) ProxyInfo() string {
 	return "via proxy"
 }

+// DisableCoolingOverride returns the auth-file scoped disable_cooling override when present.
+// The value is read from metadata key "disable_cooling" (or legacy "disable-cooling").
+func (a *Auth) DisableCoolingOverride() (bool, bool) {
+	if a == nil || a.Metadata == nil {
+		return false, false
+	}
+	if val, ok := a.Metadata["disable_cooling"]; ok {
+		if parsed, okParse := parseBoolAny(val); okParse {
+			return parsed, true
+		}
+	}
+	if val, ok := a.Metadata["disable-cooling"]; ok {
+		if parsed, okParse := parseBoolAny(val); okParse {
+			return parsed, true
+		}
+	}
+	return false, false
+}
+
+// RequestRetryOverride returns the auth-file scoped request_retry override when present.
+// The value is read from metadata key "request_retry" (or legacy "request-retry").
+func (a *Auth) RequestRetryOverride() (int, bool) {
+	if a == nil || a.Metadata == nil {
+		return 0, false
+	}
+	if val, ok := a.Metadata["request_retry"]; ok {
+		if parsed, okParse := parseIntAny(val); okParse {
+			if parsed < 0 {
+				parsed = 0
+			}
+			return parsed, true
+		}
+	}
+	if val, ok := a.Metadata["request-retry"]; ok {
+		if parsed, okParse := parseIntAny(val); okParse {
+			if parsed < 0 {
+				parsed = 0
+			}
+			return parsed, true
+		}
+	}
+	return 0, false
+}
+
+func parseBoolAny(val any) (bool, bool) {
+	switch typed := val.(type) {
+	case bool:
+		return typed, true
+	case string:
+		trimmed := strings.TrimSpace(typed)
+		if trimmed == "" {
+			return false, false
+		}
+		parsed, err := strconv.ParseBool(trimmed)
+		if err != nil {
+			return false, false
+		}
+		return parsed, true
+	case float64:
+		return typed != 0, true
+	case json.Number:
+		parsed, err := typed.Int64()
+		if err != nil {
+			return false, false
+		}
+		return parsed != 0, true
+	default:
+		return false, false
+	}
+}
+
+func parseIntAny(val any) (int, bool) {
+	switch typed := val.(type) {
+	case int:
+		return typed, true
+	case int32:
+		return int(typed), true
+	case int64:
+		return int(typed), true
+	case float64:
+		return int(typed), true
+	case json.Number:
+		parsed, err := typed.Int64()
+		if err != nil {
+			return 0, false
+		}
+		return int(parsed), true
+	case string:
+		trimmed := strings.TrimSpace(typed)
+		if trimmed == "" {
+			return 0, false
+		}
+		parsed, err := strconv.Atoi(trimmed)
+		if err != nil {
+			return 0, false
+		}
+		return parsed, true
+	default:
+		return 0, false
+	}
+}
+
 func (a *Auth) AccountInfo() (string, string) {
 	if a == nil {
 		return "", ""
--- a/sdk/cliproxy/executor/types.go
+++ b/sdk/cliproxy/executor/types.go
@@ -7,6 +7,9 @@ import (
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
 )

+// RequestedModelMetadataKey stores the client-requested model name in Options.Metadata.
+const RequestedModelMetadataKey = "requested_model"
+
 // Request encapsulates the translated payload that will be sent to a provider executor.
 type Request struct {
 	// Model is the upstream model identifier after translation.
--- a/sdk/cliproxy/service.go
+++ b/sdk/cliproxy/service.go
@@ -135,6 +135,7 @@ func (s *Service) ensureAuthUpdateQueue(ctx context.Context) {
 }

 func (s *Service) consumeAuthUpdates(ctx context.Context) {
+	ctx = coreauth.WithSkipPersist(ctx)
 	for {
 		select {
 		case <-ctx.Done():
Author	SHA1	Message	Date
Luis Pater	8f522eed43	Merge pull request #138 from router-for-me/plus v6.7.26	2026-01-27 20:40:12 +08:00
Luis Pater	3dc001a9d2	Merge branch 'main' into plus	2026-01-27 20:39:59 +08:00
Luis Pater	ee54ee8825	Merge pull request #137 from geen02/fix/idc-auth-method-case-sensitivity fix: case-insensitive auth_method comparison for IDC tokens	2026-01-27 20:38:03 +08:00
Luis Pater	2395b7a180	Merge pull request #135 from gogoing1024/main 支持多个idc登录凭证保存	2026-01-27 20:36:56 +08:00
Luis Pater	7583193c2a	Merge pull request #1257 from router-for-me/model feat(api): add management model definitions endpoint	2026-01-27 20:32:04 +08:00
hkfires	7cc3bd4ba0	chore(deps): mark golang.org/x/text as indirect	2026-01-27 19:19:52 +08:00
hkfires	88a0f095e8	chore(registry): disable gemini 2.5 flash image preview model	2026-01-27 18:33:13 +08:00
hkfires	c65f64dce0	chore(registry): comment out rev19-uic3-1p model config	2026-01-27 18:33:13 +08:00
hkfires	d18cd217e1	feat(api): add management model definitions endpoint	2026-01-27 18:33:12 +08:00
Luis Pater	ba4a1ab433	Merge pull request #1261 from Darley-Wey/fix/gemini_scheme fix(gemini): force type to string for enum fields to fix Antigravity Gemini API error	2026-01-27 17:02:25 +08:00
Darley	decddb521e	fix(gemini): force type to string for enum fields to fix Antigravity Gemini API error (Relates to #1260 )	2026-01-27 11:14:08 +03:30
jyy	de6b1ada5d	fix: case-insensitive auth_method comparison for IDC tokens The background refresher was skipping token files with auth_method values like 'IdC' or 'IDC' because the comparison was case-sensitive and only matched lowercase 'idc'. This fix normalizes the auth_method to lowercase before comparison in: - token_repository.go: readTokenFile() when filtering tokens to refresh - background_refresh.go: refreshSingle() when selecting refresh method Fixes the issue where 'IdC' != 'idc' caused tokens to be skipped entirely.	2026-01-27 13:39:38 +09:00
gogoing1024	e08f48c7a1	Merge branch 'router-for-me:main' into main	2026-01-27 09:23:36 +08:00
Luis Pater	851712a49e	Merge pull request #132 from ClubWeGo/codex/resolve-issue-#131 Resolve Issue #131	2026-01-26 23:36:16 +08:00
Luis Pater	9e34323a40	Merge branch 'router-for-me:main' into main	2026-01-26 23:35:07 +08:00
Luis Pater	70897247b2	feat(auth): add support for request_retry and disable_cooling overrides Implement `request_retry` and `disable_cooling` metadata overrides for authentication management. Update retry and cooling logic accordingly across `Manager`, Antigravity executor, and file synthesizer. Add tests to validate new behaviors.	2026-01-26 21:59:08 +08:00
Luis Pater	9c341f5aa5	feat(auth): add skip persistence context key for file watcher events Introduce `WithSkipPersist` to disable persistence during Manager Update/Register calls, preventing write-back loops caused by redundant file writes. Add corresponding tests and integrate with existing file store and conductor logic.	2026-01-26 18:20:19 +08:00
yuechenglong.5	f74a688fb9	refactor(auth): extract token filename generation into unified function Add ExtractIDCIdentifier and GenerateTokenFileName functions to centralize token filename generation logic. This improves code maintainability by: - Extracting IDC identifier from startUrl for unique token file naming - Supporting priority-based filename generation (email > startUrl > authMethod) - Removing duplicate filename generation code from oauth_web.go - Adding comprehensive unit tests for the new functions	2026-01-26 13:54:32 +08:00
Darley	e3e741d0be	Default Claude tool input schema	2026-01-26 09:15:38 +08:00
Darley	7c7c5fd967	Fix Kiro tool schema defaults	2026-01-26 08:27:53 +08:00
Luis Pater	fe8c7a62aa	Merge branch 'router-for-me:main' into main	2026-01-26 06:23:41 +08:00
Luis Pater	2af4a8dc12	refactor(runtime): implement retry logic for Antigravity executor with improved error handling and capacity management	2026-01-26 06:22:46 +08:00
Luis Pater	0f53b952b2	Merge pull request #1225 from router-for-me/log Add request_id to error logs and extract error messages	2026-01-25 22:08:46 +08:00
Luis Pater	7b2ae7377a	chore(auth): add `net/url` import to `auth_files.go` for URL handling	2026-01-25 21:53:20 +08:00
Luis Pater	c2ab288c7d	Merge pull request #130 from router-for-me/plus v6.7.22	2026-01-25 21:51:20 +08:00
Luis Pater	dbb433fcf8	Merge branch 'main' into plus	2026-01-25 21:51:02 +08:00
Luis Pater	2abf00b5a6	Merge pull request #126 from jellyfish-p/main feat(kiro): 添加用于令牌额度查询的api-call兼容	2026-01-25 21:49:07 +08:00
Luis Pater	275839e5c9	Merge pull request #124 from gogoing1024/main fix(kiro): always attempt token refresh on 401 before checking retry …	2026-01-25 21:48:03 +08:00
hkfires	f30ffd5f5e	feat(executor): add request_id to error logs Extract error.message from JSON error responses when summarizing error bodies for debug logs	2026-01-25 21:31:46 +08:00
Luis Pater	bc9a24d705	docs(readme): reposition CPA-XXX Panel section for improved visibility	2026-01-25 18:58:32 +08:00
Luis Pater	2c879f13ef	Merge pull request #1216 from ferretgeek/add-cpa-xxx-panel docs: 新增 CPA-XXX 社区面板项目	2026-01-25 18:57:32 +08:00
Gemini	07b4a08979	docs: translate CPA-XXX description to English	2026-01-25 18:00:28 +08:00
jellyfish-p	497339f055	feat(kiro): 添加用于令牌额度查询的api-call兼容	2026-01-25 11:36:52 +08:00
Gemini	7f612bb069	docs: add CPA-XXX panel to community list Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>	2026-01-25 10:45:51 +08:00
hkfires	5743b78694	test(claude): update expectations for system message handling	2026-01-25 08:31:29 +08:00
Luis Pater	2e6a2b655c	Merge pull request #1132 from XYenon/fix/gemini-models-displayname-override fix(gemini): preserve displayName and description in models list	2026-01-25 03:40:04 +08:00
Luis Pater	cb47ac21bf	Merge pull request #1179 from mallendeo/main fix(claude): skip built-in tools in OAuth tool prefix	2026-01-25 03:31:58 +08:00
Luis Pater	a1394b4596	Merge pull request #1183 from Darley-Wey/fix/api-align fix(api): enhance ClaudeModels response to align with api.anthropic.com	2026-01-25 03:30:14 +08:00
Luis Pater	9e97948f03	Merge pull request #1185 from router-for-me/auth Refactor authentication handling for Antigravity, Claude, Codex, and Gemini	2026-01-25 03:28:53 +08:00
yuechenglong.5	8f780e7280	fix(kiro): always attempt token refresh on 401 before checking retry count Refactor 401 error handling in both executeWithRetry and executeStreamWithRetry to always attempt token refresh regardless of remaining retry attempts. Previously, token refresh was only attempted when retries remained, which could leave valid refreshed tokens unused. Also add auth directory resolution in RefreshManager.Initialize to properly resolve the base directory path before creating the token repository.	2026-01-24 20:02:09 +08:00
Darley	46c6fb1e7a	fix(api): enhance ClaudeModels response to align with api.anthropic.com	2026-01-24 04:41:08 +03:30
hkfires	9f9fec5d4c	fix(auth): improve antigravity token exchange errors	2026-01-24 09:04:15 +08:00
hkfires	e95be10485	fix(auth): validate antigravity token userinfo email	2026-01-24 08:33:52 +08:00
hkfires	f3d58fa0ce	fix(auth): correct antigravity oauth redirect and expiry	2026-01-24 08:33:52 +08:00
hkfires	8c0eaa1f71	refactor(auth): export Gemini constants and use in handler	2026-01-24 08:33:52 +08:00
hkfires	405df58f72	refactor(auth): export Codex constants and slim down handler	2026-01-24 08:33:52 +08:00
hkfires	e7f13aa008	refactor(api): slim down RequestAnthropicToken to use internal/auth	2026-01-24 08:33:51 +08:00
hkfires	7cb6a9b89a	refactor(auth): export Claude OAuth constants for reuse	2026-01-24 08:33:51 +08:00
hkfires	9aa5344c29	refactor(api): slim down RequestAntigravityToken to use internal/auth	2026-01-24 08:33:51 +08:00
hkfires	8ba0ebbd2a	refactor(sdk): slim down Antigravity authenticator to use internal/auth	2026-01-24 08:33:51 +08:00
hkfires	c65407ab9f	refactor(auth): extract Antigravity OAuth constants to internal/auth	2026-01-24 08:33:51 +08:00
hkfires	9e59685212	refactor(auth): implement Antigravity AuthService in internal/auth	2026-01-24 08:33:51 +08:00
hkfires	4a4dfaa910	refactor(auth): replace sanitizeAntigravityFileName with antigravity.CredentialFileName	2026-01-24 08:33:51 +08:00
Luis Pater	0d6ecb0191	Fixed: #1077 refactor(translator): improve tools handling by separating functionDeclarations and googleSearch nodes	2026-01-24 05:51:11 +08:00
Mauricio Allende	f16461bfe7	fix(claude): skip built-in tools in OAuth tool prefix	2026-01-23 21:29:39 +00:00
Luis Pater	9fccc86b71	fix(executor): include requested model in payload configuration	2026-01-24 05:06:02 +08:00
Luis Pater	74683560a7	chore(deps): update go.mod to add golang.org/x/sync and golang.org/x/text	2026-01-24 05:04:09 +08:00
Luis Pater	1e4f9dd438	Merge pull request #123 from router-for-me/plus v6.7.20	2026-01-24 05:02:41 +08:00
Luis Pater	b9ff916494	Merge branch 'main' into plus	2026-01-24 05:02:32 +08:00
Luis Pater	9bf4a0cad2	Merge pull request #120 from Xm798/fix/kiro-auth-method-case fix(auth): normalize Kiro authMethod to lowercase on token import	2026-01-24 04:58:50 +08:00
Luis Pater	c32e2a8196	fix(auth): handle context cancellation in executor methods	2026-01-24 04:56:55 +08:00
Luis Pater	873d41582f	Merge pull request #1125 from NightHammer1000/dev Filter out Top_P when Temp is set on Claude	2026-01-24 02:03:33 +08:00
Luis Pater	6fb7d85558	Merge pull request #1137 from augustVino/fix/remove_empty_systemmsg fix(translator): ensure system message is only added if it contains c…	2026-01-24 02:02:18 +08:00
hkfires	d5e3e32d58	fix(auth): normalize plan type filenames to lowercase	2026-01-23 20:13:09 +08:00
Chén Mù	f353a54555	Merge pull request #1171 from router-for-me/auth refactor(auth): remove unused provider execution helpers	2026-01-23 19:43:42 +08:00
Chén Mù	1d6e2e751d	Merge pull request #1140 from sxjeru/main fix(auth): handle quota cooldown in retry logic for transient errors	2026-01-23 19:43:17 +08:00
hkfires	cc50b63422	refactor(auth): remove unused provider execution helpers	2026-01-23 19:12:55 +08:00
Luis Pater	15ae83a15b	Merge pull request #1169 from router-for-me/payload feat(executor): apply payload rules using requested model	2026-01-23 18:41:31 +08:00
hkfires	81b369aed9	fix(auth): include requested model in executor metadata	2026-01-23 18:30:08 +08:00
hkfires	ecc850bfb7	feat(executor): apply payload rules using requested model	2026-01-23 16:38:41 +08:00
Chén Mù	19b4ef33e0	Merge pull request #1102 from aldinokemal/main feat(management): add PATCH endpoint to enable/disable auth files	2026-01-23 09:05:24 +08:00
hkfires	7ca045d8b9	fix(executor): adjust model-specific request payload	2026-01-22 20:28:08 +08:00
Cyrus	25b9df478c	fix(auth): normalize authMethod to lowercase on Kiro token import - Add strings.ToLower() normalization in LoadKiroIDEToken() - Add same normalization in LoadKiroTokenFromPath() - Fixes issue where Kiro IDE exports "IdC" but code expects "idc"	2026-01-22 19:54:48 +08:00
hkfires	abfca6aab2	refactor(util): reorder gemini schema cleaner helpers	2026-01-22 18:38:48 +08:00
Chén Mù	3c71c075db	Merge pull request #1131 from sowar1987/fix/gemini-malformed-function-call Fix Gemini tool calling for Antigravity (malformed_function_call)	2026-01-22 18:07:03 +08:00
sowar1987	9c2992bfb2	test: align signature cache tests with cache behavior Co-Authored-By: Warp <agent@warp.dev>	2026-01-22 17:12:47 +08:00
sowar1987	269a1c5452	refactor: reuse placeholder reason description Co-Authored-By: Warp <agent@warp.dev>	2026-01-22 17:12:47 +08:00
sowar1987	22ce65ac72	test: update signature cache tests Revert gemini translator changes for scheme A Co-Authored-By: Warp <agent@warp.dev>	2026-01-22 17:12:47 +08:00
sowar1987	a2f8f59192	Fix Gemini function-calling INVALID_ARGUMENT by relaxing Gemini tool validation and cleaning schema	2026-01-22 17:11:07 +08:00
XYenon	8c7c446f33	fix(gemini): preserve displayName and description in models list Previously GeminiModels handler unconditionally overwrote displayName and description with the model name, losing the original values defined in model definitions (e.g., 'Gemini 3 Pro Preview'). Now only set these fields as fallback when they are missing or empty.	2026-01-22 15:19:27 +08:00
sxjeru	30a59168d7	fix(auth): handle quota cooldown in retry logic for transient errors	2026-01-21 21:48:23 +08:00
hkfires	c8884f5e25	refactor(translator): enhance signature handling in Claude and Gemini requests, streamline cache usage and remove unnecessary tests	2026-01-21 20:21:49 +08:00
Luis Pater	d9c6317c84	refactor(cache, translator): refine signature caching logic and tests, replace session-based logic with model group handling	2026-01-21 18:30:05 +08:00
Vino	d29ec95526	fix(translator): ensure system message is only added if it contains content	2026-01-21 16:45:50 +08:00
N1GHT	09970dc7af	Accept Geminis Review Suggestion	2026-01-20 17:51:36 +01:00
N1GHT	d81abd401c	Returned the Code Comment I trashed	2026-01-20 17:36:27 +01:00
N1GHT	a6cba25bc1	Small fix to filter out Top_P when Temperature is set on Claude to make requests go through	2026-01-20 17:34:26 +01:00
Aldino Kemal	2f6004d74a	perf(management): optimize auth lookup in PatchAuthFileStatus Use GetByID() for O(1) map lookup first, falling back to iteration only for FileName matching. Consistent with pattern in disableAuth().	2026-01-19 20:05:37 +07:00
Aldino Kemal	a1634909e8	feat(management): add PATCH endpoint to enable/disable auth files Add new PATCH /v0/management/auth-files/status endpoint that allows toggling the disabled state of auth files without deleting them. This enables users to temporarily disable credentials from the management UI.	2026-01-19 19:50:36 +07:00