Merge pull request #153 from router-for-me/plus

v6.7.31
Merge branch 'main' into plus
2026-04-23 20:12:40 +00:00 · 2026-01-30 20:46:42 +08:00 · 2026-01-30 20:45:33 +08:00 · 2026-01-30 09:15:00 +08:00 · 2026-01-30 07:26:36 +08:00 · 2026-01-30 04:17:56 +08:00
36 changed files with 2164 additions and 1039 deletions
--- a/.github/workflows/docker-image.yml
+++ b/.github/workflows/docker-image.yml
@@ -10,13 +10,11 @@ env:
  DOCKERHUB_REPO: eceasy/cli-proxy-api-plus
 jobs:
-  docker:
+  docker_amd64:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - name: Set up QEMU
        uses: docker/setup-qemu-action@v3
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Login to DockerHub
@@ -29,19 +27,113 @@ jobs:
          echo VERSION=`git describe --tags --always --dirty` >> $GITHUB_ENV
          echo COMMIT=`git rev-parse --short HEAD` >> $GITHUB_ENV
          echo BUILD_DATE=`date -u +%Y-%m-%dT%H:%M:%SZ` >> $GITHUB_ENV
-      - name: Build and push
+      - name: Build and push (amd64)
        uses: docker/build-push-action@v6
        with:
          context: .
-          platforms: |
+          platforms: linux/amd64
            linux/amd64
            linux/arm64
          push: true
          build-args: |
            VERSION=${{ env.VERSION }}
            COMMIT=${{ env.COMMIT }}
            BUILD_DATE=${{ env.BUILD_DATE }}
          tags: |
-            ${{ env.DOCKERHUB_REPO }}:latest
+            ${{ env.DOCKERHUB_REPO }}:latest-amd64
-            ${{ env.DOCKERHUB_REPO }}:${{ env.VERSION }}
+            ${{ env.DOCKERHUB_REPO }}:${{ env.VERSION }}-amd64
  docker_arm64:
    runs-on: ubuntu-24.04-arm
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Login to DockerHub
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}
      - name: Generate Build Metadata
        run: |
          echo VERSION=`git describe --tags --always --dirty` >> $GITHUB_ENV
          echo COMMIT=`git rev-parse --short HEAD` >> $GITHUB_ENV
          echo BUILD_DATE=`date -u +%Y-%m-%dT%H:%M:%SZ` >> $GITHUB_ENV
      - name: Build and push (arm64)
        uses: docker/build-push-action@v6
        with:
          context: .
          platforms: linux/arm64
          push: true
          build-args: |
            VERSION=${{ env.VERSION }}
            COMMIT=${{ env.COMMIT }}
            BUILD_DATE=${{ env.BUILD_DATE }}
          tags: |
            ${{ env.DOCKERHUB_REPO }}:latest-arm64
            ${{ env.DOCKERHUB_REPO }}:${{ env.VERSION }}-arm64
  docker_manifest:
    runs-on: ubuntu-latest
    needs:
      - docker_amd64
      - docker_arm64
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Login to DockerHub
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}
      - name: Generate Build Metadata
        run: |
          echo VERSION=`git describe --tags --always --dirty` >> $GITHUB_ENV
          echo COMMIT=`git rev-parse --short HEAD` >> $GITHUB_ENV
          echo BUILD_DATE=`date -u +%Y-%m-%dT%H:%M:%SZ` >> $GITHUB_ENV
      - name: Create and push multi-arch manifests
        run: |
          docker buildx imagetools create \
            --tag "${DOCKERHUB_REPO}:latest" \
            "${DOCKERHUB_REPO}:latest-amd64" \
            "${DOCKERHUB_REPO}:latest-arm64"
          docker buildx imagetools create \
            --tag "${DOCKERHUB_REPO}:${VERSION}" \
            "${DOCKERHUB_REPO}:${VERSION}-amd64" \
            "${DOCKERHUB_REPO}:${VERSION}-arm64"
      - name: Cleanup temporary tags
        continue-on-error: true
        env:
          DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
          DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
        run: |
          set -euo pipefail
          namespace="${DOCKERHUB_REPO%%/*}"
          repo_name="${DOCKERHUB_REPO#*/}"
          token="$(
            curl -fsSL \
              -H 'Content-Type: application/json' \
              -d "{\"username\":\"${DOCKERHUB_USERNAME}\",\"password\":\"${DOCKERHUB_TOKEN}\"}" \
              'https://hub.docker.com/v2/users/login/' \
              | python3 -c 'import json,sys; print(json.load(sys.stdin)["token"])'
          )"
          delete_tag() {
            local tag="$1"
            local url="https://hub.docker.com/v2/repositories/${namespace}/${repo_name}/tags/${tag}/"
            local http_code
            http_code="$(curl -sS -o /dev/null -w "%{http_code}" -X DELETE -H "Authorization: JWT ${token}" "${url}" || true)"
            if [ "${http_code}" = "204" ] || [ "${http_code}" = "404" ]; then
              echo "Docker Hub tag removed (or missing): ${DOCKERHUB_REPO}:${tag} (HTTP ${http_code})"
              return 0
            fi
            echo "Docker Hub tag delete failed: ${DOCKERHUB_REPO}:${tag} (HTTP ${http_code})"
            return 0
          }
          delete_tag "latest-amd64"
          delete_tag "latest-arm64"
          delete_tag "${VERSION}-amd64"
          delete_tag "${VERSION}-arm64"
--- a/internal/api/handlers/management/model_definitions.go
+++ b/internal/api/handlers/management/model_definitions.go
@@ -0,0 +1,33 @@
 package management
 import (
 	"net/http"
 	"strings"
 	"github.com/gin-gonic/gin"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 )
 // GetStaticModelDefinitions returns static model metadata for a given channel.
 // Channel is provided via path param (:channel) or query param (?channel=...).
 func (h *Handler) GetStaticModelDefinitions(c *gin.Context) {
 	channel := strings.TrimSpace(c.Param("channel"))
 	if channel == "" {
 		channel = strings.TrimSpace(c.Query("channel"))
 	}
 	if channel == "" {
 		c.JSON(http.StatusBadRequest, gin.H{"error": "channel is required"})
 		return
 	}
 	models := registry.GetStaticModelDefinitionsByChannel(channel)
 	if models == nil {
 		c.JSON(http.StatusBadRequest, gin.H{"error": "unknown channel", "channel": channel})
 		return
 	}
 	c.JSON(http.StatusOK, gin.H{
 		"channel": strings.ToLower(strings.TrimSpace(channel)),
 		"models":  models,
 	})
 }
--- a/internal/api/middleware/request_logging.go
+++ b/internal/api/middleware/request_logging.go
@@ -8,6 +8,7 @@ import (
 	"io"
 	"net/http"
 	"strings"
 	"time"
 	"github.com/gin-gonic/gin"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
@@ -103,6 +104,7 @@ func captureRequestInfo(c *gin.Context) (*RequestInfo, error) {
 		Headers:   headers,
 		Body:      body,
 		RequestID: logging.GetGinRequestID(c),
 		Timestamp: time.Now(),
 	}, nil
 }
--- a/internal/api/middleware/response_writer.go
+++ b/internal/api/middleware/response_writer.go
@@ -7,6 +7,7 @@ import (
 	"bytes"
 	"net/http"
 	"strings"
 	"time"
 	"github.com/gin-gonic/gin"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
@@ -20,22 +21,24 @@ type RequestInfo struct {
 	Headers   map[string][]string // Headers contains the request headers.
 	Body      []byte              // Body is the raw request body.
 	RequestID string              // RequestID is the unique identifier for the request.
 	Timestamp time.Time           // Timestamp is when the request was received.
 }
 // ResponseWriterWrapper wraps the standard gin.ResponseWriter to intercept and log response data.
 // It is designed to handle both standard and streaming responses, ensuring that logging operations do not block the client response.
 type ResponseWriterWrapper struct {
 	gin.ResponseWriter
-	body           *bytes.Buffer              // body is a buffer to store the response body for non-streaming responses.
+	body                *bytes.Buffer              // body is a buffer to store the response body for non-streaming responses.
-	isStreaming    bool                       // isStreaming indicates whether the response is a streaming type (e.g., text/event-stream).
+	isStreaming         bool                       // isStreaming indicates whether the response is a streaming type (e.g., text/event-stream).
-	streamWriter   logging.StreamingLogWriter // streamWriter is a writer for handling streaming log entries.
+	streamWriter        logging.StreamingLogWriter // streamWriter is a writer for handling streaming log entries.
-	chunkChannel   chan []byte                // chunkChannel is a channel for asynchronously passing response chunks to the logger.
+	chunkChannel        chan []byte                // chunkChannel is a channel for asynchronously passing response chunks to the logger.
-	streamDone     chan struct{}              // streamDone signals when the streaming goroutine completes.
+	streamDone          chan struct{}              // streamDone signals when the streaming goroutine completes.
-	logger         logging.RequestLogger      // logger is the instance of the request logger service.
+	logger              logging.RequestLogger      // logger is the instance of the request logger service.
-	requestInfo    *RequestInfo               // requestInfo holds the details of the original request.
+	requestInfo         *RequestInfo               // requestInfo holds the details of the original request.
-	statusCode     int                        // statusCode stores the HTTP status code of the response.
+	statusCode          int                        // statusCode stores the HTTP status code of the response.
-	headers        map[string][]string        // headers stores the response headers.
+	headers             map[string][]string        // headers stores the response headers.
-	logOnErrorOnly bool                       // logOnErrorOnly enables logging only when an error response is detected.
+	logOnErrorOnly      bool                       // logOnErrorOnly enables logging only when an error response is detected.
 	firstChunkTimestamp time.Time                  // firstChunkTimestamp captures TTFB for streaming responses.
 }
 // NewResponseWriterWrapper creates and initializes a new ResponseWriterWrapper.
@@ -73,6 +76,10 @@ func (w *ResponseWriterWrapper) Write(data []byte) (int, error) {
 	// THEN: Handle logging based on response type
 	if w.isStreaming && w.chunkChannel != nil {
 		// Capture TTFB on first chunk (synchronous, before async channel send)
 		if w.firstChunkTimestamp.IsZero() {
 			w.firstChunkTimestamp = time.Now()
 		}
 		// For streaming responses: Send to async logging channel (non-blocking)
 		select {
 		case w.chunkChannel <- append([]byte(nil), data...): // Non-blocking send with copy
@@ -117,6 +124,10 @@ func (w *ResponseWriterWrapper) WriteString(data string) (int, error) {
 	// THEN: Capture for logging
 	if w.isStreaming && w.chunkChannel != nil {
 		// Capture TTFB on first chunk (synchronous, before async channel send)
 		if w.firstChunkTimestamp.IsZero() {
 			w.firstChunkTimestamp = time.Now()
 		}
 		select {
 		case w.chunkChannel <- []byte(data):
 		default:
@@ -280,6 +291,8 @@ func (w *ResponseWriterWrapper) Finalize(c *gin.Context) error {
 			w.streamDone = nil
 		}
 		w.streamWriter.SetFirstChunkTimestamp(w.firstChunkTimestamp)
 		// Write API Request and Response to the streaming log before closing
 		apiRequest := w.extractAPIRequest(c)
 		if len(apiRequest) > 0 {
@@ -297,7 +310,7 @@ func (w *ResponseWriterWrapper) Finalize(c *gin.Context) error {
 		return nil
 	}
-	return w.logRequest(finalStatusCode, w.cloneHeaders(), w.body.Bytes(), w.extractAPIRequest(c), w.extractAPIResponse(c), slicesAPIResponseError, forceLog)
+	return w.logRequest(finalStatusCode, w.cloneHeaders(), w.body.Bytes(), w.extractAPIRequest(c), w.extractAPIResponse(c), w.extractAPIResponseTimestamp(c), slicesAPIResponseError, forceLog)
 }
 func (w *ResponseWriterWrapper) cloneHeaders() map[string][]string {
@@ -337,7 +350,18 @@ func (w *ResponseWriterWrapper) extractAPIResponse(c *gin.Context) []byte {
 	return data
 }
-func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]string, body []byte, apiRequestBody, apiResponseBody []byte, apiResponseErrors []*interfaces.ErrorMessage, forceLog bool) error {
+func (w *ResponseWriterWrapper) extractAPIResponseTimestamp(c *gin.Context) time.Time {
 	ts, isExist := c.Get("API_RESPONSE_TIMESTAMP")
 	if !isExist {
 		return time.Time{}
 	}
 	if t, ok := ts.(time.Time); ok {
 		return t
 	}
 	return time.Time{}
 }
 func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]string, body []byte, apiRequestBody, apiResponseBody []byte, apiResponseTimestamp time.Time, apiResponseErrors []*interfaces.ErrorMessage, forceLog bool) error {
 	if w.requestInfo == nil {
 		return nil
 	}
@@ -348,7 +372,7 @@ func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]
 	}
 	if loggerWithOptions, ok := w.logger.(interface {
-		LogRequestWithOptions(string, string, map[string][]string, []byte, int, map[string][]string, []byte, []byte, []byte, []*interfaces.ErrorMessage, bool, string) error
+		LogRequestWithOptions(string, string, map[string][]string, []byte, int, map[string][]string, []byte, []byte, []byte, []*interfaces.ErrorMessage, bool, string, time.Time, time.Time) error
 	}); ok {
 		return loggerWithOptions.LogRequestWithOptions(
 			w.requestInfo.URL,
@@ -363,6 +387,8 @@ func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]
 			apiResponseErrors,
 			forceLog,
 			w.requestInfo.RequestID,
 			w.requestInfo.Timestamp,
 			apiResponseTimestamp,
 		)
 	}
@@ -378,5 +404,7 @@ func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]
 		apiResponseBody,
 		apiResponseErrors,
 		w.requestInfo.RequestID,
 		w.requestInfo.Timestamp,
 		apiResponseTimestamp,
 	)
 }
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -12,6 +12,7 @@ import (
 	"net/http"
 	"os"
 	"path/filepath"
 	"reflect"
 	"strings"
 	"sync"
 	"sync/atomic"
@@ -633,6 +634,7 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.GET("/auth-files", s.mgmt.ListAuthFiles)
 		mgmt.GET("/auth-files/models", s.mgmt.GetAuthFileModels)
 		mgmt.GET("/model-definitions/:channel", s.mgmt.GetStaticModelDefinitions)
 		mgmt.GET("/auth-files/download", s.mgmt.DownloadAuthFile)
 		mgmt.POST("/auth-files", s.mgmt.UploadAuthFile)
 		mgmt.DELETE("/auth-files", s.mgmt.DeleteAuthFile)
@@ -1017,14 +1019,17 @@ func (s *Server) UpdateClients(cfg *config.Config) {
 		s.mgmt.SetAuthManager(s.handlers.AuthManager)
 	}
-	// Notify Amp module of config changes (for model mapping hot-reload)
+	// Notify Amp module only when Amp config has changed.
-	if s.ampModule != nil {
+	ampConfigChanged := oldCfg == nil || !reflect.DeepEqual(oldCfg.AmpCode, cfg.AmpCode)
-		log.Debugf("triggering amp module config update")
+	if ampConfigChanged {
-		if err := s.ampModule.OnConfigUpdated(cfg); err != nil {
+		if s.ampModule != nil {
-			log.Errorf("failed to update Amp module config: %v", err)
+			log.Debugf("triggering amp module config update")
 			if err := s.ampModule.OnConfigUpdated(cfg); err != nil {
 				log.Errorf("failed to update Amp module config: %v", err)
 			}
 		} else {
 			log.Warnf("amp module is nil, skipping config update")
 		}
 	} else {
 		log.Warnf("amp module is nil, skipping config update")
 	}
 	// Count client sources from configuration and auth store.
--- a/internal/auth/kiro/aws.go
+++ b/internal/auth/kiro/aws.go
@@ -360,7 +360,7 @@ func SanitizeEmailForFilename(email string) string {
 	}
 	result := email
-	
+
 	// First, handle URL-encoded path traversal attempts (%2F, %2E, %5C, etc.)
 	// This prevents encoded characters from bypassing the sanitization.
 	// Note: We replace % last to catch any remaining encodings including double-encoding (%252F)
@@ -378,7 +378,7 @@ func SanitizeEmailForFilename(email string) string {
 	for _, char := range []string{"/", "\\", ":", "*", "?", "\"", "<", ">", "|", " ", "\x00"} {
 		result = strings.ReplaceAll(result, char, "_")
 	}
-	
+
 	// Prevent path traversal: replace leading dots in each path component
 	// This handles cases like "../../../etc/passwd" → "_.._.._.._etc_passwd"
 	parts := strings.Split(result, "_")
@@ -389,6 +389,65 @@ func SanitizeEmailForFilename(email string) string {
 		parts[i] = part
 	}
 	result = strings.Join(parts, "_")
-	
+
 	return result
 }
 // ExtractIDCIdentifier extracts a unique identifier from IDC startUrl.
 // Examples:
 //   - "https://d-1234567890.awsapps.com/start" -> "d-1234567890"
 //   - "https://my-company.awsapps.com/start" -> "my-company"
 //   - "https://acme-corp.awsapps.com/start" -> "acme-corp"
 func ExtractIDCIdentifier(startURL string) string {
 	if startURL == "" {
 		return ""
 	}
 	// Remove protocol prefix
 	url := strings.TrimPrefix(startURL, "https://")
 	url = strings.TrimPrefix(url, "http://")
 	// Extract subdomain (first part before the first dot)
 	// Format: {identifier}.awsapps.com/start
 	parts := strings.Split(url, ".")
 	if len(parts) > 0 && parts[0] != "" {
 		identifier := parts[0]
 		// Sanitize for filename safety
 		identifier = strings.ReplaceAll(identifier, "/", "_")
 		identifier = strings.ReplaceAll(identifier, "\\", "_")
 		identifier = strings.ReplaceAll(identifier, ":", "_")
 		return identifier
 	}
 	return ""
 }
 // GenerateTokenFileName generates a unique filename for token storage.
 // Priority: email > startUrl identifier (for IDC) > authMethod only
 // Format: kiro-{authMethod}-{identifier}.json
 func GenerateTokenFileName(tokenData *KiroTokenData) string {
 	authMethod := tokenData.AuthMethod
 	if authMethod == "" {
 		authMethod = "unknown"
 	}
 	// Priority 1: Use email if available
 	if tokenData.Email != "" {
 		// Sanitize email for filename (replace @ and . with -)
 		sanitizedEmail := tokenData.Email
 		sanitizedEmail = strings.ReplaceAll(sanitizedEmail, "@", "-")
 		sanitizedEmail = strings.ReplaceAll(sanitizedEmail, ".", "-")
 		return fmt.Sprintf("kiro-%s-%s.json", authMethod, sanitizedEmail)
 	}
 	// Priority 2: For IDC, use startUrl identifier
 	if authMethod == "idc" && tokenData.StartURL != "" {
 		identifier := ExtractIDCIdentifier(tokenData.StartURL)
 		if identifier != "" {
 			return fmt.Sprintf("kiro-%s-%s.json", authMethod, identifier)
 		}
 	}
 	// Priority 3: Fallback to authMethod only
 	return fmt.Sprintf("kiro-%s.json", authMethod)
 }
--- a/internal/auth/kiro/aws_test.go
+++ b/internal/auth/kiro/aws_test.go
@@ -151,11 +151,161 @@ func TestSanitizeEmailForFilename(t *testing.T) {
 // createTestJWT creates a test JWT token with the given claims
 func createTestJWT(claims map[string]any) string {
 	header := base64.RawURLEncoding.EncodeToString([]byte(`{"alg":"RS256","typ":"JWT"}`))
-	
+
 	payloadBytes, _ := json.Marshal(claims)
 	payload := base64.RawURLEncoding.EncodeToString(payloadBytes)
-	
+
 	signature := base64.RawURLEncoding.EncodeToString([]byte("fake-signature"))
-	
+
 	return header + "." + payload + "." + signature
 }
 func TestExtractIDCIdentifier(t *testing.T) {
 	tests := []struct {
 		name     string
 		startURL string
 		expected string
 	}{
 		{
 			name:     "Empty URL",
 			startURL: "",
 			expected: "",
 		},
 		{
 			name:     "Standard IDC URL with d- prefix",
 			startURL: "https://d-1234567890.awsapps.com/start",
 			expected: "d-1234567890",
 		},
 		{
 			name:     "IDC URL with company name",
 			startURL: "https://my-company.awsapps.com/start",
 			expected: "my-company",
 		},
 		{
 			name:     "IDC URL with simple name",
 			startURL: "https://acme-corp.awsapps.com/start",
 			expected: "acme-corp",
 		},
 		{
 			name:     "IDC URL without https",
 			startURL: "http://d-9876543210.awsapps.com/start",
 			expected: "d-9876543210",
 		},
 		{
 			name:     "IDC URL with subdomain only",
 			startURL: "https://test.awsapps.com/start",
 			expected: "test",
 		},
 		{
 			name:     "Builder ID URL",
 			startURL: "https://view.awsapps.com/start",
 			expected: "view",
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			result := ExtractIDCIdentifier(tt.startURL)
 			if result != tt.expected {
 				t.Errorf("ExtractIDCIdentifier() = %q, want %q", result, tt.expected)
 			}
 		})
 	}
 }
 func TestGenerateTokenFileName(t *testing.T) {
 	tests := []struct {
 		name      string
 		tokenData *KiroTokenData
 		expected  string
 	}{
 		{
 			name: "IDC with email",
 			tokenData: &KiroTokenData{
 				AuthMethod: "idc",
 				Email:      "user@example.com",
 				StartURL:   "https://d-1234567890.awsapps.com/start",
 			},
 			expected: "kiro-idc-user-example-com.json",
 		},
 		{
 			name: "IDC without email but with startUrl",
 			tokenData: &KiroTokenData{
 				AuthMethod: "idc",
 				Email:      "",
 				StartURL:   "https://d-1234567890.awsapps.com/start",
 			},
 			expected: "kiro-idc-d-1234567890.json",
 		},
 		{
 			name: "IDC with company name in startUrl",
 			tokenData: &KiroTokenData{
 				AuthMethod: "idc",
 				Email:      "",
 				StartURL:   "https://my-company.awsapps.com/start",
 			},
 			expected: "kiro-idc-my-company.json",
 		},
 		{
 			name: "IDC without email and without startUrl",
 			tokenData: &KiroTokenData{
 				AuthMethod: "idc",
 				Email:      "",
 				StartURL:   "",
 			},
 			expected: "kiro-idc.json",
 		},
 		{
 			name: "Builder ID with email",
 			tokenData: &KiroTokenData{
 				AuthMethod: "builder-id",
 				Email:      "user@gmail.com",
 				StartURL:   "https://view.awsapps.com/start",
 			},
 			expected: "kiro-builder-id-user-gmail-com.json",
 		},
 		{
 			name: "Builder ID without email",
 			tokenData: &KiroTokenData{
 				AuthMethod: "builder-id",
 				Email:      "",
 				StartURL:   "https://view.awsapps.com/start",
 			},
 			expected: "kiro-builder-id.json",
 		},
 		{
 			name: "Social auth with email",
 			tokenData: &KiroTokenData{
 				AuthMethod: "google",
 				Email:      "user@gmail.com",
 			},
 			expected: "kiro-google-user-gmail-com.json",
 		},
 		{
 			name: "Empty auth method",
 			tokenData: &KiroTokenData{
 				AuthMethod: "",
 				Email:      "",
 			},
 			expected: "kiro-unknown.json",
 		},
 		{
 			name: "Email with special characters",
 			tokenData: &KiroTokenData{
 				AuthMethod: "idc",
 				Email:      "user.name+tag@sub.example.com",
 				StartURL:   "https://d-1234567890.awsapps.com/start",
 			},
 			expected: "kiro-idc-user-name+tag-sub-example-com.json",
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			result := GenerateTokenFileName(tt.tokenData)
 			if result != tt.expected {
 				t.Errorf("GenerateTokenFileName() = %q, want %q", result, tt.expected)
 			}
 		})
 	}
 }
--- a/internal/auth/kiro/background_refresh.go
+++ b/internal/auth/kiro/background_refresh.go
@@ -3,6 +3,7 @@ package kiro
 import (
 	"context"
 	"log"
 	"strings"
 	"sync"
 	"time"
@@ -58,7 +59,7 @@ type BackgroundRefresher struct {
 	wg               sync.WaitGroup
 	oauth            *KiroOAuth
 	ssoClient        *SSOOIDCClient
-	callbackMu       sync.RWMutex                                    // 保护回调函数的并发访问
+	callbackMu       sync.RWMutex                                   // 保护回调函数的并发访问
 	onTokenRefreshed func(tokenID string, tokenData *KiroTokenData) // 刷新成功回调
 }
@@ -163,7 +164,10 @@ func (r *BackgroundRefresher) refreshSingle(ctx context.Context, token *Token) {
 	var newTokenData *KiroTokenData
 	var err error
-	switch token.AuthMethod {
+	// Normalize auth method to lowercase for case-insensitive matching
 	authMethod := strings.ToLower(token.AuthMethod)
 	switch authMethod {
 	case "idc":
 		newTokenData, err = r.ssoClient.RefreshTokenWithRegion(
 			ctx,
--- a/internal/auth/kiro/oauth_web.go
+++ b/internal/auth/kiro/oauth_web.go
@@ -421,7 +421,7 @@ func (h *OAuthWebHandler) saveTokenToFile(tokenData *KiroTokenData) {
 			log.Errorf("OAuth Web: failed to resolve auth directory: %v", err)
 		}
 	}
-	
+
 	// Fall back to default location
 	if authDir == "" {
 		home, err := os.UserHomeDir()
@@ -431,24 +431,16 @@ func (h *OAuthWebHandler) saveTokenToFile(tokenData *KiroTokenData) {
 		}
 		authDir = filepath.Join(home, ".cli-proxy-api")
 	}
-	
+
 	// Create directory if not exists
 	if err := os.MkdirAll(authDir, 0700); err != nil {
 		log.Errorf("OAuth Web: failed to create auth directory: %v", err)
 		return
 	}
-	
+
-	// Generate filename based on auth method
+	// Generate filename using the unified function
-	// Format: kiro-{authMethod}.json or kiro-{authMethod}-{email}.json
+	fileName := GenerateTokenFileName(tokenData)
-	fileName := fmt.Sprintf("kiro-%s.json", tokenData.AuthMethod)
+
 	if tokenData.Email != "" {
 		// Sanitize email for filename (replace @ and . with -)
 		sanitizedEmail := tokenData.Email
 		sanitizedEmail = strings.ReplaceAll(sanitizedEmail, "@", "-")
 		sanitizedEmail = strings.ReplaceAll(sanitizedEmail, ".", "-")
 		fileName = fmt.Sprintf("kiro-%s-%s.json", tokenData.AuthMethod, sanitizedEmail)
 	}
 	authFilePath := filepath.Join(authDir, fileName)
 	// Convert to storage format and save
@@ -811,13 +803,8 @@ func (h *OAuthWebHandler) handleImportToken(c *gin.Context) {
 	// Save token to file
 	h.saveTokenToFile(tokenData)
-	// Generate filename for response
+	// Generate filename for response using the unified function
-	fileName := fmt.Sprintf("kiro-%s.json", tokenData.AuthMethod)
+	fileName := GenerateTokenFileName(tokenData)
 	if tokenData.Email != "" {
 		sanitizedEmail := strings.ReplaceAll(tokenData.Email, "@", "-")
 		sanitizedEmail = strings.ReplaceAll(sanitizedEmail, ".", "-")
 		fileName = fmt.Sprintf("kiro-%s-%s.json", tokenData.AuthMethod, sanitizedEmail)
 	}
 	log.Infof("OAuth Web: token imported successfully")
 	c.JSON(http.StatusOK, gin.H{
--- a/internal/auth/kiro/token_repository.go
+++ b/internal/auth/kiro/token_repository.go
@@ -187,8 +187,9 @@ func (r *FileTokenRepository) readTokenFile(path string) (*Token, error) {
 		return nil, nil
 	}
-	// 检查 auth_method
+	// 检查 auth_method (case-insensitive comparison to handle "IdC", "IDC", "idc", etc.)
 	authMethod, _ := metadata["auth_method"].(string)
 	authMethod = strings.ToLower(authMethod)
 	if authMethod != "idc" && authMethod != "builder-id" {
 		return nil, nil // 只处理 IDC 和 Builder ID token
 	}
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -986,6 +986,7 @@ func SaveConfigPreserveComments(configFile string, cfg *Config) error {
 	removeLegacyGenerativeLanguageKeys(original.Content[0])
 	pruneMappingToGeneratedKeys(original.Content[0], generated.Content[0], "oauth-excluded-models")
 	pruneMappingToGeneratedKeys(original.Content[0], generated.Content[0], "oauth-model-alias")
 	// Merge generated into original in-place, preserving comments/order of existing nodes.
 	mergeMappingPreserve(original.Content[0], generated.Content[0])
@@ -1476,6 +1477,16 @@ func pruneMappingToGeneratedKeys(dstRoot, srcRoot *yaml.Node, key string) {
 	}
 	srcIdx := findMapKeyIndex(srcRoot, key)
 	if srcIdx < 0 {
 		// Keep an explicit empty mapping for oauth-model-alias when it was previously present.
 		//
 		// Rationale: LoadConfig runs MigrateOAuthModelAlias before unmarshalling. If the
 		// oauth-model-alias key is missing, migration will add the default antigravity aliases.
 		// When users delete the last channel from oauth-model-alias via the management API,
 		// we want that deletion to persist across hot reloads and restarts.
 		if key == "oauth-model-alias" {
 			dstRoot.Content[dstIdx+1] = &yaml.Node{Kind: yaml.MappingNode, Tag: "!!map"}
 			return
 		}
 		removeMapKey(dstRoot, key)
 		return
 	}
--- a/internal/logging/request_logger.go
+++ b/internal/logging/request_logger.go
@@ -44,10 +44,12 @@ type RequestLogger interface {
 	//   - apiRequest: The API request data
 	//   - apiResponse: The API response data
 	//   - requestID: Optional request ID for log file naming
 	//   - requestTimestamp: When the request was received
 	//   - apiResponseTimestamp: When the API response was received
 	//
 	// Returns:
 	//   - error: An error if logging fails, nil otherwise
-	LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string) error
+	LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error
 	// LogStreamingRequest initiates logging for a streaming request and returns a writer for chunks.
 	//
@@ -109,6 +111,12 @@ type StreamingLogWriter interface {
 	//   - error: An error if writing fails, nil otherwise
 	WriteAPIResponse(apiResponse []byte) error
 	// SetFirstChunkTimestamp sets the TTFB timestamp captured when first chunk was received.
 	//
 	// Parameters:
 	//   - timestamp: The time when first response chunk was received
 	SetFirstChunkTimestamp(timestamp time.Time)
 	// Close finalizes the log file and cleans up resources.
 	//
 	// Returns:
@@ -180,20 +188,22 @@ func (l *FileRequestLogger) SetEnabled(enabled bool) {
 //   - apiRequest: The API request data
 //   - apiResponse: The API response data
 //   - requestID: Optional request ID for log file naming
 //   - requestTimestamp: When the request was received
 //   - apiResponseTimestamp: When the API response was received
 //
 // Returns:
 //   - error: An error if logging fails, nil otherwise
-func (l *FileRequestLogger) LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string) error {
+func (l *FileRequestLogger) LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
-	return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, false, requestID)
+	return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, false, requestID, requestTimestamp, apiResponseTimestamp)
 }
 // LogRequestWithOptions logs a request with optional forced logging behavior.
 // The force flag allows writing error logs even when regular request logging is disabled.
-func (l *FileRequestLogger) LogRequestWithOptions(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string) error {
+func (l *FileRequestLogger) LogRequestWithOptions(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
-	return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, force, requestID)
+	return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, force, requestID, requestTimestamp, apiResponseTimestamp)
 }
-func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string) error {
+func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
 	if !l.enabled && !force {
 		return nil
 	}
@@ -247,6 +257,8 @@ func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[st
 		responseHeaders,
 		responseToWrite,
 		decompressErr,
 		requestTimestamp,
 		apiResponseTimestamp,
 	)
 	if errClose := logFile.Close(); errClose != nil {
 		log.WithError(errClose).Warn("failed to close request log file")
@@ -499,17 +511,22 @@ func (l *FileRequestLogger) writeNonStreamingLog(
 	responseHeaders map[string][]string,
 	response []byte,
 	decompressErr error,
 	requestTimestamp time.Time,
 	apiResponseTimestamp time.Time,
 ) error {
-	if errWrite := writeRequestInfoWithBody(w, url, method, requestHeaders, requestBody, requestBodyPath, time.Now()); errWrite != nil {
+	if requestTimestamp.IsZero() {
 		requestTimestamp = time.Now()
 	}
 	if errWrite := writeRequestInfoWithBody(w, url, method, requestHeaders, requestBody, requestBodyPath, requestTimestamp); errWrite != nil {
 		return errWrite
 	}
-	if errWrite := writeAPISection(w, "=== API REQUEST ===\n", "=== API REQUEST", apiRequest); errWrite != nil {
+	if errWrite := writeAPISection(w, "=== API REQUEST ===\n", "=== API REQUEST", apiRequest, time.Time{}); errWrite != nil {
 		return errWrite
 	}
 	if errWrite := writeAPIErrorResponses(w, apiResponseErrors); errWrite != nil {
 		return errWrite
 	}
-	if errWrite := writeAPISection(w, "=== API RESPONSE ===\n", "=== API RESPONSE", apiResponse); errWrite != nil {
+	if errWrite := writeAPISection(w, "=== API RESPONSE ===\n", "=== API RESPONSE", apiResponse, apiResponseTimestamp); errWrite != nil {
 		return errWrite
 	}
 	return writeResponseSection(w, statusCode, true, responseHeaders, bytes.NewReader(response), decompressErr, true)
@@ -583,7 +600,7 @@ func writeRequestInfoWithBody(
 	return nil
 }
-func writeAPISection(w io.Writer, sectionHeader string, sectionPrefix string, payload []byte) error {
+func writeAPISection(w io.Writer, sectionHeader string, sectionPrefix string, payload []byte, timestamp time.Time) error {
 	if len(payload) == 0 {
 		return nil
 	}
@@ -601,6 +618,11 @@ func writeAPISection(w io.Writer, sectionHeader string, sectionPrefix string, pa
 		if _, errWrite := io.WriteString(w, sectionHeader); errWrite != nil {
 			return errWrite
 		}
 		if !timestamp.IsZero() {
 			if _, errWrite := io.WriteString(w, fmt.Sprintf("Timestamp: %s\n", timestamp.Format(time.RFC3339Nano))); errWrite != nil {
 				return errWrite
 			}
 		}
 		if _, errWrite := w.Write(payload); errWrite != nil {
 			return errWrite
 		}
@@ -974,6 +996,9 @@ type FileStreamingLogWriter struct {
 	// apiResponse stores the upstream API response data.
 	apiResponse []byte
 	// apiResponseTimestamp captures when the API response was received.
 	apiResponseTimestamp time.Time
 }
 // WriteChunkAsync writes a response chunk asynchronously (non-blocking).
@@ -1053,6 +1078,12 @@ func (w *FileStreamingLogWriter) WriteAPIResponse(apiResponse []byte) error {
 	return nil
 }
 func (w *FileStreamingLogWriter) SetFirstChunkTimestamp(timestamp time.Time) {
 	if !timestamp.IsZero() {
 		w.apiResponseTimestamp = timestamp
 	}
 }
 // Close finalizes the log file and cleans up resources.
 // It writes all buffered data to the file in the correct order:
 // API REQUEST -> API RESPONSE -> RESPONSE (status, headers, body chunks)
@@ -1140,10 +1171,10 @@ func (w *FileStreamingLogWriter) writeFinalLog(logFile *os.File) error {
 	if errWrite := writeRequestInfoWithBody(logFile, w.url, w.method, w.requestHeaders, nil, w.requestBodyPath, w.timestamp); errWrite != nil {
 		return errWrite
 	}
-	if errWrite := writeAPISection(logFile, "=== API REQUEST ===\n", "=== API REQUEST", w.apiRequest); errWrite != nil {
+	if errWrite := writeAPISection(logFile, "=== API REQUEST ===\n", "=== API REQUEST", w.apiRequest, time.Time{}); errWrite != nil {
 		return errWrite
 	}
-	if errWrite := writeAPISection(logFile, "=== API RESPONSE ===\n", "=== API RESPONSE", w.apiResponse); errWrite != nil {
+	if errWrite := writeAPISection(logFile, "=== API RESPONSE ===\n", "=== API RESPONSE", w.apiResponse, w.apiResponseTimestamp); errWrite != nil {
 		return errWrite
 	}
@@ -1220,6 +1251,8 @@ func (w *NoOpStreamingLogWriter) WriteAPIResponse(_ []byte) error {
 	return nil
 }
 func (w *NoOpStreamingLogWriter) SetFirstChunkTimestamp(_ time.Time) {}
 // Close is a no-op implementation that does nothing and always returns nil.
 //
 // Returns:
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -1,848 +1,69 @@
-// Package registry provides model definitions for various AI service providers.
+// Package registry provides model definitions and lookup helpers for various AI providers.
-// This file contains static model definitions that can be used by clients
+// Static model metadata is stored in model_definitions_static_data.go.
 // when registering their supported models.
 package registry
-// GetClaudeModels returns the standard Claude model definitions
+import (
-func GetClaudeModels() []*ModelInfo {
+	"sort"
-	return []*ModelInfo{
+	"strings"
 )
-		{
+// GetStaticModelDefinitionsByChannel returns static model definitions for a given channel/provider.
-			ID:                  "claude-haiku-4-5-20251001",
+// It returns nil when the channel is unknown.
-			Object:              "model",
+//
-			Created:             1759276800, // 2025-10-01
+// Supported channels:
-			OwnedBy:             "anthropic",
+//   - claude
-			Type:                "claude",
+//   - gemini
-			DisplayName:         "Claude 4.5 Haiku",
+//   - vertex
-			ContextLength:       200000,
+//   - gemini-cli
-			MaxCompletionTokens: 64000,
+//   - aistudio
-			// Thinking: not supported for Haiku models
+//   - codex
-		},
+//   - qwen
-		{
+//   - iflow
-			ID:                  "claude-sonnet-4-5-20250929",
+//   - antigravity (returns static overrides only)
-			Object:              "model",
+func GetStaticModelDefinitionsByChannel(channel string) []*ModelInfo {
-			Created:             1759104000, // 2025-09-29
+	key := strings.ToLower(strings.TrimSpace(channel))
-			OwnedBy:             "anthropic",
+	switch key {
-			Type:                "claude",
+	case "claude":
-			DisplayName:         "Claude 4.5 Sonnet",
+		return GetClaudeModels()
-			ContextLength:       200000,
+	case "gemini":
-			MaxCompletionTokens: 64000,
+		return GetGeminiModels()
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
+	case "vertex":
-		},
+		return GetGeminiVertexModels()
-		{
+	case "gemini-cli":
-			ID:                  "claude-opus-4-5-20251101",
+		return GetGeminiCLIModels()
-			Object:              "model",
+	case "aistudio":
-			Created:             1761955200, // 2025-11-01
+		return GetAIStudioModels()
-			OwnedBy:             "anthropic",
+	case "codex":
-			Type:                "claude",
+		return GetOpenAIModels()
-			DisplayName:         "Claude 4.5 Opus",
+	case "qwen":
-			Description:         "Premium model combining maximum intelligence with practical performance",
+		return GetQwenModels()
-			ContextLength:       200000,
+	case "iflow":
-			MaxCompletionTokens: 64000,
+		return GetIFlowModels()
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
+	case "antigravity":
-		},
+		cfg := GetAntigravityModelConfig()
-		{
+		if len(cfg) == 0 {
-			ID:                  "claude-opus-4-1-20250805",
+			return nil
-			Object:              "model",
+		}
-			Created:             1722945600, // 2025-08-05
+		models := make([]*ModelInfo, 0, len(cfg))
-			OwnedBy:             "anthropic",
+		for modelID, entry := range cfg {
-			Type:                "claude",
+			if modelID == "" || entry == nil {
-			DisplayName:         "Claude 4.1 Opus",
+				continue
-			ContextLength:       200000,
+			}
-			MaxCompletionTokens: 32000,
+			models = append(models, &ModelInfo{
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
+				ID:                  modelID,
-		},
+				Object:              "model",
-		{
+				OwnedBy:             "antigravity",
-			ID:                  "claude-opus-4-20250514",
+				Type:                "antigravity",
-			Object:              "model",
+				Thinking:            entry.Thinking,
-			Created:             1715644800, // 2025-05-14
+				MaxCompletionTokens: entry.MaxCompletionTokens,
-			OwnedBy:             "anthropic",
+			})
-			Type:                "claude",
+		}
-			DisplayName:         "Claude 4 Opus",
+		sort.Slice(models, func(i, j int) bool {
-			ContextLength:       200000,
+			return strings.ToLower(models[i].ID) < strings.ToLower(models[j].ID)
 			MaxCompletionTokens: 32000,
 			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
 		},
 		{
 			ID:                  "claude-sonnet-4-20250514",
 			Object:              "model",
 			Created:             1715644800, // 2025-05-14
 			OwnedBy:             "anthropic",
 			Type:                "claude",
 			DisplayName:         "Claude 4 Sonnet",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
 			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
 		},
 		{
 			ID:                  "claude-3-7-sonnet-20250219",
 			Object:              "model",
 			Created:             1708300800, // 2025-02-19
 			OwnedBy:             "anthropic",
 			Type:                "claude",
 			DisplayName:         "Claude 3.7 Sonnet",
 			ContextLength:       128000,
 			MaxCompletionTokens: 8192,
 			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
 		},
 		{
 			ID:                  "claude-3-5-haiku-20241022",
 			Object:              "model",
 			Created:             1729555200, // 2024-10-22
 			OwnedBy:             "anthropic",
 			Type:                "claude",
 			DisplayName:         "Claude 3.5 Haiku",
 			ContextLength:       128000,
 			MaxCompletionTokens: 8192,
 			// Thinking: not supported for Haiku models
 		},
 	}
 }
 // GetGeminiModels returns the standard Gemini model definitions
 func GetGeminiModels() []*ModelInfo {
 	return []*ModelInfo{
 		{
 			ID:                         "gemini-2.5-pro",
 			Object:                     "model",
 			Created:                    1750118400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-pro",
 			Version:                    "2.5",
 			DisplayName:                "Gemini 2.5 Pro",
 			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-2.5-flash",
 			Object:                     "model",
 			Created:                    1750118400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-flash",
 			Version:                    "001",
 			DisplayName:                "Gemini 2.5 Flash",
 			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-2.5-flash-lite",
 			Object:                     "model",
 			Created:                    1753142400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-flash-lite",
 			Version:                    "2.5",
 			DisplayName:                "Gemini 2.5 Flash Lite",
 			Description:                "Our smallest and most cost effective model, built for at scale usage.",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-3-pro-preview",
 			Object:                     "model",
 			Created:                    1737158400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-3-pro-preview",
 			Version:                    "3.0",
 			DisplayName:                "Gemini 3 Pro Preview",
 			Description:                "Gemini 3 Pro Preview",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
 		},
 		{
 			ID:                         "gemini-3-flash-preview",
 			Object:                     "model",
 			Created:                    1765929600,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-3-flash-preview",
 			Version:                    "3.0",
 			DisplayName:                "Gemini 3 Flash Preview",
 			Description:                "Gemini 3 Flash Preview",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
 		},
 		{
 			ID:                         "gemini-3-pro-image-preview",
 			Object:                     "model",
 			Created:                    1737158400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-3-pro-image-preview",
 			Version:                    "3.0",
 			DisplayName:                "Gemini 3 Pro Image Preview",
 			Description:                "Gemini 3 Pro Image Preview",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
 		},
 	}
 }
 func GetGeminiVertexModels() []*ModelInfo {
 	return []*ModelInfo{
 		{
 			ID:                         "gemini-2.5-pro",
 			Object:                     "model",
 			Created:                    1750118400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-pro",
 			Version:                    "2.5",
 			DisplayName:                "Gemini 2.5 Pro",
 			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-2.5-flash",
 			Object:                     "model",
 			Created:                    1750118400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-flash",
 			Version:                    "001",
 			DisplayName:                "Gemini 2.5 Flash",
 			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-2.5-flash-lite",
 			Object:                     "model",
 			Created:                    1753142400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-flash-lite",
 			Version:                    "2.5",
 			DisplayName:                "Gemini 2.5 Flash Lite",
 			Description:                "Our smallest and most cost effective model, built for at scale usage.",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-3-pro-preview",
 			Object:                     "model",
 			Created:                    1737158400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-3-pro-preview",
 			Version:                    "3.0",
 			DisplayName:                "Gemini 3 Pro Preview",
 			Description:                "Gemini 3 Pro Preview",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
 		},
 		{
 			ID:                         "gemini-3-flash-preview",
 			Object:                     "model",
 			Created:                    1765929600,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-3-flash-preview",
 			Version:                    "3.0",
 			DisplayName:                "Gemini 3 Flash Preview",
 			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
 		},
 		{
 			ID:                         "gemini-3-pro-image-preview",
 			Object:                     "model",
 			Created:                    1737158400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-3-pro-image-preview",
 			Version:                    "3.0",
 			DisplayName:                "Gemini 3 Pro Image Preview",
 			Description:                "Gemini 3 Pro Image Preview",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
 		},
 		// Imagen image generation models - use :predict action
 		{
 			ID:                         "imagen-4.0-generate-001",
 			Object:                     "model",
 			Created:                    1750000000,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/imagen-4.0-generate-001",
 			Version:                    "4.0",
 			DisplayName:                "Imagen 4.0 Generate",
 			Description:                "Imagen 4.0 image generation model",
 			SupportedGenerationMethods: []string{"predict"},
 		},
 		{
 			ID:                         "imagen-4.0-ultra-generate-001",
 			Object:                     "model",
 			Created:                    1750000000,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/imagen-4.0-ultra-generate-001",
 			Version:                    "4.0",
 			DisplayName:                "Imagen 4.0 Ultra Generate",
 			Description:                "Imagen 4.0 Ultra high-quality image generation model",
 			SupportedGenerationMethods: []string{"predict"},
 		},
 		{
 			ID:                         "imagen-3.0-generate-002",
 			Object:                     "model",
 			Created:                    1740000000,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/imagen-3.0-generate-002",
 			Version:                    "3.0",
 			DisplayName:                "Imagen 3.0 Generate",
 			Description:                "Imagen 3.0 image generation model",
 			SupportedGenerationMethods: []string{"predict"},
 		},
 		{
 			ID:                         "imagen-3.0-fast-generate-001",
 			Object:                     "model",
 			Created:                    1740000000,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/imagen-3.0-fast-generate-001",
 			Version:                    "3.0",
 			DisplayName:                "Imagen 3.0 Fast Generate",
 			Description:                "Imagen 3.0 fast image generation model",
 			SupportedGenerationMethods: []string{"predict"},
 		},
 		{
 			ID:                         "imagen-4.0-fast-generate-001",
 			Object:                     "model",
 			Created:                    1750000000,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/imagen-4.0-fast-generate-001",
 			Version:                    "4.0",
 			DisplayName:                "Imagen 4.0 Fast Generate",
 			Description:                "Imagen 4.0 fast image generation model",
 			SupportedGenerationMethods: []string{"predict"},
 		},
 	}
 }
 // GetGeminiCLIModels returns the standard Gemini model definitions
 func GetGeminiCLIModels() []*ModelInfo {
 	return []*ModelInfo{
 		{
 			ID:                         "gemini-2.5-pro",
 			Object:                     "model",
 			Created:                    1750118400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-pro",
 			Version:                    "2.5",
 			DisplayName:                "Gemini 2.5 Pro",
 			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-2.5-flash",
 			Object:                     "model",
 			Created:                    1750118400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-flash",
 			Version:                    "001",
 			DisplayName:                "Gemini 2.5 Flash",
 			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-2.5-flash-lite",
 			Object:                     "model",
 			Created:                    1753142400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-flash-lite",
 			Version:                    "2.5",
 			DisplayName:                "Gemini 2.5 Flash Lite",
 			Description:                "Our smallest and most cost effective model, built for at scale usage.",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-3-pro-preview",
 			Object:                     "model",
 			Created:                    1737158400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-3-pro-preview",
 			Version:                    "3.0",
 			DisplayName:                "Gemini 3 Pro Preview",
 			Description:                "Our most intelligent model with SOTA reasoning and multimodal understanding, and powerful agentic and vibe coding capabilities",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
 		},
 		{
 			ID:                         "gemini-3-flash-preview",
 			Object:                     "model",
 			Created:                    1765929600,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-3-flash-preview",
 			Version:                    "3.0",
 			DisplayName:                "Gemini 3 Flash Preview",
 			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
 		},
 	}
 }
 // GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
 func GetAIStudioModels() []*ModelInfo {
 	return []*ModelInfo{
 		{
 			ID:                         "gemini-2.5-pro",
 			Object:                     "model",
 			Created:                    1750118400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-pro",
 			Version:                    "2.5",
 			DisplayName:                "Gemini 2.5 Pro",
 			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-2.5-flash",
 			Object:                     "model",
 			Created:                    1750118400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-flash",
 			Version:                    "001",
 			DisplayName:                "Gemini 2.5 Flash",
 			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-2.5-flash-lite",
 			Object:                     "model",
 			Created:                    1753142400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-flash-lite",
 			Version:                    "2.5",
 			DisplayName:                "Gemini 2.5 Flash Lite",
 			Description:                "Our smallest and most cost effective model, built for at scale usage.",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-3-pro-preview",
 			Object:                     "model",
 			Created:                    1737158400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-3-pro-preview",
 			Version:                    "3.0",
 			DisplayName:                "Gemini 3 Pro Preview",
 			Description:                "Gemini 3 Pro Preview",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-3-flash-preview",
 			Object:                     "model",
 			Created:                    1765929600,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-3-flash-preview",
 			Version:                    "3.0",
 			DisplayName:                "Gemini 3 Flash Preview",
 			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-pro-latest",
 			Object:                     "model",
 			Created:                    1750118400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-pro-latest",
 			Version:                    "2.5",
 			DisplayName:                "Gemini Pro Latest",
 			Description:                "Latest release of Gemini Pro",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-flash-latest",
 			Object:                     "model",
 			Created:                    1750118400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-flash-latest",
 			Version:                    "2.5",
 			DisplayName:                "Gemini Flash Latest",
 			Description:                "Latest release of Gemini Flash",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-flash-lite-latest",
 			Object:                     "model",
 			Created:                    1753142400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-flash-lite-latest",
 			Version:                    "2.5",
 			DisplayName:                "Gemini Flash-Lite Latest",
 			Description:                "Latest release of Gemini Flash-Lite",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-2.5-flash-image-preview",
 			Object:                     "model",
 			Created:                    1756166400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-flash-image-preview",
 			Version:                    "2.5",
 			DisplayName:                "Gemini 2.5 Flash Image Preview",
 			Description:                "State-of-the-art image generation and editing model.",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           8192,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			// image models don't support thinkingConfig; leave Thinking nil
 		},
 		{
 			ID:                         "gemini-2.5-flash-image",
 			Object:                     "model",
 			Created:                    1759363200,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-flash-image",
 			Version:                    "2.5",
 			DisplayName:                "Gemini 2.5 Flash Image",
 			Description:                "State-of-the-art image generation and editing model.",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           8192,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			// image models don't support thinkingConfig; leave Thinking nil
 		},
 	}
 }
 // GetOpenAIModels returns the standard OpenAI model definitions
 func GetOpenAIModels() []*ModelInfo {
 	return []*ModelInfo{
 		{
 			ID:                  "gpt-5",
 			Object:              "model",
 			Created:             1754524800,
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5-2025-08-07",
 			DisplayName:         "GPT 5",
 			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
 			Thinking:            &ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5-codex",
 			Object:              "model",
 			Created:             1757894400,
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5-2025-09-15",
 			DisplayName:         "GPT 5 Codex",
 			Description:         "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
 			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5-codex-mini",
 			Object:              "model",
 			Created:             1762473600,
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5-2025-11-07",
 			DisplayName:         "GPT 5 Codex Mini",
 			Description:         "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
 			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5.1",
 			Object:              "model",
 			Created:             1762905600,
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-2025-11-12",
 			DisplayName:         "GPT 5",
 			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
 			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5.1-codex",
 			Object:              "model",
 			Created:             1762905600,
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-2025-11-12",
 			DisplayName:         "GPT 5.1 Codex",
 			Description:         "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
 			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5.1-codex-mini",
 			Object:              "model",
 			Created:             1762905600,
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-2025-11-12",
 			DisplayName:         "GPT 5.1 Codex Mini",
 			Description:         "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
 			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5.1-codex-max",
 			Object:              "model",
 			Created:             1763424000,
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-max",
 			DisplayName:         "GPT 5.1 Codex Max",
 			Description:         "Stable version of GPT 5.1 Codex Max",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
 			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
 		},
 		{
 			ID:                  "gpt-5.2",
 			Object:              "model",
 			Created:             1765440000,
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.2",
 			DisplayName:         "GPT 5.2",
 			Description:         "Stable version of GPT 5.2",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
 			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}},
 		},
 		{
 			ID:                  "gpt-5.2-codex",
 			Object:              "model",
 			Created:             1765440000,
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.2",
 			DisplayName:         "GPT 5.2 Codex",
 			Description:         "Stable version of GPT 5.2 Codex, The best model for coding and agentic tasks across domains.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
 			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
 		},
 	}
 }
 // GetQwenModels returns the standard Qwen model definitions
 func GetQwenModels() []*ModelInfo {
 	return []*ModelInfo{
 		{
 			ID:                  "qwen3-coder-plus",
 			Object:              "model",
 			Created:             1753228800,
 			OwnedBy:             "qwen",
 			Type:                "qwen",
 			Version:             "3.0",
 			DisplayName:         "Qwen3 Coder Plus",
 			Description:         "Advanced code generation and understanding model",
 			ContextLength:       32768,
 			MaxCompletionTokens: 8192,
 			SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
 		},
 		{
 			ID:                  "qwen3-coder-flash",
 			Object:              "model",
 			Created:             1753228800,
 			OwnedBy:             "qwen",
 			Type:                "qwen",
 			Version:             "3.0",
 			DisplayName:         "Qwen3 Coder Flash",
 			Description:         "Fast code generation model",
 			ContextLength:       8192,
 			MaxCompletionTokens: 2048,
 			SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
 		},
 		{
 			ID:                  "vision-model",
 			Object:              "model",
 			Created:             1758672000,
 			OwnedBy:             "qwen",
 			Type:                "qwen",
 			Version:             "3.0",
 			DisplayName:         "Qwen3 Vision Model",
 			Description:         "Vision model model",
 			ContextLength:       32768,
 			MaxCompletionTokens: 2048,
 			SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
 		},
 	}
 }
 // iFlowThinkingSupport is a shared ThinkingSupport configuration for iFlow models
 // that support thinking mode via chat_template_kwargs.enable_thinking (boolean toggle).
 // Uses level-based configuration so standard normalization flows apply before conversion.
 var iFlowThinkingSupport = &ThinkingSupport{
 	Levels: []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"},
 }
 // GetIFlowModels returns supported models for iFlow OAuth accounts.
 func GetIFlowModels() []*ModelInfo {
 	entries := []struct {
 		ID          string
 		DisplayName string
 		Description string
 		Created     int64
 		Thinking    *ThinkingSupport
 	}{
 		{ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600},
 		{ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800},
 		{ID: "qwen3-max", DisplayName: "Qwen3-Max", Description: "Qwen3 flagship model", Created: 1758672000},
 		{ID: "qwen3-vl-plus", DisplayName: "Qwen3-VL-Plus", Description: "Qwen3 multimodal vision-language", Created: 1758672000},
 		{ID: "qwen3-max-preview", DisplayName: "Qwen3-Max-Preview", Description: "Qwen3 Max preview build", Created: 1757030400},
 		{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
 		{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400, Thinking: iFlowThinkingSupport},
 		{ID: "glm-4.7", DisplayName: "GLM-4.7", Description: "Zhipu GLM 4.7 general model", Created: 1766448000, Thinking: iFlowThinkingSupport},
 		{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
 		{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200},
 		{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Chat", Created: 1764576000},
 		{ID: "deepseek-v3.2-reasoner", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Reasoner", Created: 1764576000},
 		{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000},
 		{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200},
 		{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200},
 		{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
 		{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
 		{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
 		{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
 		{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
 		{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: iFlowThinkingSupport},
 		{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
 		{ID: "iflow-rome-30ba3b", DisplayName: "iFlow-ROME", Description: "iFlow Rome 30BA3B model", Created: 1736899200},
 	}
 	models := make([]*ModelInfo, 0, len(entries))
 	for _, entry := range entries {
 		models = append(models, &ModelInfo{
 			ID:          entry.ID,
 			Object:      "model",
 			Created:     entry.Created,
 			OwnedBy:     "iflow",
 			Type:        "iflow",
 			DisplayName: entry.DisplayName,
 			Description: entry.Description,
 			Thinking:    entry.Thinking,
 		})
-	}
+		return models
-	return models
+	default:
-}
+		return nil
 // AntigravityModelConfig captures static antigravity model overrides, including
 // Thinking budget limits and provider max completion tokens.
 type AntigravityModelConfig struct {
 	Thinking            *ThinkingSupport
 	MaxCompletionTokens int
 }
 // GetAntigravityModelConfig returns static configuration for antigravity models.
 // Keys use upstream model names returned by the Antigravity models endpoint.
 func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
 	return map[string]*AntigravityModelConfig{
 		"gemini-2.5-flash":           {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
 		"gemini-2.5-flash-lite":      {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
 		"rev19-uic3-1p":              {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}},
 		"gemini-3-pro-high":          {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
 		"gemini-3-pro-image":         {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
 		"gemini-3-flash":             {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}},
 		"claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
 		"claude-opus-4-5-thinking":   {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
 		"claude-sonnet-4-5":          {MaxCompletionTokens: 64000},
 		"gpt-oss-120b-medium":        {},
 		"tab_flash_lite_preview":     {},
 	}
 }
--- a/internal/registry/model_definitions_static_data.go
+++ b/internal/registry/model_definitions_static_data.go
@@ -0,0 +1,846 @@
 // Package registry provides model definitions for various AI service providers.
 // This file stores the static model metadata catalog.
 package registry
 // GetClaudeModels returns the standard Claude model definitions
 func GetClaudeModels() []*ModelInfo {
 	return []*ModelInfo{
 		{
 			ID:                  "claude-haiku-4-5-20251001",
 			Object:              "model",
 			Created:             1759276800, // 2025-10-01
 			OwnedBy:             "anthropic",
 			Type:                "claude",
 			DisplayName:         "Claude 4.5 Haiku",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
 			// Thinking: not supported for Haiku models
 		},
 		{
 			ID:                  "claude-sonnet-4-5-20250929",
 			Object:              "model",
 			Created:             1759104000, // 2025-09-29
 			OwnedBy:             "anthropic",
 			Type:                "claude",
 			DisplayName:         "Claude 4.5 Sonnet",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
 			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
 		},
 		{
 			ID:                  "claude-opus-4-5-20251101",
 			Object:              "model",
 			Created:             1761955200, // 2025-11-01
 			OwnedBy:             "anthropic",
 			Type:                "claude",
 			DisplayName:         "Claude 4.5 Opus",
 			Description:         "Premium model combining maximum intelligence with practical performance",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
 			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
 		},
 		{
 			ID:                  "claude-opus-4-1-20250805",
 			Object:              "model",
 			Created:             1722945600, // 2025-08-05
 			OwnedBy:             "anthropic",
 			Type:                "claude",
 			DisplayName:         "Claude 4.1 Opus",
 			ContextLength:       200000,
 			MaxCompletionTokens: 32000,
 			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
 		},
 		{
 			ID:                  "claude-opus-4-20250514",
 			Object:              "model",
 			Created:             1715644800, // 2025-05-14
 			OwnedBy:             "anthropic",
 			Type:                "claude",
 			DisplayName:         "Claude 4 Opus",
 			ContextLength:       200000,
 			MaxCompletionTokens: 32000,
 			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
 		},
 		{
 			ID:                  "claude-sonnet-4-20250514",
 			Object:              "model",
 			Created:             1715644800, // 2025-05-14
 			OwnedBy:             "anthropic",
 			Type:                "claude",
 			DisplayName:         "Claude 4 Sonnet",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
 			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
 		},
 		{
 			ID:                  "claude-3-7-sonnet-20250219",
 			Object:              "model",
 			Created:             1708300800, // 2025-02-19
 			OwnedBy:             "anthropic",
 			Type:                "claude",
 			DisplayName:         "Claude 3.7 Sonnet",
 			ContextLength:       128000,
 			MaxCompletionTokens: 8192,
 			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
 		},
 		{
 			ID:                  "claude-3-5-haiku-20241022",
 			Object:              "model",
 			Created:             1729555200, // 2024-10-22
 			OwnedBy:             "anthropic",
 			Type:                "claude",
 			DisplayName:         "Claude 3.5 Haiku",
 			ContextLength:       128000,
 			MaxCompletionTokens: 8192,
 			// Thinking: not supported for Haiku models
 		},
 	}
 }
 // GetGeminiModels returns the standard Gemini model definitions
 func GetGeminiModels() []*ModelInfo {
 	return []*ModelInfo{
 		{
 			ID:                         "gemini-2.5-pro",
 			Object:                     "model",
 			Created:                    1750118400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-pro",
 			Version:                    "2.5",
 			DisplayName:                "Gemini 2.5 Pro",
 			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-2.5-flash",
 			Object:                     "model",
 			Created:                    1750118400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-flash",
 			Version:                    "001",
 			DisplayName:                "Gemini 2.5 Flash",
 			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-2.5-flash-lite",
 			Object:                     "model",
 			Created:                    1753142400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-flash-lite",
 			Version:                    "2.5",
 			DisplayName:                "Gemini 2.5 Flash Lite",
 			Description:                "Our smallest and most cost effective model, built for at scale usage.",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-3-pro-preview",
 			Object:                     "model",
 			Created:                    1737158400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-3-pro-preview",
 			Version:                    "3.0",
 			DisplayName:                "Gemini 3 Pro Preview",
 			Description:                "Gemini 3 Pro Preview",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
 		},
 		{
 			ID:                         "gemini-3-flash-preview",
 			Object:                     "model",
 			Created:                    1765929600,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-3-flash-preview",
 			Version:                    "3.0",
 			DisplayName:                "Gemini 3 Flash Preview",
 			Description:                "Gemini 3 Flash Preview",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
 		},
 		{
 			ID:                         "gemini-3-pro-image-preview",
 			Object:                     "model",
 			Created:                    1737158400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-3-pro-image-preview",
 			Version:                    "3.0",
 			DisplayName:                "Gemini 3 Pro Image Preview",
 			Description:                "Gemini 3 Pro Image Preview",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
 		},
 	}
 }
 func GetGeminiVertexModels() []*ModelInfo {
 	return []*ModelInfo{
 		{
 			ID:                         "gemini-2.5-pro",
 			Object:                     "model",
 			Created:                    1750118400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-pro",
 			Version:                    "2.5",
 			DisplayName:                "Gemini 2.5 Pro",
 			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-2.5-flash",
 			Object:                     "model",
 			Created:                    1750118400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-flash",
 			Version:                    "001",
 			DisplayName:                "Gemini 2.5 Flash",
 			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-2.5-flash-lite",
 			Object:                     "model",
 			Created:                    1753142400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-flash-lite",
 			Version:                    "2.5",
 			DisplayName:                "Gemini 2.5 Flash Lite",
 			Description:                "Our smallest and most cost effective model, built for at scale usage.",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-3-pro-preview",
 			Object:                     "model",
 			Created:                    1737158400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-3-pro-preview",
 			Version:                    "3.0",
 			DisplayName:                "Gemini 3 Pro Preview",
 			Description:                "Gemini 3 Pro Preview",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
 		},
 		{
 			ID:                         "gemini-3-flash-preview",
 			Object:                     "model",
 			Created:                    1765929600,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-3-flash-preview",
 			Version:                    "3.0",
 			DisplayName:                "Gemini 3 Flash Preview",
 			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
 		},
 		{
 			ID:                         "gemini-3-pro-image-preview",
 			Object:                     "model",
 			Created:                    1737158400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-3-pro-image-preview",
 			Version:                    "3.0",
 			DisplayName:                "Gemini 3 Pro Image Preview",
 			Description:                "Gemini 3 Pro Image Preview",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
 		},
 		// Imagen image generation models - use :predict action
 		{
 			ID:                         "imagen-4.0-generate-001",
 			Object:                     "model",
 			Created:                    1750000000,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/imagen-4.0-generate-001",
 			Version:                    "4.0",
 			DisplayName:                "Imagen 4.0 Generate",
 			Description:                "Imagen 4.0 image generation model",
 			SupportedGenerationMethods: []string{"predict"},
 		},
 		{
 			ID:                         "imagen-4.0-ultra-generate-001",
 			Object:                     "model",
 			Created:                    1750000000,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/imagen-4.0-ultra-generate-001",
 			Version:                    "4.0",
 			DisplayName:                "Imagen 4.0 Ultra Generate",
 			Description:                "Imagen 4.0 Ultra high-quality image generation model",
 			SupportedGenerationMethods: []string{"predict"},
 		},
 		{
 			ID:                         "imagen-3.0-generate-002",
 			Object:                     "model",
 			Created:                    1740000000,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/imagen-3.0-generate-002",
 			Version:                    "3.0",
 			DisplayName:                "Imagen 3.0 Generate",
 			Description:                "Imagen 3.0 image generation model",
 			SupportedGenerationMethods: []string{"predict"},
 		},
 		{
 			ID:                         "imagen-3.0-fast-generate-001",
 			Object:                     "model",
 			Created:                    1740000000,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/imagen-3.0-fast-generate-001",
 			Version:                    "3.0",
 			DisplayName:                "Imagen 3.0 Fast Generate",
 			Description:                "Imagen 3.0 fast image generation model",
 			SupportedGenerationMethods: []string{"predict"},
 		},
 		{
 			ID:                         "imagen-4.0-fast-generate-001",
 			Object:                     "model",
 			Created:                    1750000000,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/imagen-4.0-fast-generate-001",
 			Version:                    "4.0",
 			DisplayName:                "Imagen 4.0 Fast Generate",
 			Description:                "Imagen 4.0 fast image generation model",
 			SupportedGenerationMethods: []string{"predict"},
 		},
 	}
 }
 // GetGeminiCLIModels returns the standard Gemini model definitions
 func GetGeminiCLIModels() []*ModelInfo {
 	return []*ModelInfo{
 		{
 			ID:                         "gemini-2.5-pro",
 			Object:                     "model",
 			Created:                    1750118400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-pro",
 			Version:                    "2.5",
 			DisplayName:                "Gemini 2.5 Pro",
 			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-2.5-flash",
 			Object:                     "model",
 			Created:                    1750118400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-flash",
 			Version:                    "001",
 			DisplayName:                "Gemini 2.5 Flash",
 			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-2.5-flash-lite",
 			Object:                     "model",
 			Created:                    1753142400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-flash-lite",
 			Version:                    "2.5",
 			DisplayName:                "Gemini 2.5 Flash Lite",
 			Description:                "Our smallest and most cost effective model, built for at scale usage.",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-3-pro-preview",
 			Object:                     "model",
 			Created:                    1737158400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-3-pro-preview",
 			Version:                    "3.0",
 			DisplayName:                "Gemini 3 Pro Preview",
 			Description:                "Our most intelligent model with SOTA reasoning and multimodal understanding, and powerful agentic and vibe coding capabilities",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
 		},
 		{
 			ID:                         "gemini-3-flash-preview",
 			Object:                     "model",
 			Created:                    1765929600,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-3-flash-preview",
 			Version:                    "3.0",
 			DisplayName:                "Gemini 3 Flash Preview",
 			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
 		},
 	}
 }
 // GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
 func GetAIStudioModels() []*ModelInfo {
 	return []*ModelInfo{
 		{
 			ID:                         "gemini-2.5-pro",
 			Object:                     "model",
 			Created:                    1750118400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-pro",
 			Version:                    "2.5",
 			DisplayName:                "Gemini 2.5 Pro",
 			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-2.5-flash",
 			Object:                     "model",
 			Created:                    1750118400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-flash",
 			Version:                    "001",
 			DisplayName:                "Gemini 2.5 Flash",
 			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-2.5-flash-lite",
 			Object:                     "model",
 			Created:                    1753142400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-flash-lite",
 			Version:                    "2.5",
 			DisplayName:                "Gemini 2.5 Flash Lite",
 			Description:                "Our smallest and most cost effective model, built for at scale usage.",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-3-pro-preview",
 			Object:                     "model",
 			Created:                    1737158400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-3-pro-preview",
 			Version:                    "3.0",
 			DisplayName:                "Gemini 3 Pro Preview",
 			Description:                "Gemini 3 Pro Preview",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-3-flash-preview",
 			Object:                     "model",
 			Created:                    1765929600,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-3-flash-preview",
 			Version:                    "3.0",
 			DisplayName:                "Gemini 3 Flash Preview",
 			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-pro-latest",
 			Object:                     "model",
 			Created:                    1750118400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-pro-latest",
 			Version:                    "2.5",
 			DisplayName:                "Gemini Pro Latest",
 			Description:                "Latest release of Gemini Pro",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-flash-latest",
 			Object:                     "model",
 			Created:                    1750118400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-flash-latest",
 			Version:                    "2.5",
 			DisplayName:                "Gemini Flash Latest",
 			Description:                "Latest release of Gemini Flash",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                         "gemini-flash-lite-latest",
 			Object:                     "model",
 			Created:                    1753142400,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-flash-lite-latest",
 			Version:                    "2.5",
 			DisplayName:                "Gemini Flash-Lite Latest",
 			Description:                "Latest release of Gemini Flash-Lite",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		// {
 		// 	ID:                         "gemini-2.5-flash-image-preview",
 		// 	Object:                     "model",
 		// 	Created:                    1756166400,
 		// 	OwnedBy:                    "google",
 		// 	Type:                       "gemini",
 		// 	Name:                       "models/gemini-2.5-flash-image-preview",
 		// 	Version:                    "2.5",
 		// 	DisplayName:                "Gemini 2.5 Flash Image Preview",
 		// 	Description:                "State-of-the-art image generation and editing model.",
 		// 	InputTokenLimit:            1048576,
 		// 	OutputTokenLimit:           8192,
 		// 	SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 		// 	// image models don't support thinkingConfig; leave Thinking nil
 		// },
 		{
 			ID:                         "gemini-2.5-flash-image",
 			Object:                     "model",
 			Created:                    1759363200,
 			OwnedBy:                    "google",
 			Type:                       "gemini",
 			Name:                       "models/gemini-2.5-flash-image",
 			Version:                    "2.5",
 			DisplayName:                "Gemini 2.5 Flash Image",
 			Description:                "State-of-the-art image generation and editing model.",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           8192,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			// image models don't support thinkingConfig; leave Thinking nil
 		},
 	}
 }
 // GetOpenAIModels returns the standard OpenAI model definitions
 func GetOpenAIModels() []*ModelInfo {
 	return []*ModelInfo{
 		{
 			ID:                  "gpt-5",
 			Object:              "model",
 			Created:             1754524800,
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5-2025-08-07",
 			DisplayName:         "GPT 5",
 			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
 			Thinking:            &ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5-codex",
 			Object:              "model",
 			Created:             1757894400,
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5-2025-09-15",
 			DisplayName:         "GPT 5 Codex",
 			Description:         "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
 			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5-codex-mini",
 			Object:              "model",
 			Created:             1762473600,
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5-2025-11-07",
 			DisplayName:         "GPT 5 Codex Mini",
 			Description:         "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
 			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5.1",
 			Object:              "model",
 			Created:             1762905600,
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-2025-11-12",
 			DisplayName:         "GPT 5",
 			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
 			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5.1-codex",
 			Object:              "model",
 			Created:             1762905600,
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-2025-11-12",
 			DisplayName:         "GPT 5.1 Codex",
 			Description:         "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
 			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5.1-codex-mini",
 			Object:              "model",
 			Created:             1762905600,
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-2025-11-12",
 			DisplayName:         "GPT 5.1 Codex Mini",
 			Description:         "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
 			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5.1-codex-max",
 			Object:              "model",
 			Created:             1763424000,
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-max",
 			DisplayName:         "GPT 5.1 Codex Max",
 			Description:         "Stable version of GPT 5.1 Codex Max",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
 			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
 		},
 		{
 			ID:                  "gpt-5.2",
 			Object:              "model",
 			Created:             1765440000,
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.2",
 			DisplayName:         "GPT 5.2",
 			Description:         "Stable version of GPT 5.2",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
 			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}},
 		},
 		{
 			ID:                  "gpt-5.2-codex",
 			Object:              "model",
 			Created:             1765440000,
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.2",
 			DisplayName:         "GPT 5.2 Codex",
 			Description:         "Stable version of GPT 5.2 Codex, The best model for coding and agentic tasks across domains.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
 			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
 		},
 	}
 }
 // GetQwenModels returns the standard Qwen model definitions
 func GetQwenModels() []*ModelInfo {
 	return []*ModelInfo{
 		{
 			ID:                  "qwen3-coder-plus",
 			Object:              "model",
 			Created:             1753228800,
 			OwnedBy:             "qwen",
 			Type:                "qwen",
 			Version:             "3.0",
 			DisplayName:         "Qwen3 Coder Plus",
 			Description:         "Advanced code generation and understanding model",
 			ContextLength:       32768,
 			MaxCompletionTokens: 8192,
 			SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
 		},
 		{
 			ID:                  "qwen3-coder-flash",
 			Object:              "model",
 			Created:             1753228800,
 			OwnedBy:             "qwen",
 			Type:                "qwen",
 			Version:             "3.0",
 			DisplayName:         "Qwen3 Coder Flash",
 			Description:         "Fast code generation model",
 			ContextLength:       8192,
 			MaxCompletionTokens: 2048,
 			SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
 		},
 		{
 			ID:                  "vision-model",
 			Object:              "model",
 			Created:             1758672000,
 			OwnedBy:             "qwen",
 			Type:                "qwen",
 			Version:             "3.0",
 			DisplayName:         "Qwen3 Vision Model",
 			Description:         "Vision model model",
 			ContextLength:       32768,
 			MaxCompletionTokens: 2048,
 			SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
 		},
 	}
 }
 // iFlowThinkingSupport is a shared ThinkingSupport configuration for iFlow models
 // that support thinking mode via chat_template_kwargs.enable_thinking (boolean toggle).
 // Uses level-based configuration so standard normalization flows apply before conversion.
 var iFlowThinkingSupport = &ThinkingSupport{
 	Levels: []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"},
 }
 // GetIFlowModels returns supported models for iFlow OAuth accounts.
 func GetIFlowModels() []*ModelInfo {
 	entries := []struct {
 		ID          string
 		DisplayName string
 		Description string
 		Created     int64
 		Thinking    *ThinkingSupport
 	}{
 		{ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600},
 		{ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800},
 		{ID: "qwen3-max", DisplayName: "Qwen3-Max", Description: "Qwen3 flagship model", Created: 1758672000},
 		{ID: "qwen3-vl-plus", DisplayName: "Qwen3-VL-Plus", Description: "Qwen3 multimodal vision-language", Created: 1758672000},
 		{ID: "qwen3-max-preview", DisplayName: "Qwen3-Max-Preview", Description: "Qwen3 Max preview build", Created: 1757030400, Thinking: iFlowThinkingSupport},
 		{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
 		{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400, Thinking: iFlowThinkingSupport},
 		{ID: "glm-4.7", DisplayName: "GLM-4.7", Description: "Zhipu GLM 4.7 general model", Created: 1766448000, Thinking: iFlowThinkingSupport},
 		{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
 		{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200},
 		{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Chat", Created: 1764576000},
 		{ID: "deepseek-v3.2-reasoner", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Reasoner", Created: 1764576000},
 		{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000, Thinking: iFlowThinkingSupport},
 		{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200, Thinking: iFlowThinkingSupport},
 		{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200},
 		{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
 		{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
 		{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
 		{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
 		{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
 		{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: iFlowThinkingSupport},
 		{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
 		{ID: "iflow-rome-30ba3b", DisplayName: "iFlow-ROME", Description: "iFlow Rome 30BA3B model", Created: 1736899200},
 	}
 	models := make([]*ModelInfo, 0, len(entries))
 	for _, entry := range entries {
 		models = append(models, &ModelInfo{
 			ID:          entry.ID,
 			Object:      "model",
 			Created:     entry.Created,
 			OwnedBy:     "iflow",
 			Type:        "iflow",
 			DisplayName: entry.DisplayName,
 			Description: entry.Description,
 			Thinking:    entry.Thinking,
 		})
 	}
 	return models
 }
 // AntigravityModelConfig captures static antigravity model overrides, including
 // Thinking budget limits and provider max completion tokens.
 type AntigravityModelConfig struct {
 	Thinking            *ThinkingSupport
 	MaxCompletionTokens int
 }
 // GetAntigravityModelConfig returns static configuration for antigravity models.
 // Keys use upstream model names returned by the Antigravity models endpoint.
 func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
 	return map[string]*AntigravityModelConfig{
 		// "rev19-uic3-1p":              {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}},
 		"gemini-2.5-flash":           {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
 		"gemini-2.5-flash-lite":      {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
 		"gemini-3-pro-high":          {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
 		"gemini-3-pro-image":         {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
 		"gemini-3-flash":             {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}},
 		"claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
 		"claude-opus-4-5-thinking":   {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
 		"claude-sonnet-4-5":          {MaxCompletionTokens: 64000},
 		"gpt-oss-120b-medium":        {},
 		"tab_flash_lite_preview":     {},
 	}
 }
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -148,7 +148,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
-	attempts := antigravityRetryAttempts(e.cfg)
+	attempts := antigravityRetryAttempts(auth, e.cfg)
 attemptLoop:
 	for attempt := 0; attempt < attempts; attempt++ {
@@ -289,7 +289,7 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
 	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
-	attempts := antigravityRetryAttempts(e.cfg)
+	attempts := antigravityRetryAttempts(auth, e.cfg)
 attemptLoop:
 	for attempt := 0; attempt < attempts; attempt++ {
@@ -677,7 +677,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
-	attempts := antigravityRetryAttempts(e.cfg)
+	attempts := antigravityRetryAttempts(auth, e.cfg)
 attemptLoop:
 	for attempt := 0; attempt < attempts; attempt++ {
@@ -1447,11 +1447,16 @@ func resolveUserAgent(auth *cliproxyauth.Auth) string {
 	return defaultAntigravityAgent
 }
-func antigravityRetryAttempts(cfg *config.Config) int {
+func antigravityRetryAttempts(auth *cliproxyauth.Auth, cfg *config.Config) int {
-	if cfg == nil {
+	retry := 0
-		return 1
+	if cfg != nil {
 		retry = cfg.RequestRetry
 	}
 	if auth != nil {
 		if override, ok := auth.RequestRetryOverride(); ok {
 			retry = override
 		}
 	}
 	retry := cfg.RequestRetry
 	if retry < 0 {
 		retry = 0
 	}
--- a/internal/thinking/provider/iflow/apply.go
+++ b/internal/thinking/provider/iflow/apply.go
@@ -1,7 +1,7 @@
-// Package iflow implements thinking configuration for iFlow models (GLM, MiniMax).
+// Package iflow implements thinking configuration for iFlow models.
 //
 // iFlow models use boolean toggle semantics:
-//   - GLM models: chat_template_kwargs.enable_thinking (boolean)
+//   - Models using chat_template_kwargs.enable_thinking (boolean toggle)
 //   - MiniMax models: reasoning_split (boolean)
 //
 // Level values are converted to boolean: none=false, all others=true
@@ -20,6 +20,7 @@ import (
 // Applier implements thinking.ProviderApplier for iFlow models.
 //
 // iFlow-specific behavior:
 //   - enable_thinking toggle models: enable_thinking boolean
 //   - GLM models: enable_thinking boolean + clear_thinking=false
 //   - MiniMax models: reasoning_split boolean
 //   - Level to boolean: none=false, others=true
@@ -61,8 +62,8 @@ func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *
 		return body, nil
 	}
-	if isGLMModel(modelInfo.ID) {
+	if isEnableThinkingModel(modelInfo.ID) {
-		return applyGLM(body, config), nil
+		return applyEnableThinking(body, config, isGLMModel(modelInfo.ID)), nil
 	}
 	if isMiniMaxModel(modelInfo.ID) {
@@ -97,7 +98,8 @@ func configToBoolean(config thinking.ThinkingConfig) bool {
 	}
 }
-// applyGLM applies thinking configuration for GLM models.
+// applyEnableThinking applies thinking configuration for models that use
 // chat_template_kwargs.enable_thinking format.
 //
 // Output format when enabled:
 //
@@ -107,9 +109,8 @@ func configToBoolean(config thinking.ThinkingConfig) bool {
 //
 //	{"chat_template_kwargs": {"enable_thinking": false}}
 //
-// Note: clear_thinking is only set when thinking is enabled, to preserve
+// Note: clear_thinking is only set for GLM models when thinking is enabled.
-// thinking output in the response.
+func applyEnableThinking(body []byte, config thinking.ThinkingConfig, setClearThinking bool) []byte {
 func applyGLM(body []byte, config thinking.ThinkingConfig) []byte {
 	enableThinking := configToBoolean(config)
 	if len(body) == 0 || !gjson.ValidBytes(body) {
@@ -118,8 +119,11 @@ func applyGLM(body []byte, config thinking.ThinkingConfig) []byte {
 	result, _ := sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
 	// clear_thinking is a GLM-only knob, strip it for other models.
 	result, _ = sjson.DeleteBytes(result, "chat_template_kwargs.clear_thinking")
 	// clear_thinking only needed when thinking is enabled
-	if enableThinking {
+	if enableThinking && setClearThinking {
 		result, _ = sjson.SetBytes(result, "chat_template_kwargs.clear_thinking", false)
 	}
@@ -143,8 +147,21 @@ func applyMiniMax(body []byte, config thinking.ThinkingConfig) []byte {
 	return result
 }
 // isEnableThinkingModel determines if the model uses chat_template_kwargs.enable_thinking format.
 func isEnableThinkingModel(modelID string) bool {
 	if isGLMModel(modelID) {
 		return true
 	}
 	id := strings.ToLower(modelID)
 	switch id {
 	case "qwen3-max-preview", "deepseek-v3.2", "deepseek-v3.1":
 		return true
 	default:
 		return false
 	}
 }
 // isGLMModel determines if the model is a GLM series model.
 // GLM models use chat_template_kwargs.enable_thinking format.
 func isGLMModel(modelID string) bool {
 	return strings.HasPrefix(strings.ToLower(modelID), "glm")
 }
--- a/internal/translator/antigravity/gemini/antigravity_gemini_response.go
+++ b/internal/translator/antigravity/gemini/antigravity_gemini_response.go
@@ -41,6 +41,7 @@ func ConvertAntigravityResponseToGemini(ctx context.Context, _ string, originalR
 			responseResult := gjson.GetBytes(rawJSON, "response")
 			if responseResult.Exists() {
 				chunk = []byte(responseResult.Raw)
 				chunk = restoreUsageMetadata(chunk)
 			}
 		} else {
 			chunkTemplate := "[]"
@@ -76,7 +77,8 @@ func ConvertAntigravityResponseToGemini(ctx context.Context, _ string, originalR
 func ConvertAntigravityResponseToGeminiNonStream(_ context.Context, _ string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, _ *any) string {
 	responseResult := gjson.GetBytes(rawJSON, "response")
 	if responseResult.Exists() {
-		return responseResult.Raw
+		chunk := restoreUsageMetadata([]byte(responseResult.Raw))
 		return string(chunk)
 	}
 	return string(rawJSON)
 }
@@ -84,3 +86,15 @@ func ConvertAntigravityResponseToGeminiNonStream(_ context.Context, _ string, or
 func GeminiTokenCount(ctx context.Context, count int64) string {
 	return fmt.Sprintf(`{"totalTokens":%d,"promptTokensDetails":[{"modality":"TEXT","tokenCount":%d}]}`, count, count)
 }
 // restoreUsageMetadata renames cpaUsageMetadata back to usageMetadata.
 // The executor renames usageMetadata to cpaUsageMetadata in non-terminal chunks
 // to preserve usage data while hiding it from clients that don't expect it.
 // When returning standard Gemini API format, we must restore the original name.
 func restoreUsageMetadata(chunk []byte) []byte {
 	if cpaUsage := gjson.GetBytes(chunk, "cpaUsageMetadata"); cpaUsage.Exists() {
 		chunk, _ = sjson.SetRawBytes(chunk, "usageMetadata", []byte(cpaUsage.Raw))
 		chunk, _ = sjson.DeleteBytes(chunk, "cpaUsageMetadata")
 	}
 	return chunk
 }
--- a/internal/translator/antigravity/gemini/antigravity_gemini_response_test.go
+++ b/internal/translator/antigravity/gemini/antigravity_gemini_response_test.go
@@ -0,0 +1,95 @@
 package gemini
 import (
 	"context"
 	"testing"
 )
 func TestRestoreUsageMetadata(t *testing.T) {
 	tests := []struct {
 		name     string
 		input    []byte
 		expected string
 	}{
 		{
 			name:     "cpaUsageMetadata renamed to usageMetadata",
 			input:    []byte(`{"modelVersion":"gemini-3-pro","cpaUsageMetadata":{"promptTokenCount":100,"candidatesTokenCount":200}}`),
 			expected: `{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":200}}`,
 		},
 		{
 			name:     "no cpaUsageMetadata unchanged",
 			input:    []byte(`{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}`),
 			expected: `{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}`,
 		},
 		{
 			name:     "empty input",
 			input:    []byte(`{}`),
 			expected: `{}`,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			result := restoreUsageMetadata(tt.input)
 			if string(result) != tt.expected {
 				t.Errorf("restoreUsageMetadata() = %s, want %s", string(result), tt.expected)
 			}
 		})
 	}
 }
 func TestConvertAntigravityResponseToGeminiNonStream(t *testing.T) {
 	tests := []struct {
 		name     string
 		input    []byte
 		expected string
 	}{
 		{
 			name:     "cpaUsageMetadata restored in response",
 			input:    []byte(`{"response":{"modelVersion":"gemini-3-pro","cpaUsageMetadata":{"promptTokenCount":100}}}`),
 			expected: `{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}`,
 		},
 		{
 			name:     "usageMetadata preserved",
 			input:    []byte(`{"response":{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}}`),
 			expected: `{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}`,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			result := ConvertAntigravityResponseToGeminiNonStream(context.Background(), "", nil, nil, tt.input, nil)
 			if result != tt.expected {
 				t.Errorf("ConvertAntigravityResponseToGeminiNonStream() = %s, want %s", result, tt.expected)
 			}
 		})
 	}
 }
 func TestConvertAntigravityResponseToGeminiStream(t *testing.T) {
 	ctx := context.WithValue(context.Background(), "alt", "")
 	tests := []struct {
 		name     string
 		input    []byte
 		expected string
 	}{
 		{
 			name:     "cpaUsageMetadata restored in streaming response",
 			input:    []byte(`data: {"response":{"modelVersion":"gemini-3-pro","cpaUsageMetadata":{"promptTokenCount":100}}}`),
 			expected: `{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}`,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			results := ConvertAntigravityResponseToGemini(ctx, "", nil, nil, tt.input, nil)
 			if len(results) != 1 {
 				t.Fatalf("expected 1 result, got %d", len(results))
 			}
 			if results[0] != tt.expected {
 				t.Errorf("ConvertAntigravityResponseToGemini() = %s, want %s", results[0], tt.expected)
 			}
 		})
 	}
 }
--- a/internal/translator/kiro/claude/kiro_claude_request.go
+++ b/internal/translator/kiro/claude/kiro_claude_request.go
@@ -499,6 +499,16 @@ func shortenToolNameIfNeeded(name string) string {
 	return name[:limit]
 }
 func ensureKiroInputSchema(parameters interface{}) interface{} {
 	if parameters != nil {
 		return parameters
 	}
 	return map[string]interface{}{
 		"type":       "object",
 		"properties": map[string]interface{}{},
 	}
 }
 // convertClaudeToolsToKiro converts Claude tools to Kiro format
 func convertClaudeToolsToKiro(tools gjson.Result) []KiroToolWrapper {
 	var kiroTools []KiroToolWrapper
@@ -509,7 +519,12 @@ func convertClaudeToolsToKiro(tools gjson.Result) []KiroToolWrapper {
 	for _, tool := range tools.Array() {
 		name := tool.Get("name").String()
 		description := tool.Get("description").String()
-		inputSchema := tool.Get("input_schema").Value()
+		inputSchemaResult := tool.Get("input_schema")
 		var inputSchema interface{}
 		if inputSchemaResult.Exists() && inputSchemaResult.Type != gjson.Null {
 			inputSchema = inputSchemaResult.Value()
 		}
 		inputSchema = ensureKiroInputSchema(inputSchema)
 		// Shorten tool name if it exceeds 64 characters (common with MCP tools)
 		originalName := name
--- a/internal/translator/kiro/openai/kiro_openai.go
+++ b/internal/translator/kiro/openai/kiro_openai.go
@@ -314,7 +314,7 @@ func ConvertOpenAIToolsToKiroFormat(tools []map[string]interface{}) []KiroToolWr
 		name := kirocommon.GetString(fn, "name")
 		description := kirocommon.GetString(fn, "description")
-		parameters := fn["parameters"]
+		parameters := ensureKiroInputSchema(fn["parameters"])
 		if name == "" {
 			continue
@@ -368,4 +368,4 @@ func ConvertClaudeToolUseToOpenAI(toolUseID, toolName string, input map[string]i
 // LogStreamEvent logs a streaming event for debugging
 func LogStreamEvent(eventType, data string) {
 	log.Debugf("kiro-openai: stream event type=%s, data_len=%d", eventType, len(data))
-}
+}
--- a/internal/translator/kiro/openai/kiro_openai_request.go
+++ b/internal/translator/kiro/openai/kiro_openai_request.go
@@ -381,6 +381,16 @@ func shortenToolNameIfNeeded(name string) string {
 	return name[:limit]
 }
 func ensureKiroInputSchema(parameters interface{}) interface{} {
 	if parameters != nil {
 		return parameters
 	}
 	return map[string]interface{}{
 		"type":       "object",
 		"properties": map[string]interface{}{},
 	}
 }
 // convertOpenAIToolsToKiro converts OpenAI tools to Kiro format
 func convertOpenAIToolsToKiro(tools gjson.Result) []KiroToolWrapper {
 	var kiroTools []KiroToolWrapper
@@ -401,7 +411,12 @@ func convertOpenAIToolsToKiro(tools gjson.Result) []KiroToolWrapper {
 		name := fn.Get("name").String()
 		description := fn.Get("description").String()
-		parameters := fn.Get("parameters").Value()
+		parametersResult := fn.Get("parameters")
 		var parameters interface{}
 		if parametersResult.Exists() && parametersResult.Type != gjson.Null {
 			parameters = parametersResult.Value()
 		}
 		parameters = ensureKiroInputSchema(parameters)
 		// Shorten tool name if it exceeds 64 characters (common with MCP tools)
 		originalName := name
--- a/internal/util/gemini_schema.go
+++ b/internal/util/gemini_schema.go
@@ -175,7 +175,7 @@ func convertConstToEnum(jsonStr string) string {
 	return jsonStr
 }
-// convertEnumValuesToStrings ensures all enum values are strings.
+// convertEnumValuesToStrings ensures all enum values are strings and the schema type is set to string.
 // Gemini API requires enum values to be of type string, not numbers or booleans.
 func convertEnumValuesToStrings(jsonStr string) string {
 	for _, p := range findPaths(jsonStr, "enum") {
@@ -185,19 +185,15 @@ func convertEnumValuesToStrings(jsonStr string) string {
 		}
 		var stringVals []string
 		needsConversion := false
 		for _, item := range arr.Array() {
 			// Check if any value is not a string
 			if item.Type != gjson.String {
 				needsConversion = true
 			}
 			stringVals = append(stringVals, item.String())
 		}
-		// Only update if we found non-string values
+		// Always update enum values to strings and set type to "string"
-		if needsConversion {
+		// This ensures compatibility with Antigravity Gemini which only allows enum for STRING type
-			jsonStr, _ = sjson.Set(jsonStr, p, stringVals)
+		jsonStr, _ = sjson.Set(jsonStr, p, stringVals)
-		}
+		parentPath := trimSuffix(p, ".enum")
 		jsonStr, _ = sjson.Set(jsonStr, joinPath(parentPath, "type"), "string")
 	}
 	return jsonStr
 }
--- a/internal/watcher/synthesizer/file.go
+++ b/internal/watcher/synthesizer/file.go
@@ -86,12 +86,19 @@ func (s *FileSynthesizer) Synthesize(ctx *SynthesisContext) ([]*coreauth.Auth, e
 			}
 		}
 		disabled, _ := metadata["disabled"].(bool)
 		status := coreauth.StatusActive
 		if disabled {
 			status = coreauth.StatusDisabled
 		}
 		a := &coreauth.Auth{
 			ID:       id,
 			Provider: provider,
 			Label:    label,
 			Prefix:   prefix,
-			Status:   coreauth.StatusActive,
+			Status:   status,
 			Disabled: disabled,
 			Attributes: map[string]string{
 				"source": full,
 				"path":   full,
@@ -167,6 +174,16 @@ func SynthesizeGeminiVirtualAuths(primary *coreauth.Auth, metadata map[string]an
 			"virtual_parent_id": primary.ID,
 			"type":              metadata["type"],
 		}
 		if v, ok := metadata["disable_cooling"]; ok {
 			metadataCopy["disable_cooling"] = v
 		} else if v, ok := metadata["disable-cooling"]; ok {
 			metadataCopy["disable_cooling"] = v
 		}
 		if v, ok := metadata["request_retry"]; ok {
 			metadataCopy["request_retry"] = v
 		} else if v, ok := metadata["request-retry"]; ok {
 			metadataCopy["request_retry"] = v
 		}
 		proxy := strings.TrimSpace(primary.ProxyURL)
 		if proxy != "" {
 			metadataCopy["proxy_url"] = proxy
--- a/internal/watcher/synthesizer/file_test.go
+++ b/internal/watcher/synthesizer/file_test.go
@@ -69,10 +69,12 @@ func TestFileSynthesizer_Synthesize_ValidAuthFile(t *testing.T) {
 	// Create a valid auth file
 	authData := map[string]any{
-		"type":      "claude",
+		"type":            "claude",
-		"email":     "test@example.com",
+		"email":           "test@example.com",
-		"proxy_url": "http://proxy.local",
+		"proxy_url":       "http://proxy.local",
-		"prefix":    "test-prefix",
+		"prefix":          "test-prefix",
 		"disable_cooling": true,
 		"request_retry":   2,
 	}
 	data, _ := json.Marshal(authData)
 	err := os.WriteFile(filepath.Join(tempDir, "claude-auth.json"), data, 0644)
@@ -108,6 +110,12 @@ func TestFileSynthesizer_Synthesize_ValidAuthFile(t *testing.T) {
 	if auths[0].ProxyURL != "http://proxy.local" {
 		t.Errorf("expected proxy_url http://proxy.local, got %s", auths[0].ProxyURL)
 	}
 	if v, ok := auths[0].Metadata["disable_cooling"].(bool); !ok || !v {
 		t.Errorf("expected disable_cooling true, got %v", auths[0].Metadata["disable_cooling"])
 	}
 	if v, ok := auths[0].Metadata["request_retry"].(float64); !ok || int(v) != 2 {
 		t.Errorf("expected request_retry 2, got %v", auths[0].Metadata["request_retry"])
 	}
 	if auths[0].Status != coreauth.StatusActive {
 		t.Errorf("expected status active, got %s", auths[0].Status)
 	}
@@ -336,9 +344,11 @@ func TestSynthesizeGeminiVirtualAuths_MultiProject(t *testing.T) {
 		},
 	}
 	metadata := map[string]any{
-		"project_id": "project-a, project-b, project-c",
+		"project_id":      "project-a, project-b, project-c",
-		"email":      "test@example.com",
+		"email":           "test@example.com",
-		"type":       "gemini",
+		"type":            "gemini",
 		"request_retry":   2,
 		"disable_cooling": true,
 	}
 	virtuals := SynthesizeGeminiVirtualAuths(primary, metadata, now)
@@ -376,6 +386,12 @@ func TestSynthesizeGeminiVirtualAuths_MultiProject(t *testing.T) {
 		if v.ProxyURL != "http://proxy.local" {
 			t.Errorf("expected proxy_url http://proxy.local, got %s", v.ProxyURL)
 		}
 		if vv, ok := v.Metadata["disable_cooling"].(bool); !ok || !vv {
 			t.Errorf("expected disable_cooling true, got %v", v.Metadata["disable_cooling"])
 		}
 		if vv, ok := v.Metadata["request_retry"].(int); !ok || vv != 2 {
 			t.Errorf("expected request_retry 2, got %v", v.Metadata["request_retry"])
 		}
 		if v.Attributes["runtime_only"] != "true" {
 			t.Error("expected runtime_only=true")
 		}
--- a/internal/wsrelay/http.go
+++ b/internal/wsrelay/http.go
@@ -124,32 +124,47 @@ func (m *Manager) Stream(ctx context.Context, provider string, req *HTTPRequest)
 	out := make(chan StreamEvent)
 	go func() {
 		defer close(out)
 		send := func(ev StreamEvent) bool {
 			if ctx == nil {
 				out <- ev
 				return true
 			}
 			select {
 			case <-ctx.Done():
 				return false
 			case out <- ev:
 				return true
 			}
 		}
 		for {
 			select {
 			case <-ctx.Done():
 				out <- StreamEvent{Err: ctx.Err()}
 				return
 			case msg, ok := <-respCh:
 				if !ok {
-					out <- StreamEvent{Err: errors.New("wsrelay: stream closed")}
+					_ = send(StreamEvent{Err: errors.New("wsrelay: stream closed")})
 					return
 				}
 				switch msg.Type {
 				case MessageTypeStreamStart:
 					resp := decodeResponse(msg.Payload)
-					out <- StreamEvent{Type: MessageTypeStreamStart, Status: resp.Status, Headers: resp.Headers}
+					if okSend := send(StreamEvent{Type: MessageTypeStreamStart, Status: resp.Status, Headers: resp.Headers}); !okSend {
 						return
 					}
 				case MessageTypeStreamChunk:
 					chunk := decodeChunk(msg.Payload)
-					out <- StreamEvent{Type: MessageTypeStreamChunk, Payload: chunk}
+					if okSend := send(StreamEvent{Type: MessageTypeStreamChunk, Payload: chunk}); !okSend {
 						return
 					}
 				case MessageTypeStreamEnd:
-					out <- StreamEvent{Type: MessageTypeStreamEnd}
+					_ = send(StreamEvent{Type: MessageTypeStreamEnd})
 					return
 				case MessageTypeError:
-					out <- StreamEvent{Type: MessageTypeError, Err: decodeError(msg.Payload)}
+					_ = send(StreamEvent{Type: MessageTypeError, Err: decodeError(msg.Payload)})
 					return
 				case MessageTypeHTTPResp:
 					resp := decodeResponse(msg.Payload)
-					out <- StreamEvent{Type: MessageTypeHTTPResp, Status: resp.Status, Headers: resp.Headers, Payload: resp.Body}
+					_ = send(StreamEvent{Type: MessageTypeHTTPResp, Status: resp.Status, Headers: resp.Headers, Payload: resp.Body})
 					return
 				default:
 				}
--- a/sdk/api/handlers/gemini/gemini-cli_handlers.go
+++ b/sdk/api/handlers/gemini/gemini-cli_handlers.go
@@ -124,6 +124,7 @@ func (h *GeminiCLIAPIHandler) CLIHandler(c *gin.Context) {
 			log.Errorf("Failed to read response body: %v", err)
 			return
 		}
 		c.Set("API_RESPONSE_TIMESTAMP", time.Now())
 		_, _ = c.Writer.Write(output)
 		c.Set("API_RESPONSE", output)
 	}
--- a/sdk/api/handlers/handlers.go
+++ b/sdk/api/handlers/handlers.go
@@ -362,6 +362,11 @@ func appendAPIResponse(c *gin.Context, data []byte) {
 		return
 	}
 	// Capture timestamp on first API response
 	if _, exists := c.Get("API_RESPONSE_TIMESTAMP"); !exists {
 		c.Set("API_RESPONSE_TIMESTAMP", time.Now())
 	}
 	if existing, exists := c.Get("API_RESPONSE"); exists {
 		if existingBytes, ok := existing.([]byte); ok && len(existingBytes) > 0 {
 			combined := make([]byte, 0, len(existingBytes)+len(data)+1)
@@ -507,6 +512,32 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
 		bootstrapRetries := 0
 		maxBootstrapRetries := StreamingBootstrapRetries(h.Cfg)
 		sendErr := func(msg *interfaces.ErrorMessage) bool {
 			if ctx == nil {
 				errChan <- msg
 				return true
 			}
 			select {
 			case <-ctx.Done():
 				return false
 			case errChan <- msg:
 				return true
 			}
 		}
 		sendData := func(chunk []byte) bool {
 			if ctx == nil {
 				dataChan <- chunk
 				return true
 			}
 			select {
 			case <-ctx.Done():
 				return false
 			case dataChan <- chunk:
 				return true
 			}
 		}
 		bootstrapEligible := func(err error) bool {
 			status := statusFromError(err)
 			if status == 0 {
@@ -566,12 +597,14 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
 							addon = hdr.Clone()
 						}
 					}
-					errChan <- &interfaces.ErrorMessage{StatusCode: status, Error: streamErr, Addon: addon}
+					_ = sendErr(&interfaces.ErrorMessage{StatusCode: status, Error: streamErr, Addon: addon})
 					return
 				}
 				if len(chunk.Payload) > 0 {
 					sentPayload = true
-					dataChan <- cloneBytes(chunk.Payload)
+					if okSendData := sendData(cloneBytes(chunk.Payload)); !okSendData {
 						return
 					}
 				}
 			}
 		}
--- a/sdk/api/handlers/handlers_stream_bootstrap_test.go
+++ b/sdk/api/handlers/handlers_stream_bootstrap_test.go
@@ -70,6 +70,58 @@ func (e *failOnceStreamExecutor) Calls() int {
 	return e.calls
 }
 type payloadThenErrorStreamExecutor struct {
 	mu    sync.Mutex
 	calls int
 }
 func (e *payloadThenErrorStreamExecutor) Identifier() string { return "codex" }
 func (e *payloadThenErrorStreamExecutor) Execute(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (coreexecutor.Response, error) {
 	return coreexecutor.Response{}, &coreauth.Error{Code: "not_implemented", Message: "Execute not implemented"}
 }
 func (e *payloadThenErrorStreamExecutor) ExecuteStream(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (<-chan coreexecutor.StreamChunk, error) {
 	e.mu.Lock()
 	e.calls++
 	e.mu.Unlock()
 	ch := make(chan coreexecutor.StreamChunk, 2)
 	ch <- coreexecutor.StreamChunk{Payload: []byte("partial")}
 	ch <- coreexecutor.StreamChunk{
 		Err: &coreauth.Error{
 			Code:       "upstream_closed",
 			Message:    "upstream closed",
 			Retryable:  false,
 			HTTPStatus: http.StatusBadGateway,
 		},
 	}
 	close(ch)
 	return ch, nil
 }
 func (e *payloadThenErrorStreamExecutor) Refresh(ctx context.Context, auth *coreauth.Auth) (*coreauth.Auth, error) {
 	return auth, nil
 }
 func (e *payloadThenErrorStreamExecutor) CountTokens(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (coreexecutor.Response, error) {
 	return coreexecutor.Response{}, &coreauth.Error{Code: "not_implemented", Message: "CountTokens not implemented"}
 }
 func (e *payloadThenErrorStreamExecutor) HttpRequest(ctx context.Context, auth *coreauth.Auth, req *http.Request) (*http.Response, error) {
 	return nil, &coreauth.Error{
 		Code:       "not_implemented",
 		Message:    "HttpRequest not implemented",
 		HTTPStatus: http.StatusNotImplemented,
 	}
 }
 func (e *payloadThenErrorStreamExecutor) Calls() int {
 	e.mu.Lock()
 	defer e.mu.Unlock()
 	return e.calls
 }
 func TestExecuteStreamWithAuthManager_RetriesBeforeFirstByte(t *testing.T) {
 	executor := &failOnceStreamExecutor{}
 	manager := coreauth.NewManager(nil, nil, nil)
@@ -130,3 +182,73 @@ func TestExecuteStreamWithAuthManager_RetriesBeforeFirstByte(t *testing.T) {
 		t.Fatalf("expected 2 stream attempts, got %d", executor.Calls())
 	}
 }
 func TestExecuteStreamWithAuthManager_DoesNotRetryAfterFirstByte(t *testing.T) {
 	executor := &payloadThenErrorStreamExecutor{}
 	manager := coreauth.NewManager(nil, nil, nil)
 	manager.RegisterExecutor(executor)
 	auth1 := &coreauth.Auth{
 		ID:       "auth1",
 		Provider: "codex",
 		Status:   coreauth.StatusActive,
 		Metadata: map[string]any{"email": "test1@example.com"},
 	}
 	if _, err := manager.Register(context.Background(), auth1); err != nil {
 		t.Fatalf("manager.Register(auth1): %v", err)
 	}
 	auth2 := &coreauth.Auth{
 		ID:       "auth2",
 		Provider: "codex",
 		Status:   coreauth.StatusActive,
 		Metadata: map[string]any{"email": "test2@example.com"},
 	}
 	if _, err := manager.Register(context.Background(), auth2); err != nil {
 		t.Fatalf("manager.Register(auth2): %v", err)
 	}
 	registry.GetGlobalRegistry().RegisterClient(auth1.ID, auth1.Provider, []*registry.ModelInfo{{ID: "test-model"}})
 	registry.GetGlobalRegistry().RegisterClient(auth2.ID, auth2.Provider, []*registry.ModelInfo{{ID: "test-model"}})
 	t.Cleanup(func() {
 		registry.GetGlobalRegistry().UnregisterClient(auth1.ID)
 		registry.GetGlobalRegistry().UnregisterClient(auth2.ID)
 	})
 	handler := NewBaseAPIHandlers(&sdkconfig.SDKConfig{
 		Streaming: sdkconfig.StreamingConfig{
 			BootstrapRetries: 1,
 		},
 	}, manager)
 	dataChan, errChan := handler.ExecuteStreamWithAuthManager(context.Background(), "openai", "test-model", []byte(`{"model":"test-model"}`), "")
 	if dataChan == nil || errChan == nil {
 		t.Fatalf("expected non-nil channels")
 	}
 	var got []byte
 	for chunk := range dataChan {
 		got = append(got, chunk...)
 	}
 	var gotErr error
 	var gotStatus int
 	for msg := range errChan {
 		if msg != nil && msg.Error != nil {
 			gotErr = msg.Error
 			gotStatus = msg.StatusCode
 		}
 	}
 	if string(got) != "partial" {
 		t.Fatalf("expected payload partial, got %q", string(got))
 	}
 	if gotErr == nil {
 		t.Fatalf("expected terminal error, got nil")
 	}
 	if gotStatus != http.StatusBadGateway {
 		t.Fatalf("expected status %d, got %d", http.StatusBadGateway, gotStatus)
 	}
 	if executor.Calls() != 1 {
 		t.Fatalf("expected 1 stream attempt, got %d", executor.Calls())
 	}
 }
--- a/sdk/auth/filestore.go
+++ b/sdk/auth/filestore.go
@@ -68,14 +68,13 @@ func (s *FileTokenStore) Save(ctx context.Context, auth *cliproxyauth.Auth) (str
 			return "", err
 		}
 	case auth.Metadata != nil:
 		auth.Metadata["disabled"] = auth.Disabled
 		raw, errMarshal := json.Marshal(auth.Metadata)
 		if errMarshal != nil {
 			return "", fmt.Errorf("auth filestore: marshal metadata failed: %w", errMarshal)
 		}
 		if existing, errRead := os.ReadFile(path); errRead == nil {
-			// Use metadataEqualIgnoringTimestamps to skip writes when only timestamp fields change.
+			if jsonEqual(existing, raw) {
 			// This prevents the token refresh loop caused by timestamp/expired/expires_in changes.
 			if metadataEqualIgnoringTimestamps(existing, raw, auth.Provider) {
 				return path, nil
 			}
 			file, errOpen := os.OpenFile(path, os.O_WRONLY|os.O_TRUNC, 0o600)
@@ -216,6 +215,11 @@ func (s *FileTokenStore) readAuthFile(path, baseDir string) (*cliproxyauth.Auth,
 		return nil, fmt.Errorf("stat file: %w", err)
 	}
 	id := s.idFor(path, baseDir)
 	disabled, _ := metadata["disabled"].(bool)
 	status := cliproxyauth.StatusActive
 	if disabled {
 		status = cliproxyauth.StatusDisabled
 	}
 	// Calculate NextRefreshAfter from expires_at (20 minutes before expiry)
 	var nextRefreshAfter time.Time
@@ -230,7 +234,8 @@ func (s *FileTokenStore) readAuthFile(path, baseDir string) (*cliproxyauth.Auth,
 		Provider:         provider,
 		FileName:         id,
 		Label:            s.labelFor(metadata),
-		Status:           cliproxyauth.StatusActive,
+		Status:           status,
 		Disabled:         disabled,
 		Attributes:       map[string]string{"path": path},
 		Metadata:         metadata,
 		CreatedAt:        info.ModTime(),
@@ -308,8 +313,7 @@ func (s *FileTokenStore) baseDirSnapshot() string {
 	return s.baseDir
 }
-// DEPRECATED: Use metadataEqualIgnoringTimestamps for comparing auth metadata.
+// jsonEqual compares two JSON blobs by parsing them into Go objects and deep comparing.
 // This function is kept for backward compatibility but can cause refresh loops.
 func jsonEqual(a, b []byte) bool {
 	var objA any
 	var objB any
@@ -322,41 +326,6 @@ func jsonEqual(a, b []byte) bool {
 	return deepEqualJSON(objA, objB)
 }
 // metadataEqualIgnoringTimestamps compares two metadata JSON blobs,
 // ignoring fields that change on every refresh but don't affect functionality.
 // This prevents unnecessary file writes that would trigger watcher events and
 // create refresh loops.
 // The provider parameter controls whether access_token is ignored: providers like
 // Google OAuth (gemini, gemini-cli) can re-fetch tokens when needed, while others
 // like iFlow require the refreshed token to be persisted.
 func metadataEqualIgnoringTimestamps(a, b []byte, provider string) bool {
 	var objA, objB map[string]any
 	if err := json.Unmarshal(a, &objA); err != nil {
 		return false
 	}
 	if err := json.Unmarshal(b, &objB); err != nil {
 		return false
 	}
 	// Fields to ignore: these change on every refresh but don't affect authentication logic.
 	// - timestamp, expired, expires_in, last_refresh: time-related fields that change on refresh
 	ignoredFields := []string{"timestamp", "expired", "expires_in", "last_refresh"}
 	// For providers that can re-fetch tokens when needed (e.g., Google OAuth),
 	// we ignore access_token to avoid unnecessary file writes.
 	switch provider {
 	case "gemini", "gemini-cli", "antigravity":
 		ignoredFields = append(ignoredFields, "access_token")
 	}
 	for _, field := range ignoredFields {
 		delete(objA, field)
 		delete(objB, field)
 	}
 	return deepEqualJSON(objA, objB)
 }
 func deepEqualJSON(a, b any) bool {
 	switch valA := a.(type) {
 	case map[string]any:
--- a/sdk/cliproxy/auth/conductor.go
+++ b/sdk/cliproxy/auth/conductor.go
@@ -61,6 +61,15 @@ func SetQuotaCooldownDisabled(disable bool) {
 	quotaCooldownDisabled.Store(disable)
 }
 func quotaCooldownDisabledForAuth(auth *Auth) bool {
 	if auth != nil {
 		if override, ok := auth.DisableCoolingOverride(); ok {
 			return override
 		}
 	}
 	return quotaCooldownDisabled.Load()
 }
 // Result captures execution outcome used to adjust auth state.
 type Result struct {
 	// AuthID references the auth that produced this result.
@@ -468,20 +477,16 @@ func (m *Manager) Execute(ctx context.Context, providers []string, req cliproxye
 		return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
 	}
-	retryTimes, maxWait := m.retrySettings()
+	_, maxWait := m.retrySettings()
 	attempts := retryTimes + 1
 	if attempts < 1 {
 		attempts = 1
 	}
 	var lastErr error
-	for attempt := 0; attempt < attempts; attempt++ {
+	for attempt := 0; ; attempt++ {
 		resp, errExec := m.executeMixedOnce(ctx, normalized, req, opts)
 		if errExec == nil {
 			return resp, nil
 		}
 		lastErr = errExec
-		wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, attempts, normalized, req.Model, maxWait)
+		wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, normalized, req.Model, maxWait)
 		if !shouldRetry {
 			break
 		}
@@ -503,20 +508,16 @@ func (m *Manager) ExecuteCount(ctx context.Context, providers []string, req clip
 		return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
 	}
-	retryTimes, maxWait := m.retrySettings()
+	_, maxWait := m.retrySettings()
 	attempts := retryTimes + 1
 	if attempts < 1 {
 		attempts = 1
 	}
 	var lastErr error
-	for attempt := 0; attempt < attempts; attempt++ {
+	for attempt := 0; ; attempt++ {
 		resp, errExec := m.executeCountMixedOnce(ctx, normalized, req, opts)
 		if errExec == nil {
 			return resp, nil
 		}
 		lastErr = errExec
-		wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, attempts, normalized, req.Model, maxWait)
+		wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, normalized, req.Model, maxWait)
 		if !shouldRetry {
 			break
 		}
@@ -538,20 +539,16 @@ func (m *Manager) ExecuteStream(ctx context.Context, providers []string, req cli
 		return nil, &Error{Code: "provider_not_found", Message: "no provider supplied"}
 	}
-	retryTimes, maxWait := m.retrySettings()
+	_, maxWait := m.retrySettings()
 	attempts := retryTimes + 1
 	if attempts < 1 {
 		attempts = 1
 	}
 	var lastErr error
-	for attempt := 0; attempt < attempts; attempt++ {
+	for attempt := 0; ; attempt++ {
 		chunks, errStream := m.executeStreamMixedOnce(ctx, normalized, req, opts)
 		if errStream == nil {
 			return chunks, nil
 		}
 		lastErr = errStream
-		wait, shouldRetry := m.shouldRetryAfterError(errStream, attempt, attempts, normalized, req.Model, maxWait)
+		wait, shouldRetry := m.shouldRetryAfterError(errStream, attempt, normalized, req.Model, maxWait)
 		if !shouldRetry {
 			break
 		}
@@ -721,6 +718,7 @@ func (m *Manager) executeStreamMixedOnce(ctx context.Context, providers []string
 		go func(streamCtx context.Context, streamAuth *Auth, streamProvider string, streamChunks <-chan cliproxyexecutor.StreamChunk) {
 			defer close(out)
 			var failed bool
 			forward := true
 			for chunk := range streamChunks {
 				if chunk.Err != nil && !failed {
 					failed = true
@@ -731,7 +729,18 @@ func (m *Manager) executeStreamMixedOnce(ctx context.Context, providers []string
 					}
 					m.MarkResult(streamCtx, Result{AuthID: streamAuth.ID, Provider: streamProvider, Model: routeModel, Success: false, Error: rerr})
 				}
-				out <- chunk
+				if !forward {
 					continue
 				}
 				if streamCtx == nil {
 					out <- chunk
 					continue
 				}
 				select {
 				case <-streamCtx.Done():
 					forward = false
 				case out <- chunk:
 				}
 			}
 			if !failed {
 				m.MarkResult(streamCtx, Result{AuthID: streamAuth.ID, Provider: streamProvider, Model: routeModel, Success: true})
@@ -1034,11 +1043,15 @@ func (m *Manager) retrySettings() (int, time.Duration) {
 	return int(m.requestRetry.Load()), time.Duration(m.maxRetryInterval.Load())
 }
-func (m *Manager) closestCooldownWait(providers []string, model string) (time.Duration, bool) {
+func (m *Manager) closestCooldownWait(providers []string, model string, attempt int) (time.Duration, bool) {
 	if m == nil || len(providers) == 0 {
 		return 0, false
 	}
 	now := time.Now()
 	defaultRetry := int(m.requestRetry.Load())
 	if defaultRetry < 0 {
 		defaultRetry = 0
 	}
 	providerSet := make(map[string]struct{}, len(providers))
 	for i := range providers {
 		key := strings.TrimSpace(strings.ToLower(providers[i]))
@@ -1061,6 +1074,16 @@ func (m *Manager) closestCooldownWait(providers []string, model string) (time.Du
 		if _, ok := providerSet[providerKey]; !ok {
 			continue
 		}
 		effectiveRetry := defaultRetry
 		if override, ok := auth.RequestRetryOverride(); ok {
 			effectiveRetry = override
 		}
 		if effectiveRetry < 0 {
 			effectiveRetry = 0
 		}
 		if attempt >= effectiveRetry {
 			continue
 		}
 		blocked, reason, next := isAuthBlockedForModel(auth, model, now)
 		if !blocked || next.IsZero() || reason == blockReasonDisabled {
 			continue
@@ -1077,8 +1100,8 @@ func (m *Manager) closestCooldownWait(providers []string, model string) (time.Du
 	return minWait, found
 }
-func (m *Manager) shouldRetryAfterError(err error, attempt, maxAttempts int, providers []string, model string, maxWait time.Duration) (time.Duration, bool) {
+func (m *Manager) shouldRetryAfterError(err error, attempt int, providers []string, model string, maxWait time.Duration) (time.Duration, bool) {
-	if err == nil || attempt >= maxAttempts-1 {
+	if err == nil {
 		return 0, false
 	}
 	if maxWait <= 0 {
@@ -1087,7 +1110,7 @@ func (m *Manager) shouldRetryAfterError(err error, attempt, maxAttempts int, pro
 	if status := statusCodeFromError(err); status == http.StatusOK {
 		return 0, false
 	}
-	wait, found := m.closestCooldownWait(providers, model)
+	wait, found := m.closestCooldownWait(providers, model, attempt)
 	if !found || wait > maxWait {
 		return 0, false
 	}
@@ -1176,7 +1199,7 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
 					if result.RetryAfter != nil {
 						next = now.Add(*result.RetryAfter)
 					} else {
-						cooldown, nextLevel := nextQuotaCooldown(backoffLevel)
+						cooldown, nextLevel := nextQuotaCooldown(backoffLevel, quotaCooldownDisabledForAuth(auth))
 						if cooldown > 0 {
 							next = now.Add(cooldown)
 						}
@@ -1193,7 +1216,7 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
 					shouldSuspendModel = true
 					setModelQuota = true
 				case 408, 500, 502, 503, 504:
-					if quotaCooldownDisabled.Load() {
+					if quotaCooldownDisabledForAuth(auth) {
 						state.NextRetryAfter = time.Time{}
 					} else {
 						next := now.Add(1 * time.Minute)
@@ -1439,7 +1462,7 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Durati
 		if retryAfter != nil {
 			next = now.Add(*retryAfter)
 		} else {
-			cooldown, nextLevel := nextQuotaCooldown(auth.Quota.BackoffLevel)
+			cooldown, nextLevel := nextQuotaCooldown(auth.Quota.BackoffLevel, quotaCooldownDisabledForAuth(auth))
 			if cooldown > 0 {
 				next = now.Add(cooldown)
 			}
@@ -1449,7 +1472,7 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Durati
 		auth.NextRetryAfter = next
 	case 408, 500, 502, 503, 504:
 		auth.StatusMessage = "transient upstream error"
-		if quotaCooldownDisabled.Load() {
+		if quotaCooldownDisabledForAuth(auth) {
 			auth.NextRetryAfter = time.Time{}
 		} else {
 			auth.NextRetryAfter = now.Add(1 * time.Minute)
@@ -1462,11 +1485,11 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Durati
 }
 // nextQuotaCooldown returns the next cooldown duration and updated backoff level for repeated quota errors.
-func nextQuotaCooldown(prevLevel int) (time.Duration, int) {
+func nextQuotaCooldown(prevLevel int, disableCooling bool) (time.Duration, int) {
 	if prevLevel < 0 {
 		prevLevel = 0
 	}
-	if quotaCooldownDisabled.Load() {
+	if disableCooling {
 		return 0, prevLevel
 	}
 	cooldown := quotaBackoffBase * time.Duration(1<<prevLevel)
@@ -1642,6 +1665,9 @@ func (m *Manager) persist(ctx context.Context, auth *Auth) error {
 	if m.store == nil || auth == nil {
 		return nil
 	}
 	if shouldSkipPersist(ctx) {
 		return nil
 	}
 	if auth.Attributes != nil {
 		if v := strings.ToLower(strings.TrimSpace(auth.Attributes["runtime_only"])); v == "true" {
 			return nil
--- a/sdk/cliproxy/auth/conductor_overrides_test.go
+++ b/sdk/cliproxy/auth/conductor_overrides_test.go
@@ -0,0 +1,97 @@
 package auth
 import (
 	"context"
 	"testing"
 	"time"
 )
 func TestManager_ShouldRetryAfterError_RespectsAuthRequestRetryOverride(t *testing.T) {
 	m := NewManager(nil, nil, nil)
 	m.SetRetryConfig(3, 30*time.Second)
 	model := "test-model"
 	next := time.Now().Add(5 * time.Second)
 	auth := &Auth{
 		ID:       "auth-1",
 		Provider: "claude",
 		Metadata: map[string]any{
 			"request_retry": float64(0),
 		},
 		ModelStates: map[string]*ModelState{
 			model: {
 				Unavailable:    true,
 				Status:         StatusError,
 				NextRetryAfter: next,
 			},
 		},
 	}
 	if _, errRegister := m.Register(context.Background(), auth); errRegister != nil {
 		t.Fatalf("register auth: %v", errRegister)
 	}
 	_, maxWait := m.retrySettings()
 	wait, shouldRetry := m.shouldRetryAfterError(&Error{HTTPStatus: 500, Message: "boom"}, 0, []string{"claude"}, model, maxWait)
 	if shouldRetry {
 		t.Fatalf("expected shouldRetry=false for request_retry=0, got true (wait=%v)", wait)
 	}
 	auth.Metadata["request_retry"] = float64(1)
 	if _, errUpdate := m.Update(context.Background(), auth); errUpdate != nil {
 		t.Fatalf("update auth: %v", errUpdate)
 	}
 	wait, shouldRetry = m.shouldRetryAfterError(&Error{HTTPStatus: 500, Message: "boom"}, 0, []string{"claude"}, model, maxWait)
 	if !shouldRetry {
 		t.Fatalf("expected shouldRetry=true for request_retry=1, got false")
 	}
 	if wait <= 0 {
 		t.Fatalf("expected wait > 0, got %v", wait)
 	}
 	_, shouldRetry = m.shouldRetryAfterError(&Error{HTTPStatus: 500, Message: "boom"}, 1, []string{"claude"}, model, maxWait)
 	if shouldRetry {
 		t.Fatalf("expected shouldRetry=false on attempt=1 for request_retry=1, got true")
 	}
 }
 func TestManager_MarkResult_RespectsAuthDisableCoolingOverride(t *testing.T) {
 	prev := quotaCooldownDisabled.Load()
 	quotaCooldownDisabled.Store(false)
 	t.Cleanup(func() { quotaCooldownDisabled.Store(prev) })
 	m := NewManager(nil, nil, nil)
 	auth := &Auth{
 		ID:       "auth-1",
 		Provider: "claude",
 		Metadata: map[string]any{
 			"disable_cooling": true,
 		},
 	}
 	if _, errRegister := m.Register(context.Background(), auth); errRegister != nil {
 		t.Fatalf("register auth: %v", errRegister)
 	}
 	model := "test-model"
 	m.MarkResult(context.Background(), Result{
 		AuthID:   "auth-1",
 		Provider: "claude",
 		Model:    model,
 		Success:  false,
 		Error:    &Error{HTTPStatus: 500, Message: "boom"},
 	})
 	updated, ok := m.GetByID("auth-1")
 	if !ok || updated == nil {
 		t.Fatalf("expected auth to be present")
 	}
 	state := updated.ModelStates[model]
 	if state == nil {
 		t.Fatalf("expected model state to be present")
 	}
 	if !state.NextRetryAfter.IsZero() {
 		t.Fatalf("expected NextRetryAfter to be zero when disable_cooling=true, got %v", state.NextRetryAfter)
 	}
 }
--- a/sdk/cliproxy/auth/persist_policy.go
+++ b/sdk/cliproxy/auth/persist_policy.go
@@ -0,0 +1,24 @@
 package auth
 import "context"
 type skipPersistContextKey struct{}
 // WithSkipPersist returns a derived context that disables persistence for Manager Update/Register calls.
 // It is intended for code paths that are reacting to file watcher events, where the file on disk is
 // already the source of truth and persisting again would create a write-back loop.
 func WithSkipPersist(ctx context.Context) context.Context {
 	if ctx == nil {
 		ctx = context.Background()
 	}
 	return context.WithValue(ctx, skipPersistContextKey{}, true)
 }
 func shouldSkipPersist(ctx context.Context) bool {
 	if ctx == nil {
 		return false
 	}
 	v := ctx.Value(skipPersistContextKey{})
 	enabled, ok := v.(bool)
 	return ok && enabled
 }
--- a/sdk/cliproxy/auth/persist_policy_test.go
+++ b/sdk/cliproxy/auth/persist_policy_test.go
@@ -0,0 +1,62 @@
 package auth
 import (
 	"context"
 	"sync/atomic"
 	"testing"
 )
 type countingStore struct {
 	saveCount atomic.Int32
 }
 func (s *countingStore) List(context.Context) ([]*Auth, error) { return nil, nil }
 func (s *countingStore) Save(context.Context, *Auth) (string, error) {
 	s.saveCount.Add(1)
 	return "", nil
 }
 func (s *countingStore) Delete(context.Context, string) error { return nil }
 func TestWithSkipPersist_DisablesUpdatePersistence(t *testing.T) {
 	store := &countingStore{}
 	mgr := NewManager(store, nil, nil)
 	auth := &Auth{
 		ID:       "auth-1",
 		Provider: "antigravity",
 		Metadata: map[string]any{"type": "antigravity"},
 	}
 	if _, err := mgr.Update(context.Background(), auth); err != nil {
 		t.Fatalf("Update returned error: %v", err)
 	}
 	if got := store.saveCount.Load(); got != 1 {
 		t.Fatalf("expected 1 Save call, got %d", got)
 	}
 	ctxSkip := WithSkipPersist(context.Background())
 	if _, err := mgr.Update(ctxSkip, auth); err != nil {
 		t.Fatalf("Update(skipPersist) returned error: %v", err)
 	}
 	if got := store.saveCount.Load(); got != 1 {
 		t.Fatalf("expected Save call count to remain 1, got %d", got)
 	}
 }
 func TestWithSkipPersist_DisablesRegisterPersistence(t *testing.T) {
 	store := &countingStore{}
 	mgr := NewManager(store, nil, nil)
 	auth := &Auth{
 		ID:       "auth-1",
 		Provider: "antigravity",
 		Metadata: map[string]any{"type": "antigravity"},
 	}
 	if _, err := mgr.Register(WithSkipPersist(context.Background()), auth); err != nil {
 		t.Fatalf("Register(skipPersist) returned error: %v", err)
 	}
 	if got := store.saveCount.Load(); got != 0 {
 		t.Fatalf("expected 0 Save calls, got %d", got)
 	}
 }
--- a/sdk/cliproxy/auth/types.go
+++ b/sdk/cliproxy/auth/types.go
@@ -194,6 +194,108 @@ func (a *Auth) ProxyInfo() string {
 	return "via proxy"
 }
 // DisableCoolingOverride returns the auth-file scoped disable_cooling override when present.
 // The value is read from metadata key "disable_cooling" (or legacy "disable-cooling").
 func (a *Auth) DisableCoolingOverride() (bool, bool) {
 	if a == nil || a.Metadata == nil {
 		return false, false
 	}
 	if val, ok := a.Metadata["disable_cooling"]; ok {
 		if parsed, okParse := parseBoolAny(val); okParse {
 			return parsed, true
 		}
 	}
 	if val, ok := a.Metadata["disable-cooling"]; ok {
 		if parsed, okParse := parseBoolAny(val); okParse {
 			return parsed, true
 		}
 	}
 	return false, false
 }
 // RequestRetryOverride returns the auth-file scoped request_retry override when present.
 // The value is read from metadata key "request_retry" (or legacy "request-retry").
 func (a *Auth) RequestRetryOverride() (int, bool) {
 	if a == nil || a.Metadata == nil {
 		return 0, false
 	}
 	if val, ok := a.Metadata["request_retry"]; ok {
 		if parsed, okParse := parseIntAny(val); okParse {
 			if parsed < 0 {
 				parsed = 0
 			}
 			return parsed, true
 		}
 	}
 	if val, ok := a.Metadata["request-retry"]; ok {
 		if parsed, okParse := parseIntAny(val); okParse {
 			if parsed < 0 {
 				parsed = 0
 			}
 			return parsed, true
 		}
 	}
 	return 0, false
 }
 func parseBoolAny(val any) (bool, bool) {
 	switch typed := val.(type) {
 	case bool:
 		return typed, true
 	case string:
 		trimmed := strings.TrimSpace(typed)
 		if trimmed == "" {
 			return false, false
 		}
 		parsed, err := strconv.ParseBool(trimmed)
 		if err != nil {
 			return false, false
 		}
 		return parsed, true
 	case float64:
 		return typed != 0, true
 	case json.Number:
 		parsed, err := typed.Int64()
 		if err != nil {
 			return false, false
 		}
 		return parsed != 0, true
 	default:
 		return false, false
 	}
 }
 func parseIntAny(val any) (int, bool) {
 	switch typed := val.(type) {
 	case int:
 		return typed, true
 	case int32:
 		return int(typed), true
 	case int64:
 		return int(typed), true
 	case float64:
 		return int(typed), true
 	case json.Number:
 		parsed, err := typed.Int64()
 		if err != nil {
 			return 0, false
 		}
 		return int(parsed), true
 	case string:
 		trimmed := strings.TrimSpace(typed)
 		if trimmed == "" {
 			return 0, false
 		}
 		parsed, err := strconv.Atoi(trimmed)
 		if err != nil {
 			return 0, false
 		}
 		return parsed, true
 	default:
 		return 0, false
 	}
 }
 func (a *Auth) AccountInfo() (string, string) {
 	if a == nil {
 		return "", ""
--- a/sdk/cliproxy/service.go
+++ b/sdk/cliproxy/service.go
@@ -135,6 +135,7 @@ func (s *Service) ensureAuthUpdateQueue(ctx context.Context) {
 }
 func (s *Service) consumeAuthUpdates(ctx context.Context) {
 	ctx = coreauth.WithSkipPersist(ctx)
 	for {
 		select {
 		case <-ctx.Done():
@@ -707,6 +708,10 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
 	if a == nil || a.ID == "" {
 		return
 	}
 	if a.Disabled {
 		GlobalModelRegistry().UnregisterClient(a.ID)
 		return
 	}
 	authKind := strings.ToLower(strings.TrimSpace(a.Attributes["auth_kind"]))
 	if authKind == "" {
 		if kind, _ := a.AccountInfo(); strings.EqualFold(kind, "api_key") {
--- a/test/thinking_conversion_test.go
+++ b/test/thinking_conversion_test.go
@@ -2,6 +2,7 @@ package test
 import (
 	"fmt"
 	"strings"
 	"testing"
 	"time"
@@ -2778,12 +2779,18 @@ func runThinkingTests(t *testing.T, cases []thinkingTestCase) {
 			// Verify clear_thinking for iFlow GLM models when enable_thinking=true
 			if tc.to == "iflow" && tc.expectField == "chat_template_kwargs.enable_thinking" && tc.expectValue == "true" {
 				baseModel := thinking.ParseSuffix(tc.model).ModelName
 				isGLM := strings.HasPrefix(strings.ToLower(baseModel), "glm")
 				ctVal := gjson.GetBytes(body, "chat_template_kwargs.clear_thinking")
-				if !ctVal.Exists() {
+				if isGLM {
-					t.Fatalf("expected clear_thinking field not found for GLM model, body=%s", string(body))
+					if !ctVal.Exists() {
-				}
+						t.Fatalf("expected clear_thinking field not found for GLM model, body=%s", string(body))
-				if ctVal.Bool() != false {
+					}
-					t.Fatalf("clear_thinking: expected false, got %v, body=%s", ctVal.Bool(), string(body))
+					if ctVal.Bool() != false {
 						t.Fatalf("clear_thinking: expected false, got %v, body=%s", ctVal.Bool(), string(body))
 					}
 				} else if ctVal.Exists() {
 					t.Fatalf("expected no clear_thinking field for non-GLM enable_thinking model, body=%s", string(body))
 				}
 			}
 		})
Author	SHA1	Message	Date
Luis Pater	3631fab7e2	Merge pull request #153 from router-for-me/plus v6.7.31	2026-01-30 20:46:42 +08:00
Luis Pater	b3d292a5f9	Merge branch 'main' into plus	2026-01-30 20:45:33 +08:00
Luis Pater	d7d54fa2cc	feat(ci): add cleanup step for temporary Docker tags in workflow	2026-01-30 09:15:00 +08:00
Luis Pater	31649325f0	feat(ci): add multi-arch Docker builds and manifest creation to workflow	2026-01-30 07:26:36 +08:00
Luis Pater	a709e5a12d	fix(config): ensure empty mapping persists for `oauth-model-alias` deletions #1305	2026-01-30 04:17:56 +08:00
Luis Pater	f0ac77197b	Merge pull request #1300 from sususu98/feat/log-api-response-timestamp fix(logging): add API response timestamp and fix request timestamp timing	2026-01-30 03:27:17 +08:00
Luis Pater	da0bbf2a3f	Merge pull request #1298 from sususu98/fix/restore-usageMetadata-in-gemini-translator fix(translator): restore usageMetadata in Gemini responses from Antigravity	2026-01-30 02:59:41 +08:00
sususu98	295f34d7f0	fix(logging): capture streaming TTFB on first chunk and make timestamps required - Add firstChunkTimestamp field to ResponseWriterWrapper for sync capture - Capture TTFB in Write() and WriteString() before async channel send - Add SetFirstChunkTimestamp() to StreamingLogWriter interface - Make requestTimestamp/apiResponseTimestamp required in LogRequest() - Remove timestamp capture from WriteAPIResponse() (now via setter) - Fix Gemini handler to set API_RESPONSE_TIMESTAMP before writing response This ensures accurate TTFB measurement for all streaming API formats (OpenAI, Gemini, Claude) by capturing timestamp synchronously when the first response chunk arrives, not when the stream finalizes.	2026-01-29 22:32:24 +08:00
sususu98	c41ce77eea	fix(logging): add API response timestamp and fix request timestamp timing Previously: - REQUEST INFO timestamp was captured at log write time (not request arrival) - API RESPONSE had NO timestamp at all This fix: - Captures REQUEST INFO timestamp when request first arrives - Adds API RESPONSE timestamp when upstream response arrives Changes: - Add Timestamp field to RequestInfo, set at middleware initialization - Set API_RESPONSE_TIMESTAMP in appendAPIResponse() and gemini handler - Pass timestamps through logging chain to writeNonStreamingLog() - Add timestamp output to API RESPONSE section This enables accurate measurement of backend response latency in error logs.	2026-01-29 22:22:18 +08:00
Luis Pater	4eb1e6093f	feat(handlers): add test to verify no retries after partial stream response Introduce `TestExecuteStreamWithAuthManager_DoesNotRetryAfterFirstByte` to validate that stream executions do not retry after receiving partial responses. Implement `payloadThenErrorStreamExecutor` for test coverage of this behavior.	2026-01-29 17:30:48 +08:00
Luis Pater	189a066807	Merge pull request #1296 from router-for-me/log fix(api): update amp module only on config changes	2026-01-29 17:27:52 +08:00
hkfires	d0bada7a43	fix(config): prune oauth-model-alias when preserving config	2026-01-29 14:06:52 +08:00
sususu98	9dc0e6d08b	fix(translator): restore usageMetadata in Gemini responses from Antigravity When using Gemini API format with Antigravity backend, the executor renames usageMetadata to cpaUsageMetadata in non-terminal chunks. The Gemini translator was returning this internal field name directly to clients instead of the standard usageMetadata field. Add restoreUsageMetadata() to rename cpaUsageMetadata back to usageMetadata before returning responses to clients.	2026-01-29 11:16:00 +08:00
hkfires	8510fc313e	fix(api): update amp module only on config changes	2026-01-29 09:28:49 +08:00
Luis Pater	9e5b1d24e8	Merge pull request #1276 from router-for-me/thinking feat(thinking): enable thinking toggle for qwen3 and deepseek models	2026-01-28 11:16:54 +08:00
Luis Pater	a7dae6ad52	Merge remote-tracking branch 'origin/dev' into dev	2026-01-28 10:59:00 +08:00
Luis Pater	e93e05ae25	refactor: consolidate channel send logic with context-safe handlers Optimize channel operations by introducing reusable context-aware send functions (`send` and `sendErr`) across `wsrelay`, `handlers`, and `cliproxy`. Ensure graceful handling of canceled contexts during stream operations.	2026-01-28 10:58:35 +08:00
hkfires	c8c27325dc	feat(thinking): enable thinking toggle for qwen3 and deepseek models Fix #1245	2026-01-28 09:54:05 +08:00
hkfires	c3b6f3918c	chore(git): stop ignoring .idea and data directories	2026-01-28 09:52:44 +08:00
Luis Pater	bbb55a8ab4	Merge pull request #1170 from BianBianY/main feat: optimization enable/disable auth files	2026-01-28 09:34:35 +08:00
Luis Pater	8f522eed43	Merge pull request #138 from router-for-me/plus v6.7.26	2026-01-27 20:40:12 +08:00
Luis Pater	3dc001a9d2	Merge branch 'main' into plus	2026-01-27 20:39:59 +08:00
Luis Pater	ee54ee8825	Merge pull request #137 from geen02/fix/idc-auth-method-case-sensitivity fix: case-insensitive auth_method comparison for IDC tokens	2026-01-27 20:38:03 +08:00
Luis Pater	2395b7a180	Merge pull request #135 from gogoing1024/main 支持多个idc登录凭证保存	2026-01-27 20:36:56 +08:00
Luis Pater	7583193c2a	Merge pull request #1257 from router-for-me/model feat(api): add management model definitions endpoint	2026-01-27 20:32:04 +08:00
hkfires	7cc3bd4ba0	chore(deps): mark golang.org/x/text as indirect	2026-01-27 19:19:52 +08:00
hkfires	88a0f095e8	chore(registry): disable gemini 2.5 flash image preview model	2026-01-27 18:33:13 +08:00
hkfires	c65f64dce0	chore(registry): comment out rev19-uic3-1p model config	2026-01-27 18:33:13 +08:00
hkfires	d18cd217e1	feat(api): add management model definitions endpoint	2026-01-27 18:33:12 +08:00
Luis Pater	ba4a1ab433	Merge pull request #1261 from Darley-Wey/fix/gemini_scheme fix(gemini): force type to string for enum fields to fix Antigravity Gemini API error	2026-01-27 17:02:25 +08:00
Darley	decddb521e	fix(gemini): force type to string for enum fields to fix Antigravity Gemini API error (Relates to #1260 )	2026-01-27 11:14:08 +03:30
jyy	de6b1ada5d	fix: case-insensitive auth_method comparison for IDC tokens The background refresher was skipping token files with auth_method values like 'IdC' or 'IDC' because the comparison was case-sensitive and only matched lowercase 'idc'. This fix normalizes the auth_method to lowercase before comparison in: - token_repository.go: readTokenFile() when filtering tokens to refresh - background_refresh.go: refreshSingle() when selecting refresh method Fixes the issue where 'IdC' != 'idc' caused tokens to be skipped entirely.	2026-01-27 13:39:38 +09:00
gogoing1024	e08f48c7a1	Merge branch 'router-for-me:main' into main	2026-01-27 09:23:36 +08:00
Luis Pater	851712a49e	Merge pull request #132 from ClubWeGo/codex/resolve-issue-#131 Resolve Issue #131	2026-01-26 23:36:16 +08:00
Luis Pater	9e34323a40	Merge branch 'router-for-me:main' into main	2026-01-26 23:35:07 +08:00
Luis Pater	70897247b2	feat(auth): add support for request_retry and disable_cooling overrides Implement `request_retry` and `disable_cooling` metadata overrides for authentication management. Update retry and cooling logic accordingly across `Manager`, Antigravity executor, and file synthesizer. Add tests to validate new behaviors.	2026-01-26 21:59:08 +08:00
Luis Pater	9c341f5aa5	feat(auth): add skip persistence context key for file watcher events Introduce `WithSkipPersist` to disable persistence during Manager Update/Register calls, preventing write-back loops caused by redundant file writes. Add corresponding tests and integrate with existing file store and conductor logic.	2026-01-26 18:20:19 +08:00
yuechenglong.5	f74a688fb9	refactor(auth): extract token filename generation into unified function Add ExtractIDCIdentifier and GenerateTokenFileName functions to centralize token filename generation logic. This improves code maintainability by: - Extracting IDC identifier from startUrl for unique token file naming - Supporting priority-based filename generation (email > startUrl > authMethod) - Removing duplicate filename generation code from oauth_web.go - Adding comprehensive unit tests for the new functions	2026-01-26 13:54:32 +08:00
Darley	e3e741d0be	Default Claude tool input schema	2026-01-26 09:15:38 +08:00
Darley	7c7c5fd967	Fix Kiro tool schema defaults	2026-01-26 08:27:53 +08:00
Yang Bian	f7bfa8a05c	Merge branch 'upstream-main'	2026-01-24 16:28:08 +08:00
Yang Bian	c8620d1633	feat: optimization enable/disable auth files	2026-01-23 18:03:09 +08:00