Merge pull request #153 from router-for-me/plus

v6.7.31
Merge branch 'main' into plus
2026-03-09 15:25:17 +00:00 · 2026-01-30 20:46:42 +08:00 · 2026-01-30 20:45:33 +08:00 · 2026-01-30 09:15:00 +08:00 · 2026-01-30 07:26:36 +08:00 · 2026-01-30 04:17:56 +08:00
142 changed files with 16103 additions and 4594 deletions
--- a/.github/workflows/docker-image.yml
+++ b/.github/workflows/docker-image.yml
@@ -10,13 +10,11 @@ env:
  DOCKERHUB_REPO: eceasy/cli-proxy-api-plus

 jobs:
-  docker:
+  docker_amd64:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@v4
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Login to DockerHub
@@ -29,19 +27,113 @@ jobs:
          echo VERSION=`git describe --tags --always --dirty` >> $GITHUB_ENV
          echo COMMIT=`git rev-parse --short HEAD` >> $GITHUB_ENV
          echo BUILD_DATE=`date -u +%Y-%m-%dT%H:%M:%SZ` >> $GITHUB_ENV
-      - name: Build and push
+      - name: Build and push (amd64)
        uses: docker/build-push-action@v6
        with:
          context: .
-          platforms: |
-            linux/amd64
-            linux/arm64
+          platforms: linux/amd64
          push: true
          build-args: |
            VERSION=${{ env.VERSION }}
            COMMIT=${{ env.COMMIT }}
            BUILD_DATE=${{ env.BUILD_DATE }}
          tags: |
-            ${{ env.DOCKERHUB_REPO }}:latest
-            ${{ env.DOCKERHUB_REPO }}:${{ env.VERSION }}
+            ${{ env.DOCKERHUB_REPO }}:latest-amd64
+            ${{ env.DOCKERHUB_REPO }}:${{ env.VERSION }}-amd64

+  docker_arm64:
+    runs-on: ubuntu-24.04-arm
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+      - name: Generate Build Metadata
+        run: |
+          echo VERSION=`git describe --tags --always --dirty` >> $GITHUB_ENV
+          echo COMMIT=`git rev-parse --short HEAD` >> $GITHUB_ENV
+          echo BUILD_DATE=`date -u +%Y-%m-%dT%H:%M:%SZ` >> $GITHUB_ENV
+      - name: Build and push (arm64)
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          platforms: linux/arm64
+          push: true
+          build-args: |
+            VERSION=${{ env.VERSION }}
+            COMMIT=${{ env.COMMIT }}
+            BUILD_DATE=${{ env.BUILD_DATE }}
+          tags: |
+            ${{ env.DOCKERHUB_REPO }}:latest-arm64
+            ${{ env.DOCKERHUB_REPO }}:${{ env.VERSION }}-arm64
+
+  docker_manifest:
+    runs-on: ubuntu-latest
+    needs:
+      - docker_amd64
+      - docker_arm64
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+      - name: Generate Build Metadata
+        run: |
+          echo VERSION=`git describe --tags --always --dirty` >> $GITHUB_ENV
+          echo COMMIT=`git rev-parse --short HEAD` >> $GITHUB_ENV
+          echo BUILD_DATE=`date -u +%Y-%m-%dT%H:%M:%SZ` >> $GITHUB_ENV
+      - name: Create and push multi-arch manifests
+        run: |
+          docker buildx imagetools create \
+            --tag "${DOCKERHUB_REPO}:latest" \
+            "${DOCKERHUB_REPO}:latest-amd64" \
+            "${DOCKERHUB_REPO}:latest-arm64"
+          docker buildx imagetools create \
+            --tag "${DOCKERHUB_REPO}:${VERSION}" \
+            "${DOCKERHUB_REPO}:${VERSION}-amd64" \
+            "${DOCKERHUB_REPO}:${VERSION}-arm64"
+      - name: Cleanup temporary tags
+        continue-on-error: true
+        env:
+          DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
+          DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+          namespace="${DOCKERHUB_REPO%%/*}"
+          repo_name="${DOCKERHUB_REPO#*/}"
+
+          token="$(
+            curl -fsSL \
+              -H 'Content-Type: application/json' \
+              -d "{\"username\":\"${DOCKERHUB_USERNAME}\",\"password\":\"${DOCKERHUB_TOKEN}\"}" \
+              'https://hub.docker.com/v2/users/login/' \
+              | python3 -c 'import json,sys; print(json.load(sys.stdin)["token"])'
+          )"
+
+          delete_tag() {
+            local tag="$1"
+            local url="https://hub.docker.com/v2/repositories/${namespace}/${repo_name}/tags/${tag}/"
+            local http_code
+            http_code="$(curl -sS -o /dev/null -w "%{http_code}" -X DELETE -H "Authorization: JWT ${token}" "${url}" || true)"
+            if [ "${http_code}" = "204" ] || [ "${http_code}" = "404" ]; then
+              echo "Docker Hub tag removed (or missing): ${DOCKERHUB_REPO}:${tag} (HTTP ${http_code})"
+              return 0
+            fi
+            echo "Docker Hub tag delete failed: ${DOCKERHUB_REPO}:${tag} (HTTP ${http_code})"
+            return 0
+          }
+
+          delete_tag "latest-amd64"
+          delete_tag "latest-arm64"
+          delete_tag "${VERSION}-amd64"
+          delete_tag "${VERSION}-arm64"
--- a/.gitignore
+++ b/.gitignore
@@ -50,3 +50,4 @@ _bmad-output/*
 # macOS
 .DS_Store
 ._*
+*.bak
--- a/README.md
+++ b/README.md
@@ -13,6 +13,82 @@ The Plus release stays in lockstep with the mainline features.
 - Added GitHub Copilot support (OAuth login), provided by [em4go](https://github.com/em4go/CLIProxyAPI/tree/feature/github-copilot-auth)
 - Added Kiro (AWS CodeWhisperer) support (OAuth login), provided by [fuko2935](https://github.com/fuko2935/CLIProxyAPI/tree/feature/kiro-integration), [Ravens2121](https://github.com/Ravens2121/CLIProxyAPIPlus/)

+## New Features (Plus Enhanced)
+
+- **OAuth Web Authentication**: Browser-based OAuth login for Kiro with beautiful web UI
+- **Rate Limiter**: Built-in request rate limiting to prevent API abuse
+- **Background Token Refresh**: Automatic token refresh 10 minutes before expiration
+- **Metrics & Monitoring**: Request metrics collection for monitoring and debugging
+- **Device Fingerprint**: Device fingerprint generation for enhanced security
+- **Cooldown Management**: Smart cooldown mechanism for API rate limits
+- **Usage Checker**: Real-time usage monitoring and quota management
+- **Model Converter**: Unified model name conversion across providers
+- **UTF-8 Stream Processing**: Improved streaming response handling
+
+## Kiro Authentication
+
+### Web-based OAuth Login
+
+Access the Kiro OAuth web interface at:
+
+```
+http://your-server:8080/v0/oauth/kiro
+```
+
+This provides a browser-based OAuth flow for Kiro (AWS CodeWhisperer) authentication with:
+- AWS Builder ID login
+- AWS Identity Center (IDC) login
+- Token import from Kiro IDE
+
+## Quick Deployment with Docker
+
+### One-Command Deployment
+
+```bash
+# Create deployment directory
+mkdir -p ~/cli-proxy && cd ~/cli-proxy
+
+# Create docker-compose.yml
+cat > docker-compose.yml << 'EOF'
+services:
+  cli-proxy-api:
+    image: 17600006524/cli-proxy-api-plus:latest
+    container_name: cli-proxy-api-plus
+    ports:
+      - "8317:8317"
+    volumes:
+      - ./config.yaml:/CLIProxyAPI/config.yaml
+      - ./auths:/root/.cli-proxy-api
+      - ./logs:/CLIProxyAPI/logs
+    restart: unless-stopped
+EOF
+
+# Download example config
+curl -o config.yaml https://raw.githubusercontent.com/linlang781/CLIProxyAPIPlus/main/config.example.yaml
+
+# Pull and start
+docker compose pull && docker compose up -d
+```
+
+### Configuration
+
+Edit `config.yaml` before starting:
+
+```yaml
+# Basic configuration example
+server:
+  port: 8317
+
+# Add your provider configurations here
+```
+
+### Update to Latest Version
+
+```bash
+cd ~/cli-proxy
+docker compose pull && docker compose up -d
+```
+
 ## Contributing

 This project only accepts pull requests that relate to third-party provider support. Any pull requests unrelated to third-party provider support will be rejected.
--- a/README_CN.md
+++ b/README_CN.md
@@ -13,6 +13,82 @@
 - 新增 GitHub Copilot 支持（OAuth 登录），由[em4go](https://github.com/em4go/CLIProxyAPI/tree/feature/github-copilot-auth)提供
 - 新增 Kiro (AWS CodeWhisperer) 支持 (OAuth 登录), 由[fuko2935](https://github.com/fuko2935/CLIProxyAPI/tree/feature/kiro-integration)、[Ravens2121](https://github.com/Ravens2121/CLIProxyAPIPlus/)提供

+## 新增功能 (Plus 增强版)
+
+- **OAuth Web 认证**: 基于浏览器的 Kiro OAuth 登录，提供美观的 Web UI
+- **请求限流器**: 内置请求限流，防止 API 滥用
+- **后台令牌刷新**: 过期前 10 分钟自动刷新令牌
+- **监控指标**: 请求指标收集，用于监控和调试
+- **设备指纹**: 设备指纹生成，增强安全性
+- **冷却管理**: 智能冷却机制，应对 API 速率限制
+- **用量检查器**: 实时用量监控和配额管理
+- **模型转换器**: 跨供应商的统一模型名称转换
+- **UTF-8 流处理**: 改进的流式响应处理
+
+## Kiro 认证
+
+### 网页端 OAuth 登录
+
+访问 Kiro OAuth 网页认证界面：
+
+```
+http://your-server:8080/v0/oauth/kiro
+```
+
+提供基于浏览器的 Kiro (AWS CodeWhisperer) OAuth 认证流程，支持：
+- AWS Builder ID 登录
+- AWS Identity Center (IDC) 登录
+- 从 Kiro IDE 导入令牌
+
+## Docker 快速部署
+
+### 一键部署
+
+```bash
+# 创建部署目录
+mkdir -p ~/cli-proxy && cd ~/cli-proxy
+
+# 创建 docker-compose.yml
+cat > docker-compose.yml << 'EOF'
+services:
+  cli-proxy-api:
+    image: 17600006524/cli-proxy-api-plus:latest
+    container_name: cli-proxy-api-plus
+    ports:
+      - "8317:8317"
+    volumes:
+      - ./config.yaml:/CLIProxyAPI/config.yaml
+      - ./auths:/root/.cli-proxy-api
+      - ./logs:/CLIProxyAPI/logs
+    restart: unless-stopped
+EOF
+
+# 下载示例配置
+curl -o config.yaml https://raw.githubusercontent.com/linlang781/CLIProxyAPIPlus/main/config.example.yaml
+
+# 拉取并启动
+docker compose pull && docker compose up -d
+```
+
+### 配置说明
+
+启动前请编辑 `config.yaml`：
+
+```yaml
+# 基本配置示例
+server:
+  port: 8317
+
+# 在此添加你的供应商配置
+```
+
+### 更新到最新版本
+
+```bash
+cd ~/cli-proxy
+docker compose pull && docker compose up -d
+```
+
 ## 贡献

 该项目仅接受第三方供应商支持的 Pull Request。任何非第三方供应商支持的 Pull Request 都将被拒绝。
--- a/cmd/server/main.go
+++ b/cmd/server/main.go
@@ -17,6 +17,7 @@ import (

 	"github.com/joho/godotenv"
 	configaccess "github.com/router-for-me/CLIProxyAPI/v6/internal/access/config_access"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/kiro"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/buildinfo"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/cmd"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
@@ -533,6 +534,13 @@ func main() {
 		}
 		// Start the main proxy service
 		managementasset.StartAutoUpdater(context.Background(), configFilePath)
+
+		// 初始化并启动 Kiro token 后台刷新
+		if cfg.AuthDir != "" {
+			kiro.InitializeAndStart(cfg.AuthDir, cfg)
+			defer kiro.StopGlobalRefreshManager()
+		}
+
 		cmd.StartService(cfg, configFilePath, password)
 	}
 }
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -146,6 +146,15 @@ codex-instructions-enabled: false
 #       - "claude-3-*"               # wildcard matching prefix (e.g. claude-3-7-sonnet-20250219)
 #       - "*-thinking"               # wildcard matching suffix (e.g. claude-opus-4-5-thinking)
 #       - "*haiku*"                  # wildcard matching substring (e.g. claude-3-5-haiku-20241022)
+#     cloak:                         # optional: request cloaking for non-Claude-Code clients
+#       mode: "auto"                 # "auto" (default): cloak only when client is not Claude Code
+#                                    # "always": always apply cloaking
+#                                    # "never": never apply cloaking
+#       strict-mode: false           # false (default): prepend Claude Code prompt to user system messages
+#                                    # true: strip all user system messages, keep only Claude Code prompt
+#       sensitive-words:             # optional: words to obfuscate with zero-width characters
+#         - "API"
+#         - "proxy"

 # Kiro (AWS CodeWhisperer) configuration
 # Note: Kiro API currently only operates in us-east-1 region
--- a/go.mod
+++ b/go.mod
@@ -21,6 +21,7 @@ require (
 	golang.org/x/crypto v0.45.0
 	golang.org/x/net v0.47.0
 	golang.org/x/oauth2 v0.30.0
+	golang.org/x/sync v0.18.0
 	golang.org/x/term v0.37.0
 	gopkg.in/natefinch/lumberjack.v2 v2.2.1
 	gopkg.in/yaml.v3 v3.0.1
@@ -39,6 +40,7 @@ require (
 	github.com/dlclark/regexp2 v1.11.5 // indirect
 	github.com/dustin/go-humanize v1.0.1 // indirect
 	github.com/emirpasic/gods v1.18.1 // indirect
+	github.com/fxamacker/cbor/v2 v2.9.0 // indirect
 	github.com/gabriel-vasile/mimetype v1.4.3 // indirect
 	github.com/gin-contrib/sse v0.1.0 // indirect
 	github.com/go-git/gcfg/v2 v2.0.2 // indirect
@@ -68,8 +70,8 @@ require (
 	github.com/tidwall/pretty v1.2.0 // indirect
 	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
 	github.com/ugorji/go/codec v1.2.12 // indirect
+	github.com/x448/float16 v0.8.4 // indirect
 	golang.org/x/arch v0.8.0 // indirect
-	golang.org/x/sync v0.18.0 // indirect
 	golang.org/x/sys v0.38.0 // indirect
 	golang.org/x/text v0.31.0 // indirect
 	google.golang.org/protobuf v1.34.1 // indirect
--- a/go.sum
+++ b/go.sum
@@ -35,6 +35,8 @@ github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc
 github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=
 github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
 github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
+github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM=
+github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ=
 github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
 github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
 github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
@@ -157,6 +159,8 @@ github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS
 github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
 github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
 github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
+github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
+github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
 golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
 golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
 golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
--- a/internal/api/handlers/management/api_tools.go
+++ b/internal/api/handlers/management/api_tools.go
@@ -11,6 +11,7 @@ import (
 	"strings"
 	"time"

+	"github.com/fxamacker/cbor/v2"
 	"github.com/gin-gonic/gin"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/geminicli"
 	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
@@ -70,7 +71,7 @@ type apiCallResponse struct {
 //	- Authorization: Bearer <key>
 //	- X-Management-Key: <key>
 //
-// Request JSON:
+// Request JSON (supports both application/json and application/cbor):
 //   - auth_index / authIndex / AuthIndex (optional):
 //     The credential "auth_index" from GET /v0/management/auth-files (or other endpoints returning it).
 //     If omitted or not found, credential-specific proxy/token substitution is skipped.
@@ -90,10 +91,12 @@ type apiCallResponse struct {
 //  2. Global config proxy-url
 //  3. Direct connect (environment proxies are not used)
 //
-// Response JSON (returned with HTTP 200 when the APICall itself succeeds):
-//   - status_code: Upstream HTTP status code.
-//   - header: Upstream response headers.
-//   - body: Upstream response body as string.
+// Response (returned with HTTP 200 when the APICall itself succeeds):
+//
+//	Format matches request Content-Type (application/json or application/cbor)
+//	- status_code: Upstream HTTP status code.
+//	- header: Upstream response headers.
+//	- body: Upstream response body as string.
 //
 // Example:
 //
@@ -107,10 +110,28 @@ type apiCallResponse struct {
 //	  -H "Content-Type: application/json" \
 //	  -d '{"auth_index":"<AUTH_INDEX>","method":"POST","url":"https://api.example.com/v1/fetchAvailableModels","header":{"Authorization":"Bearer $TOKEN$","Content-Type":"application/json","User-Agent":"cliproxyapi"},"data":"{}"}'
 func (h *Handler) APICall(c *gin.Context) {
+	// Detect content type
+	contentType := strings.ToLower(strings.TrimSpace(c.GetHeader("Content-Type")))
+	isCBOR := strings.Contains(contentType, "application/cbor")
+
 	var body apiCallRequest
-	if errBindJSON := c.ShouldBindJSON(&body); errBindJSON != nil {
-		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid body"})
-		return
+
+	// Parse request body based on content type
+	if isCBOR {
+		rawBody, errRead := io.ReadAll(c.Request.Body)
+		if errRead != nil {
+			c.JSON(http.StatusBadRequest, gin.H{"error": "failed to read request body"})
+			return
+		}
+		if errUnmarshal := cbor.Unmarshal(rawBody, &body); errUnmarshal != nil {
+			c.JSON(http.StatusBadRequest, gin.H{"error": "invalid cbor body"})
+			return
+		}
+	} else {
+		if errBindJSON := c.ShouldBindJSON(&body); errBindJSON != nil {
+			c.JSON(http.StatusBadRequest, gin.H{"error": "invalid body"})
+			return
+		}
 	}

 	method := strings.ToUpper(strings.TrimSpace(body.Method))
@@ -209,11 +230,23 @@ func (h *Handler) APICall(c *gin.Context) {
 		return
 	}

-	c.JSON(http.StatusOK, apiCallResponse{
+	response := apiCallResponse{
 		StatusCode: resp.StatusCode,
 		Header:     resp.Header,
 		Body:       string(respBody),
-	})
+	}
+
+	// Return response in the same format as the request
+	if isCBOR {
+		cborData, errMarshal := cbor.Marshal(response)
+		if errMarshal != nil {
+			c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to encode cbor response"})
+			return
+		}
+		c.Data(http.StatusOK, "application/cbor", cborData)
+	} else {
+		c.JSON(http.StatusOK, response)
+	}
 }

 func firstNonEmptyString(values ...*string) string {
--- a/internal/api/handlers/management/api_tools_cbor_test.go
+++ b/internal/api/handlers/management/api_tools_cbor_test.go
@@ -0,0 +1,149 @@
+package management
+
+import (
+	"bytes"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/fxamacker/cbor/v2"
+	"github.com/gin-gonic/gin"
+)
+
+func TestAPICall_CBOR_Support(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	// Create a test handler
+	h := &Handler{}
+
+	// Create test request data
+	reqData := apiCallRequest{
+		Method: "GET",
+		URL:    "https://httpbin.org/get",
+		Header: map[string]string{
+			"User-Agent": "test-client",
+		},
+	}
+
+	t.Run("JSON request and response", func(t *testing.T) {
+		// Marshal request as JSON
+		jsonData, err := json.Marshal(reqData)
+		if err != nil {
+			t.Fatalf("Failed to marshal JSON: %v", err)
+		}
+
+		// Create HTTP request
+		req := httptest.NewRequest(http.MethodPost, "/v0/management/api-call", bytes.NewReader(jsonData))
+		req.Header.Set("Content-Type", "application/json")
+
+		// Create response recorder
+		w := httptest.NewRecorder()
+
+		// Create Gin context
+		c, _ := gin.CreateTestContext(w)
+		c.Request = req
+
+		// Call handler
+		h.APICall(c)
+
+		// Verify response
+		if w.Code != http.StatusOK && w.Code != http.StatusBadGateway {
+			t.Logf("Response status: %d", w.Code)
+			t.Logf("Response body: %s", w.Body.String())
+		}
+
+		// Check content type
+		contentType := w.Header().Get("Content-Type")
+		if w.Code == http.StatusOK && !contains(contentType, "application/json") {
+			t.Errorf("Expected JSON response, got: %s", contentType)
+		}
+	})
+
+	t.Run("CBOR request and response", func(t *testing.T) {
+		// Marshal request as CBOR
+		cborData, err := cbor.Marshal(reqData)
+		if err != nil {
+			t.Fatalf("Failed to marshal CBOR: %v", err)
+		}
+
+		// Create HTTP request
+		req := httptest.NewRequest(http.MethodPost, "/v0/management/api-call", bytes.NewReader(cborData))
+		req.Header.Set("Content-Type", "application/cbor")
+
+		// Create response recorder
+		w := httptest.NewRecorder()
+
+		// Create Gin context
+		c, _ := gin.CreateTestContext(w)
+		c.Request = req
+
+		// Call handler
+		h.APICall(c)
+
+		// Verify response
+		if w.Code != http.StatusOK && w.Code != http.StatusBadGateway {
+			t.Logf("Response status: %d", w.Code)
+			t.Logf("Response body: %s", w.Body.String())
+		}
+
+		// Check content type
+		contentType := w.Header().Get("Content-Type")
+		if w.Code == http.StatusOK && !contains(contentType, "application/cbor") {
+			t.Errorf("Expected CBOR response, got: %s", contentType)
+		}
+
+		// Try to decode CBOR response
+		if w.Code == http.StatusOK {
+			var response apiCallResponse
+			if err := cbor.Unmarshal(w.Body.Bytes(), &response); err != nil {
+				t.Errorf("Failed to unmarshal CBOR response: %v", err)
+			} else {
+				t.Logf("CBOR response decoded successfully: status_code=%d", response.StatusCode)
+			}
+		}
+	})
+
+	t.Run("CBOR encoding and decoding consistency", func(t *testing.T) {
+		// Test data
+		testReq := apiCallRequest{
+			Method: "POST",
+			URL:    "https://example.com/api",
+			Header: map[string]string{
+				"Authorization": "Bearer $TOKEN$",
+				"Content-Type":  "application/json",
+			},
+			Data: `{"key":"value"}`,
+		}
+
+		// Encode to CBOR
+		cborData, err := cbor.Marshal(testReq)
+		if err != nil {
+			t.Fatalf("Failed to marshal to CBOR: %v", err)
+		}
+
+		// Decode from CBOR
+		var decoded apiCallRequest
+		if err := cbor.Unmarshal(cborData, &decoded); err != nil {
+			t.Fatalf("Failed to unmarshal from CBOR: %v", err)
+		}
+
+		// Verify fields
+		if decoded.Method != testReq.Method {
+			t.Errorf("Method mismatch: got %s, want %s", decoded.Method, testReq.Method)
+		}
+		if decoded.URL != testReq.URL {
+			t.Errorf("URL mismatch: got %s, want %s", decoded.URL, testReq.URL)
+		}
+		if decoded.Data != testReq.Data {
+			t.Errorf("Data mismatch: got %s, want %s", decoded.Data, testReq.Data)
+		}
+		if len(decoded.Header) != len(testReq.Header) {
+			t.Errorf("Header count mismatch: got %d, want %d", len(decoded.Header), len(testReq.Header))
+		}
+	})
+}
+
+func contains(s, substr string) bool {
+	return len(s) > 0 && len(substr) > 0 && (s == substr || len(s) >= len(substr) && s[:len(substr)] == substr || bytes.Contains([]byte(s), []byte(substr)))
+}
--- a/internal/api/handlers/management/auth_files.go
+++ b/internal/api/handlers/management/auth_files.go
@@ -6,6 +6,7 @@ import (
 	"crypto/rand"
 	"crypto/sha256"
 	"encoding/base64"
+	"encoding/hex"
 	"encoding/json"
 	"errors"
 	"fmt"
@@ -22,8 +23,10 @@ import (
 	"time"

 	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/antigravity"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/codex"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/copilot"
 	geminiAuth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/gemini"
 	iflowauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/iflow"
 	kiroauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/kiro"
@@ -234,14 +237,6 @@ func stopForwarderInstance(port int, forwarder *callbackForwarder) {
 	log.Infof("callback forwarder on port %d stopped", port)
 }

-func sanitizeAntigravityFileName(email string) string {
-	if strings.TrimSpace(email) == "" {
-		return "antigravity.json"
-	}
-	replacer := strings.NewReplacer("@", "_", ".", "_")
-	return fmt.Sprintf("antigravity-%s.json", replacer.Replace(email))
-}
-
 func (h *Handler) managementCallbackURL(path string) (string, error) {
 	if h == nil || h.cfg == nil || h.cfg.Port <= 0 {
 		return "", fmt.Errorf("server port is not configured")
@@ -751,6 +746,72 @@ func (h *Handler) registerAuthFromFile(ctx context.Context, path string, data []
 	return err
 }

+// PatchAuthFileStatus toggles the disabled state of an auth file
+func (h *Handler) PatchAuthFileStatus(c *gin.Context) {
+	if h.authManager == nil {
+		c.JSON(http.StatusServiceUnavailable, gin.H{"error": "core auth manager unavailable"})
+		return
+	}
+
+	var req struct {
+		Name     string `json:"name"`
+		Disabled *bool  `json:"disabled"`
+	}
+	if err := c.ShouldBindJSON(&req); err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
+		return
+	}
+
+	name := strings.TrimSpace(req.Name)
+	if name == "" {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "name is required"})
+		return
+	}
+	if req.Disabled == nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "disabled is required"})
+		return
+	}
+
+	ctx := c.Request.Context()
+
+	// Find auth by name or ID
+	var targetAuth *coreauth.Auth
+	if auth, ok := h.authManager.GetByID(name); ok {
+		targetAuth = auth
+	} else {
+		auths := h.authManager.List()
+		for _, auth := range auths {
+			if auth.FileName == name {
+				targetAuth = auth
+				break
+			}
+		}
+	}
+
+	if targetAuth == nil {
+		c.JSON(http.StatusNotFound, gin.H{"error": "auth file not found"})
+		return
+	}
+
+	// Update disabled state
+	targetAuth.Disabled = *req.Disabled
+	if *req.Disabled {
+		targetAuth.Status = coreauth.StatusDisabled
+		targetAuth.StatusMessage = "disabled via management API"
+	} else {
+		targetAuth.Status = coreauth.StatusActive
+		targetAuth.StatusMessage = ""
+	}
+	targetAuth.UpdatedAt = time.Now()
+
+	if _, err := h.authManager.Update(ctx, targetAuth); err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to update auth: %v", err)})
+		return
+	}
+
+	c.JSON(http.StatusOK, gin.H{"status": "ok", "disabled": *req.Disabled})
+}
+
 func (h *Handler) disableAuth(ctx context.Context, id string) {
 	if h == nil || h.authManager == nil {
 		return
@@ -917,67 +978,14 @@ func (h *Handler) RequestAnthropicToken(c *gin.Context) {
 		rawCode := resultMap["code"]
 		code := strings.Split(rawCode, "#")[0]

-		// Exchange code for tokens (replicate logic using updated redirect_uri)
-		// Extract client_id from the modified auth URL
-		clientID := ""
-		if u2, errP := url.Parse(authURL); errP == nil {
-			clientID = u2.Query().Get("client_id")
-		}
-		// Build request
-		bodyMap := map[string]any{
-			"code":          code,
-			"state":         state,
-			"grant_type":    "authorization_code",
-			"client_id":     clientID,
-			"redirect_uri":  "http://localhost:54545/callback",
-			"code_verifier": pkceCodes.CodeVerifier,
-		}
-		bodyJSON, _ := json.Marshal(bodyMap)
-
-		httpClient := util.SetProxy(&h.cfg.SDKConfig, &http.Client{})
-		req, _ := http.NewRequestWithContext(ctx, "POST", "https://console.anthropic.com/v1/oauth/token", strings.NewReader(string(bodyJSON)))
-		req.Header.Set("Content-Type", "application/json")
-		req.Header.Set("Accept", "application/json")
-		resp, errDo := httpClient.Do(req)
-		if errDo != nil {
-			authErr := claude.NewAuthenticationError(claude.ErrCodeExchangeFailed, errDo)
+		// Exchange code for tokens using internal auth service
+		bundle, errExchange := anthropicAuth.ExchangeCodeForTokens(ctx, code, state, pkceCodes)
+		if errExchange != nil {
+			authErr := claude.NewAuthenticationError(claude.ErrCodeExchangeFailed, errExchange)
 			log.Errorf("Failed to exchange authorization code for tokens: %v", authErr)
 			SetOAuthSessionError(state, "Failed to exchange authorization code for tokens")
 			return
 		}
-		defer func() {
-			if errClose := resp.Body.Close(); errClose != nil {
-				log.Errorf("failed to close response body: %v", errClose)
-			}
-		}()
-		respBody, _ := io.ReadAll(resp.Body)
-		if resp.StatusCode != http.StatusOK {
-			log.Errorf("token exchange failed with status %d: %s", resp.StatusCode, string(respBody))
-			SetOAuthSessionError(state, fmt.Sprintf("token exchange failed with status %d", resp.StatusCode))
-			return
-		}
-		var tResp struct {
-			AccessToken  string `json:"access_token"`
-			RefreshToken string `json:"refresh_token"`
-			ExpiresIn    int    `json:"expires_in"`
-			Account      struct {
-				EmailAddress string `json:"email_address"`
-			} `json:"account"`
-		}
-		if errU := json.Unmarshal(respBody, &tResp); errU != nil {
-			log.Errorf("failed to parse token response: %v", errU)
-			SetOAuthSessionError(state, "Failed to parse token response")
-			return
-		}
-		bundle := &claude.ClaudeAuthBundle{
-			TokenData: claude.ClaudeTokenData{
-				AccessToken:  tResp.AccessToken,
-				RefreshToken: tResp.RefreshToken,
-				Email:        tResp.Account.EmailAddress,
-				Expire:       time.Now().Add(time.Duration(tResp.ExpiresIn) * time.Second).Format(time.RFC3339),
-			},
-			LastRefresh: time.Now().Format(time.RFC3339),
-		}

 		// Create token storage
 		tokenStorage := anthropicAuth.CreateTokenStorage(bundle)
@@ -1017,17 +1025,13 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {

 	fmt.Println("Initializing Google authentication...")

-	// OAuth2 configuration (mirrors internal/auth/gemini)
+	// OAuth2 configuration using exported constants from internal/auth/gemini
 	conf := &oauth2.Config{
-		ClientID:     "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com",
-		ClientSecret: "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",
-		RedirectURL:  "http://localhost:8085/oauth2callback",
-		Scopes: []string{
-			"https://www.googleapis.com/auth/cloud-platform",
-			"https://www.googleapis.com/auth/userinfo.email",
-			"https://www.googleapis.com/auth/userinfo.profile",
-		},
-		Endpoint: google.Endpoint,
+		ClientID:     geminiAuth.ClientID,
+		ClientSecret: geminiAuth.ClientSecret,
+		RedirectURL:  fmt.Sprintf("http://localhost:%d/oauth2callback", geminiAuth.DefaultCallbackPort),
+		Scopes:       geminiAuth.Scopes,
+		Endpoint:     google.Endpoint,
 	}

 	// Build authorization URL and return it immediately
@@ -1149,13 +1153,9 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {
 		}

 		ifToken["token_uri"] = "https://oauth2.googleapis.com/token"
-		ifToken["client_id"] = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com"
-		ifToken["client_secret"] = "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"
-		ifToken["scopes"] = []string{
-			"https://www.googleapis.com/auth/cloud-platform",
-			"https://www.googleapis.com/auth/userinfo.email",
-			"https://www.googleapis.com/auth/userinfo.profile",
-		}
+		ifToken["client_id"] = geminiAuth.ClientID
+		ifToken["client_secret"] = geminiAuth.ClientSecret
+		ifToken["scopes"] = geminiAuth.Scopes
 		ifToken["universe_domain"] = "googleapis.com"

 		ts := geminiAuth.GeminiTokenStorage{
@@ -1342,74 +1342,34 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
 		}

 		log.Debug("Authorization code received, exchanging for tokens...")
-		// Extract client_id from authURL
-		clientID := ""
-		if u2, errP := url.Parse(authURL); errP == nil {
-			clientID = u2.Query().Get("client_id")
-		}
-		// Exchange code for tokens with redirect equal to mgmtRedirect
-		form := url.Values{
-			"grant_type":    {"authorization_code"},
-			"client_id":     {clientID},
-			"code":          {code},
-			"redirect_uri":  {"http://localhost:1455/auth/callback"},
-			"code_verifier": {pkceCodes.CodeVerifier},
-		}
-		httpClient := util.SetProxy(&h.cfg.SDKConfig, &http.Client{})
-		req, _ := http.NewRequestWithContext(ctx, "POST", "https://auth.openai.com/oauth/token", strings.NewReader(form.Encode()))
-		req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
-		req.Header.Set("Accept", "application/json")
-		resp, errDo := httpClient.Do(req)
-		if errDo != nil {
-			authErr := codex.NewAuthenticationError(codex.ErrCodeExchangeFailed, errDo)
+		// Exchange code for tokens using internal auth service
+		bundle, errExchange := openaiAuth.ExchangeCodeForTokens(ctx, code, pkceCodes)
+		if errExchange != nil {
+			authErr := codex.NewAuthenticationError(codex.ErrCodeExchangeFailed, errExchange)
 			SetOAuthSessionError(state, "Failed to exchange authorization code for tokens")
 			log.Errorf("Failed to exchange authorization code for tokens: %v", authErr)
 			return
 		}
-		defer func() { _ = resp.Body.Close() }()
-		respBody, _ := io.ReadAll(resp.Body)
-		if resp.StatusCode != http.StatusOK {
-			SetOAuthSessionError(state, fmt.Sprintf("Token exchange failed with status %d", resp.StatusCode))
-			log.Errorf("token exchange failed with status %d: %s", resp.StatusCode, string(respBody))
-			return
-		}
-		var tokenResp struct {
-			AccessToken  string `json:"access_token"`
-			RefreshToken string `json:"refresh_token"`
-			IDToken      string `json:"id_token"`
-			ExpiresIn    int    `json:"expires_in"`
-		}
-		if errU := json.Unmarshal(respBody, &tokenResp); errU != nil {
-			SetOAuthSessionError(state, "Failed to parse token response")
-			log.Errorf("failed to parse token response: %v", errU)
-			return
-		}
-		claims, _ := codex.ParseJWTToken(tokenResp.IDToken)
-		email := ""
-		accountID := ""
+
+		// Extract additional info for filename generation
+		claims, _ := codex.ParseJWTToken(bundle.TokenData.IDToken)
+		planType := ""
+		hashAccountID := ""
 		if claims != nil {
-			email = claims.GetUserEmail()
-			accountID = claims.GetAccountID()
-		}
-		// Build bundle compatible with existing storage
-		bundle := &codex.CodexAuthBundle{
-			TokenData: codex.CodexTokenData{
-				IDToken:      tokenResp.IDToken,
-				AccessToken:  tokenResp.AccessToken,
-				RefreshToken: tokenResp.RefreshToken,
-				AccountID:    accountID,
-				Email:        email,
-				Expire:       time.Now().Add(time.Duration(tokenResp.ExpiresIn) * time.Second).Format(time.RFC3339),
-			},
-			LastRefresh: time.Now().Format(time.RFC3339),
+			planType = strings.TrimSpace(claims.CodexAuthInfo.ChatgptPlanType)
+			if accountID := claims.GetAccountID(); accountID != "" {
+				digest := sha256.Sum256([]byte(accountID))
+				hashAccountID = hex.EncodeToString(digest[:])[:8]
+			}
 		}

 		// Create token storage and persist
 		tokenStorage := openaiAuth.CreateTokenStorage(bundle)
+		fileName := codex.CredentialFileName(tokenStorage.Email, planType, hashAccountID, true)
 		record := &coreauth.Auth{
-			ID:       fmt.Sprintf("codex-%s.json", tokenStorage.Email),
+			ID:       fileName,
 			Provider: "codex",
-			FileName: fmt.Sprintf("codex-%s.json", tokenStorage.Email),
+			FileName: fileName,
 			Storage:  tokenStorage,
 			Metadata: map[string]any{
 				"email":      tokenStorage.Email,
@@ -1435,23 +1395,12 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
 }

 func (h *Handler) RequestAntigravityToken(c *gin.Context) {
-	const (
-		antigravityCallbackPort = 51121
-		antigravityClientID     = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
-		antigravityClientSecret = "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"
-	)
-	var antigravityScopes = []string{
-		"https://www.googleapis.com/auth/cloud-platform",
-		"https://www.googleapis.com/auth/userinfo.email",
-		"https://www.googleapis.com/auth/userinfo.profile",
-		"https://www.googleapis.com/auth/cclog",
-		"https://www.googleapis.com/auth/experimentsandconfigs",
-	}
-
 	ctx := context.Background()

 	fmt.Println("Initializing Antigravity authentication...")

+	authSvc := antigravity.NewAntigravityAuth(h.cfg, nil)
+
 	state, errState := misc.GenerateRandomState()
 	if errState != nil {
 		log.Errorf("Failed to generate state parameter: %v", errState)
@@ -1459,17 +1408,8 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
 		return
 	}

-	redirectURI := fmt.Sprintf("http://localhost:%d/oauth-callback", antigravityCallbackPort)
-
-	params := url.Values{}
-	params.Set("access_type", "offline")
-	params.Set("client_id", antigravityClientID)
-	params.Set("prompt", "consent")
-	params.Set("redirect_uri", redirectURI)
-	params.Set("response_type", "code")
-	params.Set("scope", strings.Join(antigravityScopes, " "))
-	params.Set("state", state)
-	authURL := "https://accounts.google.com/o/oauth2/v2/auth?" + params.Encode()
+	redirectURI := fmt.Sprintf("http://localhost:%d/oauth-callback", antigravity.CallbackPort)
+	authURL := authSvc.BuildAuthURL(state, redirectURI)

 	RegisterOAuthSession(state, "antigravity")

@@ -1483,7 +1423,7 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
 			return
 		}
 		var errStart error
-		if forwarder, errStart = startCallbackForwarder(antigravityCallbackPort, "antigravity", targetURL); errStart != nil {
+		if forwarder, errStart = startCallbackForwarder(antigravity.CallbackPort, "antigravity", targetURL); errStart != nil {
 			log.WithError(errStart).Error("failed to start antigravity callback forwarder")
 			c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to start callback server"})
 			return
@@ -1492,7 +1432,7 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {

 	go func() {
 		if isWebUI {
-			defer stopCallbackForwarderInstance(antigravityCallbackPort, forwarder)
+			defer stopCallbackForwarderInstance(antigravity.CallbackPort, forwarder)
 		}

 		waitFile := filepath.Join(h.cfg.AuthDir, fmt.Sprintf(".oauth-antigravity-%s.oauth", state))
@@ -1532,93 +1472,36 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
 			time.Sleep(500 * time.Millisecond)
 		}

-		httpClient := util.SetProxy(&h.cfg.SDKConfig, &http.Client{})
-		form := url.Values{}
-		form.Set("code", authCode)
-		form.Set("client_id", antigravityClientID)
-		form.Set("client_secret", antigravityClientSecret)
-		form.Set("redirect_uri", redirectURI)
-		form.Set("grant_type", "authorization_code")
-
-		req, errNewRequest := http.NewRequestWithContext(ctx, http.MethodPost, "https://oauth2.googleapis.com/token", strings.NewReader(form.Encode()))
-		if errNewRequest != nil {
-			log.Errorf("Failed to build token request: %v", errNewRequest)
-			SetOAuthSessionError(state, "Failed to build token request")
-			return
-		}
-		req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
-
-		resp, errDo := httpClient.Do(req)
-		if errDo != nil {
-			log.Errorf("Failed to execute token request: %v", errDo)
+		tokenResp, errToken := authSvc.ExchangeCodeForTokens(ctx, authCode, redirectURI)
+		if errToken != nil {
+			log.Errorf("Failed to exchange token: %v", errToken)
 			SetOAuthSessionError(state, "Failed to exchange token")
 			return
 		}
-		defer func() {
-			if errClose := resp.Body.Close(); errClose != nil {
-				log.Errorf("antigravity token exchange close error: %v", errClose)
-			}
-		}()

-		if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusMultipleChoices {
-			bodyBytes, _ := io.ReadAll(resp.Body)
-			log.Errorf("Antigravity token exchange failed with status %d: %s", resp.StatusCode, string(bodyBytes))
-			SetOAuthSessionError(state, fmt.Sprintf("Token exchange failed: %d", resp.StatusCode))
+		accessToken := strings.TrimSpace(tokenResp.AccessToken)
+		if accessToken == "" {
+			log.Error("antigravity: token exchange returned empty access token")
+			SetOAuthSessionError(state, "Failed to exchange token")
 			return
 		}

-		var tokenResp struct {
-			AccessToken  string `json:"access_token"`
-			RefreshToken string `json:"refresh_token"`
-			ExpiresIn    int64  `json:"expires_in"`
-			TokenType    string `json:"token_type"`
-		}
-		if errDecode := json.NewDecoder(resp.Body).Decode(&tokenResp); errDecode != nil {
-			log.Errorf("Failed to parse token response: %v", errDecode)
-			SetOAuthSessionError(state, "Failed to parse token response")
+		email, errInfo := authSvc.FetchUserInfo(ctx, accessToken)
+		if errInfo != nil {
+			log.Errorf("Failed to fetch user info: %v", errInfo)
+			SetOAuthSessionError(state, "Failed to fetch user info")
 			return
 		}
-
-		email := ""
-		if strings.TrimSpace(tokenResp.AccessToken) != "" {
-			infoReq, errInfoReq := http.NewRequestWithContext(ctx, http.MethodGet, "https://www.googleapis.com/oauth2/v1/userinfo?alt=json", nil)
-			if errInfoReq != nil {
-				log.Errorf("Failed to build user info request: %v", errInfoReq)
-				SetOAuthSessionError(state, "Failed to build user info request")
-				return
-			}
-			infoReq.Header.Set("Authorization", "Bearer "+tokenResp.AccessToken)
-
-			infoResp, errInfo := httpClient.Do(infoReq)
-			if errInfo != nil {
-				log.Errorf("Failed to execute user info request: %v", errInfo)
-				SetOAuthSessionError(state, "Failed to execute user info request")
-				return
-			}
-			defer func() {
-				if errClose := infoResp.Body.Close(); errClose != nil {
-					log.Errorf("antigravity user info close error: %v", errClose)
-				}
-			}()
-
-			if infoResp.StatusCode >= http.StatusOK && infoResp.StatusCode < http.StatusMultipleChoices {
-				var infoPayload struct {
-					Email string `json:"email"`
-				}
-				if errDecodeInfo := json.NewDecoder(infoResp.Body).Decode(&infoPayload); errDecodeInfo == nil {
-					email = strings.TrimSpace(infoPayload.Email)
-				}
-			} else {
-				bodyBytes, _ := io.ReadAll(infoResp.Body)
-				log.Errorf("User info request failed with status %d: %s", infoResp.StatusCode, string(bodyBytes))
-				SetOAuthSessionError(state, fmt.Sprintf("User info request failed: %d", infoResp.StatusCode))
-				return
-			}
+		email = strings.TrimSpace(email)
+		if email == "" {
+			log.Error("antigravity: user info returned empty email")
+			SetOAuthSessionError(state, "Failed to fetch user info")
+			return
 		}

 		projectID := ""
-		if strings.TrimSpace(tokenResp.AccessToken) != "" {
-			fetchedProjectID, errProject := sdkAuth.FetchAntigravityProjectID(ctx, tokenResp.AccessToken, httpClient)
+		if accessToken != "" {
+			fetchedProjectID, errProject := authSvc.FetchProjectID(ctx, accessToken)
 			if errProject != nil {
 				log.Warnf("antigravity: failed to fetch project ID: %v", errProject)
 			} else {
@@ -1643,7 +1526,7 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
 			metadata["project_id"] = projectID
 		}

-		fileName := sanitizeAntigravityFileName(email)
+		fileName := antigravity.CredentialFileName(email)
 		label := strings.TrimSpace(email)
 		if label == "" {
 			label = "antigravity"
@@ -1707,7 +1590,7 @@ func (h *Handler) RequestQwenToken(c *gin.Context) {
 		// Create token storage
 		tokenStorage := qwenAuth.CreateTokenStorage(tokenData)

-		tokenStorage.Email = fmt.Sprintf("qwen-%d", time.Now().UnixMilli())
+		tokenStorage.Email = fmt.Sprintf("%d", time.Now().UnixMilli())
 		record := &coreauth.Auth{
 			ID:       fmt.Sprintf("qwen-%s.json", tokenStorage.Email),
 			Provider: "qwen",
@@ -1812,7 +1695,7 @@ func (h *Handler) RequestIFlowToken(c *gin.Context) {
 		tokenStorage := authSvc.CreateTokenStorage(tokenData)
 		identifier := strings.TrimSpace(tokenStorage.Email)
 		if identifier == "" {
-			identifier = fmt.Sprintf("iflow-%d", time.Now().UnixMilli())
+			identifier = fmt.Sprintf("%d", time.Now().UnixMilli())
 			tokenStorage.Email = identifier
 		}
 		record := &coreauth.Auth{
@@ -1843,6 +1726,89 @@ func (h *Handler) RequestIFlowToken(c *gin.Context) {
 	c.JSON(http.StatusOK, gin.H{"status": "ok", "url": authURL, "state": state})
 }

+func (h *Handler) RequestGitHubToken(c *gin.Context) {
+	ctx := context.Background()
+
+	fmt.Println("Initializing GitHub Copilot authentication...")
+
+	state := fmt.Sprintf("gh-%d", time.Now().UnixNano())
+
+	// Initialize Copilot auth service
+	// We need to import "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/copilot" first if not present
+	// Assuming copilot package is imported as "copilot"
+	deviceClient := copilot.NewDeviceFlowClient(h.cfg)
+
+	// Initiate device flow
+	deviceCode, err := deviceClient.RequestDeviceCode(ctx)
+	if err != nil {
+		log.Errorf("Failed to initiate device flow: %v", err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to initiate device flow"})
+		return
+	}
+
+	authURL := deviceCode.VerificationURI
+	userCode := deviceCode.UserCode
+
+	RegisterOAuthSession(state, "github")
+
+	go func() {
+		fmt.Printf("Please visit %s and enter code: %s\n", authURL, userCode)
+
+		tokenData, errPoll := deviceClient.PollForToken(ctx, deviceCode)
+		if errPoll != nil {
+			SetOAuthSessionError(state, "Authentication failed")
+			fmt.Printf("Authentication failed: %v\n", errPoll)
+			return
+		}
+
+		username, errUser := deviceClient.FetchUserInfo(ctx, tokenData.AccessToken)
+		if errUser != nil {
+			log.Warnf("Failed to fetch user info: %v", errUser)
+			username = "github-user"
+		}
+
+		tokenStorage := &copilot.CopilotTokenStorage{
+			AccessToken: tokenData.AccessToken,
+			TokenType:   tokenData.TokenType,
+			Scope:       tokenData.Scope,
+			Username:    username,
+			Type:        "github-copilot",
+		}
+
+		fileName := fmt.Sprintf("github-%s.json", username)
+		record := &coreauth.Auth{
+			ID:       fileName,
+			Provider: "github",
+			FileName: fileName,
+			Storage:  tokenStorage,
+			Metadata: map[string]any{
+				"email":    username,
+				"username": username,
+			},
+		}
+
+		savedPath, errSave := h.saveTokenRecord(ctx, record)
+		if errSave != nil {
+			log.Errorf("Failed to save authentication tokens: %v", errSave)
+			SetOAuthSessionError(state, "Failed to save authentication tokens")
+			return
+		}
+
+		fmt.Printf("Authentication successful! Token saved to %s\n", savedPath)
+		fmt.Println("You can now use GitHub Copilot services through this CLI")
+		CompleteOAuthSession(state)
+		CompleteOAuthSessionsByProvider("github")
+	}()
+
+	c.JSON(200, gin.H{
+		"status":           "ok",
+		"url":              authURL,
+		"state":            state,
+		"user_code":        userCode,
+		"verification_uri": authURL,
+	})
+}
+
 func (h *Handler) RequestIFlowCookieToken(c *gin.Context) {
 	ctx := context.Background()

@@ -1897,15 +1863,17 @@ func (h *Handler) RequestIFlowCookieToken(c *gin.Context) {
 	fileName := iflowauth.SanitizeIFlowFileName(email)
 	if fileName == "" {
 		fileName = fmt.Sprintf("iflow-%d", time.Now().UnixMilli())
+	} else {
+		fileName = fmt.Sprintf("iflow-%s", fileName)
 	}

 	tokenStorage.Email = email
 	timestamp := time.Now().Unix()

 	record := &coreauth.Auth{
-		ID:       fmt.Sprintf("iflow-%s-%d.json", fileName, timestamp),
+		ID:       fmt.Sprintf("%s-%d.json", fileName, timestamp),
 		Provider: "iflow",
-		FileName: fmt.Sprintf("iflow-%s-%d.json", fileName, timestamp),
+		FileName: fmt.Sprintf("%s-%d.json", fileName, timestamp),
 		Storage:  tokenStorage,
 		Metadata: map[string]any{
 			"email":        email,
@@ -2112,7 +2080,20 @@ func performGeminiCLISetup(ctx context.Context, httpClient *http.Client, storage
 			finalProjectID := projectID
 			if responseProjectID != "" {
 				if explicitProject && !strings.EqualFold(responseProjectID, projectID) {
-					log.Warnf("Gemini onboarding returned project %s instead of requested %s; keeping requested project ID.", responseProjectID, projectID)
+					// Check if this is a free user (gen-lang-client projects or free/legacy tier)
+					isFreeUser := strings.HasPrefix(projectID, "gen-lang-client-") ||
+						strings.EqualFold(tierID, "FREE") ||
+						strings.EqualFold(tierID, "LEGACY")
+
+					if isFreeUser {
+						// For free users, use backend project ID for preview model access
+						log.Infof("Gemini onboarding: frontend project %s maps to backend project %s", projectID, responseProjectID)
+						log.Infof("Using backend project ID: %s (recommended for preview model access)", responseProjectID)
+						finalProjectID = responseProjectID
+					} else {
+						// Pro users: keep requested project ID (original behavior)
+						log.Warnf("Gemini onboarding returned project %s instead of requested %s; keeping requested project ID.", responseProjectID, projectID)
+					}
 				} else {
 					finalProjectID = responseProjectID
 				}
--- a/internal/api/handlers/management/logs.go
+++ b/internal/api/handlers/management/logs.go
@@ -13,7 +13,7 @@ import (
 	"time"

 	"github.com/gin-gonic/gin"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
 )

 const (
@@ -360,16 +360,7 @@ func (h *Handler) logDirectory() string {
 	if h.logDir != "" {
 		return h.logDir
 	}
-	if base := util.WritablePath(); base != "" {
-		return filepath.Join(base, "logs")
-	}
-	if h.configFilePath != "" {
-		dir := filepath.Dir(h.configFilePath)
-		if dir != "" && dir != "." {
-			return filepath.Join(dir, "logs")
-		}
-	}
-	return "logs"
+	return logging.ResolveLogDirectory(h.cfg)
 }

 func (h *Handler) collectLogFiles(dir string) ([]string, error) {
--- a/internal/api/handlers/management/model_definitions.go
+++ b/internal/api/handlers/management/model_definitions.go
@@ -0,0 +1,33 @@
+package management
+
+import (
+	"net/http"
+	"strings"
+
+	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+)
+
+// GetStaticModelDefinitions returns static model metadata for a given channel.
+// Channel is provided via path param (:channel) or query param (?channel=...).
+func (h *Handler) GetStaticModelDefinitions(c *gin.Context) {
+	channel := strings.TrimSpace(c.Param("channel"))
+	if channel == "" {
+		channel = strings.TrimSpace(c.Query("channel"))
+	}
+	if channel == "" {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "channel is required"})
+		return
+	}
+
+	models := registry.GetStaticModelDefinitionsByChannel(channel)
+	if models == nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "unknown channel", "channel": channel})
+		return
+	}
+
+	c.JSON(http.StatusOK, gin.H{
+		"channel": strings.ToLower(strings.TrimSpace(channel)),
+		"models":  models,
+	})
+}
--- a/internal/api/handlers/management/oauth_sessions.go
+++ b/internal/api/handlers/management/oauth_sessions.go
@@ -238,6 +238,8 @@ func NormalizeOAuthProvider(provider string) (string, error) {
 		return "qwen", nil
 	case "kiro":
 		return "kiro", nil
+	case "github":
+		return "github", nil
 	default:
 		return "", errUnsupportedOAuthFlow
 	}
--- a/internal/api/middleware/request_logging.go
+++ b/internal/api/middleware/request_logging.go
@@ -8,6 +8,7 @@ import (
 	"io"
 	"net/http"
 	"strings"
+	"time"

 	"github.com/gin-gonic/gin"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
@@ -103,6 +104,7 @@ func captureRequestInfo(c *gin.Context) (*RequestInfo, error) {
 		Headers:   headers,
 		Body:      body,
 		RequestID: logging.GetGinRequestID(c),
+		Timestamp: time.Now(),
 	}, nil
 }

--- a/internal/api/middleware/response_writer.go
+++ b/internal/api/middleware/response_writer.go
@@ -7,6 +7,7 @@ import (
 	"bytes"
 	"net/http"
 	"strings"
+	"time"

 	"github.com/gin-gonic/gin"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
@@ -20,22 +21,24 @@ type RequestInfo struct {
 	Headers   map[string][]string // Headers contains the request headers.
 	Body      []byte              // Body is the raw request body.
 	RequestID string              // RequestID is the unique identifier for the request.
+	Timestamp time.Time           // Timestamp is when the request was received.
 }

 // ResponseWriterWrapper wraps the standard gin.ResponseWriter to intercept and log response data.
 // It is designed to handle both standard and streaming responses, ensuring that logging operations do not block the client response.
 type ResponseWriterWrapper struct {
 	gin.ResponseWriter
-	body           *bytes.Buffer              // body is a buffer to store the response body for non-streaming responses.
-	isStreaming    bool                       // isStreaming indicates whether the response is a streaming type (e.g., text/event-stream).
-	streamWriter   logging.StreamingLogWriter // streamWriter is a writer for handling streaming log entries.
-	chunkChannel   chan []byte                // chunkChannel is a channel for asynchronously passing response chunks to the logger.
-	streamDone     chan struct{}              // streamDone signals when the streaming goroutine completes.
-	logger         logging.RequestLogger      // logger is the instance of the request logger service.
-	requestInfo    *RequestInfo               // requestInfo holds the details of the original request.
-	statusCode     int                        // statusCode stores the HTTP status code of the response.
-	headers        map[string][]string        // headers stores the response headers.
-	logOnErrorOnly bool                       // logOnErrorOnly enables logging only when an error response is detected.
+	body                *bytes.Buffer              // body is a buffer to store the response body for non-streaming responses.
+	isStreaming         bool                       // isStreaming indicates whether the response is a streaming type (e.g., text/event-stream).
+	streamWriter        logging.StreamingLogWriter // streamWriter is a writer for handling streaming log entries.
+	chunkChannel        chan []byte                // chunkChannel is a channel for asynchronously passing response chunks to the logger.
+	streamDone          chan struct{}              // streamDone signals when the streaming goroutine completes.
+	logger              logging.RequestLogger      // logger is the instance of the request logger service.
+	requestInfo         *RequestInfo               // requestInfo holds the details of the original request.
+	statusCode          int                        // statusCode stores the HTTP status code of the response.
+	headers             map[string][]string        // headers stores the response headers.
+	logOnErrorOnly      bool                       // logOnErrorOnly enables logging only when an error response is detected.
+	firstChunkTimestamp time.Time                  // firstChunkTimestamp captures TTFB for streaming responses.
 }

 // NewResponseWriterWrapper creates and initializes a new ResponseWriterWrapper.
@@ -73,6 +76,10 @@ func (w *ResponseWriterWrapper) Write(data []byte) (int, error) {

 	// THEN: Handle logging based on response type
 	if w.isStreaming && w.chunkChannel != nil {
+		// Capture TTFB on first chunk (synchronous, before async channel send)
+		if w.firstChunkTimestamp.IsZero() {
+			w.firstChunkTimestamp = time.Now()
+		}
 		// For streaming responses: Send to async logging channel (non-blocking)
 		select {
 		case w.chunkChannel <- append([]byte(nil), data...): // Non-blocking send with copy
@@ -117,6 +124,10 @@ func (w *ResponseWriterWrapper) WriteString(data string) (int, error) {

 	// THEN: Capture for logging
 	if w.isStreaming && w.chunkChannel != nil {
+		// Capture TTFB on first chunk (synchronous, before async channel send)
+		if w.firstChunkTimestamp.IsZero() {
+			w.firstChunkTimestamp = time.Now()
+		}
 		select {
 		case w.chunkChannel <- []byte(data):
 		default:
@@ -280,6 +291,8 @@ func (w *ResponseWriterWrapper) Finalize(c *gin.Context) error {
 			w.streamDone = nil
 		}

+		w.streamWriter.SetFirstChunkTimestamp(w.firstChunkTimestamp)
+
 		// Write API Request and Response to the streaming log before closing
 		apiRequest := w.extractAPIRequest(c)
 		if len(apiRequest) > 0 {
@@ -297,7 +310,7 @@ func (w *ResponseWriterWrapper) Finalize(c *gin.Context) error {
 		return nil
 	}

-	return w.logRequest(finalStatusCode, w.cloneHeaders(), w.body.Bytes(), w.extractAPIRequest(c), w.extractAPIResponse(c), slicesAPIResponseError, forceLog)
+	return w.logRequest(finalStatusCode, w.cloneHeaders(), w.body.Bytes(), w.extractAPIRequest(c), w.extractAPIResponse(c), w.extractAPIResponseTimestamp(c), slicesAPIResponseError, forceLog)
 }

 func (w *ResponseWriterWrapper) cloneHeaders() map[string][]string {
@@ -337,7 +350,18 @@ func (w *ResponseWriterWrapper) extractAPIResponse(c *gin.Context) []byte {
 	return data
 }

-func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]string, body []byte, apiRequestBody, apiResponseBody []byte, apiResponseErrors []*interfaces.ErrorMessage, forceLog bool) error {
+func (w *ResponseWriterWrapper) extractAPIResponseTimestamp(c *gin.Context) time.Time {
+	ts, isExist := c.Get("API_RESPONSE_TIMESTAMP")
+	if !isExist {
+		return time.Time{}
+	}
+	if t, ok := ts.(time.Time); ok {
+		return t
+	}
+	return time.Time{}
+}
+
+func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]string, body []byte, apiRequestBody, apiResponseBody []byte, apiResponseTimestamp time.Time, apiResponseErrors []*interfaces.ErrorMessage, forceLog bool) error {
 	if w.requestInfo == nil {
 		return nil
 	}
@@ -348,7 +372,7 @@ func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]
 	}

 	if loggerWithOptions, ok := w.logger.(interface {
-		LogRequestWithOptions(string, string, map[string][]string, []byte, int, map[string][]string, []byte, []byte, []byte, []*interfaces.ErrorMessage, bool, string) error
+		LogRequestWithOptions(string, string, map[string][]string, []byte, int, map[string][]string, []byte, []byte, []byte, []*interfaces.ErrorMessage, bool, string, time.Time, time.Time) error
 	}); ok {
 		return loggerWithOptions.LogRequestWithOptions(
 			w.requestInfo.URL,
@@ -363,6 +387,8 @@ func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]
 			apiResponseErrors,
 			forceLog,
 			w.requestInfo.RequestID,
+			w.requestInfo.Timestamp,
+			apiResponseTimestamp,
 		)
 	}

@@ -378,5 +404,7 @@ func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]
 		apiResponseBody,
 		apiResponseErrors,
 		w.requestInfo.RequestID,
+		w.requestInfo.Timestamp,
+		apiResponseTimestamp,
 	)
 }
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -12,6 +12,7 @@ import (
 	"net/http"
 	"os"
 	"path/filepath"
+	"reflect"
 	"strings"
 	"sync"
 	"sync/atomic"
@@ -23,6 +24,7 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/api/middleware"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/api/modules"
 	ampmodule "github.com/router-for-me/CLIProxyAPI/v6/internal/api/modules/amp"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/kiro"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/managementasset"
@@ -261,10 +263,7 @@ func NewServer(cfg *config.Config, authManager *auth.Manager, accessManager *sdk
 	if optionState.localPassword != "" {
 		s.mgmt.SetLocalPassword(optionState.localPassword)
 	}
-	logDir := filepath.Join(s.currentPath, "logs")
-	if base := util.WritablePath(); base != "" {
-		logDir = filepath.Join(base, "logs")
-	}
+	logDir := logging.ResolveLogDirectory(cfg)
 	s.mgmt.SetLogDirectory(logDir)
 	s.localPassword = optionState.localPassword

@@ -295,6 +294,11 @@ func NewServer(cfg *config.Config, authManager *auth.Manager, accessManager *sdk
 		s.registerManagementRoutes()
 	}

+	// === CLIProxyAPIPlus 扩展: 注册 Kiro OAuth Web 路由 ===
+	kiroOAuthHandler := kiro.NewOAuthWebHandler(cfg)
+	kiroOAuthHandler.RegisterRoutes(engine)
+	log.Info("Kiro OAuth Web routes registered at /v0/oauth/kiro/*")
+
 	if optionState.keepAliveEnabled {
 		s.enableKeepAlive(optionState.keepAliveTimeout, optionState.keepAliveOnTimeout)
 	}
@@ -630,9 +634,11 @@ func (s *Server) registerManagementRoutes() {

 		mgmt.GET("/auth-files", s.mgmt.ListAuthFiles)
 		mgmt.GET("/auth-files/models", s.mgmt.GetAuthFileModels)
+		mgmt.GET("/model-definitions/:channel", s.mgmt.GetStaticModelDefinitions)
 		mgmt.GET("/auth-files/download", s.mgmt.DownloadAuthFile)
 		mgmt.POST("/auth-files", s.mgmt.UploadAuthFile)
 		mgmt.DELETE("/auth-files", s.mgmt.DeleteAuthFile)
+		mgmt.PATCH("/auth-files/status", s.mgmt.PatchAuthFileStatus)
 		mgmt.POST("/vertex/import", s.mgmt.ImportVertexCredential)

 		mgmt.GET("/anthropic-auth-url", s.mgmt.RequestAnthropicToken)
@@ -643,6 +649,7 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.GET("/iflow-auth-url", s.mgmt.RequestIFlowToken)
 		mgmt.POST("/iflow-auth-url", s.mgmt.RequestIFlowCookieToken)
 		mgmt.GET("/kiro-auth-url", s.mgmt.RequestKiroToken)
+		mgmt.GET("/github-auth-url", s.mgmt.RequestGitHubToken)
 		mgmt.POST("/oauth-callback", s.mgmt.PostOAuthCallback)
 		mgmt.GET("/get-auth-status", s.mgmt.GetAuthStatus)
 	}
@@ -1012,14 +1019,17 @@ func (s *Server) UpdateClients(cfg *config.Config) {
 		s.mgmt.SetAuthManager(s.handlers.AuthManager)
 	}

-	// Notify Amp module of config changes (for model mapping hot-reload)
-	if s.ampModule != nil {
-		log.Debugf("triggering amp module config update")
-		if err := s.ampModule.OnConfigUpdated(cfg); err != nil {
-			log.Errorf("failed to update Amp module config: %v", err)
+	// Notify Amp module only when Amp config has changed.
+	ampConfigChanged := oldCfg == nil || !reflect.DeepEqual(oldCfg.AmpCode, cfg.AmpCode)
+	if ampConfigChanged {
+		if s.ampModule != nil {
+			log.Debugf("triggering amp module config update")
+			if err := s.ampModule.OnConfigUpdated(cfg); err != nil {
+				log.Errorf("failed to update Amp module config: %v", err)
+			}
+		} else {
+			log.Warnf("amp module is nil, skipping config update")
 		}
-	} else {
-		log.Warnf("amp module is nil, skipping config update")
 	}

 	// Count client sources from configuration and auth store.
--- a/internal/auth/antigravity/auth.go
+++ b/internal/auth/antigravity/auth.go
@@ -0,0 +1,344 @@
+// Package antigravity provides OAuth2 authentication functionality for the Antigravity provider.
+package antigravity
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"strings"
+	"time"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	log "github.com/sirupsen/logrus"
+)
+
+// TokenResponse represents OAuth token response from Google
+type TokenResponse struct {
+	AccessToken  string `json:"access_token"`
+	RefreshToken string `json:"refresh_token"`
+	ExpiresIn    int64  `json:"expires_in"`
+	TokenType    string `json:"token_type"`
+}
+
+// userInfo represents Google user profile
+type userInfo struct {
+	Email string `json:"email"`
+}
+
+// AntigravityAuth handles Antigravity OAuth authentication
+type AntigravityAuth struct {
+	httpClient *http.Client
+}
+
+// NewAntigravityAuth creates a new Antigravity auth service.
+func NewAntigravityAuth(cfg *config.Config, httpClient *http.Client) *AntigravityAuth {
+	if httpClient != nil {
+		return &AntigravityAuth{httpClient: httpClient}
+	}
+	if cfg == nil {
+		cfg = &config.Config{}
+	}
+	return &AntigravityAuth{
+		httpClient: util.SetProxy(&cfg.SDKConfig, &http.Client{}),
+	}
+}
+
+// BuildAuthURL generates the OAuth authorization URL.
+func (o *AntigravityAuth) BuildAuthURL(state, redirectURI string) string {
+	if strings.TrimSpace(redirectURI) == "" {
+		redirectURI = fmt.Sprintf("http://localhost:%d/oauth-callback", CallbackPort)
+	}
+	params := url.Values{}
+	params.Set("access_type", "offline")
+	params.Set("client_id", ClientID)
+	params.Set("prompt", "consent")
+	params.Set("redirect_uri", redirectURI)
+	params.Set("response_type", "code")
+	params.Set("scope", strings.Join(Scopes, " "))
+	params.Set("state", state)
+	return AuthEndpoint + "?" + params.Encode()
+}
+
+// ExchangeCodeForTokens exchanges authorization code for access and refresh tokens
+func (o *AntigravityAuth) ExchangeCodeForTokens(ctx context.Context, code, redirectURI string) (*TokenResponse, error) {
+	data := url.Values{}
+	data.Set("code", code)
+	data.Set("client_id", ClientID)
+	data.Set("client_secret", ClientSecret)
+	data.Set("redirect_uri", redirectURI)
+	data.Set("grant_type", "authorization_code")
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, TokenEndpoint, strings.NewReader(data.Encode()))
+	if err != nil {
+		return nil, fmt.Errorf("antigravity token exchange: create request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+
+	resp, errDo := o.httpClient.Do(req)
+	if errDo != nil {
+		return nil, fmt.Errorf("antigravity token exchange: execute request: %w", errDo)
+	}
+	defer func() {
+		if errClose := resp.Body.Close(); errClose != nil {
+			log.Errorf("antigravity token exchange: close body error: %v", errClose)
+		}
+	}()
+
+	if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusMultipleChoices {
+		bodyBytes, errRead := io.ReadAll(io.LimitReader(resp.Body, 8<<10))
+		if errRead != nil {
+			return nil, fmt.Errorf("antigravity token exchange: read response: %w", errRead)
+		}
+		body := strings.TrimSpace(string(bodyBytes))
+		if body == "" {
+			return nil, fmt.Errorf("antigravity token exchange: request failed: status %d", resp.StatusCode)
+		}
+		return nil, fmt.Errorf("antigravity token exchange: request failed: status %d: %s", resp.StatusCode, body)
+	}
+
+	var token TokenResponse
+	if errDecode := json.NewDecoder(resp.Body).Decode(&token); errDecode != nil {
+		return nil, fmt.Errorf("antigravity token exchange: decode response: %w", errDecode)
+	}
+	return &token, nil
+}
+
+// FetchUserInfo retrieves user email from Google
+func (o *AntigravityAuth) FetchUserInfo(ctx context.Context, accessToken string) (string, error) {
+	accessToken = strings.TrimSpace(accessToken)
+	if accessToken == "" {
+		return "", fmt.Errorf("antigravity userinfo: missing access token")
+	}
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, UserInfoEndpoint, nil)
+	if err != nil {
+		return "", fmt.Errorf("antigravity userinfo: create request: %w", err)
+	}
+	req.Header.Set("Authorization", "Bearer "+accessToken)
+
+	resp, errDo := o.httpClient.Do(req)
+	if errDo != nil {
+		return "", fmt.Errorf("antigravity userinfo: execute request: %w", errDo)
+	}
+	defer func() {
+		if errClose := resp.Body.Close(); errClose != nil {
+			log.Errorf("antigravity userinfo: close body error: %v", errClose)
+		}
+	}()
+
+	if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusMultipleChoices {
+		bodyBytes, errRead := io.ReadAll(io.LimitReader(resp.Body, 8<<10))
+		if errRead != nil {
+			return "", fmt.Errorf("antigravity userinfo: read response: %w", errRead)
+		}
+		body := strings.TrimSpace(string(bodyBytes))
+		if body == "" {
+			return "", fmt.Errorf("antigravity userinfo: request failed: status %d", resp.StatusCode)
+		}
+		return "", fmt.Errorf("antigravity userinfo: request failed: status %d: %s", resp.StatusCode, body)
+	}
+	var info userInfo
+	if errDecode := json.NewDecoder(resp.Body).Decode(&info); errDecode != nil {
+		return "", fmt.Errorf("antigravity userinfo: decode response: %w", errDecode)
+	}
+	email := strings.TrimSpace(info.Email)
+	if email == "" {
+		return "", fmt.Errorf("antigravity userinfo: response missing email")
+	}
+	return email, nil
+}
+
+// FetchProjectID retrieves the project ID for the authenticated user via loadCodeAssist
+func (o *AntigravityAuth) FetchProjectID(ctx context.Context, accessToken string) (string, error) {
+	loadReqBody := map[string]any{
+		"metadata": map[string]string{
+			"ideType":    "ANTIGRAVITY",
+			"platform":   "PLATFORM_UNSPECIFIED",
+			"pluginType": "GEMINI",
+		},
+	}
+
+	rawBody, errMarshal := json.Marshal(loadReqBody)
+	if errMarshal != nil {
+		return "", fmt.Errorf("marshal request body: %w", errMarshal)
+	}
+
+	endpointURL := fmt.Sprintf("%s/%s:loadCodeAssist", APIEndpoint, APIVersion)
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpointURL, strings.NewReader(string(rawBody)))
+	if err != nil {
+		return "", fmt.Errorf("create request: %w", err)
+	}
+	req.Header.Set("Authorization", "Bearer "+accessToken)
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("User-Agent", APIUserAgent)
+	req.Header.Set("X-Goog-Api-Client", APIClient)
+	req.Header.Set("Client-Metadata", ClientMetadata)
+
+	resp, errDo := o.httpClient.Do(req)
+	if errDo != nil {
+		return "", fmt.Errorf("execute request: %w", errDo)
+	}
+	defer func() {
+		if errClose := resp.Body.Close(); errClose != nil {
+			log.Errorf("antigravity loadCodeAssist: close body error: %v", errClose)
+		}
+	}()
+
+	bodyBytes, errRead := io.ReadAll(resp.Body)
+	if errRead != nil {
+		return "", fmt.Errorf("read response: %w", errRead)
+	}
+
+	if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusMultipleChoices {
+		return "", fmt.Errorf("request failed with status %d: %s", resp.StatusCode, strings.TrimSpace(string(bodyBytes)))
+	}
+
+	var loadResp map[string]any
+	if errDecode := json.Unmarshal(bodyBytes, &loadResp); errDecode != nil {
+		return "", fmt.Errorf("decode response: %w", errDecode)
+	}
+
+	// Extract projectID from response
+	projectID := ""
+	if id, ok := loadResp["cloudaicompanionProject"].(string); ok {
+		projectID = strings.TrimSpace(id)
+	}
+	if projectID == "" {
+		if projectMap, ok := loadResp["cloudaicompanionProject"].(map[string]any); ok {
+			if id, okID := projectMap["id"].(string); okID {
+				projectID = strings.TrimSpace(id)
+			}
+		}
+	}
+
+	if projectID == "" {
+		tierID := "legacy-tier"
+		if tiers, okTiers := loadResp["allowedTiers"].([]any); okTiers {
+			for _, rawTier := range tiers {
+				tier, okTier := rawTier.(map[string]any)
+				if !okTier {
+					continue
+				}
+				if isDefault, okDefault := tier["isDefault"].(bool); okDefault && isDefault {
+					if id, okID := tier["id"].(string); okID && strings.TrimSpace(id) != "" {
+						tierID = strings.TrimSpace(id)
+						break
+					}
+				}
+			}
+		}
+
+		projectID, err = o.OnboardUser(ctx, accessToken, tierID)
+		if err != nil {
+			return "", err
+		}
+		return projectID, nil
+	}
+
+	return projectID, nil
+}
+
+// OnboardUser attempts to fetch the project ID via onboardUser by polling for completion
+func (o *AntigravityAuth) OnboardUser(ctx context.Context, accessToken, tierID string) (string, error) {
+	log.Infof("Antigravity: onboarding user with tier: %s", tierID)
+	requestBody := map[string]any{
+		"tierId": tierID,
+		"metadata": map[string]string{
+			"ideType":    "ANTIGRAVITY",
+			"platform":   "PLATFORM_UNSPECIFIED",
+			"pluginType": "GEMINI",
+		},
+	}
+
+	rawBody, errMarshal := json.Marshal(requestBody)
+	if errMarshal != nil {
+		return "", fmt.Errorf("marshal request body: %w", errMarshal)
+	}
+
+	maxAttempts := 5
+	for attempt := 1; attempt <= maxAttempts; attempt++ {
+		log.Debugf("Polling attempt %d/%d", attempt, maxAttempts)
+
+		reqCtx := ctx
+		var cancel context.CancelFunc
+		if reqCtx == nil {
+			reqCtx = context.Background()
+		}
+		reqCtx, cancel = context.WithTimeout(reqCtx, 30*time.Second)
+
+		endpointURL := fmt.Sprintf("%s/%s:onboardUser", APIEndpoint, APIVersion)
+		req, errRequest := http.NewRequestWithContext(reqCtx, http.MethodPost, endpointURL, strings.NewReader(string(rawBody)))
+		if errRequest != nil {
+			cancel()
+			return "", fmt.Errorf("create request: %w", errRequest)
+		}
+		req.Header.Set("Authorization", "Bearer "+accessToken)
+		req.Header.Set("Content-Type", "application/json")
+		req.Header.Set("User-Agent", APIUserAgent)
+		req.Header.Set("X-Goog-Api-Client", APIClient)
+		req.Header.Set("Client-Metadata", ClientMetadata)
+
+		resp, errDo := o.httpClient.Do(req)
+		if errDo != nil {
+			cancel()
+			return "", fmt.Errorf("execute request: %w", errDo)
+		}
+
+		bodyBytes, errRead := io.ReadAll(resp.Body)
+		if errClose := resp.Body.Close(); errClose != nil {
+			log.Errorf("close body error: %v", errClose)
+		}
+		cancel()
+
+		if errRead != nil {
+			return "", fmt.Errorf("read response: %w", errRead)
+		}
+
+		if resp.StatusCode == http.StatusOK {
+			var data map[string]any
+			if errDecode := json.Unmarshal(bodyBytes, &data); errDecode != nil {
+				return "", fmt.Errorf("decode response: %w", errDecode)
+			}
+
+			if done, okDone := data["done"].(bool); okDone && done {
+				projectID := ""
+				if responseData, okResp := data["response"].(map[string]any); okResp {
+					switch projectValue := responseData["cloudaicompanionProject"].(type) {
+					case map[string]any:
+						if id, okID := projectValue["id"].(string); okID {
+							projectID = strings.TrimSpace(id)
+						}
+					case string:
+						projectID = strings.TrimSpace(projectValue)
+					}
+				}
+
+				if projectID != "" {
+					log.Infof("Successfully fetched project_id: %s", projectID)
+					return projectID, nil
+				}
+
+				return "", fmt.Errorf("no project_id in response")
+			}
+
+			time.Sleep(2 * time.Second)
+			continue
+		}
+
+		responsePreview := strings.TrimSpace(string(bodyBytes))
+		if len(responsePreview) > 500 {
+			responsePreview = responsePreview[:500]
+		}
+
+		responseErr := responsePreview
+		if len(responseErr) > 200 {
+			responseErr = responseErr[:200]
+		}
+		return "", fmt.Errorf("http %d: %s", resp.StatusCode, responseErr)
+	}
+
+	return "", nil
+}
--- a/internal/auth/antigravity/constants.go
+++ b/internal/auth/antigravity/constants.go
@@ -0,0 +1,34 @@
+// Package antigravity provides OAuth2 authentication functionality for the Antigravity provider.
+package antigravity
+
+// OAuth client credentials and configuration
+const (
+	ClientID     = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
+	ClientSecret = "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"
+	CallbackPort = 51121
+)
+
+// Scopes defines the OAuth scopes required for Antigravity authentication
+var Scopes = []string{
+	"https://www.googleapis.com/auth/cloud-platform",
+	"https://www.googleapis.com/auth/userinfo.email",
+	"https://www.googleapis.com/auth/userinfo.profile",
+	"https://www.googleapis.com/auth/cclog",
+	"https://www.googleapis.com/auth/experimentsandconfigs",
+}
+
+// OAuth2 endpoints for Google authentication
+const (
+	TokenEndpoint    = "https://oauth2.googleapis.com/token"
+	AuthEndpoint     = "https://accounts.google.com/o/oauth2/v2/auth"
+	UserInfoEndpoint = "https://www.googleapis.com/oauth2/v1/userinfo?alt=json"
+)
+
+// Antigravity API configuration
+const (
+	APIEndpoint    = "https://cloudcode-pa.googleapis.com"
+	APIVersion     = "v1internal"
+	APIUserAgent   = "google-api-nodejs-client/9.15.1"
+	APIClient      = "google-cloud-sdk vscode_cloudshelleditor/0.1"
+	ClientMetadata = `{"ideType":"IDE_UNSPECIFIED","platform":"PLATFORM_UNSPECIFIED","pluginType":"GEMINI"}`
+)
--- a/internal/auth/antigravity/filename.go
+++ b/internal/auth/antigravity/filename.go
@@ -0,0 +1,16 @@
+package antigravity
+
+import (
+	"fmt"
+	"strings"
+)
+
+// CredentialFileName returns the filename used to persist Antigravity credentials.
+// It uses the email as a suffix to disambiguate accounts.
+func CredentialFileName(email string) string {
+	email = strings.TrimSpace(email)
+	if email == "" {
+		return "antigravity.json"
+	}
+	return fmt.Sprintf("antigravity-%s.json", email)
+}
--- a/internal/auth/claude/anthropic_auth.go
+++ b/internal/auth/claude/anthropic_auth.go
@@ -18,11 +18,12 @@ import (
 	log "github.com/sirupsen/logrus"
 )

+// OAuth configuration constants for Claude/Anthropic
 const (
-	anthropicAuthURL  = "https://claude.ai/oauth/authorize"
-	anthropicTokenURL = "https://console.anthropic.com/v1/oauth/token"
-	anthropicClientID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
-	redirectURI       = "http://localhost:54545/callback"
+	AuthURL     = "https://claude.ai/oauth/authorize"
+	TokenURL    = "https://console.anthropic.com/v1/oauth/token"
+	ClientID    = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
+	RedirectURI = "http://localhost:54545/callback"
 )

 // tokenResponse represents the response structure from Anthropic's OAuth token endpoint.
@@ -82,16 +83,16 @@ func (o *ClaudeAuth) GenerateAuthURL(state string, pkceCodes *PKCECodes) (string

 	params := url.Values{
 		"code":                  {"true"},
-		"client_id":             {anthropicClientID},
+		"client_id":             {ClientID},
 		"response_type":         {"code"},
-		"redirect_uri":          {redirectURI},
+		"redirect_uri":          {RedirectURI},
 		"scope":                 {"org:create_api_key user:profile user:inference"},
 		"code_challenge":        {pkceCodes.CodeChallenge},
 		"code_challenge_method": {"S256"},
 		"state":                 {state},
 	}

-	authURL := fmt.Sprintf("%s?%s", anthropicAuthURL, params.Encode())
+	authURL := fmt.Sprintf("%s?%s", AuthURL, params.Encode())
 	return authURL, state, nil
 }

@@ -137,8 +138,8 @@ func (o *ClaudeAuth) ExchangeCodeForTokens(ctx context.Context, code, state stri
 		"code":          newCode,
 		"state":         state,
 		"grant_type":    "authorization_code",
-		"client_id":     anthropicClientID,
-		"redirect_uri":  redirectURI,
+		"client_id":     ClientID,
+		"redirect_uri":  RedirectURI,
 		"code_verifier": pkceCodes.CodeVerifier,
 	}

@@ -154,7 +155,7 @@ func (o *ClaudeAuth) ExchangeCodeForTokens(ctx context.Context, code, state stri

 	// log.Debugf("Token exchange request: %s", string(jsonBody))

-	req, err := http.NewRequestWithContext(ctx, "POST", anthropicTokenURL, strings.NewReader(string(jsonBody)))
+	req, err := http.NewRequestWithContext(ctx, "POST", TokenURL, strings.NewReader(string(jsonBody)))
 	if err != nil {
 		return nil, fmt.Errorf("failed to create token request: %w", err)
 	}
@@ -221,7 +222,7 @@ func (o *ClaudeAuth) RefreshTokens(ctx context.Context, refreshToken string) (*C
 	}

 	reqBody := map[string]interface{}{
-		"client_id":     anthropicClientID,
+		"client_id":     ClientID,
 		"grant_type":    "refresh_token",
 		"refresh_token": refreshToken,
 	}
@@ -231,7 +232,7 @@ func (o *ClaudeAuth) RefreshTokens(ctx context.Context, refreshToken string) (*C
 		return nil, fmt.Errorf("failed to marshal request body: %w", err)
 	}

-	req, err := http.NewRequestWithContext(ctx, "POST", anthropicTokenURL, strings.NewReader(string(jsonBody)))
+	req, err := http.NewRequestWithContext(ctx, "POST", TokenURL, strings.NewReader(string(jsonBody)))
 	if err != nil {
 		return nil, fmt.Errorf("failed to create refresh request: %w", err)
 	}
--- a/internal/auth/codex/filename.go
+++ b/internal/auth/codex/filename.go
@@ -0,0 +1,46 @@
+package codex
+
+import (
+	"fmt"
+	"strings"
+	"unicode"
+)
+
+// CredentialFileName returns the filename used to persist Codex OAuth credentials.
+// When planType is available (e.g. "plus", "team"), it is appended after the email
+// as a suffix to disambiguate subscriptions.
+func CredentialFileName(email, planType, hashAccountID string, includeProviderPrefix bool) string {
+	email = strings.TrimSpace(email)
+	plan := normalizePlanTypeForFilename(planType)
+
+	prefix := ""
+	if includeProviderPrefix {
+		prefix = "codex"
+	}
+
+	if plan == "" {
+		return fmt.Sprintf("%s-%s.json", prefix, email)
+	} else if plan == "team" {
+		return fmt.Sprintf("%s-%s-%s-%s.json", prefix, hashAccountID, email, plan)
+	}
+	return fmt.Sprintf("%s-%s-%s.json", prefix, email, plan)
+}
+
+func normalizePlanTypeForFilename(planType string) string {
+	planType = strings.TrimSpace(planType)
+	if planType == "" {
+		return ""
+	}
+
+	parts := strings.FieldsFunc(planType, func(r rune) bool {
+		return !unicode.IsLetter(r) && !unicode.IsDigit(r)
+	})
+	if len(parts) == 0 {
+		return ""
+	}
+
+	for i, part := range parts {
+		parts[i] = strings.ToLower(strings.TrimSpace(part))
+	}
+	return strings.Join(parts, "-")
+}
--- a/internal/auth/codex/openai_auth.go
+++ b/internal/auth/codex/openai_auth.go
@@ -19,11 +19,12 @@ import (
 	log "github.com/sirupsen/logrus"
 )

+// OAuth configuration constants for OpenAI Codex
 const (
-	openaiAuthURL  = "https://auth.openai.com/oauth/authorize"
-	openaiTokenURL = "https://auth.openai.com/oauth/token"
-	openaiClientID = "app_EMoamEEZ73f0CkXaXp7hrann"
-	redirectURI    = "http://localhost:1455/auth/callback"
+	AuthURL     = "https://auth.openai.com/oauth/authorize"
+	TokenURL    = "https://auth.openai.com/oauth/token"
+	ClientID    = "app_EMoamEEZ73f0CkXaXp7hrann"
+	RedirectURI = "http://localhost:1455/auth/callback"
 )

 // CodexAuth handles the OpenAI OAuth2 authentication flow.
@@ -50,9 +51,9 @@ func (o *CodexAuth) GenerateAuthURL(state string, pkceCodes *PKCECodes) (string,
 	}

 	params := url.Values{
-		"client_id":                  {openaiClientID},
+		"client_id":                  {ClientID},
 		"response_type":              {"code"},
-		"redirect_uri":               {redirectURI},
+		"redirect_uri":               {RedirectURI},
 		"scope":                      {"openid email profile offline_access"},
 		"state":                      {state},
 		"code_challenge":             {pkceCodes.CodeChallenge},
@@ -62,7 +63,7 @@ func (o *CodexAuth) GenerateAuthURL(state string, pkceCodes *PKCECodes) (string,
 		"codex_cli_simplified_flow":  {"true"},
 	}

-	authURL := fmt.Sprintf("%s?%s", openaiAuthURL, params.Encode())
+	authURL := fmt.Sprintf("%s?%s", AuthURL, params.Encode())
 	return authURL, nil
 }

@@ -77,13 +78,13 @@ func (o *CodexAuth) ExchangeCodeForTokens(ctx context.Context, code string, pkce
 	// Prepare token exchange request
 	data := url.Values{
 		"grant_type":    {"authorization_code"},
-		"client_id":     {openaiClientID},
+		"client_id":     {ClientID},
 		"code":          {code},
-		"redirect_uri":  {redirectURI},
+		"redirect_uri":  {RedirectURI},
 		"code_verifier": {pkceCodes.CodeVerifier},
 	}

-	req, err := http.NewRequestWithContext(ctx, "POST", openaiTokenURL, strings.NewReader(data.Encode()))
+	req, err := http.NewRequestWithContext(ctx, "POST", TokenURL, strings.NewReader(data.Encode()))
 	if err != nil {
 		return nil, fmt.Errorf("failed to create token request: %w", err)
 	}
@@ -163,13 +164,13 @@ func (o *CodexAuth) RefreshTokens(ctx context.Context, refreshToken string) (*Co
 	}

 	data := url.Values{
-		"client_id":     {openaiClientID},
+		"client_id":     {ClientID},
 		"grant_type":    {"refresh_token"},
 		"refresh_token": {refreshToken},
 		"scope":         {"openid profile email"},
 	}

-	req, err := http.NewRequestWithContext(ctx, "POST", openaiTokenURL, strings.NewReader(data.Encode()))
+	req, err := http.NewRequestWithContext(ctx, "POST", TokenURL, strings.NewReader(data.Encode()))
 	if err != nil {
 		return nil, fmt.Errorf("failed to create refresh request: %w", err)
 	}
--- a/internal/auth/gemini/gemini_auth.go
+++ b/internal/auth/gemini/gemini_auth.go
@@ -28,19 +28,19 @@ import (
 	"golang.org/x/oauth2/google"
 )

+// OAuth configuration constants for Gemini
 const (
-	geminiOauthClientID       = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com"
-	geminiOauthClientSecret   = "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"
-	geminiDefaultCallbackPort = 8085
+	ClientID            = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com"
+	ClientSecret        = "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"
+	DefaultCallbackPort = 8085
 )

-var (
-	geminiOauthScopes = []string{
-		"https://www.googleapis.com/auth/cloud-platform",
-		"https://www.googleapis.com/auth/userinfo.email",
-		"https://www.googleapis.com/auth/userinfo.profile",
-	}
-)
+// OAuth scopes for Gemini authentication
+var Scopes = []string{
+	"https://www.googleapis.com/auth/cloud-platform",
+	"https://www.googleapis.com/auth/userinfo.email",
+	"https://www.googleapis.com/auth/userinfo.profile",
+}

 // GeminiAuth provides methods for handling the Gemini OAuth2 authentication flow.
 // It encapsulates the logic for obtaining, storing, and refreshing authentication tokens
@@ -74,7 +74,7 @@ func NewGeminiAuth() *GeminiAuth {
 //   - *http.Client: An HTTP client configured with authentication
 //   - error: An error if the client configuration fails, nil otherwise
 func (g *GeminiAuth) GetAuthenticatedClient(ctx context.Context, ts *GeminiTokenStorage, cfg *config.Config, opts *WebLoginOptions) (*http.Client, error) {
-	callbackPort := geminiDefaultCallbackPort
+	callbackPort := DefaultCallbackPort
 	if opts != nil && opts.CallbackPort > 0 {
 		callbackPort = opts.CallbackPort
 	}
@@ -112,10 +112,10 @@ func (g *GeminiAuth) GetAuthenticatedClient(ctx context.Context, ts *GeminiToken

 	// Configure the OAuth2 client.
 	conf := &oauth2.Config{
-		ClientID:     geminiOauthClientID,
-		ClientSecret: geminiOauthClientSecret,
+		ClientID:     ClientID,
+		ClientSecret: ClientSecret,
 		RedirectURL:  callbackURL, // This will be used by the local server.
-		Scopes:       geminiOauthScopes,
+		Scopes:       Scopes,
 		Endpoint:     google.Endpoint,
 	}

@@ -198,9 +198,9 @@ func (g *GeminiAuth) createTokenStorage(ctx context.Context, config *oauth2.Conf
 	}

 	ifToken["token_uri"] = "https://oauth2.googleapis.com/token"
-	ifToken["client_id"] = geminiOauthClientID
-	ifToken["client_secret"] = geminiOauthClientSecret
-	ifToken["scopes"] = geminiOauthScopes
+	ifToken["client_id"] = ClientID
+	ifToken["client_secret"] = ClientSecret
+	ifToken["scopes"] = Scopes
 	ifToken["universe_domain"] = "googleapis.com"

 	ts := GeminiTokenStorage{
@@ -226,7 +226,7 @@ func (g *GeminiAuth) createTokenStorage(ctx context.Context, config *oauth2.Conf
 //   - *oauth2.Token: The OAuth2 token obtained from the authorization flow
 //   - error: An error if the token acquisition fails, nil otherwise
 func (g *GeminiAuth) getTokenFromWeb(ctx context.Context, config *oauth2.Config, opts *WebLoginOptions) (*oauth2.Token, error) {
-	callbackPort := geminiDefaultCallbackPort
+	callbackPort := DefaultCallbackPort
 	if opts != nil && opts.CallbackPort > 0 {
 		callbackPort = opts.CallbackPort
 	}
--- a/internal/auth/kiro/aws.go
+++ b/internal/auth/kiro/aws.go
@@ -5,10 +5,12 @@ package kiro
 import (
 	"encoding/base64"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"os"
 	"path/filepath"
 	"strings"
+	"time"
 )

 // PKCECodes holds PKCE verification codes for OAuth2 PKCE flow
@@ -85,6 +87,87 @@ type KiroModel struct {
 // KiroIDETokenFile is the default path to Kiro IDE's token file
 const KiroIDETokenFile = ".aws/sso/cache/kiro-auth-token.json"

+// Default retry configuration for file reading
+const (
+	defaultTokenReadMaxAttempts = 10               // Maximum retry attempts
+	defaultTokenReadBaseDelay   = 50 * time.Millisecond // Base delay between retries
+)
+
+// isTransientFileError checks if the error is a transient file access error
+// that may be resolved by retrying (e.g., file locked by another process on Windows).
+func isTransientFileError(err error) bool {
+	if err == nil {
+		return false
+	}
+
+	// Check for OS-level file access errors (Windows sharing violation, etc.)
+	var pathErr *os.PathError
+	if errors.As(err, &pathErr) {
+		// Windows sharing violation (ERROR_SHARING_VIOLATION = 32)
+		// Windows lock violation (ERROR_LOCK_VIOLATION = 33)
+		errStr := pathErr.Err.Error()
+		if strings.Contains(errStr, "being used by another process") ||
+			strings.Contains(errStr, "sharing violation") ||
+			strings.Contains(errStr, "lock violation") {
+			return true
+		}
+	}
+
+	// Check error message for common transient patterns
+	errMsg := strings.ToLower(err.Error())
+	transientPatterns := []string{
+		"being used by another process",
+		"sharing violation",
+		"lock violation",
+		"access is denied",
+		"unexpected end of json",
+		"unexpected eof",
+	}
+	for _, pattern := range transientPatterns {
+		if strings.Contains(errMsg, pattern) {
+			return true
+		}
+	}
+
+	return false
+}
+
+// LoadKiroIDETokenWithRetry loads token data from Kiro IDE's token file with retry logic.
+// This handles transient file access errors (e.g., file locked by Kiro IDE during write).
+// maxAttempts: maximum number of retry attempts (default 10 if <= 0)
+// baseDelay: base delay between retries with exponential backoff (default 50ms if <= 0)
+func LoadKiroIDETokenWithRetry(maxAttempts int, baseDelay time.Duration) (*KiroTokenData, error) {
+	if maxAttempts <= 0 {
+		maxAttempts = defaultTokenReadMaxAttempts
+	}
+	if baseDelay <= 0 {
+		baseDelay = defaultTokenReadBaseDelay
+	}
+
+	var lastErr error
+	for attempt := 0; attempt < maxAttempts; attempt++ {
+		token, err := LoadKiroIDEToken()
+		if err == nil {
+			return token, nil
+		}
+		lastErr = err
+
+		// Only retry for transient errors
+		if !isTransientFileError(err) {
+			return nil, err
+		}
+
+		// Exponential backoff: delay * 2^attempt, capped at 500ms
+		delay := baseDelay * time.Duration(1<<uint(attempt))
+		if delay > 500*time.Millisecond {
+			delay = 500 * time.Millisecond
+		}
+		time.Sleep(delay)
+	}
+
+	return nil, fmt.Errorf("failed to read token file after %d attempts: %w", maxAttempts, lastErr)
+}
+
 // LoadKiroIDEToken loads token data from Kiro IDE's token file.
 func LoadKiroIDEToken() (*KiroTokenData, error) {
 	homeDir, err := os.UserHomeDir()
@@ -107,6 +190,9 @@ func LoadKiroIDEToken() (*KiroTokenData, error) {
 		return nil, fmt.Errorf("access token is empty in Kiro IDE token file")
 	}

+	// Normalize AuthMethod to lowercase (Kiro IDE uses "IdC" but we expect "idc")
+	token.AuthMethod = strings.ToLower(token.AuthMethod)
+
 	return &token, nil
 }

@@ -136,6 +222,9 @@ func LoadKiroTokenFromPath(tokenPath string) (*KiroTokenData, error) {
 		return nil, fmt.Errorf("access token is empty in token file")
 	}

+	// Normalize AuthMethod to lowercase (Kiro IDE uses "IdC" but we expect "idc")
+	token.AuthMethod = strings.ToLower(token.AuthMethod)
+
 	return &token, nil
 }

@@ -271,7 +360,7 @@ func SanitizeEmailForFilename(email string) string {
 	}

 	result := email
-	
+
 	// First, handle URL-encoded path traversal attempts (%2F, %2E, %5C, etc.)
 	// This prevents encoded characters from bypassing the sanitization.
 	// Note: We replace % last to catch any remaining encodings including double-encoding (%252F)
@@ -289,7 +378,7 @@ func SanitizeEmailForFilename(email string) string {
 	for _, char := range []string{"/", "\\", ":", "*", "?", "\"", "<", ">", "|", " ", "\x00"} {
 		result = strings.ReplaceAll(result, char, "_")
 	}
-	
+
 	// Prevent path traversal: replace leading dots in each path component
 	// This handles cases like "../../../etc/passwd" → "_.._.._.._etc_passwd"
 	parts := strings.Split(result, "_")
@@ -300,6 +389,65 @@ func SanitizeEmailForFilename(email string) string {
 		parts[i] = part
 	}
 	result = strings.Join(parts, "_")
-	
+
 	return result
 }
+
+// ExtractIDCIdentifier extracts a unique identifier from IDC startUrl.
+// Examples:
+//   - "https://d-1234567890.awsapps.com/start" -> "d-1234567890"
+//   - "https://my-company.awsapps.com/start" -> "my-company"
+//   - "https://acme-corp.awsapps.com/start" -> "acme-corp"
+func ExtractIDCIdentifier(startURL string) string {
+	if startURL == "" {
+		return ""
+	}
+
+	// Remove protocol prefix
+	url := strings.TrimPrefix(startURL, "https://")
+	url = strings.TrimPrefix(url, "http://")
+
+	// Extract subdomain (first part before the first dot)
+	// Format: {identifier}.awsapps.com/start
+	parts := strings.Split(url, ".")
+	if len(parts) > 0 && parts[0] != "" {
+		identifier := parts[0]
+		// Sanitize for filename safety
+		identifier = strings.ReplaceAll(identifier, "/", "_")
+		identifier = strings.ReplaceAll(identifier, "\\", "_")
+		identifier = strings.ReplaceAll(identifier, ":", "_")
+		return identifier
+	}
+
+	return ""
+}
+
+// GenerateTokenFileName generates a unique filename for token storage.
+// Priority: email > startUrl identifier (for IDC) > authMethod only
+// Format: kiro-{authMethod}-{identifier}.json
+func GenerateTokenFileName(tokenData *KiroTokenData) string {
+	authMethod := tokenData.AuthMethod
+	if authMethod == "" {
+		authMethod = "unknown"
+	}
+
+	// Priority 1: Use email if available
+	if tokenData.Email != "" {
+		// Sanitize email for filename (replace @ and . with -)
+		sanitizedEmail := tokenData.Email
+		sanitizedEmail = strings.ReplaceAll(sanitizedEmail, "@", "-")
+		sanitizedEmail = strings.ReplaceAll(sanitizedEmail, ".", "-")
+		return fmt.Sprintf("kiro-%s-%s.json", authMethod, sanitizedEmail)
+	}
+
+	// Priority 2: For IDC, use startUrl identifier
+	if authMethod == "idc" && tokenData.StartURL != "" {
+		identifier := ExtractIDCIdentifier(tokenData.StartURL)
+		if identifier != "" {
+			return fmt.Sprintf("kiro-%s-%s.json", authMethod, identifier)
+		}
+	}
+
+	// Priority 3: Fallback to authMethod only
+	return fmt.Sprintf("kiro-%s.json", authMethod)
+}
--- a/internal/auth/kiro/aws_auth.go
+++ b/internal/auth/kiro/aws_auth.go
@@ -280,6 +280,11 @@ func (k *KiroAuth) CreateTokenStorage(tokenData *KiroTokenData) *KiroTokenStorag
 		AuthMethod:   tokenData.AuthMethod,
 		Provider:     tokenData.Provider,
 		LastRefresh:  time.Now().Format(time.RFC3339),
+		ClientID:     tokenData.ClientID,
+		ClientSecret: tokenData.ClientSecret,
+		Region:       tokenData.Region,
+		StartURL:     tokenData.StartURL,
+		Email:        tokenData.Email,
 	}
 }

@@ -311,4 +316,19 @@ func (k *KiroAuth) UpdateTokenStorage(storage *KiroTokenStorage, tokenData *Kiro
 	storage.AuthMethod = tokenData.AuthMethod
 	storage.Provider = tokenData.Provider
 	storage.LastRefresh = time.Now().Format(time.RFC3339)
+	if tokenData.ClientID != "" {
+		storage.ClientID = tokenData.ClientID
+	}
+	if tokenData.ClientSecret != "" {
+		storage.ClientSecret = tokenData.ClientSecret
+	}
+	if tokenData.Region != "" {
+		storage.Region = tokenData.Region
+	}
+	if tokenData.StartURL != "" {
+		storage.StartURL = tokenData.StartURL
+	}
+	if tokenData.Email != "" {
+		storage.Email = tokenData.Email
+	}
 }
--- a/internal/auth/kiro/aws_test.go
+++ b/internal/auth/kiro/aws_test.go
@@ -151,11 +151,161 @@ func TestSanitizeEmailForFilename(t *testing.T) {
 // createTestJWT creates a test JWT token with the given claims
 func createTestJWT(claims map[string]any) string {
 	header := base64.RawURLEncoding.EncodeToString([]byte(`{"alg":"RS256","typ":"JWT"}`))
-	
+
 	payloadBytes, _ := json.Marshal(claims)
 	payload := base64.RawURLEncoding.EncodeToString(payloadBytes)
-	
+
 	signature := base64.RawURLEncoding.EncodeToString([]byte("fake-signature"))
-	
+
 	return header + "." + payload + "." + signature
 }
+
+func TestExtractIDCIdentifier(t *testing.T) {
+	tests := []struct {
+		name     string
+		startURL string
+		expected string
+	}{
+		{
+			name:     "Empty URL",
+			startURL: "",
+			expected: "",
+		},
+		{
+			name:     "Standard IDC URL with d- prefix",
+			startURL: "https://d-1234567890.awsapps.com/start",
+			expected: "d-1234567890",
+		},
+		{
+			name:     "IDC URL with company name",
+			startURL: "https://my-company.awsapps.com/start",
+			expected: "my-company",
+		},
+		{
+			name:     "IDC URL with simple name",
+			startURL: "https://acme-corp.awsapps.com/start",
+			expected: "acme-corp",
+		},
+		{
+			name:     "IDC URL without https",
+			startURL: "http://d-9876543210.awsapps.com/start",
+			expected: "d-9876543210",
+		},
+		{
+			name:     "IDC URL with subdomain only",
+			startURL: "https://test.awsapps.com/start",
+			expected: "test",
+		},
+		{
+			name:     "Builder ID URL",
+			startURL: "https://view.awsapps.com/start",
+			expected: "view",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := ExtractIDCIdentifier(tt.startURL)
+			if result != tt.expected {
+				t.Errorf("ExtractIDCIdentifier() = %q, want %q", result, tt.expected)
+			}
+		})
+	}
+}
+
+func TestGenerateTokenFileName(t *testing.T) {
+	tests := []struct {
+		name      string
+		tokenData *KiroTokenData
+		expected  string
+	}{
+		{
+			name: "IDC with email",
+			tokenData: &KiroTokenData{
+				AuthMethod: "idc",
+				Email:      "user@example.com",
+				StartURL:   "https://d-1234567890.awsapps.com/start",
+			},
+			expected: "kiro-idc-user-example-com.json",
+		},
+		{
+			name: "IDC without email but with startUrl",
+			tokenData: &KiroTokenData{
+				AuthMethod: "idc",
+				Email:      "",
+				StartURL:   "https://d-1234567890.awsapps.com/start",
+			},
+			expected: "kiro-idc-d-1234567890.json",
+		},
+		{
+			name: "IDC with company name in startUrl",
+			tokenData: &KiroTokenData{
+				AuthMethod: "idc",
+				Email:      "",
+				StartURL:   "https://my-company.awsapps.com/start",
+			},
+			expected: "kiro-idc-my-company.json",
+		},
+		{
+			name: "IDC without email and without startUrl",
+			tokenData: &KiroTokenData{
+				AuthMethod: "idc",
+				Email:      "",
+				StartURL:   "",
+			},
+			expected: "kiro-idc.json",
+		},
+		{
+			name: "Builder ID with email",
+			tokenData: &KiroTokenData{
+				AuthMethod: "builder-id",
+				Email:      "user@gmail.com",
+				StartURL:   "https://view.awsapps.com/start",
+			},
+			expected: "kiro-builder-id-user-gmail-com.json",
+		},
+		{
+			name: "Builder ID without email",
+			tokenData: &KiroTokenData{
+				AuthMethod: "builder-id",
+				Email:      "",
+				StartURL:   "https://view.awsapps.com/start",
+			},
+			expected: "kiro-builder-id.json",
+		},
+		{
+			name: "Social auth with email",
+			tokenData: &KiroTokenData{
+				AuthMethod: "google",
+				Email:      "user@gmail.com",
+			},
+			expected: "kiro-google-user-gmail-com.json",
+		},
+		{
+			name: "Empty auth method",
+			tokenData: &KiroTokenData{
+				AuthMethod: "",
+				Email:      "",
+			},
+			expected: "kiro-unknown.json",
+		},
+		{
+			name: "Email with special characters",
+			tokenData: &KiroTokenData{
+				AuthMethod: "idc",
+				Email:      "user.name+tag@sub.example.com",
+				StartURL:   "https://d-1234567890.awsapps.com/start",
+			},
+			expected: "kiro-idc-user-name+tag-sub-example-com.json",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := GenerateTokenFileName(tt.tokenData)
+			if result != tt.expected {
+				t.Errorf("GenerateTokenFileName() = %q, want %q", result, tt.expected)
+			}
+		})
+	}
+}
--- a/internal/auth/kiro/background_refresh.go
+++ b/internal/auth/kiro/background_refresh.go
@@ -0,0 +1,228 @@
+package kiro
+
+import (
+	"context"
+	"log"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"golang.org/x/sync/semaphore"
+)
+
+type Token struct {
+	ID           string
+	AccessToken  string
+	RefreshToken string
+	ExpiresAt    time.Time
+	LastVerified time.Time
+	ClientID     string
+	ClientSecret string
+	AuthMethod   string
+	Provider     string
+	StartURL     string
+	Region       string
+}
+
+type TokenRepository interface {
+	FindOldestUnverified(limit int) []*Token
+	UpdateToken(token *Token) error
+}
+
+type RefresherOption func(*BackgroundRefresher)
+
+func WithInterval(interval time.Duration) RefresherOption {
+	return func(r *BackgroundRefresher) {
+		r.interval = interval
+	}
+}
+
+func WithBatchSize(size int) RefresherOption {
+	return func(r *BackgroundRefresher) {
+		r.batchSize = size
+	}
+}
+
+func WithConcurrency(concurrency int) RefresherOption {
+	return func(r *BackgroundRefresher) {
+		r.concurrency = concurrency
+	}
+}
+
+type BackgroundRefresher struct {
+	interval         time.Duration
+	batchSize        int
+	concurrency      int
+	tokenRepo        TokenRepository
+	stopCh           chan struct{}
+	wg               sync.WaitGroup
+	oauth            *KiroOAuth
+	ssoClient        *SSOOIDCClient
+	callbackMu       sync.RWMutex                                   // 保护回调函数的并发访问
+	onTokenRefreshed func(tokenID string, tokenData *KiroTokenData) // 刷新成功回调
+}
+
+func NewBackgroundRefresher(repo TokenRepository, opts ...RefresherOption) *BackgroundRefresher {
+	r := &BackgroundRefresher{
+		interval:    time.Minute,
+		batchSize:   50,
+		concurrency: 10,
+		tokenRepo:   repo,
+		stopCh:      make(chan struct{}),
+		oauth:       nil, // Lazy init - will be set when config available
+		ssoClient:   nil, // Lazy init - will be set when config available
+	}
+	for _, opt := range opts {
+		opt(r)
+	}
+	return r
+}
+
+// WithConfig sets the configuration for OAuth and SSO clients.
+func WithConfig(cfg *config.Config) RefresherOption {
+	return func(r *BackgroundRefresher) {
+		r.oauth = NewKiroOAuth(cfg)
+		r.ssoClient = NewSSOOIDCClient(cfg)
+	}
+}
+
+// WithOnTokenRefreshed sets the callback function to be called when a token is successfully refreshed.
+// The callback receives the token ID (filename) and the new token data.
+// This allows external components (e.g., Watcher) to be notified of token updates.
+func WithOnTokenRefreshed(callback func(tokenID string, tokenData *KiroTokenData)) RefresherOption {
+	return func(r *BackgroundRefresher) {
+		r.callbackMu.Lock()
+		r.onTokenRefreshed = callback
+		r.callbackMu.Unlock()
+	}
+}
+
+func (r *BackgroundRefresher) Start(ctx context.Context) {
+	r.wg.Add(1)
+	go func() {
+		defer r.wg.Done()
+		ticker := time.NewTicker(r.interval)
+		defer ticker.Stop()
+
+		r.refreshBatch(ctx)
+
+		for {
+			select {
+			case <-ctx.Done():
+				return
+			case <-r.stopCh:
+				return
+			case <-ticker.C:
+				r.refreshBatch(ctx)
+			}
+		}
+	}()
+}
+
+func (r *BackgroundRefresher) Stop() {
+	close(r.stopCh)
+	r.wg.Wait()
+}
+
+func (r *BackgroundRefresher) refreshBatch(ctx context.Context) {
+	tokens := r.tokenRepo.FindOldestUnverified(r.batchSize)
+	if len(tokens) == 0 {
+		return
+	}
+
+	sem := semaphore.NewWeighted(int64(r.concurrency))
+	var wg sync.WaitGroup
+
+	for i, token := range tokens {
+		if i > 0 {
+			select {
+			case <-ctx.Done():
+				return
+			case <-r.stopCh:
+				return
+			case <-time.After(100 * time.Millisecond):
+			}
+		}
+
+		if err := sem.Acquire(ctx, 1); err != nil {
+			return
+		}
+
+		wg.Add(1)
+		go func(t *Token) {
+			defer wg.Done()
+			defer sem.Release(1)
+			r.refreshSingle(ctx, t)
+		}(token)
+	}
+
+	wg.Wait()
+}
+
+func (r *BackgroundRefresher) refreshSingle(ctx context.Context, token *Token) {
+	var newTokenData *KiroTokenData
+	var err error
+
+	// Normalize auth method to lowercase for case-insensitive matching
+	authMethod := strings.ToLower(token.AuthMethod)
+
+	switch authMethod {
+	case "idc":
+		newTokenData, err = r.ssoClient.RefreshTokenWithRegion(
+			ctx,
+			token.ClientID,
+			token.ClientSecret,
+			token.RefreshToken,
+			token.Region,
+			token.StartURL,
+		)
+	case "builder-id":
+		newTokenData, err = r.ssoClient.RefreshToken(
+			ctx,
+			token.ClientID,
+			token.ClientSecret,
+			token.RefreshToken,
+		)
+	default:
+		newTokenData, err = r.oauth.RefreshToken(ctx, token.RefreshToken)
+	}
+
+	if err != nil {
+		log.Printf("failed to refresh token %s: %v", token.ID, err)
+		return
+	}
+
+	token.AccessToken = newTokenData.AccessToken
+	token.RefreshToken = newTokenData.RefreshToken
+	token.LastVerified = time.Now()
+
+	if newTokenData.ExpiresAt != "" {
+		if expTime, parseErr := time.Parse(time.RFC3339, newTokenData.ExpiresAt); parseErr == nil {
+			token.ExpiresAt = expTime
+		}
+	}
+
+	if err := r.tokenRepo.UpdateToken(token); err != nil {
+		log.Printf("failed to update token %s: %v", token.ID, err)
+		return
+	}
+
+	// 方案 A: 刷新成功后触发回调，通知 Watcher 更新内存中的 Auth 对象
+	r.callbackMu.RLock()
+	callback := r.onTokenRefreshed
+	r.callbackMu.RUnlock()
+
+	if callback != nil {
+		// 使用 defer recover 隔离回调 panic，防止崩溃整个进程
+		func() {
+			defer func() {
+				if rec := recover(); rec != nil {
+					log.Printf("background refresh: callback panic for token %s: %v", token.ID, rec)
+				}
+			}()
+			log.Printf("background refresh: notifying token refresh callback for %s", token.ID)
+			callback(token.ID, newTokenData)
+		}()
+	}
+}
--- a/internal/auth/kiro/cooldown.go
+++ b/internal/auth/kiro/cooldown.go
@@ -0,0 +1,112 @@
+package kiro
+
+import (
+	"sync"
+	"time"
+)
+
+const (
+	CooldownReason429         = "rate_limit_exceeded"
+	CooldownReasonSuspended   = "account_suspended"
+	CooldownReasonQuotaExhausted = "quota_exhausted"
+
+	DefaultShortCooldown = 1 * time.Minute
+	MaxShortCooldown     = 5 * time.Minute
+	LongCooldown         = 24 * time.Hour
+)
+
+type CooldownManager struct {
+	mu        sync.RWMutex
+	cooldowns map[string]time.Time
+	reasons   map[string]string
+}
+
+func NewCooldownManager() *CooldownManager {
+	return &CooldownManager{
+		cooldowns: make(map[string]time.Time),
+		reasons:   make(map[string]string),
+	}
+}
+
+func (cm *CooldownManager) SetCooldown(tokenKey string, duration time.Duration, reason string) {
+	cm.mu.Lock()
+	defer cm.mu.Unlock()
+	cm.cooldowns[tokenKey] = time.Now().Add(duration)
+	cm.reasons[tokenKey] = reason
+}
+
+func (cm *CooldownManager) IsInCooldown(tokenKey string) bool {
+	cm.mu.RLock()
+	defer cm.mu.RUnlock()
+	endTime, exists := cm.cooldowns[tokenKey]
+	if !exists {
+		return false
+	}
+	return time.Now().Before(endTime)
+}
+
+func (cm *CooldownManager) GetRemainingCooldown(tokenKey string) time.Duration {
+	cm.mu.RLock()
+	defer cm.mu.RUnlock()
+	endTime, exists := cm.cooldowns[tokenKey]
+	if !exists {
+		return 0
+	}
+	remaining := time.Until(endTime)
+	if remaining < 0 {
+		return 0
+	}
+	return remaining
+}
+
+func (cm *CooldownManager) GetCooldownReason(tokenKey string) string {
+	cm.mu.RLock()
+	defer cm.mu.RUnlock()
+	return cm.reasons[tokenKey]
+}
+
+func (cm *CooldownManager) ClearCooldown(tokenKey string) {
+	cm.mu.Lock()
+	defer cm.mu.Unlock()
+	delete(cm.cooldowns, tokenKey)
+	delete(cm.reasons, tokenKey)
+}
+
+func (cm *CooldownManager) CleanupExpired() {
+	cm.mu.Lock()
+	defer cm.mu.Unlock()
+	now := time.Now()
+	for tokenKey, endTime := range cm.cooldowns {
+		if now.After(endTime) {
+			delete(cm.cooldowns, tokenKey)
+			delete(cm.reasons, tokenKey)
+		}
+	}
+}
+
+func (cm *CooldownManager) StartCleanupRoutine(interval time.Duration, stopCh <-chan struct{}) {
+	ticker := time.NewTicker(interval)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-ticker.C:
+			cm.CleanupExpired()
+		case <-stopCh:
+			return
+		}
+	}
+}
+
+func CalculateCooldownFor429(retryCount int) time.Duration {
+	duration := DefaultShortCooldown * time.Duration(1<<retryCount)
+	if duration > MaxShortCooldown {
+		return MaxShortCooldown
+	}
+	return duration
+}
+
+func CalculateCooldownUntilNextDay() time.Duration {
+	now := time.Now()
+	nextDay := time.Date(now.Year(), now.Month(), now.Day()+1, 0, 0, 0, 0, now.Location())
+	return time.Until(nextDay)
+}
--- a/internal/auth/kiro/cooldown_test.go
+++ b/internal/auth/kiro/cooldown_test.go
@@ -0,0 +1,240 @@
+package kiro
+
+import (
+	"sync"
+	"testing"
+	"time"
+)
+
+func TestNewCooldownManager(t *testing.T) {
+	cm := NewCooldownManager()
+	if cm == nil {
+		t.Fatal("expected non-nil CooldownManager")
+	}
+	if cm.cooldowns == nil {
+		t.Error("expected non-nil cooldowns map")
+	}
+	if cm.reasons == nil {
+		t.Error("expected non-nil reasons map")
+	}
+}
+
+func TestSetCooldown(t *testing.T) {
+	cm := NewCooldownManager()
+	cm.SetCooldown("token1", 1*time.Minute, CooldownReason429)
+
+	if !cm.IsInCooldown("token1") {
+		t.Error("expected token to be in cooldown")
+	}
+	if cm.GetCooldownReason("token1") != CooldownReason429 {
+		t.Errorf("expected reason %s, got %s", CooldownReason429, cm.GetCooldownReason("token1"))
+	}
+}
+
+func TestIsInCooldown_NotSet(t *testing.T) {
+	cm := NewCooldownManager()
+	if cm.IsInCooldown("nonexistent") {
+		t.Error("expected non-existent token to not be in cooldown")
+	}
+}
+
+func TestIsInCooldown_Expired(t *testing.T) {
+	cm := NewCooldownManager()
+	cm.SetCooldown("token1", 1*time.Millisecond, CooldownReason429)
+
+	time.Sleep(10 * time.Millisecond)
+
+	if cm.IsInCooldown("token1") {
+		t.Error("expected expired cooldown to return false")
+	}
+}
+
+func TestGetRemainingCooldown(t *testing.T) {
+	cm := NewCooldownManager()
+	cm.SetCooldown("token1", 1*time.Second, CooldownReason429)
+
+	remaining := cm.GetRemainingCooldown("token1")
+	if remaining <= 0 || remaining > 1*time.Second {
+		t.Errorf("expected remaining cooldown between 0 and 1s, got %v", remaining)
+	}
+}
+
+func TestGetRemainingCooldown_NotSet(t *testing.T) {
+	cm := NewCooldownManager()
+	remaining := cm.GetRemainingCooldown("nonexistent")
+	if remaining != 0 {
+		t.Errorf("expected 0 remaining for non-existent, got %v", remaining)
+	}
+}
+
+func TestGetRemainingCooldown_Expired(t *testing.T) {
+	cm := NewCooldownManager()
+	cm.SetCooldown("token1", 1*time.Millisecond, CooldownReason429)
+
+	time.Sleep(10 * time.Millisecond)
+
+	remaining := cm.GetRemainingCooldown("token1")
+	if remaining != 0 {
+		t.Errorf("expected 0 remaining for expired, got %v", remaining)
+	}
+}
+
+func TestGetCooldownReason(t *testing.T) {
+	cm := NewCooldownManager()
+	cm.SetCooldown("token1", 1*time.Minute, CooldownReasonSuspended)
+
+	reason := cm.GetCooldownReason("token1")
+	if reason != CooldownReasonSuspended {
+		t.Errorf("expected reason %s, got %s", CooldownReasonSuspended, reason)
+	}
+}
+
+func TestGetCooldownReason_NotSet(t *testing.T) {
+	cm := NewCooldownManager()
+	reason := cm.GetCooldownReason("nonexistent")
+	if reason != "" {
+		t.Errorf("expected empty reason for non-existent, got %s", reason)
+	}
+}
+
+func TestClearCooldown(t *testing.T) {
+	cm := NewCooldownManager()
+	cm.SetCooldown("token1", 1*time.Minute, CooldownReason429)
+	cm.ClearCooldown("token1")
+
+	if cm.IsInCooldown("token1") {
+		t.Error("expected cooldown to be cleared")
+	}
+	if cm.GetCooldownReason("token1") != "" {
+		t.Error("expected reason to be cleared")
+	}
+}
+
+func TestClearCooldown_NonExistent(t *testing.T) {
+	cm := NewCooldownManager()
+	cm.ClearCooldown("nonexistent")
+}
+
+func TestCleanupExpired(t *testing.T) {
+	cm := NewCooldownManager()
+	cm.SetCooldown("expired1", 1*time.Millisecond, CooldownReason429)
+	cm.SetCooldown("expired2", 1*time.Millisecond, CooldownReason429)
+	cm.SetCooldown("active", 1*time.Hour, CooldownReason429)
+
+	time.Sleep(10 * time.Millisecond)
+	cm.CleanupExpired()
+
+	if cm.GetCooldownReason("expired1") != "" {
+		t.Error("expected expired1 to be cleaned up")
+	}
+	if cm.GetCooldownReason("expired2") != "" {
+		t.Error("expected expired2 to be cleaned up")
+	}
+	if cm.GetCooldownReason("active") != CooldownReason429 {
+		t.Error("expected active to remain")
+	}
+}
+
+func TestCalculateCooldownFor429_FirstRetry(t *testing.T) {
+	duration := CalculateCooldownFor429(0)
+	if duration != DefaultShortCooldown {
+		t.Errorf("expected %v for retry 0, got %v", DefaultShortCooldown, duration)
+	}
+}
+
+func TestCalculateCooldownFor429_Exponential(t *testing.T) {
+	d1 := CalculateCooldownFor429(1)
+	d2 := CalculateCooldownFor429(2)
+
+	if d2 <= d1 {
+		t.Errorf("expected d2 > d1, got d1=%v, d2=%v", d1, d2)
+	}
+}
+
+func TestCalculateCooldownFor429_MaxCap(t *testing.T) {
+	duration := CalculateCooldownFor429(10)
+	if duration > MaxShortCooldown {
+		t.Errorf("expected max %v, got %v", MaxShortCooldown, duration)
+	}
+}
+
+func TestCalculateCooldownUntilNextDay(t *testing.T) {
+	duration := CalculateCooldownUntilNextDay()
+	if duration <= 0 || duration > 24*time.Hour {
+		t.Errorf("expected duration between 0 and 24h, got %v", duration)
+	}
+}
+
+func TestCooldownManager_ConcurrentAccess(t *testing.T) {
+	cm := NewCooldownManager()
+	const numGoroutines = 50
+	const numOperations = 100
+
+	var wg sync.WaitGroup
+	wg.Add(numGoroutines)
+
+	for i := 0; i < numGoroutines; i++ {
+		go func(id int) {
+			defer wg.Done()
+			tokenKey := "token" + string(rune('a'+id%10))
+			for j := 0; j < numOperations; j++ {
+				switch j % 6 {
+				case 0:
+					cm.SetCooldown(tokenKey, time.Duration(j)*time.Millisecond, CooldownReason429)
+				case 1:
+					cm.IsInCooldown(tokenKey)
+				case 2:
+					cm.GetRemainingCooldown(tokenKey)
+				case 3:
+					cm.GetCooldownReason(tokenKey)
+				case 4:
+					cm.ClearCooldown(tokenKey)
+				case 5:
+					cm.CleanupExpired()
+				}
+			}
+		}(i)
+	}
+
+	wg.Wait()
+}
+
+func TestCooldownReasonConstants(t *testing.T) {
+	if CooldownReason429 != "rate_limit_exceeded" {
+		t.Errorf("unexpected CooldownReason429: %s", CooldownReason429)
+	}
+	if CooldownReasonSuspended != "account_suspended" {
+		t.Errorf("unexpected CooldownReasonSuspended: %s", CooldownReasonSuspended)
+	}
+	if CooldownReasonQuotaExhausted != "quota_exhausted" {
+		t.Errorf("unexpected CooldownReasonQuotaExhausted: %s", CooldownReasonQuotaExhausted)
+	}
+}
+
+func TestDefaultConstants(t *testing.T) {
+	if DefaultShortCooldown != 1*time.Minute {
+		t.Errorf("unexpected DefaultShortCooldown: %v", DefaultShortCooldown)
+	}
+	if MaxShortCooldown != 5*time.Minute {
+		t.Errorf("unexpected MaxShortCooldown: %v", MaxShortCooldown)
+	}
+	if LongCooldown != 24*time.Hour {
+		t.Errorf("unexpected LongCooldown: %v", LongCooldown)
+	}
+}
+
+func TestSetCooldown_OverwritesPrevious(t *testing.T) {
+	cm := NewCooldownManager()
+	cm.SetCooldown("token1", 1*time.Hour, CooldownReason429)
+	cm.SetCooldown("token1", 1*time.Minute, CooldownReasonSuspended)
+
+	reason := cm.GetCooldownReason("token1")
+	if reason != CooldownReasonSuspended {
+		t.Errorf("expected reason to be overwritten to %s, got %s", CooldownReasonSuspended, reason)
+	}
+
+	remaining := cm.GetRemainingCooldown("token1")
+	if remaining > 1*time.Minute {
+		t.Errorf("expected remaining <= 1 minute, got %v", remaining)
+	}
+}
--- a/internal/auth/kiro/fingerprint.go
+++ b/internal/auth/kiro/fingerprint.go
@@ -0,0 +1,197 @@
+package kiro
+
+import (
+	"crypto/sha256"
+	"encoding/hex"
+	"fmt"
+	"math/rand"
+	"net/http"
+	"sync"
+	"time"
+)
+
+// Fingerprint 多维度指纹信息
+type Fingerprint struct {
+	SDKVersion          string // 1.0.20-1.0.27
+	OSType              string // darwin/windows/linux
+	OSVersion           string // 10.0.22621
+	NodeVersion         string // 18.x/20.x/22.x
+	KiroVersion         string // 0.3.x-0.8.x
+	KiroHash            string // SHA256
+	AcceptLanguage      string
+	ScreenResolution    string // 1920x1080
+	ColorDepth          int    // 24
+	HardwareConcurrency int    // CPU 核心数
+	TimezoneOffset      int
+}
+
+// FingerprintManager 指纹管理器
+type FingerprintManager struct {
+	mu           sync.RWMutex
+	fingerprints map[string]*Fingerprint // tokenKey -> fingerprint
+	rng          *rand.Rand
+}
+
+var (
+	sdkVersions = []string{
+		"1.0.20", "1.0.21", "1.0.22", "1.0.23",
+		"1.0.24", "1.0.25", "1.0.26", "1.0.27",
+	}
+	osTypes = []string{"darwin", "windows", "linux"}
+	osVersions = map[string][]string{
+		"darwin":  {"14.0", "14.1", "14.2", "14.3", "14.4", "14.5", "15.0", "15.1"},
+		"windows": {"10.0.19041", "10.0.19042", "10.0.19043", "10.0.19044", "10.0.22621", "10.0.22631"},
+		"linux":   {"5.15.0", "6.1.0", "6.2.0", "6.5.0", "6.6.0", "6.8.0"},
+	}
+	nodeVersions = []string{
+		"18.17.0", "18.18.0", "18.19.0", "18.20.0",
+		"20.9.0", "20.10.0", "20.11.0", "20.12.0", "20.13.0",
+		"22.0.0", "22.1.0", "22.2.0", "22.3.0",
+	}
+	kiroVersions = []string{
+		"0.3.0", "0.3.1", "0.4.0", "0.4.1", "0.5.0", "0.5.1",
+		"0.6.0", "0.6.1", "0.7.0", "0.7.1", "0.8.0", "0.8.1",
+	}
+	acceptLanguages = []string{
+		"en-US,en;q=0.9",
+		"en-GB,en;q=0.9",
+		"zh-CN,zh;q=0.9,en;q=0.8",
+		"zh-TW,zh;q=0.9,en;q=0.8",
+		"ja-JP,ja;q=0.9,en;q=0.8",
+		"ko-KR,ko;q=0.9,en;q=0.8",
+		"de-DE,de;q=0.9,en;q=0.8",
+		"fr-FR,fr;q=0.9,en;q=0.8",
+	}
+	screenResolutions = []string{
+		"1920x1080", "2560x1440", "3840x2160",
+		"1366x768", "1440x900", "1680x1050",
+		"2560x1600", "3440x1440",
+	}
+	colorDepths          = []int{24, 32}
+	hardwareConcurrencies = []int{4, 6, 8, 10, 12, 16, 20, 24, 32}
+	timezoneOffsets      = []int{-480, -420, -360, -300, -240, 0, 60, 120, 480, 540}
+)
+
+// NewFingerprintManager 创建指纹管理器
+func NewFingerprintManager() *FingerprintManager {
+	return &FingerprintManager{
+		fingerprints: make(map[string]*Fingerprint),
+		rng:          rand.New(rand.NewSource(time.Now().UnixNano())),
+	}
+}
+
+// GetFingerprint 获取或生成 Token 关联的指纹
+func (fm *FingerprintManager) GetFingerprint(tokenKey string) *Fingerprint {
+	fm.mu.RLock()
+	if fp, exists := fm.fingerprints[tokenKey]; exists {
+		fm.mu.RUnlock()
+		return fp
+	}
+	fm.mu.RUnlock()
+
+	fm.mu.Lock()
+	defer fm.mu.Unlock()
+
+	if fp, exists := fm.fingerprints[tokenKey]; exists {
+		return fp
+	}
+
+	fp := fm.generateFingerprint(tokenKey)
+	fm.fingerprints[tokenKey] = fp
+	return fp
+}
+
+// generateFingerprint 生成新的指纹
+func (fm *FingerprintManager) generateFingerprint(tokenKey string) *Fingerprint {
+	osType := fm.randomChoice(osTypes)
+	osVersion := fm.randomChoice(osVersions[osType])
+	kiroVersion := fm.randomChoice(kiroVersions)
+
+	fp := &Fingerprint{
+		SDKVersion:          fm.randomChoice(sdkVersions),
+		OSType:              osType,
+		OSVersion:           osVersion,
+		NodeVersion:         fm.randomChoice(nodeVersions),
+		KiroVersion:         kiroVersion,
+		AcceptLanguage:      fm.randomChoice(acceptLanguages),
+		ScreenResolution:    fm.randomChoice(screenResolutions),
+		ColorDepth:          fm.randomIntChoice(colorDepths),
+		HardwareConcurrency: fm.randomIntChoice(hardwareConcurrencies),
+		TimezoneOffset:      fm.randomIntChoice(timezoneOffsets),
+	}
+
+	fp.KiroHash = fm.generateKiroHash(tokenKey, kiroVersion, osType)
+	return fp
+}
+
+// generateKiroHash 生成 Kiro Hash
+func (fm *FingerprintManager) generateKiroHash(tokenKey, kiroVersion, osType string) string {
+	data := fmt.Sprintf("%s:%s:%s:%d", tokenKey, kiroVersion, osType, time.Now().UnixNano())
+	hash := sha256.Sum256([]byte(data))
+	return hex.EncodeToString(hash[:])
+}
+
+// randomChoice 随机选择字符串
+func (fm *FingerprintManager) randomChoice(choices []string) string {
+	return choices[fm.rng.Intn(len(choices))]
+}
+
+// randomIntChoice 随机选择整数
+func (fm *FingerprintManager) randomIntChoice(choices []int) int {
+	return choices[fm.rng.Intn(len(choices))]
+}
+
+// ApplyToRequest 将指纹信息应用到 HTTP 请求头
+func (fp *Fingerprint) ApplyToRequest(req *http.Request) {
+	req.Header.Set("X-Kiro-SDK-Version", fp.SDKVersion)
+	req.Header.Set("X-Kiro-OS-Type", fp.OSType)
+	req.Header.Set("X-Kiro-OS-Version", fp.OSVersion)
+	req.Header.Set("X-Kiro-Node-Version", fp.NodeVersion)
+	req.Header.Set("X-Kiro-Version", fp.KiroVersion)
+	req.Header.Set("X-Kiro-Hash", fp.KiroHash)
+	req.Header.Set("Accept-Language", fp.AcceptLanguage)
+	req.Header.Set("X-Screen-Resolution", fp.ScreenResolution)
+	req.Header.Set("X-Color-Depth", fmt.Sprintf("%d", fp.ColorDepth))
+	req.Header.Set("X-Hardware-Concurrency", fmt.Sprintf("%d", fp.HardwareConcurrency))
+	req.Header.Set("X-Timezone-Offset", fmt.Sprintf("%d", fp.TimezoneOffset))
+}
+
+// RemoveFingerprint 移除 Token 关联的指纹
+func (fm *FingerprintManager) RemoveFingerprint(tokenKey string) {
+	fm.mu.Lock()
+	defer fm.mu.Unlock()
+	delete(fm.fingerprints, tokenKey)
+}
+
+// Count 返回当前管理的指纹数量
+func (fm *FingerprintManager) Count() int {
+	fm.mu.RLock()
+	defer fm.mu.RUnlock()
+	return len(fm.fingerprints)
+}
+
+// BuildUserAgent 构建 User-Agent 字符串 (Kiro IDE 风格)
+// 格式: aws-sdk-js/{SDKVersion} ua/2.1 os/{OSType}#{OSVersion} lang/js md/nodejs#{NodeVersion} api/codewhispererstreaming#{SDKVersion} m/E KiroIDE-{KiroVersion}-{KiroHash}
+func (fp *Fingerprint) BuildUserAgent() string {
+	return fmt.Sprintf(
+		"aws-sdk-js/%s ua/2.1 os/%s#%s lang/js md/nodejs#%s api/codewhispererstreaming#%s m/E KiroIDE-%s-%s",
+		fp.SDKVersion,
+		fp.OSType,
+		fp.OSVersion,
+		fp.NodeVersion,
+		fp.SDKVersion,
+		fp.KiroVersion,
+		fp.KiroHash,
+	)
+}
+
+// BuildAmzUserAgent 构建 X-Amz-User-Agent 字符串
+// 格式: aws-sdk-js/{SDKVersion} KiroIDE-{KiroVersion}-{KiroHash}
+func (fp *Fingerprint) BuildAmzUserAgent() string {
+	return fmt.Sprintf(
+		"aws-sdk-js/%s KiroIDE-%s-%s",
+		fp.SDKVersion,
+		fp.KiroVersion,
+		fp.KiroHash,
+	)
+}
--- a/internal/auth/kiro/fingerprint_test.go
+++ b/internal/auth/kiro/fingerprint_test.go
@@ -0,0 +1,227 @@
+package kiro
+
+import (
+	"net/http"
+	"sync"
+	"testing"
+)
+
+func TestNewFingerprintManager(t *testing.T) {
+	fm := NewFingerprintManager()
+	if fm == nil {
+		t.Fatal("expected non-nil FingerprintManager")
+	}
+	if fm.fingerprints == nil {
+		t.Error("expected non-nil fingerprints map")
+	}
+	if fm.rng == nil {
+		t.Error("expected non-nil rng")
+	}
+}
+
+func TestGetFingerprint_NewToken(t *testing.T) {
+	fm := NewFingerprintManager()
+	fp := fm.GetFingerprint("token1")
+
+	if fp == nil {
+		t.Fatal("expected non-nil Fingerprint")
+	}
+	if fp.SDKVersion == "" {
+		t.Error("expected non-empty SDKVersion")
+	}
+	if fp.OSType == "" {
+		t.Error("expected non-empty OSType")
+	}
+	if fp.OSVersion == "" {
+		t.Error("expected non-empty OSVersion")
+	}
+	if fp.NodeVersion == "" {
+		t.Error("expected non-empty NodeVersion")
+	}
+	if fp.KiroVersion == "" {
+		t.Error("expected non-empty KiroVersion")
+	}
+	if fp.KiroHash == "" {
+		t.Error("expected non-empty KiroHash")
+	}
+	if fp.AcceptLanguage == "" {
+		t.Error("expected non-empty AcceptLanguage")
+	}
+	if fp.ScreenResolution == "" {
+		t.Error("expected non-empty ScreenResolution")
+	}
+	if fp.ColorDepth == 0 {
+		t.Error("expected non-zero ColorDepth")
+	}
+	if fp.HardwareConcurrency == 0 {
+		t.Error("expected non-zero HardwareConcurrency")
+	}
+}
+
+func TestGetFingerprint_SameTokenReturnsSameFingerprint(t *testing.T) {
+	fm := NewFingerprintManager()
+	fp1 := fm.GetFingerprint("token1")
+	fp2 := fm.GetFingerprint("token1")
+
+	if fp1 != fp2 {
+		t.Error("expected same fingerprint for same token")
+	}
+}
+
+func TestGetFingerprint_DifferentTokens(t *testing.T) {
+	fm := NewFingerprintManager()
+	fp1 := fm.GetFingerprint("token1")
+	fp2 := fm.GetFingerprint("token2")
+
+	if fp1 == fp2 {
+		t.Error("expected different fingerprints for different tokens")
+	}
+}
+
+func TestRemoveFingerprint(t *testing.T) {
+	fm := NewFingerprintManager()
+	fm.GetFingerprint("token1")
+	if fm.Count() != 1 {
+		t.Fatalf("expected count 1, got %d", fm.Count())
+	}
+
+	fm.RemoveFingerprint("token1")
+	if fm.Count() != 0 {
+		t.Errorf("expected count 0, got %d", fm.Count())
+	}
+}
+
+func TestRemoveFingerprint_NonExistent(t *testing.T) {
+	fm := NewFingerprintManager()
+	fm.RemoveFingerprint("nonexistent")
+	if fm.Count() != 0 {
+		t.Errorf("expected count 0, got %d", fm.Count())
+	}
+}
+
+func TestCount(t *testing.T) {
+	fm := NewFingerprintManager()
+	if fm.Count() != 0 {
+		t.Errorf("expected count 0, got %d", fm.Count())
+	}
+
+	fm.GetFingerprint("token1")
+	fm.GetFingerprint("token2")
+	fm.GetFingerprint("token3")
+
+	if fm.Count() != 3 {
+		t.Errorf("expected count 3, got %d", fm.Count())
+	}
+}
+
+func TestApplyToRequest(t *testing.T) {
+	fm := NewFingerprintManager()
+	fp := fm.GetFingerprint("token1")
+
+	req, _ := http.NewRequest("GET", "http://example.com", nil)
+	fp.ApplyToRequest(req)
+
+	if req.Header.Get("X-Kiro-SDK-Version") != fp.SDKVersion {
+		t.Error("X-Kiro-SDK-Version header mismatch")
+	}
+	if req.Header.Get("X-Kiro-OS-Type") != fp.OSType {
+		t.Error("X-Kiro-OS-Type header mismatch")
+	}
+	if req.Header.Get("X-Kiro-OS-Version") != fp.OSVersion {
+		t.Error("X-Kiro-OS-Version header mismatch")
+	}
+	if req.Header.Get("X-Kiro-Node-Version") != fp.NodeVersion {
+		t.Error("X-Kiro-Node-Version header mismatch")
+	}
+	if req.Header.Get("X-Kiro-Version") != fp.KiroVersion {
+		t.Error("X-Kiro-Version header mismatch")
+	}
+	if req.Header.Get("X-Kiro-Hash") != fp.KiroHash {
+		t.Error("X-Kiro-Hash header mismatch")
+	}
+	if req.Header.Get("Accept-Language") != fp.AcceptLanguage {
+		t.Error("Accept-Language header mismatch")
+	}
+	if req.Header.Get("X-Screen-Resolution") != fp.ScreenResolution {
+		t.Error("X-Screen-Resolution header mismatch")
+	}
+}
+
+func TestGetFingerprint_OSVersionMatchesOSType(t *testing.T) {
+	fm := NewFingerprintManager()
+
+	for i := 0; i < 20; i++ {
+		fp := fm.GetFingerprint("token" + string(rune('a'+i)))
+		validVersions := osVersions[fp.OSType]
+		found := false
+		for _, v := range validVersions {
+			if v == fp.OSVersion {
+				found = true
+				break
+			}
+		}
+		if !found {
+			t.Errorf("OS version %s not valid for OS type %s", fp.OSVersion, fp.OSType)
+		}
+	}
+}
+
+func TestFingerprintManager_ConcurrentAccess(t *testing.T) {
+	fm := NewFingerprintManager()
+	const numGoroutines = 100
+	const numOperations = 100
+
+	var wg sync.WaitGroup
+	wg.Add(numGoroutines)
+
+	for i := 0; i < numGoroutines; i++ {
+		go func(id int) {
+			defer wg.Done()
+			for j := 0; j < numOperations; j++ {
+				tokenKey := "token" + string(rune('a'+id%26))
+				switch j % 4 {
+				case 0:
+					fm.GetFingerprint(tokenKey)
+				case 1:
+					fm.Count()
+				case 2:
+					fp := fm.GetFingerprint(tokenKey)
+					req, _ := http.NewRequest("GET", "http://example.com", nil)
+					fp.ApplyToRequest(req)
+				case 3:
+					fm.RemoveFingerprint(tokenKey)
+				}
+			}
+		}(i)
+	}
+
+	wg.Wait()
+}
+
+func TestKiroHashUniqueness(t *testing.T) {
+	fm := NewFingerprintManager()
+	hashes := make(map[string]bool)
+
+	for i := 0; i < 100; i++ {
+		fp := fm.GetFingerprint("token" + string(rune(i)))
+		if hashes[fp.KiroHash] {
+			t.Errorf("duplicate KiroHash detected: %s", fp.KiroHash)
+		}
+		hashes[fp.KiroHash] = true
+	}
+}
+
+func TestKiroHashFormat(t *testing.T) {
+	fm := NewFingerprintManager()
+	fp := fm.GetFingerprint("token1")
+
+	if len(fp.KiroHash) != 64 {
+		t.Errorf("expected KiroHash length 64 (SHA256 hex), got %d", len(fp.KiroHash))
+	}
+
+	for _, c := range fp.KiroHash {
+		if !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f')) {
+			t.Errorf("invalid hex character in KiroHash: %c", c)
+		}
+	}
+}
--- a/internal/auth/kiro/jitter.go
+++ b/internal/auth/kiro/jitter.go
@@ -0,0 +1,174 @@
+package kiro
+
+import (
+	"math/rand"
+	"sync"
+	"time"
+)
+
+// Jitter configuration constants
+const (
+	// JitterPercent is the default percentage of jitter to apply (±30%)
+	JitterPercent = 0.30
+
+	// Human-like delay ranges
+	ShortDelayMin  = 50 * time.Millisecond  // Minimum for rapid consecutive operations
+	ShortDelayMax  = 200 * time.Millisecond // Maximum for rapid consecutive operations
+	NormalDelayMin = 1 * time.Second        // Minimum for normal thinking time
+	NormalDelayMax = 3 * time.Second        // Maximum for normal thinking time
+	LongDelayMin   = 5 * time.Second        // Minimum for reading/resting
+	LongDelayMax   = 10 * time.Second       // Maximum for reading/resting
+
+	// Probability thresholds for human-like behavior
+	ShortDelayProbability  = 0.20 // 20% chance of short delay (consecutive ops)
+	LongDelayProbability   = 0.05 // 5% chance of long delay (reading/resting)
+	NormalDelayProbability = 0.75 // 75% chance of normal delay (thinking)
+)
+
+var (
+	jitterRand     *rand.Rand
+	jitterRandOnce sync.Once
+	jitterMu       sync.Mutex
+	lastRequestTime time.Time
+)
+
+// initJitterRand initializes the random number generator for jitter calculations.
+// Uses a time-based seed for unpredictable but reproducible randomness.
+func initJitterRand() {
+	jitterRandOnce.Do(func() {
+		jitterRand = rand.New(rand.NewSource(time.Now().UnixNano()))
+	})
+}
+
+// RandomDelay generates a random delay between min and max duration.
+// Thread-safe implementation using mutex protection.
+func RandomDelay(min, max time.Duration) time.Duration {
+	initJitterRand()
+	jitterMu.Lock()
+	defer jitterMu.Unlock()
+
+	if min >= max {
+		return min
+	}
+
+	rangeMs := max.Milliseconds() - min.Milliseconds()
+	randomMs := jitterRand.Int63n(rangeMs)
+	return min + time.Duration(randomMs)*time.Millisecond
+}
+
+// JitterDelay adds jitter to a base delay.
+// Applies ±jitterPercent variation to the base delay.
+// For example, JitterDelay(1*time.Second, 0.30) returns a value between 700ms and 1300ms.
+func JitterDelay(baseDelay time.Duration, jitterPercent float64) time.Duration {
+	initJitterRand()
+	jitterMu.Lock()
+	defer jitterMu.Unlock()
+
+	if jitterPercent <= 0 || jitterPercent > 1 {
+		jitterPercent = JitterPercent
+	}
+
+	// Calculate jitter range: base * jitterPercent
+	jitterRange := float64(baseDelay) * jitterPercent
+
+	// Generate random value in range [-jitterRange, +jitterRange]
+	jitter := (jitterRand.Float64()*2 - 1) * jitterRange
+
+	result := time.Duration(float64(baseDelay) + jitter)
+	if result < 0 {
+		return 0
+	}
+	return result
+}
+
+// JitterDelayDefault applies the default ±30% jitter to a base delay.
+func JitterDelayDefault(baseDelay time.Duration) time.Duration {
+	return JitterDelay(baseDelay, JitterPercent)
+}
+
+// HumanLikeDelay generates a delay that mimics human behavior patterns.
+// The delay is selected based on probability distribution:
+//   - 20% chance: Short delay (50-200ms) - simulates consecutive rapid operations
+//   - 75% chance: Normal delay (1-3s) - simulates thinking/reading time
+//   - 5% chance: Long delay (5-10s) - simulates breaks/reading longer content
+//
+// Returns the delay duration (caller should call time.Sleep with this value).
+func HumanLikeDelay() time.Duration {
+	initJitterRand()
+	jitterMu.Lock()
+	defer jitterMu.Unlock()
+
+	// Track time since last request for adaptive behavior
+	now := time.Now()
+	timeSinceLastRequest := now.Sub(lastRequestTime)
+	lastRequestTime = now
+
+	// If requests are very close together, use short delay
+	if timeSinceLastRequest < 500*time.Millisecond && timeSinceLastRequest > 0 {
+		rangeMs := ShortDelayMax.Milliseconds() - ShortDelayMin.Milliseconds()
+		randomMs := jitterRand.Int63n(rangeMs)
+		return ShortDelayMin + time.Duration(randomMs)*time.Millisecond
+	}
+
+	// Otherwise, use probability-based selection
+	roll := jitterRand.Float64()
+
+	var min, max time.Duration
+	switch {
+	case roll < ShortDelayProbability:
+		// Short delay - consecutive operations
+		min, max = ShortDelayMin, ShortDelayMax
+	case roll < ShortDelayProbability+LongDelayProbability:
+		// Long delay - reading/resting
+		min, max = LongDelayMin, LongDelayMax
+	default:
+		// Normal delay - thinking time
+		min, max = NormalDelayMin, NormalDelayMax
+	}
+
+	rangeMs := max.Milliseconds() - min.Milliseconds()
+	randomMs := jitterRand.Int63n(rangeMs)
+	return min + time.Duration(randomMs)*time.Millisecond
+}
+
+// ApplyHumanLikeDelay applies human-like delay by sleeping.
+// This is a convenience function that combines HumanLikeDelay with time.Sleep.
+func ApplyHumanLikeDelay() {
+	delay := HumanLikeDelay()
+	if delay > 0 {
+		time.Sleep(delay)
+	}
+}
+
+// ExponentialBackoffWithJitter calculates retry delay using exponential backoff with jitter.
+// Formula: min(baseDelay * 2^attempt + jitter, maxDelay)
+// This helps prevent thundering herd problem when multiple clients retry simultaneously.
+func ExponentialBackoffWithJitter(attempt int, baseDelay, maxDelay time.Duration) time.Duration {
+	if attempt < 0 {
+		attempt = 0
+	}
+
+	// Calculate exponential backoff: baseDelay * 2^attempt
+	backoff := baseDelay * time.Duration(1<<uint(attempt))
+	if backoff > maxDelay {
+		backoff = maxDelay
+	}
+
+	// Add ±30% jitter
+	return JitterDelay(backoff, JitterPercent)
+}
+
+// ShouldSkipDelay determines if delay should be skipped based on context.
+// Returns true for streaming responses, WebSocket connections, etc.
+// This function can be extended to check additional skip conditions.
+func ShouldSkipDelay(isStreaming bool) bool {
+	return isStreaming
+}
+
+// ResetLastRequestTime resets the last request time tracker.
+// Useful for testing or when starting a new session.
+func ResetLastRequestTime() {
+	jitterMu.Lock()
+	defer jitterMu.Unlock()
+	lastRequestTime = time.Time{}
+}
--- a/internal/auth/kiro/metrics.go
+++ b/internal/auth/kiro/metrics.go
@@ -0,0 +1,187 @@
+package kiro
+
+import (
+	"math"
+	"sync"
+	"time"
+)
+
+// TokenMetrics holds performance metrics for a single token.
+type TokenMetrics struct {
+	SuccessRate    float64   // Success rate (0.0 - 1.0)
+	AvgLatency     float64   // Average latency in milliseconds
+	QuotaRemaining float64   // Remaining quota (0.0 - 1.0)
+	LastUsed       time.Time // Last usage timestamp
+	FailCount      int       // Consecutive failure count
+	TotalRequests  int       // Total request count
+	successCount   int       // Internal: successful request count
+	totalLatency   float64   // Internal: cumulative latency
+}
+
+// TokenScorer manages token metrics and scoring.
+type TokenScorer struct {
+	mu      sync.RWMutex
+	metrics map[string]*TokenMetrics
+
+	// Scoring weights
+	successRateWeight    float64
+	quotaWeight          float64
+	latencyWeight        float64
+	lastUsedWeight       float64
+	failPenaltyMultiplier float64
+}
+
+// NewTokenScorer creates a new TokenScorer with default weights.
+func NewTokenScorer() *TokenScorer {
+	return &TokenScorer{
+		metrics:               make(map[string]*TokenMetrics),
+		successRateWeight:     0.4,
+		quotaWeight:           0.25,
+		latencyWeight:         0.2,
+		lastUsedWeight:        0.15,
+		failPenaltyMultiplier: 0.1,
+	}
+}
+
+// getOrCreateMetrics returns existing metrics or creates new ones.
+func (s *TokenScorer) getOrCreateMetrics(tokenKey string) *TokenMetrics {
+	if m, ok := s.metrics[tokenKey]; ok {
+		return m
+	}
+	m := &TokenMetrics{
+		SuccessRate:    1.0,
+		QuotaRemaining: 1.0,
+	}
+	s.metrics[tokenKey] = m
+	return m
+}
+
+// RecordRequest records the result of a request for a token.
+func (s *TokenScorer) RecordRequest(tokenKey string, success bool, latency time.Duration) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	m := s.getOrCreateMetrics(tokenKey)
+	m.TotalRequests++
+	m.LastUsed = time.Now()
+	m.totalLatency += float64(latency.Milliseconds())
+
+	if success {
+		m.successCount++
+		m.FailCount = 0
+	} else {
+		m.FailCount++
+	}
+
+	// Update derived metrics
+	if m.TotalRequests > 0 {
+		m.SuccessRate = float64(m.successCount) / float64(m.TotalRequests)
+		m.AvgLatency = m.totalLatency / float64(m.TotalRequests)
+	}
+}
+
+// SetQuotaRemaining updates the remaining quota for a token.
+func (s *TokenScorer) SetQuotaRemaining(tokenKey string, quota float64) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	m := s.getOrCreateMetrics(tokenKey)
+	m.QuotaRemaining = quota
+}
+
+// GetMetrics returns a copy of the metrics for a token.
+func (s *TokenScorer) GetMetrics(tokenKey string) *TokenMetrics {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+
+	if m, ok := s.metrics[tokenKey]; ok {
+		copy := *m
+		return &copy
+	}
+	return nil
+}
+
+// CalculateScore computes the score for a token (higher is better).
+func (s *TokenScorer) CalculateScore(tokenKey string) float64 {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+
+	m, ok := s.metrics[tokenKey]
+	if !ok {
+		return 1.0 // New tokens get a high initial score
+	}
+
+	// Success rate component (0-1)
+	successScore := m.SuccessRate
+
+	// Quota component (0-1)
+	quotaScore := m.QuotaRemaining
+
+	// Latency component (normalized, lower is better)
+	// Using exponential decay: score = e^(-latency/1000)
+	// 1000ms latency -> ~0.37 score, 100ms -> ~0.90 score
+	latencyScore := math.Exp(-m.AvgLatency / 1000.0)
+	if m.TotalRequests == 0 {
+		latencyScore = 1.0
+	}
+
+	// Last used component (prefer tokens not recently used)
+	// Score increases as time since last use increases
+	timeSinceUse := time.Since(m.LastUsed).Seconds()
+	// Normalize: 60 seconds -> ~0.63 score, 0 seconds -> 0 score
+	lastUsedScore := 1.0 - math.Exp(-timeSinceUse/60.0)
+	if m.LastUsed.IsZero() {
+		lastUsedScore = 1.0
+	}
+
+	// Calculate weighted score
+	score := s.successRateWeight*successScore +
+		s.quotaWeight*quotaScore +
+		s.latencyWeight*latencyScore +
+		s.lastUsedWeight*lastUsedScore
+
+	// Apply consecutive failure penalty
+	if m.FailCount > 0 {
+		penalty := s.failPenaltyMultiplier * float64(m.FailCount)
+		score = score * math.Max(0, 1.0-penalty)
+	}
+
+	return score
+}
+
+// SelectBestToken selects the token with the highest score.
+func (s *TokenScorer) SelectBestToken(tokens []string) string {
+	if len(tokens) == 0 {
+		return ""
+	}
+	if len(tokens) == 1 {
+		return tokens[0]
+	}
+
+	bestToken := tokens[0]
+	bestScore := s.CalculateScore(tokens[0])
+
+	for _, token := range tokens[1:] {
+		score := s.CalculateScore(token)
+		if score > bestScore {
+			bestScore = score
+			bestToken = token
+		}
+	}
+
+	return bestToken
+}
+
+// ResetMetrics clears all metrics for a token.
+func (s *TokenScorer) ResetMetrics(tokenKey string) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	delete(s.metrics, tokenKey)
+}
+
+// ResetAllMetrics clears all stored metrics.
+func (s *TokenScorer) ResetAllMetrics() {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	s.metrics = make(map[string]*TokenMetrics)
+}
--- a/internal/auth/kiro/metrics_test.go
+++ b/internal/auth/kiro/metrics_test.go
@@ -0,0 +1,301 @@
+package kiro
+
+import (
+	"sync"
+	"testing"
+	"time"
+)
+
+func TestNewTokenScorer(t *testing.T) {
+	s := NewTokenScorer()
+	if s == nil {
+		t.Fatal("expected non-nil TokenScorer")
+	}
+	if s.metrics == nil {
+		t.Error("expected non-nil metrics map")
+	}
+	if s.successRateWeight != 0.4 {
+		t.Errorf("expected successRateWeight 0.4, got %f", s.successRateWeight)
+	}
+	if s.quotaWeight != 0.25 {
+		t.Errorf("expected quotaWeight 0.25, got %f", s.quotaWeight)
+	}
+}
+
+func TestRecordRequest_Success(t *testing.T) {
+	s := NewTokenScorer()
+	s.RecordRequest("token1", true, 100*time.Millisecond)
+
+	m := s.GetMetrics("token1")
+	if m == nil {
+		t.Fatal("expected non-nil metrics")
+	}
+	if m.TotalRequests != 1 {
+		t.Errorf("expected TotalRequests 1, got %d", m.TotalRequests)
+	}
+	if m.SuccessRate != 1.0 {
+		t.Errorf("expected SuccessRate 1.0, got %f", m.SuccessRate)
+	}
+	if m.FailCount != 0 {
+		t.Errorf("expected FailCount 0, got %d", m.FailCount)
+	}
+	if m.AvgLatency != 100 {
+		t.Errorf("expected AvgLatency 100, got %f", m.AvgLatency)
+	}
+}
+
+func TestRecordRequest_Failure(t *testing.T) {
+	s := NewTokenScorer()
+	s.RecordRequest("token1", false, 200*time.Millisecond)
+
+	m := s.GetMetrics("token1")
+	if m.SuccessRate != 0.0 {
+		t.Errorf("expected SuccessRate 0.0, got %f", m.SuccessRate)
+	}
+	if m.FailCount != 1 {
+		t.Errorf("expected FailCount 1, got %d", m.FailCount)
+	}
+}
+
+func TestRecordRequest_MixedResults(t *testing.T) {
+	s := NewTokenScorer()
+	s.RecordRequest("token1", true, 100*time.Millisecond)
+	s.RecordRequest("token1", true, 100*time.Millisecond)
+	s.RecordRequest("token1", false, 100*time.Millisecond)
+	s.RecordRequest("token1", true, 100*time.Millisecond)
+
+	m := s.GetMetrics("token1")
+	if m.TotalRequests != 4 {
+		t.Errorf("expected TotalRequests 4, got %d", m.TotalRequests)
+	}
+	if m.SuccessRate != 0.75 {
+		t.Errorf("expected SuccessRate 0.75, got %f", m.SuccessRate)
+	}
+	if m.FailCount != 0 {
+		t.Errorf("expected FailCount 0 (reset on success), got %d", m.FailCount)
+	}
+}
+
+func TestRecordRequest_ConsecutiveFailures(t *testing.T) {
+	s := NewTokenScorer()
+	s.RecordRequest("token1", true, 100*time.Millisecond)
+	s.RecordRequest("token1", false, 100*time.Millisecond)
+	s.RecordRequest("token1", false, 100*time.Millisecond)
+	s.RecordRequest("token1", false, 100*time.Millisecond)
+
+	m := s.GetMetrics("token1")
+	if m.FailCount != 3 {
+		t.Errorf("expected FailCount 3, got %d", m.FailCount)
+	}
+}
+
+func TestSetQuotaRemaining(t *testing.T) {
+	s := NewTokenScorer()
+	s.SetQuotaRemaining("token1", 0.5)
+
+	m := s.GetMetrics("token1")
+	if m.QuotaRemaining != 0.5 {
+		t.Errorf("expected QuotaRemaining 0.5, got %f", m.QuotaRemaining)
+	}
+}
+
+func TestGetMetrics_NonExistent(t *testing.T) {
+	s := NewTokenScorer()
+	m := s.GetMetrics("nonexistent")
+	if m != nil {
+		t.Error("expected nil metrics for non-existent token")
+	}
+}
+
+func TestGetMetrics_ReturnsCopy(t *testing.T) {
+	s := NewTokenScorer()
+	s.RecordRequest("token1", true, 100*time.Millisecond)
+
+	m1 := s.GetMetrics("token1")
+	m1.TotalRequests = 999
+
+	m2 := s.GetMetrics("token1")
+	if m2.TotalRequests == 999 {
+		t.Error("GetMetrics should return a copy")
+	}
+}
+
+func TestCalculateScore_NewToken(t *testing.T) {
+	s := NewTokenScorer()
+	score := s.CalculateScore("newtoken")
+	if score != 1.0 {
+		t.Errorf("expected score 1.0 for new token, got %f", score)
+	}
+}
+
+func TestCalculateScore_PerfectToken(t *testing.T) {
+	s := NewTokenScorer()
+	s.RecordRequest("token1", true, 50*time.Millisecond)
+	s.SetQuotaRemaining("token1", 1.0)
+
+	time.Sleep(100 * time.Millisecond)
+	score := s.CalculateScore("token1")
+	if score < 0.5 || score > 1.0 {
+		t.Errorf("expected high score for perfect token, got %f", score)
+	}
+}
+
+func TestCalculateScore_FailedToken(t *testing.T) {
+	s := NewTokenScorer()
+	for i := 0; i < 5; i++ {
+		s.RecordRequest("token1", false, 1000*time.Millisecond)
+	}
+	s.SetQuotaRemaining("token1", 0.1)
+
+	score := s.CalculateScore("token1")
+	if score > 0.5 {
+		t.Errorf("expected low score for failed token, got %f", score)
+	}
+}
+
+func TestCalculateScore_FailPenalty(t *testing.T) {
+	s := NewTokenScorer()
+	s.RecordRequest("token1", true, 100*time.Millisecond)
+	scoreNoFail := s.CalculateScore("token1")
+
+	s.RecordRequest("token1", false, 100*time.Millisecond)
+	s.RecordRequest("token1", false, 100*time.Millisecond)
+	scoreWithFail := s.CalculateScore("token1")
+
+	if scoreWithFail >= scoreNoFail {
+		t.Errorf("expected lower score with consecutive failures: noFail=%f, withFail=%f", scoreNoFail, scoreWithFail)
+	}
+}
+
+func TestSelectBestToken_Empty(t *testing.T) {
+	s := NewTokenScorer()
+	best := s.SelectBestToken([]string{})
+	if best != "" {
+		t.Errorf("expected empty string for empty tokens, got %s", best)
+	}
+}
+
+func TestSelectBestToken_SingleToken(t *testing.T) {
+	s := NewTokenScorer()
+	best := s.SelectBestToken([]string{"token1"})
+	if best != "token1" {
+		t.Errorf("expected token1, got %s", best)
+	}
+}
+
+func TestSelectBestToken_MultipleTokens(t *testing.T) {
+	s := NewTokenScorer()
+
+	s.RecordRequest("bad", false, 1000*time.Millisecond)
+	s.RecordRequest("bad", false, 1000*time.Millisecond)
+	s.SetQuotaRemaining("bad", 0.1)
+
+	s.RecordRequest("good", true, 50*time.Millisecond)
+	s.SetQuotaRemaining("good", 0.9)
+
+	time.Sleep(50 * time.Millisecond)
+
+	best := s.SelectBestToken([]string{"bad", "good"})
+	if best != "good" {
+		t.Errorf("expected good token to be selected, got %s", best)
+	}
+}
+
+func TestResetMetrics(t *testing.T) {
+	s := NewTokenScorer()
+	s.RecordRequest("token1", true, 100*time.Millisecond)
+	s.ResetMetrics("token1")
+
+	m := s.GetMetrics("token1")
+	if m != nil {
+		t.Error("expected nil metrics after reset")
+	}
+}
+
+func TestResetAllMetrics(t *testing.T) {
+	s := NewTokenScorer()
+	s.RecordRequest("token1", true, 100*time.Millisecond)
+	s.RecordRequest("token2", true, 100*time.Millisecond)
+	s.RecordRequest("token3", true, 100*time.Millisecond)
+
+	s.ResetAllMetrics()
+
+	if s.GetMetrics("token1") != nil {
+		t.Error("expected nil metrics for token1 after reset all")
+	}
+	if s.GetMetrics("token2") != nil {
+		t.Error("expected nil metrics for token2 after reset all")
+	}
+}
+
+func TestTokenScorer_ConcurrentAccess(t *testing.T) {
+	s := NewTokenScorer()
+	const numGoroutines = 50
+	const numOperations = 100
+
+	var wg sync.WaitGroup
+	wg.Add(numGoroutines)
+
+	for i := 0; i < numGoroutines; i++ {
+		go func(id int) {
+			defer wg.Done()
+			tokenKey := "token" + string(rune('a'+id%10))
+			for j := 0; j < numOperations; j++ {
+				switch j % 6 {
+				case 0:
+					s.RecordRequest(tokenKey, j%2 == 0, time.Duration(j)*time.Millisecond)
+				case 1:
+					s.SetQuotaRemaining(tokenKey, float64(j%100)/100)
+				case 2:
+					s.GetMetrics(tokenKey)
+				case 3:
+					s.CalculateScore(tokenKey)
+				case 4:
+					s.SelectBestToken([]string{tokenKey, "token_x", "token_y"})
+				case 5:
+					if j%20 == 0 {
+						s.ResetMetrics(tokenKey)
+					}
+				}
+			}
+		}(i)
+	}
+
+	wg.Wait()
+}
+
+func TestAvgLatencyCalculation(t *testing.T) {
+	s := NewTokenScorer()
+	s.RecordRequest("token1", true, 100*time.Millisecond)
+	s.RecordRequest("token1", true, 200*time.Millisecond)
+	s.RecordRequest("token1", true, 300*time.Millisecond)
+
+	m := s.GetMetrics("token1")
+	if m.AvgLatency != 200 {
+		t.Errorf("expected AvgLatency 200, got %f", m.AvgLatency)
+	}
+}
+
+func TestLastUsedUpdated(t *testing.T) {
+	s := NewTokenScorer()
+	before := time.Now()
+	s.RecordRequest("token1", true, 100*time.Millisecond)
+
+	m := s.GetMetrics("token1")
+	if m.LastUsed.Before(before) {
+		t.Error("expected LastUsed to be after test start time")
+	}
+	if m.LastUsed.After(time.Now()) {
+		t.Error("expected LastUsed to be before or equal to now")
+	}
+}
+
+func TestDefaultQuotaForNewToken(t *testing.T) {
+	s := NewTokenScorer()
+	s.RecordRequest("token1", true, 100*time.Millisecond)
+
+	m := s.GetMetrics("token1")
+	if m.QuotaRemaining != 1.0 {
+		t.Errorf("expected default QuotaRemaining 1.0, got %f", m.QuotaRemaining)
+	}
+}
--- a/internal/auth/kiro/oauth.go
+++ b/internal/auth/kiro/oauth.go
@@ -227,6 +227,7 @@ func (o *KiroOAuth) exchangeCodeForToken(ctx context.Context, code, codeVerifier
 		ExpiresAt:    expiresAt.Format(time.RFC3339),
 		AuthMethod:   "social",
 		Provider:     "", // Caller should preserve original provider
+		Region:       "us-east-1",
 	}, nil
 }

@@ -285,6 +286,7 @@ func (o *KiroOAuth) RefreshToken(ctx context.Context, refreshToken string) (*Kir
 		ExpiresAt:    expiresAt.Format(time.RFC3339),
 		AuthMethod:   "social",
 		Provider:     "", // Caller should preserve original provider
+		Region:       "us-east-1",
 	}, nil
 }

--- a/internal/auth/kiro/oauth_web.go
+++ b/internal/auth/kiro/oauth_web.go
@@ -0,0 +1,969 @@
+// Package kiro provides OAuth Web authentication for Kiro.
+package kiro
+
+import (
+	"context"
+	"crypto/rand"
+	"encoding/base64"
+	"encoding/json"
+	"fmt"
+	"html/template"
+	"net/http"
+	"os"
+	"path/filepath"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	log "github.com/sirupsen/logrus"
+)
+
+const (
+	defaultSessionExpiry = 10 * time.Minute
+	pollIntervalSeconds  = 5
+)
+
+type authSessionStatus string
+
+const (
+	statusPending authSessionStatus = "pending"
+	statusSuccess authSessionStatus = "success"
+	statusFailed  authSessionStatus = "failed"
+)
+
+type webAuthSession struct {
+	stateID          string
+	deviceCode       string
+	userCode         string
+	authURL          string
+	verificationURI  string
+	expiresIn        int
+	interval         int
+	status           authSessionStatus
+	startedAt        time.Time
+	completedAt      time.Time
+	expiresAt        time.Time
+	error            string
+	tokenData        *KiroTokenData
+	ssoClient        *SSOOIDCClient
+	clientID         string
+	clientSecret     string
+	region           string
+	cancelFunc       context.CancelFunc
+	authMethod       string // "google", "github", "builder-id", "idc"
+	startURL         string // Used for IDC
+	codeVerifier     string // Used for social auth PKCE
+	codeChallenge    string // Used for social auth PKCE
+}
+
+type OAuthWebHandler struct {
+	cfg              *config.Config
+	sessions         map[string]*webAuthSession
+	mu               sync.RWMutex
+	onTokenObtained  func(*KiroTokenData)
+}
+
+func NewOAuthWebHandler(cfg *config.Config) *OAuthWebHandler {
+	return &OAuthWebHandler{
+		cfg:      cfg,
+		sessions: make(map[string]*webAuthSession),
+	}
+}
+
+func (h *OAuthWebHandler) SetTokenCallback(callback func(*KiroTokenData)) {
+	h.onTokenObtained = callback
+}
+
+func (h *OAuthWebHandler) RegisterRoutes(router gin.IRouter) {
+	oauth := router.Group("/v0/oauth/kiro")
+	{
+		oauth.GET("", h.handleSelect)
+		oauth.GET("/start", h.handleStart)
+		oauth.GET("/callback", h.handleCallback)
+		oauth.GET("/social/callback", h.handleSocialCallback)
+		oauth.GET("/status", h.handleStatus)
+		oauth.POST("/import", h.handleImportToken)
+		oauth.POST("/refresh", h.handleManualRefresh)
+	}
+}
+
+func generateStateID() (string, error) {
+	b := make([]byte, 16)
+	if _, err := rand.Read(b); err != nil {
+		return "", err
+	}
+	return base64.RawURLEncoding.EncodeToString(b), nil
+}
+
+func (h *OAuthWebHandler) handleSelect(c *gin.Context) {
+	h.renderSelectPage(c)
+}
+
+func (h *OAuthWebHandler) handleStart(c *gin.Context) {
+	method := c.Query("method")
+	
+	if method == "" {
+		c.Redirect(http.StatusFound, "/v0/oauth/kiro")
+		return
+	}
+
+	switch method {
+	case "google", "github":
+		// Google/GitHub social login is not supported for third-party apps
+		// due to AWS Cognito redirect_uri restrictions
+		h.renderError(c, "Google/GitHub login is not available for third-party applications. Please use AWS Builder ID or import your token from Kiro IDE.")
+	case "builder-id":
+		h.startBuilderIDAuth(c)
+	case "idc":
+		h.startIDCAuth(c)
+	default:
+		h.renderError(c, fmt.Sprintf("Unknown authentication method: %s", method))
+	}
+}
+
+func (h *OAuthWebHandler) startSocialAuth(c *gin.Context, method string) {
+	stateID, err := generateStateID()
+	if err != nil {
+		h.renderError(c, "Failed to generate state parameter")
+		return
+	}
+
+	codeVerifier, codeChallenge, err := generatePKCE()
+	if err != nil {
+		h.renderError(c, "Failed to generate PKCE parameters")
+		return
+	}
+
+	socialClient := NewSocialAuthClient(h.cfg)
+	
+	var provider string
+	if method == "google" {
+		provider = string(ProviderGoogle)
+	} else {
+		provider = string(ProviderGitHub)
+	}
+
+	redirectURI := h.getSocialCallbackURL(c)
+	authURL := socialClient.buildLoginURL(provider, redirectURI, codeChallenge, stateID)
+
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
+
+	session := &webAuthSession{
+		stateID:       stateID,
+		authMethod:    method,
+		authURL:       authURL,
+		status:        statusPending,
+		startedAt:     time.Now(),
+		expiresIn:     600,
+		codeVerifier:  codeVerifier,
+		codeChallenge: codeChallenge,
+		region:        "us-east-1",
+		cancelFunc:    cancel,
+	}
+
+	h.mu.Lock()
+	h.sessions[stateID] = session
+	h.mu.Unlock()
+
+	go func() {
+		<-ctx.Done()
+		h.mu.Lock()
+		if session.status == statusPending {
+			session.status = statusFailed
+			session.error = "Authentication timed out"
+		}
+		h.mu.Unlock()
+	}()
+
+	c.Redirect(http.StatusFound, authURL)
+}
+
+func (h *OAuthWebHandler) getSocialCallbackURL(c *gin.Context) string {
+	scheme := "http"
+	if c.Request.TLS != nil || c.GetHeader("X-Forwarded-Proto") == "https" {
+		scheme = "https"
+	}
+	return fmt.Sprintf("%s://%s/v0/oauth/kiro/social/callback", scheme, c.Request.Host)
+}
+
+func (h *OAuthWebHandler) startBuilderIDAuth(c *gin.Context) {
+	stateID, err := generateStateID()
+	if err != nil {
+		h.renderError(c, "Failed to generate state parameter")
+		return
+	}
+
+	region := defaultIDCRegion
+	startURL := builderIDStartURL
+
+	ssoClient := NewSSOOIDCClient(h.cfg)
+
+	regResp, err := ssoClient.RegisterClientWithRegion(c.Request.Context(), region)
+	if err != nil {
+		log.Errorf("OAuth Web: failed to register client: %v", err)
+		h.renderError(c, fmt.Sprintf("Failed to register client: %v", err))
+		return
+	}
+
+	authResp, err := ssoClient.StartDeviceAuthorizationWithIDC(
+		c.Request.Context(),
+		regResp.ClientID,
+		regResp.ClientSecret,
+		startURL,
+		region,
+	)
+	if err != nil {
+		log.Errorf("OAuth Web: failed to start device authorization: %v", err)
+		h.renderError(c, fmt.Sprintf("Failed to start device authorization: %v", err))
+		return
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), time.Duration(authResp.ExpiresIn)*time.Second)
+
+	session := &webAuthSession{
+		stateID:         stateID,
+		deviceCode:      authResp.DeviceCode,
+		userCode:        authResp.UserCode,
+		authURL:         authResp.VerificationURIComplete,
+		verificationURI: authResp.VerificationURI,
+		expiresIn:       authResp.ExpiresIn,
+		interval:        authResp.Interval,
+		status:          statusPending,
+		startedAt:       time.Now(),
+		ssoClient:       ssoClient,
+		clientID:        regResp.ClientID,
+		clientSecret:    regResp.ClientSecret,
+		region:          region,
+		authMethod:      "builder-id",
+		startURL:        startURL,
+		cancelFunc:      cancel,
+	}
+
+	h.mu.Lock()
+	h.sessions[stateID] = session
+	h.mu.Unlock()
+
+	go h.pollForToken(ctx, session)
+
+	h.renderStartPage(c, session)
+}
+
+func (h *OAuthWebHandler) startIDCAuth(c *gin.Context) {
+	startURL := c.Query("startUrl")
+	region := c.Query("region")
+
+	if startURL == "" {
+		h.renderError(c, "Missing startUrl parameter for IDC authentication")
+		return
+	}
+	if region == "" {
+		region = defaultIDCRegion
+	}
+
+	stateID, err := generateStateID()
+	if err != nil {
+		h.renderError(c, "Failed to generate state parameter")
+		return
+	}
+
+	ssoClient := NewSSOOIDCClient(h.cfg)
+
+	regResp, err := ssoClient.RegisterClientWithRegion(c.Request.Context(), region)
+	if err != nil {
+		log.Errorf("OAuth Web: failed to register client: %v", err)
+		h.renderError(c, fmt.Sprintf("Failed to register client: %v", err))
+		return
+	}
+
+	authResp, err := ssoClient.StartDeviceAuthorizationWithIDC(
+		c.Request.Context(),
+		regResp.ClientID,
+		regResp.ClientSecret,
+		startURL,
+		region,
+	)
+	if err != nil {
+		log.Errorf("OAuth Web: failed to start device authorization: %v", err)
+		h.renderError(c, fmt.Sprintf("Failed to start device authorization: %v", err))
+		return
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), time.Duration(authResp.ExpiresIn)*time.Second)
+
+	session := &webAuthSession{
+		stateID:         stateID,
+		deviceCode:      authResp.DeviceCode,
+		userCode:        authResp.UserCode,
+		authURL:         authResp.VerificationURIComplete,
+		verificationURI: authResp.VerificationURI,
+		expiresIn:       authResp.ExpiresIn,
+		interval:        authResp.Interval,
+		status:          statusPending,
+		startedAt:       time.Now(),
+		ssoClient:       ssoClient,
+		clientID:        regResp.ClientID,
+		clientSecret:    regResp.ClientSecret,
+		region:          region,
+		authMethod:      "idc",
+		startURL:        startURL,
+		cancelFunc:      cancel,
+	}
+
+	h.mu.Lock()
+	h.sessions[stateID] = session
+	h.mu.Unlock()
+
+	go h.pollForToken(ctx, session)
+
+	h.renderStartPage(c, session)
+}
+
+func (h *OAuthWebHandler) pollForToken(ctx context.Context, session *webAuthSession) {
+	defer session.cancelFunc()
+
+	interval := time.Duration(session.interval) * time.Second
+	if interval < time.Duration(pollIntervalSeconds)*time.Second {
+		interval = time.Duration(pollIntervalSeconds) * time.Second
+	}
+
+	ticker := time.NewTicker(interval)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-ctx.Done():
+			h.mu.Lock()
+			if session.status == statusPending {
+				session.status = statusFailed
+				session.error = "Authentication timed out"
+			}
+			h.mu.Unlock()
+			return
+		case <-ticker.C:
+			tokenResp, err := h.ssoClient(session).CreateTokenWithRegion(
+				ctx,
+				session.clientID,
+				session.clientSecret,
+				session.deviceCode,
+				session.region,
+			)
+
+			if err != nil {
+				errStr := err.Error()
+				if errStr == ErrAuthorizationPending.Error() {
+					continue
+				}
+				if errStr == ErrSlowDown.Error() {
+					interval += 5 * time.Second
+					ticker.Reset(interval)
+					continue
+				}
+
+				h.mu.Lock()
+				session.status = statusFailed
+				session.error = errStr
+				session.completedAt = time.Now()
+				h.mu.Unlock()
+
+				log.Errorf("OAuth Web: token polling failed: %v", err)
+				return
+			}
+
+			expiresAt := time.Now().Add(time.Duration(tokenResp.ExpiresIn) * time.Second)
+			profileArn := session.ssoClient.fetchProfileArn(ctx, tokenResp.AccessToken)
+			email := FetchUserEmailWithFallback(ctx, h.cfg, tokenResp.AccessToken)
+
+			tokenData := &KiroTokenData{
+					AccessToken:  tokenResp.AccessToken,
+					RefreshToken: tokenResp.RefreshToken,
+					ProfileArn:   profileArn,
+					ExpiresAt:    expiresAt.Format(time.RFC3339),
+					AuthMethod:   session.authMethod,
+					Provider:     "AWS",
+					ClientID:     session.clientID,
+					ClientSecret: session.clientSecret,
+					Email:        email,
+					Region:       session.region,
+					StartURL:     session.startURL,
+				}
+
+			h.mu.Lock()
+			session.status = statusSuccess
+			session.completedAt = time.Now()
+			session.expiresAt = expiresAt
+			session.tokenData = tokenData
+			h.mu.Unlock()
+
+			if h.onTokenObtained != nil {
+				h.onTokenObtained(tokenData)
+			}
+
+			// Save token to file
+			h.saveTokenToFile(tokenData)
+
+			log.Infof("OAuth Web: authentication successful for %s", email)
+			return
+		}
+	}
+}
+
+// saveTokenToFile saves the token data to the auth directory
+func (h *OAuthWebHandler) saveTokenToFile(tokenData *KiroTokenData) {
+	// Get auth directory from config or use default
+	authDir := ""
+	if h.cfg != nil && h.cfg.AuthDir != "" {
+		var err error
+		authDir, err = util.ResolveAuthDir(h.cfg.AuthDir)
+		if err != nil {
+			log.Errorf("OAuth Web: failed to resolve auth directory: %v", err)
+		}
+	}
+
+	// Fall back to default location
+	if authDir == "" {
+		home, err := os.UserHomeDir()
+		if err != nil {
+			log.Errorf("OAuth Web: failed to get home directory: %v", err)
+			return
+		}
+		authDir = filepath.Join(home, ".cli-proxy-api")
+	}
+
+	// Create directory if not exists
+	if err := os.MkdirAll(authDir, 0700); err != nil {
+		log.Errorf("OAuth Web: failed to create auth directory: %v", err)
+		return
+	}
+
+	// Generate filename using the unified function
+	fileName := GenerateTokenFileName(tokenData)
+
+	authFilePath := filepath.Join(authDir, fileName)
+	
+	// Convert to storage format and save
+	storage := &KiroTokenStorage{
+		Type:         "kiro",
+		AccessToken:  tokenData.AccessToken,
+		RefreshToken: tokenData.RefreshToken,
+		ProfileArn:   tokenData.ProfileArn,
+		ExpiresAt:    tokenData.ExpiresAt,
+		AuthMethod:   tokenData.AuthMethod,
+		Provider:     tokenData.Provider,
+		LastRefresh:  time.Now().Format(time.RFC3339),
+		ClientID:     tokenData.ClientID,
+		ClientSecret: tokenData.ClientSecret,
+		Region:       tokenData.Region,
+		StartURL:     tokenData.StartURL,
+		Email:        tokenData.Email,
+	}
+	
+	if err := storage.SaveTokenToFile(authFilePath); err != nil {
+		log.Errorf("OAuth Web: failed to save token to file: %v", err)
+		return
+	}
+	
+	log.Infof("OAuth Web: token saved to %s", authFilePath)
+}
+
+func (h *OAuthWebHandler) ssoClient(session *webAuthSession) *SSOOIDCClient {
+	return session.ssoClient
+}
+
+func (h *OAuthWebHandler) handleCallback(c *gin.Context) {
+	stateID := c.Query("state")
+	errParam := c.Query("error")
+
+	if errParam != "" {
+		h.renderError(c, errParam)
+		return
+	}
+
+	if stateID == "" {
+		h.renderError(c, "Missing state parameter")
+		return
+	}
+
+	h.mu.RLock()
+	session, exists := h.sessions[stateID]
+	h.mu.RUnlock()
+
+	if !exists {
+		h.renderError(c, "Invalid or expired session")
+		return
+	}
+
+	if session.status == statusSuccess {
+		h.renderSuccess(c, session)
+	} else if session.status == statusFailed {
+		h.renderError(c, session.error)
+	} else {
+		c.Redirect(http.StatusFound, "/v0/oauth/kiro/start")
+	}
+}
+
+func (h *OAuthWebHandler) handleSocialCallback(c *gin.Context) {
+	stateID := c.Query("state")
+	code := c.Query("code")
+	errParam := c.Query("error")
+
+	if errParam != "" {
+		h.renderError(c, errParam)
+		return
+	}
+
+	if stateID == "" {
+		h.renderError(c, "Missing state parameter")
+		return
+	}
+
+	if code == "" {
+		h.renderError(c, "Missing authorization code")
+		return
+	}
+
+	h.mu.RLock()
+	session, exists := h.sessions[stateID]
+	h.mu.RUnlock()
+
+	if !exists {
+		h.renderError(c, "Invalid or expired session")
+		return
+	}
+
+	if session.authMethod != "google" && session.authMethod != "github" {
+		h.renderError(c, "Invalid session type for social callback")
+		return
+	}
+
+	socialClient := NewSocialAuthClient(h.cfg)
+	redirectURI := h.getSocialCallbackURL(c)
+
+	tokenReq := &CreateTokenRequest{
+		Code:         code,
+		CodeVerifier: session.codeVerifier,
+		RedirectURI:  redirectURI,
+	}
+
+	tokenResp, err := socialClient.CreateToken(c.Request.Context(), tokenReq)
+	if err != nil {
+		log.Errorf("OAuth Web: social token exchange failed: %v", err)
+		h.mu.Lock()
+		session.status = statusFailed
+		session.error = fmt.Sprintf("Token exchange failed: %v", err)
+		session.completedAt = time.Now()
+		h.mu.Unlock()
+		h.renderError(c, session.error)
+		return
+	}
+
+	expiresIn := tokenResp.ExpiresIn
+	if expiresIn <= 0 {
+		expiresIn = 3600
+	}
+	expiresAt := time.Now().Add(time.Duration(expiresIn) * time.Second)
+
+	email := ExtractEmailFromJWT(tokenResp.AccessToken)
+
+	var provider string
+	if session.authMethod == "google" {
+		provider = string(ProviderGoogle)
+	} else {
+		provider = string(ProviderGitHub)
+	}
+
+	tokenData := &KiroTokenData{
+		AccessToken:  tokenResp.AccessToken,
+		RefreshToken: tokenResp.RefreshToken,
+		ProfileArn:   tokenResp.ProfileArn,
+		ExpiresAt:    expiresAt.Format(time.RFC3339),
+		AuthMethod:   session.authMethod,
+		Provider:     provider,
+		Email:        email,
+		Region:       "us-east-1",
+	}
+
+	h.mu.Lock()
+	session.status = statusSuccess
+	session.completedAt = time.Now()
+	session.expiresAt = expiresAt
+	session.tokenData = tokenData
+	h.mu.Unlock()
+
+	if session.cancelFunc != nil {
+		session.cancelFunc()
+	}
+
+	if h.onTokenObtained != nil {
+		h.onTokenObtained(tokenData)
+	}
+
+	// Save token to file
+	h.saveTokenToFile(tokenData)
+
+	log.Infof("OAuth Web: social authentication successful for %s via %s", email, provider)
+	h.renderSuccess(c, session)
+}
+
+func (h *OAuthWebHandler) handleStatus(c *gin.Context) {
+	stateID := c.Query("state")
+	if stateID == "" {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "missing state parameter"})
+		return
+	}
+
+	h.mu.RLock()
+	session, exists := h.sessions[stateID]
+	h.mu.RUnlock()
+
+	if !exists {
+		c.JSON(http.StatusNotFound, gin.H{"error": "session not found"})
+		return
+	}
+
+	response := gin.H{
+		"status": string(session.status),
+	}
+
+	switch session.status {
+	case statusPending:
+		elapsed := time.Since(session.startedAt).Seconds()
+		remaining := float64(session.expiresIn) - elapsed
+		if remaining < 0 {
+			remaining = 0
+		}
+		response["remaining_seconds"] = int(remaining)
+	case statusSuccess:
+		response["completed_at"] = session.completedAt.Format(time.RFC3339)
+		response["expires_at"] = session.expiresAt.Format(time.RFC3339)
+	case statusFailed:
+		response["error"] = session.error
+		response["failed_at"] = session.completedAt.Format(time.RFC3339)
+	}
+
+	c.JSON(http.StatusOK, response)
+}
+
+func (h *OAuthWebHandler) renderStartPage(c *gin.Context, session *webAuthSession) {
+	tmpl, err := template.New("start").Parse(oauthWebStartPageHTML)
+	if err != nil {
+		log.Errorf("OAuth Web: failed to parse template: %v", err)
+		c.String(http.StatusInternalServerError, "Template error")
+		return
+	}
+
+	data := map[string]interface{}{
+		"AuthURL":   session.authURL,
+		"UserCode":  session.userCode,
+		"ExpiresIn": session.expiresIn,
+		"StateID":   session.stateID,
+	}
+
+	c.Header("Content-Type", "text/html; charset=utf-8")
+	if err := tmpl.Execute(c.Writer, data); err != nil {
+		log.Errorf("OAuth Web: failed to render template: %v", err)
+	}
+}
+
+func (h *OAuthWebHandler) renderSelectPage(c *gin.Context) {
+	tmpl, err := template.New("select").Parse(oauthWebSelectPageHTML)
+	if err != nil {
+		log.Errorf("OAuth Web: failed to parse select template: %v", err)
+		c.String(http.StatusInternalServerError, "Template error")
+		return
+	}
+
+	c.Header("Content-Type", "text/html; charset=utf-8")
+	if err := tmpl.Execute(c.Writer, nil); err != nil {
+		log.Errorf("OAuth Web: failed to render select template: %v", err)
+	}
+}
+
+func (h *OAuthWebHandler) renderError(c *gin.Context, errMsg string) {
+	tmpl, err := template.New("error").Parse(oauthWebErrorPageHTML)
+	if err != nil {
+		log.Errorf("OAuth Web: failed to parse error template: %v", err)
+		c.String(http.StatusInternalServerError, "Template error")
+		return
+	}
+
+	data := map[string]interface{}{
+		"Error": errMsg,
+	}
+
+	c.Header("Content-Type", "text/html; charset=utf-8")
+	c.Status(http.StatusBadRequest)
+	if err := tmpl.Execute(c.Writer, data); err != nil {
+		log.Errorf("OAuth Web: failed to render error template: %v", err)
+	}
+}
+
+func (h *OAuthWebHandler) renderSuccess(c *gin.Context, session *webAuthSession) {
+	tmpl, err := template.New("success").Parse(oauthWebSuccessPageHTML)
+	if err != nil {
+		log.Errorf("OAuth Web: failed to parse success template: %v", err)
+		c.String(http.StatusInternalServerError, "Template error")
+		return
+	}
+
+	data := map[string]interface{}{
+		"ExpiresAt": session.expiresAt.Format(time.RFC3339),
+	}
+
+	c.Header("Content-Type", "text/html; charset=utf-8")
+	if err := tmpl.Execute(c.Writer, data); err != nil {
+		log.Errorf("OAuth Web: failed to render success template: %v", err)
+	}
+}
+
+func (h *OAuthWebHandler) CleanupExpiredSessions() {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+
+	now := time.Now()
+	for id, session := range h.sessions {
+		if session.status != statusPending && now.Sub(session.completedAt) > 30*time.Minute {
+			delete(h.sessions, id)
+		} else if session.status == statusPending && now.Sub(session.startedAt) > defaultSessionExpiry {
+			session.cancelFunc()
+			delete(h.sessions, id)
+		}
+	}
+}
+
+func (h *OAuthWebHandler) GetSession(stateID string) (*webAuthSession, bool) {
+	h.mu.RLock()
+	defer h.mu.RUnlock()
+	session, exists := h.sessions[stateID]
+	return session, exists
+}
+
+// ImportTokenRequest represents the request body for token import
+type ImportTokenRequest struct {
+	RefreshToken string `json:"refreshToken"`
+}
+
+// handleImportToken handles manual refresh token import from Kiro IDE
+func (h *OAuthWebHandler) handleImportToken(c *gin.Context) {
+	var req ImportTokenRequest
+	if err := c.ShouldBindJSON(&req); err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{
+			"success": false,
+			"error":   "Invalid request body",
+		})
+		return
+	}
+
+	refreshToken := strings.TrimSpace(req.RefreshToken)
+	if refreshToken == "" {
+		c.JSON(http.StatusBadRequest, gin.H{
+			"success": false,
+			"error":   "Refresh token is required",
+		})
+		return
+	}
+
+	// Validate token format
+	if !strings.HasPrefix(refreshToken, "aorAAAAAG") {
+		c.JSON(http.StatusBadRequest, gin.H{
+			"success": false,
+			"error":   "Invalid token format. Token should start with aorAAAAAG...",
+		})
+		return
+	}
+
+	// Create social auth client to refresh and validate the token
+	socialClient := NewSocialAuthClient(h.cfg)
+
+	// Refresh the token to validate it and get access token
+	tokenData, err := socialClient.RefreshSocialToken(c.Request.Context(), refreshToken)
+	if err != nil {
+		log.Errorf("OAuth Web: token refresh failed during import: %v", err)
+		c.JSON(http.StatusBadRequest, gin.H{
+			"success": false,
+			"error":   fmt.Sprintf("Token validation failed: %v", err),
+		})
+		return
+	}
+
+	// Set the original refresh token (the refreshed one might be empty)
+	if tokenData.RefreshToken == "" {
+		tokenData.RefreshToken = refreshToken
+	}
+	tokenData.AuthMethod = "social"
+	tokenData.Provider = "imported"
+
+	// Notify callback if set
+	if h.onTokenObtained != nil {
+		h.onTokenObtained(tokenData)
+	}
+
+	// Save token to file
+	h.saveTokenToFile(tokenData)
+
+	// Generate filename for response using the unified function
+	fileName := GenerateTokenFileName(tokenData)
+
+	log.Infof("OAuth Web: token imported successfully")
+	c.JSON(http.StatusOK, gin.H{
+		"success":  true,
+		"message":  "Token imported successfully",
+		"fileName": fileName,
+	})
+}
+
+// handleManualRefresh handles manual token refresh requests from the web UI.
+// This allows users to trigger a token refresh when needed, without waiting
+// for the automatic 30-second check and 20-minute-before-expiry refresh cycle.
+// Uses the same refresh logic as kiro_executor.Refresh for consistency.
+func (h *OAuthWebHandler) handleManualRefresh(c *gin.Context) {
+	authDir := ""
+	if h.cfg != nil && h.cfg.AuthDir != "" {
+		var err error
+		authDir, err = util.ResolveAuthDir(h.cfg.AuthDir)
+		if err != nil {
+			log.Errorf("OAuth Web: failed to resolve auth directory: %v", err)
+		}
+	}
+
+	if authDir == "" {
+		home, err := os.UserHomeDir()
+		if err != nil {
+			c.JSON(http.StatusInternalServerError, gin.H{
+				"success": false,
+				"error":   "Failed to get home directory",
+			})
+			return
+		}
+		authDir = filepath.Join(home, ".cli-proxy-api")
+	}
+
+	// Find all kiro token files in the auth directory
+	files, err := os.ReadDir(authDir)
+	if err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{
+			"success": false,
+			"error":   fmt.Sprintf("Failed to read auth directory: %v", err),
+		})
+		return
+	}
+
+	var refreshedCount int
+	var errors []string
+
+	for _, file := range files {
+		if file.IsDir() {
+			continue
+		}
+		name := file.Name()
+		if !strings.HasPrefix(name, "kiro-") || !strings.HasSuffix(name, ".json") {
+			continue
+		}
+
+		filePath := filepath.Join(authDir, name)
+		data, err := os.ReadFile(filePath)
+		if err != nil {
+			errors = append(errors, fmt.Sprintf("%s: read error - %v", name, err))
+			continue
+		}
+
+		var storage KiroTokenStorage
+		if err := json.Unmarshal(data, &storage); err != nil {
+			errors = append(errors, fmt.Sprintf("%s: parse error - %v", name, err))
+			continue
+		}
+
+		if storage.RefreshToken == "" {
+			errors = append(errors, fmt.Sprintf("%s: no refresh token", name))
+			continue
+		}
+
+		// Refresh token using the same logic as kiro_executor.Refresh
+		tokenData, err := h.refreshTokenData(c.Request.Context(), &storage)
+		if err != nil {
+			errors = append(errors, fmt.Sprintf("%s: refresh failed - %v", name, err))
+			continue
+		}
+
+		// Update storage with new token data
+		storage.AccessToken = tokenData.AccessToken
+		if tokenData.RefreshToken != "" {
+			storage.RefreshToken = tokenData.RefreshToken
+		}
+		storage.ExpiresAt = tokenData.ExpiresAt
+		storage.LastRefresh = time.Now().Format(time.RFC3339)
+		if tokenData.ProfileArn != "" {
+			storage.ProfileArn = tokenData.ProfileArn
+		}
+
+		// Write updated token back to file
+		updatedData, err := json.MarshalIndent(storage, "", "  ")
+		if err != nil {
+			errors = append(errors, fmt.Sprintf("%s: marshal error - %v", name, err))
+			continue
+		}
+
+		tmpFile := filePath + ".tmp"
+		if err := os.WriteFile(tmpFile, updatedData, 0600); err != nil {
+			errors = append(errors, fmt.Sprintf("%s: write error - %v", name, err))
+			continue
+		}
+		if err := os.Rename(tmpFile, filePath); err != nil {
+			errors = append(errors, fmt.Sprintf("%s: rename error - %v", name, err))
+			continue
+		}
+
+		log.Infof("OAuth Web: manually refreshed token in %s, expires at %s", name, tokenData.ExpiresAt)
+		refreshedCount++
+
+		// Notify callback if set
+		if h.onTokenObtained != nil {
+			h.onTokenObtained(tokenData)
+		}
+	}
+
+	if refreshedCount == 0 && len(errors) > 0 {
+		c.JSON(http.StatusBadRequest, gin.H{
+			"success": false,
+			"error":   fmt.Sprintf("All refresh attempts failed: %v", errors),
+		})
+		return
+	}
+
+	response := gin.H{
+		"success":        true,
+		"message":        fmt.Sprintf("Refreshed %d token(s)", refreshedCount),
+		"refreshedCount": refreshedCount,
+	}
+	if len(errors) > 0 {
+		response["warnings"] = errors
+	}
+
+	c.JSON(http.StatusOK, response)
+}
+
+// refreshTokenData refreshes a token using the appropriate method based on auth type.
+// This mirrors the logic in kiro_executor.Refresh for consistency.
+func (h *OAuthWebHandler) refreshTokenData(ctx context.Context, storage *KiroTokenStorage) (*KiroTokenData, error) {
+	ssoClient := NewSSOOIDCClient(h.cfg)
+
+	switch {
+	case storage.ClientID != "" && storage.ClientSecret != "" && storage.AuthMethod == "idc" && storage.Region != "":
+		// IDC refresh with region-specific endpoint
+		log.Debugf("OAuth Web: using SSO OIDC refresh for IDC (region=%s)", storage.Region)
+		return ssoClient.RefreshTokenWithRegion(ctx, storage.ClientID, storage.ClientSecret, storage.RefreshToken, storage.Region, storage.StartURL)
+
+	case storage.ClientID != "" && storage.ClientSecret != "" && storage.AuthMethod == "builder-id":
+		// Builder ID refresh with default endpoint
+		log.Debugf("OAuth Web: using SSO OIDC refresh for AWS Builder ID")
+		return ssoClient.RefreshToken(ctx, storage.ClientID, storage.ClientSecret, storage.RefreshToken)
+
+	default:
+		// Fallback to Kiro's OAuth refresh endpoint (for social auth: Google/GitHub)
+		log.Debugf("OAuth Web: using Kiro OAuth refresh endpoint")
+		oauth := NewKiroOAuth(h.cfg)
+		return oauth.RefreshToken(ctx, storage.RefreshToken)
+	}
+}
--- a/internal/auth/kiro/oauth_web_templates.go
+++ b/internal/auth/kiro/oauth_web_templates.go
@@ -0,0 +1,779 @@
+// Package kiro provides OAuth Web authentication templates.
+package kiro
+
+const (
+	oauthWebStartPageHTML = `<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>AWS SSO Authentication</title>
+    <style>
+        * { box-sizing: border-box; }
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
+            margin: 0;
+            padding: 20px;
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            min-height: 100vh;
+            display: flex;
+            justify-content: center;
+            align-items: center;
+        }
+        .container {
+            max-width: 500px;
+            width: 100%;
+            background: #fff;
+            padding: 40px;
+            border-radius: 12px;
+            box-shadow: 0 10px 40px rgba(0,0,0,0.2);
+        }
+        h1 {
+            margin: 0 0 10px;
+            color: #333;
+            font-size: 24px;
+            text-align: center;
+        }
+        .subtitle {
+            text-align: center;
+            color: #666;
+            margin-bottom: 30px;
+        }
+        .step {
+            background: #f8f9fa;
+            padding: 20px;
+            border-radius: 8px;
+            margin-bottom: 15px;
+        }
+        .step-title {
+            display: flex;
+            align-items: center;
+            font-weight: 600;
+            color: #333;
+            margin-bottom: 10px;
+        }
+        .step-number {
+            width: 28px;
+            height: 28px;
+            background: #667eea;
+            color: white;
+            border-radius: 50%;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            font-size: 14px;
+            margin-right: 12px;
+        }
+        .user-code {
+            background: #e7f3ff;
+            border: 2px dashed #2196F3;
+            border-radius: 8px;
+            padding: 20px;
+            text-align: center;
+            margin-top: 10px;
+        }
+        .user-code-label {
+            font-size: 12px;
+            color: #666;
+            text-transform: uppercase;
+            letter-spacing: 1px;
+            margin-bottom: 8px;
+        }
+        .user-code-value {
+            font-size: 32px;
+            font-weight: bold;
+            font-family: monospace;
+            color: #2196F3;
+            letter-spacing: 4px;
+        }
+        .auth-btn {
+            display: block;
+            width: 100%;
+            padding: 15px;
+            background: #667eea;
+            color: white;
+            text-align: center;
+            text-decoration: none;
+            border-radius: 8px;
+            font-weight: 600;
+            font-size: 16px;
+            transition: all 0.3s;
+            border: none;
+            cursor: pointer;
+            margin-top: 20px;
+        }
+        .auth-btn:hover {
+            background: #5568d3;
+            transform: translateY(-2px);
+            box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4);
+        }
+        .status {
+            margin-top: 30px;
+            padding: 20px;
+            background: #f8f9fa;
+            border-radius: 8px;
+            text-align: center;
+        }
+        .status-pending { border-left: 4px solid #ffc107; }
+        .status-success { border-left: 4px solid #28a745; }
+        .status-failed { border-left: 4px solid #dc3545; }
+        .spinner {
+            border: 3px solid #f3f3f3;
+            border-top: 3px solid #667eea;
+            border-radius: 50%;
+            width: 40px;
+            height: 40px;
+            animation: spin 1s linear infinite;
+            margin: 0 auto 15px;
+        }
+        @keyframes spin {
+            0% { transform: rotate(0deg); }
+            100% { transform: rotate(360deg); }
+        }
+        .timer {
+            font-size: 24px;
+            font-weight: bold;
+            color: #667eea;
+            margin: 10px 0;
+        }
+        .timer.warning { color: #ffc107; }
+        .timer.danger { color: #dc3545; }
+        .status-message { color: #666; line-height: 1.6; }
+        .success-icon, .error-icon { font-size: 48px; margin-bottom: 15px; }
+        .info-box {
+            background: #e7f3ff;
+            border-left: 4px solid #2196F3;
+            padding: 15px;
+            margin-top: 20px;
+            border-radius: 4px;
+            font-size: 14px;
+            color: #666;
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <h1>🔐 AWS SSO Authentication</h1>
+        <p class="subtitle">Follow the steps below to complete authentication</p>
+        
+        <div class="step">
+            <div class="step-title">
+                <span class="step-number">1</span>
+                Click the button below to open the authorization page
+            </div>
+            <a href="{{.AuthURL}}" target="_blank" class="auth-btn" id="authBtn">
+                🚀 Open Authorization Page
+            </a>
+        </div>
+        
+        <div class="step">
+            <div class="step-title">
+                <span class="step-number">2</span>
+                Enter the verification code below
+            </div>
+            <div class="user-code">
+                <div class="user-code-label">Verification Code</div>
+                <div class="user-code-value">{{.UserCode}}</div>
+            </div>
+        </div>
+        
+        <div class="step">
+            <div class="step-title">
+                <span class="step-number">3</span>
+                Complete AWS SSO login
+            </div>
+            <p style="color: #666; font-size: 14px; margin-top: 10px;">
+                Use your AWS SSO account to login and authorize
+            </p>
+        </div>
+        
+        <div class="status status-pending" id="statusBox">
+            <div class="spinner" id="spinner"></div>
+            <div class="timer" id="timer">{{.ExpiresIn}}s</div>
+            <div class="status-message" id="statusMessage">
+                Waiting for authorization...
+            </div>
+        </div>
+        
+        <div class="info-box">
+            💡 <strong>Tip:</strong> The authorization page will open in a new tab. This page will automatically update once authorization is complete.
+        </div>
+    </div>
+    
+    <script>
+        let pollInterval;
+        let timerInterval;
+        let remainingSeconds = {{.ExpiresIn}};
+        const stateID = "{{.StateID}}";
+        
+        setTimeout(() => {
+            document.getElementById('authBtn').click();
+        }, 500);
+        
+        function pollStatus() {
+            fetch('/v0/oauth/kiro/status?state=' + stateID)
+                .then(response => response.json())
+                .then(data => {
+                    console.log('Status:', data);
+                    if (data.status === 'success') {
+                        clearInterval(pollInterval);
+                        clearInterval(timerInterval);
+                        showSuccess(data);
+                    } else if (data.status === 'failed') {
+                        clearInterval(pollInterval);
+                        clearInterval(timerInterval);
+                        showError(data);
+                    } else {
+                        remainingSeconds = data.remaining_seconds || 0;
+                    }
+                })
+                .catch(error => {
+                    console.error('Poll error:', error);
+                });
+        }
+        
+        function updateTimer() {
+            const timerEl = document.getElementById('timer');
+            const minutes = Math.floor(remainingSeconds / 60);
+            const seconds = remainingSeconds % 60;
+            timerEl.textContent = minutes + ':' + seconds.toString().padStart(2, '0');
+            
+            if (remainingSeconds < 60) {
+                timerEl.className = 'timer danger';
+            } else if (remainingSeconds < 180) {
+                timerEl.className = 'timer warning';
+            } else {
+                timerEl.className = 'timer';
+            }
+            
+            remainingSeconds--;
+            
+            if (remainingSeconds < 0) {
+                clearInterval(timerInterval);
+                clearInterval(pollInterval);
+                showError({ error: 'Authentication timed out. Please refresh and try again.' });
+            }
+        }
+        
+        function showSuccess(data) {
+            const statusBox = document.getElementById('statusBox');
+            statusBox.className = 'status status-success';
+            statusBox.innerHTML = '<div class="success-icon">✅</div>' +
+                '<div class="status-message">' +
+                '<strong>Authentication Successful!</strong><br>' +
+                'Token expires: ' + new Date(data.expires_at).toLocaleString() +
+                '</div>';
+        }
+        
+        function showError(data) {
+            const statusBox = document.getElementById('statusBox');
+            statusBox.className = 'status status-failed';
+            statusBox.innerHTML = '<div class="error-icon">❌</div>' +
+                '<div class="status-message">' +
+                '<strong>Authentication Failed</strong><br>' +
+                (data.error || 'Unknown error') +
+                '</div>' +
+                '<button class="auth-btn" onclick="location.reload()" style="margin-top: 15px;">' +
+                '🔄 Retry' +
+                '</button>';
+        }
+        
+        pollInterval = setInterval(pollStatus, 3000);
+        timerInterval = setInterval(updateTimer, 1000);
+        pollStatus();
+    </script>
+</body>
+</html>`
+
+	oauthWebErrorPageHTML = `<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Authentication Failed</title>
+    <style>
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
+            max-width: 600px;
+            margin: 50px auto;
+            padding: 20px;
+            background: #f5f5f5;
+        }
+        .error {
+            background: #fff;
+            padding: 30px;
+            border-radius: 8px;
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+            border-left: 4px solid #dc3545;
+        }
+        h1 { color: #dc3545; margin-top: 0; }
+        .error-message { color: #666; line-height: 1.6; }
+        .retry-btn {
+            display: inline-block;
+            margin-top: 20px;
+            padding: 10px 20px;
+            background: #007bff;
+            color: white;
+            text-decoration: none;
+            border-radius: 4px;
+        }
+        .retry-btn:hover { background: #0056b3; }
+    </style>
+</head>
+<body>
+    <div class="error">
+        <h1>❌ Authentication Failed</h1>
+        <div class="error-message">
+            <p><strong>Error:</strong></p>
+            <p>{{.Error}}</p>
+        </div>
+        <a href="/v0/oauth/kiro/start" class="retry-btn">🔄 Retry</a>
+    </div>
+</body>
+</html>`
+
+	oauthWebSuccessPageHTML = `<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Authentication Successful</title>
+    <style>
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
+            max-width: 600px;
+            margin: 50px auto;
+            padding: 20px;
+            background: #f5f5f5;
+        }
+        .success {
+            background: #fff;
+            padding: 30px;
+            border-radius: 8px;
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+            border-left: 4px solid #28a745;
+            text-align: center;
+        }
+        h1 { color: #28a745; margin-top: 0; }
+        .success-message { color: #666; line-height: 1.6; }
+        .icon { font-size: 48px; margin-bottom: 15px; }
+        .expires { font-size: 14px; color: #999; margin-top: 15px; }
+    </style>
+</head>
+<body>
+    <div class="success">
+        <div class="icon">✅</div>
+        <h1>Authentication Successful!</h1>
+        <div class="success-message">
+            <p>You can close this window.</p>
+        </div>
+        <div class="expires">Token expires: {{.ExpiresAt}}</div>
+    </div>
+</body>
+</html>`
+
+	oauthWebSelectPageHTML = `<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Select Authentication Method</title>
+    <style>
+        * { box-sizing: border-box; }
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
+            margin: 0;
+            padding: 20px;
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            min-height: 100vh;
+            display: flex;
+            justify-content: center;
+            align-items: center;
+        }
+        .container {
+            max-width: 500px;
+            width: 100%;
+            background: #fff;
+            padding: 40px;
+            border-radius: 12px;
+            box-shadow: 0 10px 40px rgba(0,0,0,0.2);
+        }
+        h1 {
+            margin: 0 0 10px;
+            color: #333;
+            font-size: 24px;
+            text-align: center;
+        }
+        .subtitle {
+            text-align: center;
+            color: #666;
+            margin-bottom: 30px;
+        }
+        .auth-methods {
+            display: flex;
+            flex-direction: column;
+            gap: 15px;
+        }
+        .auth-btn {
+            display: flex;
+            align-items: center;
+            width: 100%;
+            padding: 15px 20px;
+            background: #667eea;
+            color: white;
+            text-decoration: none;
+            border-radius: 8px;
+            font-weight: 600;
+            font-size: 16px;
+            transition: all 0.3s;
+            border: none;
+            cursor: pointer;
+        }
+        .auth-btn:hover {
+            background: #5568d3;
+            transform: translateY(-2px);
+            box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4);
+        }
+        .auth-btn .icon {
+            font-size: 24px;
+            margin-right: 15px;
+            width: 32px;
+            text-align: center;
+        }
+        .auth-btn.google { background: #4285F4; }
+        .auth-btn.google:hover { background: #3367D6; }
+        .auth-btn.github { background: #24292e; }
+        .auth-btn.github:hover { background: #1a1e22; }
+        .auth-btn.aws { background: #FF9900; }
+        .auth-btn.aws:hover { background: #E68A00; }
+        .auth-btn.idc { background: #232F3E; }
+        .auth-btn.idc:hover { background: #1a242f; }
+        .idc-form {
+            background: #f8f9fa;
+            padding: 20px;
+            border-radius: 8px;
+            margin-top: 15px;
+            display: none;
+        }
+        .idc-form.show {
+            display: block;
+        }
+        .form-group {
+            margin-bottom: 15px;
+        }
+        .form-group label {
+            display: block;
+            font-weight: 600;
+            color: #333;
+            margin-bottom: 8px;
+            font-size: 14px;
+        }
+        .form-group input {
+            width: 100%;
+            padding: 12px;
+            border: 2px solid #e0e0e0;
+            border-radius: 6px;
+            font-size: 14px;
+            transition: border-color 0.3s;
+        }
+        .form-group input:focus {
+            outline: none;
+            border-color: #667eea;
+        }
+        .form-group .hint {
+            font-size: 12px;
+            color: #999;
+            margin-top: 5px;
+        }
+        .submit-btn {
+            display: block;
+            width: 100%;
+            padding: 15px;
+            background: #232F3E;
+            color: white;
+            text-align: center;
+            text-decoration: none;
+            border-radius: 8px;
+            font-weight: 600;
+            font-size: 16px;
+            transition: all 0.3s;
+            border: none;
+            cursor: pointer;
+        }
+        .submit-btn:hover {
+            background: #1a242f;
+            transform: translateY(-2px);
+            box-shadow: 0 4px 12px rgba(35, 47, 62, 0.4);
+        }
+        .divider {
+            display: flex;
+            align-items: center;
+            margin: 20px 0;
+        }
+        .divider::before,
+        .divider::after {
+            content: "";
+            flex: 1;
+            border-bottom: 1px solid #e0e0e0;
+        }
+        .divider span {
+            padding: 0 15px;
+            color: #999;
+            font-size: 14px;
+        }
+        .info-box {
+            background: #e7f3ff;
+            border-left: 4px solid #2196F3;
+            padding: 15px;
+            margin-top: 20px;
+            border-radius: 4px;
+            font-size: 14px;
+            color: #666;
+        }
+        .warning-box {
+            background: #fff3cd;
+            border-left: 4px solid #ffc107;
+            padding: 15px;
+            margin-top: 20px;
+            border-radius: 4px;
+            font-size: 14px;
+            color: #856404;
+        }
+        .auth-btn.manual { background: #6c757d; }
+        .auth-btn.manual:hover { background: #5a6268; }
+        .auth-btn.refresh { background: #17a2b8; }
+        .auth-btn.refresh:hover { background: #138496; }
+        .auth-btn.refresh:disabled { background: #7fb3bd; cursor: not-allowed; }
+        .manual-form {
+            background: #f8f9fa;
+            padding: 20px;
+            border-radius: 8px;
+            margin-top: 15px;
+            display: none;
+        }
+        .manual-form.show {
+            display: block;
+        }
+        .form-group textarea {
+            width: 100%;
+            padding: 12px;
+            border: 2px solid #e0e0e0;
+            border-radius: 6px;
+            font-size: 14px;
+            font-family: monospace;
+            transition: border-color 0.3s;
+            resize: vertical;
+            min-height: 80px;
+        }
+        .form-group textarea:focus {
+            outline: none;
+            border-color: #667eea;
+        }
+        .status-message {
+            padding: 15px;
+            border-radius: 6px;
+            margin-top: 15px;
+            display: none;
+        }
+        .status-message.success {
+            background: #d4edda;
+            color: #155724;
+            display: block;
+        }
+        .status-message.error {
+            background: #f8d7da;
+            color: #721c24;
+            display: block;
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <h1>🔐 Select Authentication Method</h1>
+        <p class="subtitle">Choose how you want to authenticate with Kiro</p>
+        
+        <div class="auth-methods">
+            <a href="/v0/oauth/kiro/start?method=builder-id" class="auth-btn aws">
+                <span class="icon">🔶</span>
+                AWS Builder ID (Recommended)
+            </a>
+            
+            <button type="button" class="auth-btn idc" onclick="toggleIdcForm()">
+                <span class="icon">🏢</span>
+                AWS Identity Center (IDC)
+            </button>
+            
+            <div class="divider"><span>or</span></div>
+            
+            <button type="button" class="auth-btn manual" onclick="toggleManualForm()">
+                <span class="icon">📋</span>
+                Import RefreshToken from Kiro IDE
+            </button>
+            
+            <button type="button" class="auth-btn refresh" onclick="manualRefresh()" id="refreshBtn">
+                <span class="icon">🔄</span>
+                Manual Refresh All Tokens
+            </button>
+            
+            <div class="status-message" id="refreshStatus"></div>
+        </div>
+        
+        <div class="idc-form" id="idcForm">
+            <form action="/v0/oauth/kiro/start" method="get">
+                <input type="hidden" name="method" value="idc">
+                
+                <div class="form-group">
+                    <label for="startUrl">Start URL</label>
+                    <input type="url" id="startUrl" name="startUrl" placeholder="https://your-org.awsapps.com/start" required>
+                    <div class="hint">Your AWS Identity Center Start URL</div>
+                </div>
+                
+                <div class="form-group">
+                    <label for="region">Region</label>
+                    <input type="text" id="region" name="region" value="us-east-1" placeholder="us-east-1">
+                    <div class="hint">AWS Region for your Identity Center</div>
+                </div>
+                
+                <button type="submit" class="submit-btn">
+                    🚀 Continue with IDC
+                </button>
+            </form>
+        </div>
+        
+        <div class="manual-form" id="manualForm">
+            <form id="importForm" onsubmit="submitImport(event)">
+                <div class="form-group">
+                    <label for="refreshToken">Refresh Token</label>
+                    <textarea id="refreshToken" name="refreshToken" placeholder="Paste your refreshToken here (starts with aorAAAAAG...)" required></textarea>
+                    <div class="hint">Copy from Kiro IDE: ~/.kiro/kiro-auth-token.json → refreshToken field</div>
+                </div>
+                
+                <button type="submit" class="submit-btn" id="importBtn">
+                    📥 Import Token
+                </button>
+                
+                <div class="status-message" id="importStatus"></div>
+            </form>
+        </div>
+        
+        <div class="warning-box">
+            ⚠️ <strong>Note:</strong> Google and GitHub login are not available for third-party applications due to AWS Cognito restrictions. Please use AWS Builder ID or import your token from Kiro IDE.
+        </div>
+        
+        <div class="info-box">
+            💡 <strong>How to get RefreshToken:</strong><br>
+            1. Open Kiro IDE and login with Google/GitHub<br>
+            2. Find the token file: <code>~/.kiro/kiro-auth-token.json</code><br>
+            3. Copy the <code>refreshToken</code> value and paste it above
+        </div>
+    </div>
+    
+    <script>
+        function toggleIdcForm() {
+            const idcForm = document.getElementById('idcForm');
+            const manualForm = document.getElementById('manualForm');
+            manualForm.classList.remove('show');
+            idcForm.classList.toggle('show');
+            if (idcForm.classList.contains('show')) {
+                document.getElementById('startUrl').focus();
+            }
+        }
+        
+        function toggleManualForm() {
+            const idcForm = document.getElementById('idcForm');
+            const manualForm = document.getElementById('manualForm');
+            idcForm.classList.remove('show');
+            manualForm.classList.toggle('show');
+            if (manualForm.classList.contains('show')) {
+                document.getElementById('refreshToken').focus();
+            }
+        }
+        
+        async function submitImport(event) {
+            event.preventDefault();
+            const refreshToken = document.getElementById('refreshToken').value.trim();
+            const statusEl = document.getElementById('importStatus');
+            const btn = document.getElementById('importBtn');
+            
+            if (!refreshToken) {
+                statusEl.className = 'status-message error';
+                statusEl.textContent = 'Please enter a refresh token';
+                return;
+            }
+            
+            if (!refreshToken.startsWith('aorAAAAAG')) {
+                statusEl.className = 'status-message error';
+                statusEl.textContent = 'Invalid token format. Token should start with aorAAAAAG...';
+                return;
+            }
+            
+            btn.disabled = true;
+            btn.textContent = '⏳ Importing...';
+            statusEl.className = 'status-message';
+            statusEl.style.display = 'none';
+            
+            try {
+                const response = await fetch('/v0/oauth/kiro/import', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({ refreshToken: refreshToken })
+                });
+                
+                const data = await response.json();
+                
+                if (response.ok && data.success) {
+                    statusEl.className = 'status-message success';
+                    statusEl.textContent = '✅ Token imported successfully! File: ' + (data.fileName || 'kiro-token.json');
+                } else {
+                    statusEl.className = 'status-message error';
+                    statusEl.textContent = '❌ ' + (data.error || data.message || 'Import failed');
+                }
+            } catch (error) {
+                statusEl.className = 'status-message error';
+                statusEl.textContent = '❌ Network error: ' + error.message;
+            } finally {
+                btn.disabled = false;
+                btn.textContent = '📥 Import Token';
+            }
+        }
+        
+        async function manualRefresh() {
+            const btn = document.getElementById('refreshBtn');
+            const statusEl = document.getElementById('refreshStatus');
+            
+            btn.disabled = true;
+            btn.innerHTML = '<span class="icon">⏳</span> Refreshing...';
+            statusEl.className = 'status-message';
+            statusEl.style.display = 'none';
+            
+            try {
+                const response = await fetch('/v0/oauth/kiro/refresh', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' }
+                });
+                
+                const data = await response.json();
+                
+                if (response.ok && data.success) {
+                    statusEl.className = 'status-message success';
+                    let msg = '✅ ' + data.message;
+                    if (data.warnings && data.warnings.length > 0) {
+                        msg += ' (Warnings: ' + data.warnings.join('; ') + ')';
+                    }
+                    statusEl.textContent = msg;
+                } else {
+                    statusEl.className = 'status-message error';
+                    statusEl.textContent = '❌ ' + (data.error || data.message || 'Refresh failed');
+                }
+            } catch (error) {
+                statusEl.className = 'status-message error';
+                statusEl.textContent = '❌ Network error: ' + error.message;
+            } finally {
+                btn.disabled = false;
+                btn.innerHTML = '<span class="icon">🔄</span> Manual Refresh All Tokens';
+            }
+        }
+    </script>
+</body>
+</html>`
+)
--- a/internal/auth/kiro/rate_limiter.go
+++ b/internal/auth/kiro/rate_limiter.go
@@ -0,0 +1,316 @@
+package kiro
+
+import (
+	"math"
+	"math/rand"
+	"strings"
+	"sync"
+	"time"
+)
+
+const (
+	DefaultMinTokenInterval  = 10 * time.Second
+	DefaultMaxTokenInterval  = 30 * time.Second
+	DefaultDailyMaxRequests  = 500
+	DefaultJitterPercent     = 0.3
+	DefaultBackoffBase       = 2 * time.Minute
+	DefaultBackoffMax        = 60 * time.Minute
+	DefaultBackoffMultiplier = 2.0
+	DefaultSuspendCooldown   = 24 * time.Hour
+)
+
+// TokenState Token 状态
+type TokenState struct {
+	LastRequest    time.Time
+	RequestCount   int
+	CooldownEnd    time.Time
+	FailCount      int
+	DailyRequests  int
+	DailyResetTime time.Time
+	IsSuspended    bool
+	SuspendedAt    time.Time
+	SuspendReason  string
+}
+
+// RateLimiter 频率限制器
+type RateLimiter struct {
+	mu                sync.RWMutex
+	states            map[string]*TokenState
+	minTokenInterval  time.Duration
+	maxTokenInterval  time.Duration
+	dailyMaxRequests  int
+	jitterPercent     float64
+	backoffBase       time.Duration
+	backoffMax        time.Duration
+	backoffMultiplier float64
+	suspendCooldown   time.Duration
+	rng               *rand.Rand
+}
+
+// NewRateLimiter 创建默认配置的频率限制器
+func NewRateLimiter() *RateLimiter {
+	return &RateLimiter{
+		states:            make(map[string]*TokenState),
+		minTokenInterval:  DefaultMinTokenInterval,
+		maxTokenInterval:  DefaultMaxTokenInterval,
+		dailyMaxRequests:  DefaultDailyMaxRequests,
+		jitterPercent:     DefaultJitterPercent,
+		backoffBase:       DefaultBackoffBase,
+		backoffMax:        DefaultBackoffMax,
+		backoffMultiplier: DefaultBackoffMultiplier,
+		suspendCooldown:   DefaultSuspendCooldown,
+		rng:               rand.New(rand.NewSource(time.Now().UnixNano())),
+	}
+}
+
+// RateLimiterConfig 频率限制器配置
+type RateLimiterConfig struct {
+	MinTokenInterval  time.Duration
+	MaxTokenInterval  time.Duration
+	DailyMaxRequests  int
+	JitterPercent     float64
+	BackoffBase       time.Duration
+	BackoffMax        time.Duration
+	BackoffMultiplier float64
+	SuspendCooldown   time.Duration
+}
+
+// NewRateLimiterWithConfig 使用自定义配置创建频率限制器
+func NewRateLimiterWithConfig(cfg RateLimiterConfig) *RateLimiter {
+	rl := NewRateLimiter()
+	if cfg.MinTokenInterval > 0 {
+		rl.minTokenInterval = cfg.MinTokenInterval
+	}
+	if cfg.MaxTokenInterval > 0 {
+		rl.maxTokenInterval = cfg.MaxTokenInterval
+	}
+	if cfg.DailyMaxRequests > 0 {
+		rl.dailyMaxRequests = cfg.DailyMaxRequests
+	}
+	if cfg.JitterPercent > 0 {
+		rl.jitterPercent = cfg.JitterPercent
+	}
+	if cfg.BackoffBase > 0 {
+		rl.backoffBase = cfg.BackoffBase
+	}
+	if cfg.BackoffMax > 0 {
+		rl.backoffMax = cfg.BackoffMax
+	}
+	if cfg.BackoffMultiplier > 0 {
+		rl.backoffMultiplier = cfg.BackoffMultiplier
+	}
+	if cfg.SuspendCooldown > 0 {
+		rl.suspendCooldown = cfg.SuspendCooldown
+	}
+	return rl
+}
+
+// getOrCreateState 获取或创建 Token 状态
+func (rl *RateLimiter) getOrCreateState(tokenKey string) *TokenState {
+	state, exists := rl.states[tokenKey]
+	if !exists {
+		state = &TokenState{
+			DailyResetTime: time.Now().Truncate(24 * time.Hour).Add(24 * time.Hour),
+		}
+		rl.states[tokenKey] = state
+	}
+	return state
+}
+
+// resetDailyIfNeeded 如果需要则重置每日计数
+func (rl *RateLimiter) resetDailyIfNeeded(state *TokenState) {
+	now := time.Now()
+	if now.After(state.DailyResetTime) {
+		state.DailyRequests = 0
+		state.DailyResetTime = now.Truncate(24 * time.Hour).Add(24 * time.Hour)
+	}
+}
+
+// calculateInterval 计算带抖动的随机间隔
+func (rl *RateLimiter) calculateInterval() time.Duration {
+	baseInterval := rl.minTokenInterval + time.Duration(rl.rng.Int63n(int64(rl.maxTokenInterval-rl.minTokenInterval)))
+	jitter := time.Duration(float64(baseInterval) * rl.jitterPercent * (rl.rng.Float64()*2 - 1))
+	return baseInterval + jitter
+}
+
+// WaitForToken 等待 Token 可用（带抖动的随机间隔）
+func (rl *RateLimiter) WaitForToken(tokenKey string) {
+	rl.mu.Lock()
+	state := rl.getOrCreateState(tokenKey)
+	rl.resetDailyIfNeeded(state)
+
+	now := time.Now()
+
+	// 检查是否在冷却期
+	if now.Before(state.CooldownEnd) {
+		waitTime := state.CooldownEnd.Sub(now)
+		rl.mu.Unlock()
+		time.Sleep(waitTime)
+		rl.mu.Lock()
+		state = rl.getOrCreateState(tokenKey)
+		now = time.Now()
+	}
+
+	// 计算距离上次请求的间隔
+	interval := rl.calculateInterval()
+	nextAllowedTime := state.LastRequest.Add(interval)
+
+	if now.Before(nextAllowedTime) {
+		waitTime := nextAllowedTime.Sub(now)
+		rl.mu.Unlock()
+		time.Sleep(waitTime)
+		rl.mu.Lock()
+		state = rl.getOrCreateState(tokenKey)
+	}
+
+	state.LastRequest = time.Now()
+	state.RequestCount++
+	state.DailyRequests++
+	rl.mu.Unlock()
+}
+
+// MarkTokenFailed 标记 Token 失败
+func (rl *RateLimiter) MarkTokenFailed(tokenKey string) {
+	rl.mu.Lock()
+	defer rl.mu.Unlock()
+
+	state := rl.getOrCreateState(tokenKey)
+	state.FailCount++
+	state.CooldownEnd = time.Now().Add(rl.calculateBackoff(state.FailCount))
+}
+
+// MarkTokenSuccess 标记 Token 成功
+func (rl *RateLimiter) MarkTokenSuccess(tokenKey string) {
+	rl.mu.Lock()
+	defer rl.mu.Unlock()
+
+	state := rl.getOrCreateState(tokenKey)
+	state.FailCount = 0
+	state.CooldownEnd = time.Time{}
+}
+
+// CheckAndMarkSuspended 检测暂停错误并标记
+func (rl *RateLimiter) CheckAndMarkSuspended(tokenKey string, errorMsg string) bool {
+	suspendKeywords := []string{
+		"suspended",
+		"banned",
+		"disabled",
+		"account has been",
+		"access denied",
+		"rate limit exceeded",
+		"too many requests",
+		"quota exceeded",
+	}
+
+	lowerMsg := strings.ToLower(errorMsg)
+	for _, keyword := range suspendKeywords {
+		if strings.Contains(lowerMsg, keyword) {
+			rl.mu.Lock()
+			defer rl.mu.Unlock()
+
+			state := rl.getOrCreateState(tokenKey)
+			state.IsSuspended = true
+			state.SuspendedAt = time.Now()
+			state.SuspendReason = errorMsg
+			state.CooldownEnd = time.Now().Add(rl.suspendCooldown)
+			return true
+		}
+	}
+	return false
+}
+
+// IsTokenAvailable 检查 Token 是否可用
+func (rl *RateLimiter) IsTokenAvailable(tokenKey string) bool {
+	rl.mu.RLock()
+	defer rl.mu.RUnlock()
+
+	state, exists := rl.states[tokenKey]
+	if !exists {
+		return true
+	}
+
+	now := time.Now()
+
+	// 检查是否被暂停
+	if state.IsSuspended {
+		if now.After(state.SuspendedAt.Add(rl.suspendCooldown)) {
+			return true
+		}
+		return false
+	}
+
+	// 检查是否在冷却期
+	if now.Before(state.CooldownEnd) {
+		return false
+	}
+
+	// 检查每日请求限制
+	rl.mu.RUnlock()
+	rl.mu.Lock()
+	rl.resetDailyIfNeeded(state)
+	dailyRequests := state.DailyRequests
+	dailyMax := rl.dailyMaxRequests
+	rl.mu.Unlock()
+	rl.mu.RLock()
+
+	if dailyRequests >= dailyMax {
+		return false
+	}
+
+	return true
+}
+
+// calculateBackoff 计算指数退避时间
+func (rl *RateLimiter) calculateBackoff(failCount int) time.Duration {
+	if failCount <= 0 {
+		return 0
+	}
+
+	backoff := float64(rl.backoffBase) * math.Pow(rl.backoffMultiplier, float64(failCount-1))
+
+	// 添加抖动
+	jitter := backoff * rl.jitterPercent * (rl.rng.Float64()*2 - 1)
+	backoff += jitter
+
+	if time.Duration(backoff) > rl.backoffMax {
+		return rl.backoffMax
+	}
+	return time.Duration(backoff)
+}
+
+// GetTokenState 获取 Token 状态（只读）
+func (rl *RateLimiter) GetTokenState(tokenKey string) *TokenState {
+	rl.mu.RLock()
+	defer rl.mu.RUnlock()
+
+	state, exists := rl.states[tokenKey]
+	if !exists {
+		return nil
+	}
+
+	// 返回副本以防止外部修改
+	stateCopy := *state
+	return &stateCopy
+}
+
+// ClearTokenState 清除 Token 状态
+func (rl *RateLimiter) ClearTokenState(tokenKey string) {
+	rl.mu.Lock()
+	defer rl.mu.Unlock()
+	delete(rl.states, tokenKey)
+}
+
+// ResetSuspension 重置暂停状态
+func (rl *RateLimiter) ResetSuspension(tokenKey string) {
+	rl.mu.Lock()
+	defer rl.mu.Unlock()
+
+	state, exists := rl.states[tokenKey]
+	if exists {
+		state.IsSuspended = false
+		state.SuspendedAt = time.Time{}
+		state.SuspendReason = ""
+		state.CooldownEnd = time.Time{}
+		state.FailCount = 0
+	}
+}
--- a/internal/auth/kiro/rate_limiter_singleton.go
+++ b/internal/auth/kiro/rate_limiter_singleton.go
@@ -0,0 +1,46 @@
+package kiro
+
+import (
+	"sync"
+	"time"
+
+	log "github.com/sirupsen/logrus"
+)
+
+var (
+	globalRateLimiter     *RateLimiter
+	globalRateLimiterOnce sync.Once
+
+	globalCooldownManager     *CooldownManager
+	globalCooldownManagerOnce sync.Once
+	cooldownStopCh            chan struct{}
+)
+
+// GetGlobalRateLimiter returns the singleton RateLimiter instance.
+func GetGlobalRateLimiter() *RateLimiter {
+	globalRateLimiterOnce.Do(func() {
+		globalRateLimiter = NewRateLimiter()
+		log.Info("kiro: global RateLimiter initialized")
+	})
+	return globalRateLimiter
+}
+
+// GetGlobalCooldownManager returns the singleton CooldownManager instance.
+func GetGlobalCooldownManager() *CooldownManager {
+	globalCooldownManagerOnce.Do(func() {
+		globalCooldownManager = NewCooldownManager()
+		cooldownStopCh = make(chan struct{})
+		go globalCooldownManager.StartCleanupRoutine(5*time.Minute, cooldownStopCh)
+		log.Info("kiro: global CooldownManager initialized with cleanup routine")
+	})
+	return globalCooldownManager
+}
+
+// ShutdownRateLimiters stops the cooldown cleanup routine.
+// Should be called during application shutdown.
+func ShutdownRateLimiters() {
+	if cooldownStopCh != nil {
+		close(cooldownStopCh)
+		log.Info("kiro: rate limiter cleanup routine stopped")
+	}
+}
--- a/internal/auth/kiro/rate_limiter_test.go
+++ b/internal/auth/kiro/rate_limiter_test.go
@@ -0,0 +1,304 @@
+package kiro
+
+import (
+	"sync"
+	"testing"
+	"time"
+)
+
+func TestNewRateLimiter(t *testing.T) {
+	rl := NewRateLimiter()
+	if rl == nil {
+		t.Fatal("expected non-nil RateLimiter")
+	}
+	if rl.states == nil {
+		t.Error("expected non-nil states map")
+	}
+	if rl.minTokenInterval != DefaultMinTokenInterval {
+		t.Errorf("expected minTokenInterval %v, got %v", DefaultMinTokenInterval, rl.minTokenInterval)
+	}
+	if rl.maxTokenInterval != DefaultMaxTokenInterval {
+		t.Errorf("expected maxTokenInterval %v, got %v", DefaultMaxTokenInterval, rl.maxTokenInterval)
+	}
+	if rl.dailyMaxRequests != DefaultDailyMaxRequests {
+		t.Errorf("expected dailyMaxRequests %d, got %d", DefaultDailyMaxRequests, rl.dailyMaxRequests)
+	}
+}
+
+func TestNewRateLimiterWithConfig(t *testing.T) {
+	cfg := RateLimiterConfig{
+		MinTokenInterval:  5 * time.Second,
+		MaxTokenInterval:  15 * time.Second,
+		DailyMaxRequests:  100,
+		JitterPercent:     0.2,
+		BackoffBase:       1 * time.Minute,
+		BackoffMax:        30 * time.Minute,
+		BackoffMultiplier: 1.5,
+		SuspendCooldown:   12 * time.Hour,
+	}
+
+	rl := NewRateLimiterWithConfig(cfg)
+	if rl.minTokenInterval != 5*time.Second {
+		t.Errorf("expected minTokenInterval 5s, got %v", rl.minTokenInterval)
+	}
+	if rl.maxTokenInterval != 15*time.Second {
+		t.Errorf("expected maxTokenInterval 15s, got %v", rl.maxTokenInterval)
+	}
+	if rl.dailyMaxRequests != 100 {
+		t.Errorf("expected dailyMaxRequests 100, got %d", rl.dailyMaxRequests)
+	}
+}
+
+func TestNewRateLimiterWithConfig_PartialConfig(t *testing.T) {
+	cfg := RateLimiterConfig{
+		MinTokenInterval: 5 * time.Second,
+	}
+
+	rl := NewRateLimiterWithConfig(cfg)
+	if rl.minTokenInterval != 5*time.Second {
+		t.Errorf("expected minTokenInterval 5s, got %v", rl.minTokenInterval)
+	}
+	if rl.maxTokenInterval != DefaultMaxTokenInterval {
+		t.Errorf("expected default maxTokenInterval, got %v", rl.maxTokenInterval)
+	}
+}
+
+func TestGetTokenState_NonExistent(t *testing.T) {
+	rl := NewRateLimiter()
+	state := rl.GetTokenState("nonexistent")
+	if state != nil {
+		t.Error("expected nil state for non-existent token")
+	}
+}
+
+func TestIsTokenAvailable_NewToken(t *testing.T) {
+	rl := NewRateLimiter()
+	if !rl.IsTokenAvailable("newtoken") {
+		t.Error("expected new token to be available")
+	}
+}
+
+func TestMarkTokenFailed(t *testing.T) {
+	rl := NewRateLimiter()
+	rl.MarkTokenFailed("token1")
+
+	state := rl.GetTokenState("token1")
+	if state == nil {
+		t.Fatal("expected non-nil state")
+	}
+	if state.FailCount != 1 {
+		t.Errorf("expected FailCount 1, got %d", state.FailCount)
+	}
+	if state.CooldownEnd.IsZero() {
+		t.Error("expected non-zero CooldownEnd")
+	}
+}
+
+func TestMarkTokenSuccess(t *testing.T) {
+	rl := NewRateLimiter()
+	rl.MarkTokenFailed("token1")
+	rl.MarkTokenFailed("token1")
+	rl.MarkTokenSuccess("token1")
+
+	state := rl.GetTokenState("token1")
+	if state == nil {
+		t.Fatal("expected non-nil state")
+	}
+	if state.FailCount != 0 {
+		t.Errorf("expected FailCount 0, got %d", state.FailCount)
+	}
+	if !state.CooldownEnd.IsZero() {
+		t.Error("expected zero CooldownEnd after success")
+	}
+}
+
+func TestCheckAndMarkSuspended_Suspended(t *testing.T) {
+	rl := NewRateLimiter()
+
+	testCases := []string{
+		"Account has been suspended",
+		"You are banned from this service",
+		"Account disabled",
+		"Access denied permanently",
+		"Rate limit exceeded",
+		"Too many requests",
+		"Quota exceeded for today",
+	}
+
+	for i, msg := range testCases {
+		tokenKey := "token" + string(rune('a'+i))
+		if !rl.CheckAndMarkSuspended(tokenKey, msg) {
+			t.Errorf("expected suspension detected for: %s", msg)
+		}
+		state := rl.GetTokenState(tokenKey)
+		if !state.IsSuspended {
+			t.Errorf("expected IsSuspended true for: %s", msg)
+		}
+	}
+}
+
+func TestCheckAndMarkSuspended_NotSuspended(t *testing.T) {
+	rl := NewRateLimiter()
+
+	normalErrors := []string{
+		"connection timeout",
+		"internal server error",
+		"bad request",
+		"invalid token format",
+	}
+
+	for i, msg := range normalErrors {
+		tokenKey := "token" + string(rune('a'+i))
+		if rl.CheckAndMarkSuspended(tokenKey, msg) {
+			t.Errorf("unexpected suspension for: %s", msg)
+		}
+	}
+}
+
+func TestIsTokenAvailable_Suspended(t *testing.T) {
+	rl := NewRateLimiter()
+	rl.CheckAndMarkSuspended("token1", "Account suspended")
+
+	if rl.IsTokenAvailable("token1") {
+		t.Error("expected suspended token to be unavailable")
+	}
+}
+
+func TestClearTokenState(t *testing.T) {
+	rl := NewRateLimiter()
+	rl.MarkTokenFailed("token1")
+	rl.ClearTokenState("token1")
+
+	state := rl.GetTokenState("token1")
+	if state != nil {
+		t.Error("expected nil state after clear")
+	}
+}
+
+func TestResetSuspension(t *testing.T) {
+	rl := NewRateLimiter()
+	rl.CheckAndMarkSuspended("token1", "Account suspended")
+	rl.ResetSuspension("token1")
+
+	state := rl.GetTokenState("token1")
+	if state.IsSuspended {
+		t.Error("expected IsSuspended false after reset")
+	}
+	if state.FailCount != 0 {
+		t.Errorf("expected FailCount 0, got %d", state.FailCount)
+	}
+}
+
+func TestResetSuspension_NonExistent(t *testing.T) {
+	rl := NewRateLimiter()
+	rl.ResetSuspension("nonexistent")
+}
+
+func TestCalculateBackoff_ZeroFailCount(t *testing.T) {
+	rl := NewRateLimiter()
+	backoff := rl.calculateBackoff(0)
+	if backoff != 0 {
+		t.Errorf("expected 0 backoff for 0 fails, got %v", backoff)
+	}
+}
+
+func TestCalculateBackoff_Exponential(t *testing.T) {
+	cfg := RateLimiterConfig{
+		BackoffBase:       1 * time.Minute,
+		BackoffMax:        60 * time.Minute,
+		BackoffMultiplier: 2.0,
+		JitterPercent:     0.3,
+	}
+	rl := NewRateLimiterWithConfig(cfg)
+
+	backoff1 := rl.calculateBackoff(1)
+	if backoff1 < 40*time.Second || backoff1 > 80*time.Second {
+		t.Errorf("expected ~1min (with jitter) for fail 1, got %v", backoff1)
+	}
+
+	backoff2 := rl.calculateBackoff(2)
+	if backoff2 < 80*time.Second || backoff2 > 160*time.Second {
+		t.Errorf("expected ~2min (with jitter) for fail 2, got %v", backoff2)
+	}
+}
+
+func TestCalculateBackoff_MaxCap(t *testing.T) {
+	cfg := RateLimiterConfig{
+		BackoffBase:       1 * time.Minute,
+		BackoffMax:        10 * time.Minute,
+		BackoffMultiplier: 2.0,
+		JitterPercent:     0,
+	}
+	rl := NewRateLimiterWithConfig(cfg)
+
+	backoff := rl.calculateBackoff(10)
+	if backoff > 10*time.Minute {
+		t.Errorf("expected backoff capped at 10min, got %v", backoff)
+	}
+}
+
+func TestGetTokenState_ReturnsCopy(t *testing.T) {
+	rl := NewRateLimiter()
+	rl.MarkTokenFailed("token1")
+
+	state1 := rl.GetTokenState("token1")
+	state1.FailCount = 999
+
+	state2 := rl.GetTokenState("token1")
+	if state2.FailCount == 999 {
+		t.Error("GetTokenState should return a copy")
+	}
+}
+
+func TestRateLimiter_ConcurrentAccess(t *testing.T) {
+	rl := NewRateLimiter()
+	const numGoroutines = 50
+	const numOperations = 50
+
+	var wg sync.WaitGroup
+	wg.Add(numGoroutines)
+
+	for i := 0; i < numGoroutines; i++ {
+		go func(id int) {
+			defer wg.Done()
+			tokenKey := "token" + string(rune('a'+id%10))
+			for j := 0; j < numOperations; j++ {
+				switch j % 6 {
+				case 0:
+					rl.IsTokenAvailable(tokenKey)
+				case 1:
+					rl.MarkTokenFailed(tokenKey)
+				case 2:
+					rl.MarkTokenSuccess(tokenKey)
+				case 3:
+					rl.GetTokenState(tokenKey)
+				case 4:
+					rl.CheckAndMarkSuspended(tokenKey, "test error")
+				case 5:
+					rl.ResetSuspension(tokenKey)
+				}
+			}
+		}(i)
+	}
+
+	wg.Wait()
+}
+
+func TestCalculateInterval_WithinRange(t *testing.T) {
+	cfg := RateLimiterConfig{
+		MinTokenInterval: 10 * time.Second,
+		MaxTokenInterval: 30 * time.Second,
+		JitterPercent:    0.3,
+	}
+	rl := NewRateLimiterWithConfig(cfg)
+
+	minAllowed := 7 * time.Second
+	maxAllowed := 40 * time.Second
+
+	for i := 0; i < 100; i++ {
+		interval := rl.calculateInterval()
+		if interval < minAllowed || interval > maxAllowed {
+			t.Errorf("interval %v outside expected range [%v, %v]", interval, minAllowed, maxAllowed)
+		}
+	}
+}
--- a/internal/auth/kiro/refresh_manager.go
+++ b/internal/auth/kiro/refresh_manager.go
@@ -0,0 +1,180 @@
+package kiro
+
+import (
+	"context"
+	"sync"
+	"time"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	log "github.com/sirupsen/logrus"
+)
+
+// RefreshManager 是后台刷新器的单例管理器
+type RefreshManager struct {
+	mu               sync.Mutex
+	refresher        *BackgroundRefresher
+	ctx              context.Context
+	cancel           context.CancelFunc
+	started          bool
+	onTokenRefreshed func(tokenID string, tokenData *KiroTokenData) // 刷新成功回调
+}
+
+var (
+	globalRefreshManager *RefreshManager
+	managerOnce          sync.Once
+)
+
+// GetRefreshManager 获取全局刷新管理器实例
+func GetRefreshManager() *RefreshManager {
+	managerOnce.Do(func() {
+		globalRefreshManager = &RefreshManager{}
+	})
+	return globalRefreshManager
+}
+
+// Initialize 初始化后台刷新器
+// baseDir: token 文件所在的目录
+// cfg: 应用配置
+func (m *RefreshManager) Initialize(baseDir string, cfg *config.Config) error {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	if m.started {
+		log.Debug("refresh manager: already initialized")
+		return nil
+	}
+
+	if baseDir == "" {
+		log.Warn("refresh manager: base directory not provided, skipping initialization")
+		return nil
+	}
+
+	resolvedBaseDir, err := util.ResolveAuthDir(baseDir)
+	if err != nil {
+		log.Warnf("refresh manager: failed to resolve auth directory %s: %v", baseDir, err)
+	}
+	if resolvedBaseDir != "" {
+		baseDir = resolvedBaseDir
+	}
+
+	// 创建 token 存储库
+	repo := NewFileTokenRepository(baseDir)
+
+	// 创建后台刷新器，配置参数
+	opts := []RefresherOption{
+		WithInterval(time.Minute), // 每分钟检查一次
+		WithBatchSize(50),         // 每批最多处理 50 个 token
+		WithConcurrency(10),       // 最多 10 个并发刷新
+		WithConfig(cfg),           // 设置 OAuth 和 SSO 客户端
+	}
+
+	// 如果已设置回调，传递给 BackgroundRefresher
+	if m.onTokenRefreshed != nil {
+		opts = append(opts, WithOnTokenRefreshed(m.onTokenRefreshed))
+	}
+
+	m.refresher = NewBackgroundRefresher(repo, opts...)
+
+	log.Infof("refresh manager: initialized with base directory %s", baseDir)
+	return nil
+}
+
+// Start 启动后台刷新
+func (m *RefreshManager) Start() {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	if m.started {
+		log.Debug("refresh manager: already started")
+		return
+	}
+
+	if m.refresher == nil {
+		log.Warn("refresh manager: not initialized, cannot start")
+		return
+	}
+
+	m.ctx, m.cancel = context.WithCancel(context.Background())
+	m.refresher.Start(m.ctx)
+	m.started = true
+
+	log.Info("refresh manager: background refresh started")
+}
+
+// Stop 停止后台刷新
+func (m *RefreshManager) Stop() {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	if !m.started {
+		return
+	}
+
+	if m.cancel != nil {
+		m.cancel()
+	}
+
+	if m.refresher != nil {
+		m.refresher.Stop()
+	}
+
+	m.started = false
+	log.Info("refresh manager: background refresh stopped")
+}
+
+// IsRunning 检查后台刷新是否正在运行
+func (m *RefreshManager) IsRunning() bool {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	return m.started
+}
+
+// UpdateBaseDir 更新 token 目录（用于运行时配置更改）
+func (m *RefreshManager) UpdateBaseDir(baseDir string) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	if m.refresher != nil && m.refresher.tokenRepo != nil {
+		if repo, ok := m.refresher.tokenRepo.(*FileTokenRepository); ok {
+			repo.SetBaseDir(baseDir)
+			log.Infof("refresh manager: updated base directory to %s", baseDir)
+		}
+	}
+}
+
+// SetOnTokenRefreshed 设置 token 刷新成功后的回调函数
+// 可以在任何时候调用，支持运行时更新回调
+// callback: 回调函数，接收 tokenID（文件名）和新的 token 数据
+func (m *RefreshManager) SetOnTokenRefreshed(callback func(tokenID string, tokenData *KiroTokenData)) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	m.onTokenRefreshed = callback
+
+	// 如果 refresher 已经创建，使用并发安全的方式更新它的回调
+	if m.refresher != nil {
+		m.refresher.callbackMu.Lock()
+		m.refresher.onTokenRefreshed = callback
+		m.refresher.callbackMu.Unlock()
+	}
+
+	log.Debug("refresh manager: token refresh callback registered")
+}
+
+// InitializeAndStart 初始化并启动后台刷新（便捷方法）
+func InitializeAndStart(baseDir string, cfg *config.Config) {
+	manager := GetRefreshManager()
+	if err := manager.Initialize(baseDir, cfg); err != nil {
+		log.Errorf("refresh manager: initialization failed: %v", err)
+		return
+	}
+	manager.Start()
+}
+
+// StopGlobalRefreshManager 停止全局刷新管理器
+func StopGlobalRefreshManager() {
+	if globalRefreshManager != nil {
+		globalRefreshManager.Stop()
+	}
+}
--- a/internal/auth/kiro/social_auth.go
+++ b/internal/auth/kiro/social_auth.go
@@ -9,7 +9,9 @@ import (
 	"encoding/base64"
 	"encoding/json"
 	"fmt"
+	"html"
 	"io"
+	"net"
 	"net/http"
 	"net/url"
 	"os"
@@ -31,6 +33,9 @@ const (

 	// OAuth timeout
 	socialAuthTimeout = 10 * time.Minute
+
+	// Default callback port for social auth HTTP server
+	socialAuthCallbackPort = 9876
 )

 // SocialProvider represents the social login provider.
@@ -67,6 +72,13 @@ type RefreshTokenRequest struct {
 	RefreshToken string `json:"refreshToken"`
 }

+// WebCallbackResult contains the OAuth callback result from HTTP server.
+type WebCallbackResult struct {
+	Code  string
+	State string
+	Error string
+}
+
 // SocialAuthClient handles social authentication with Kiro.
 type SocialAuthClient struct {
 	httpClient      *http.Client
@@ -87,6 +99,83 @@ func NewSocialAuthClient(cfg *config.Config) *SocialAuthClient {
 	}
 }

+// startWebCallbackServer starts a local HTTP server to receive the OAuth callback.
+// This is used instead of the kiro:// protocol handler to avoid redirect_mismatch errors.
+func (c *SocialAuthClient) startWebCallbackServer(ctx context.Context, expectedState string) (string, <-chan WebCallbackResult, error) {
+	// Try to find an available port - use localhost like Kiro does
+	listener, err := net.Listen("tcp", fmt.Sprintf("localhost:%d", socialAuthCallbackPort))
+	if err != nil {
+		// Try with dynamic port (RFC 8252 allows dynamic ports for native apps)
+		log.Warnf("kiro social auth: default port %d is busy, falling back to dynamic port", socialAuthCallbackPort)
+		listener, err = net.Listen("tcp", "localhost:0")
+		if err != nil {
+			return "", nil, fmt.Errorf("failed to start callback server: %w", err)
+		}
+	}
+
+	port := listener.Addr().(*net.TCPAddr).Port
+	// Use http scheme for local callback server
+	redirectURI := fmt.Sprintf("http://localhost:%d/oauth/callback", port)
+	resultChan := make(chan WebCallbackResult, 1)
+
+	server := &http.Server{
+		ReadHeaderTimeout: 10 * time.Second,
+	}
+
+	mux := http.NewServeMux()
+	mux.HandleFunc("/oauth/callback", func(w http.ResponseWriter, r *http.Request) {
+		code := r.URL.Query().Get("code")
+		state := r.URL.Query().Get("state")
+		errParam := r.URL.Query().Get("error")
+
+		if errParam != "" {
+			w.Header().Set("Content-Type", "text/html; charset=utf-8")
+			w.WriteHeader(http.StatusBadRequest)
+			fmt.Fprintf(w, `<!DOCTYPE html>
+<html><head><title>Login Failed</title></head>
+<body><h1>Login Failed</h1><p>%s</p><p>You can close this window.</p></body></html>`, html.EscapeString(errParam))
+			resultChan <- WebCallbackResult{Error: errParam}
+			return
+		}
+
+		if state != expectedState {
+			w.Header().Set("Content-Type", "text/html; charset=utf-8")
+			w.WriteHeader(http.StatusBadRequest)
+			fmt.Fprint(w, `<!DOCTYPE html>
+<html><head><title>Login Failed</title></head>
+<body><h1>Login Failed</h1><p>Invalid state parameter</p><p>You can close this window.</p></body></html>`)
+			resultChan <- WebCallbackResult{Error: "state mismatch"}
+			return
+		}
+
+		w.Header().Set("Content-Type", "text/html; charset=utf-8")
+		fmt.Fprint(w, `<!DOCTYPE html>
+<html><head><title>Login Successful</title></head>
+<body><h1>Login Successful!</h1><p>You can close this window and return to the terminal.</p>
+<script>window.close();</script></body></html>`)
+		resultChan <- WebCallbackResult{Code: code, State: state}
+	})
+
+	server.Handler = mux
+
+	go func() {
+		if err := server.Serve(listener); err != nil && err != http.ErrServerClosed {
+			log.Debugf("kiro social auth callback server error: %v", err)
+		}
+	}()
+
+	go func() {
+		select {
+		case <-ctx.Done():
+		case <-time.After(socialAuthTimeout):
+		case <-resultChan:
+		}
+		_ = server.Shutdown(context.Background())
+	}()
+
+	return redirectURI, resultChan, nil
+}
+
 // generatePKCE generates PKCE code verifier and challenge.
 func generatePKCE() (verifier, challenge string, err error) {
 	// Generate 32 bytes of random data for verifier
@@ -217,10 +306,12 @@ func (c *SocialAuthClient) RefreshSocialToken(ctx context.Context, refreshToken
 		ExpiresAt:    expiresAt.Format(time.RFC3339),
 		AuthMethod:   "social",
 		Provider:     "", // Caller should preserve original provider
+		Region:       "us-east-1",
 	}, nil
 }

-// LoginWithSocial performs OAuth login with Google.
+// LoginWithSocial performs OAuth login with Google or GitHub.
+// Uses local HTTP callback server instead of custom protocol handler to avoid redirect_mismatch errors.
 func (c *SocialAuthClient) LoginWithSocial(ctx context.Context, provider SocialProvider) (*KiroTokenData, error) {
 	providerName := string(provider)

@@ -228,28 +319,10 @@ func (c *SocialAuthClient) LoginWithSocial(ctx context.Context, provider SocialP
 	fmt.Printf("║         Kiro Authentication (%s)                    ║\n", providerName)
 	fmt.Println("╚══════════════════════════════════════════════════════════╝")

-	// Step 1: Setup protocol handler
+	// Step 1: Start local HTTP callback server (instead of kiro:// protocol handler)
+	// This avoids redirect_mismatch errors with AWS Cognito
 	fmt.Println("\nSetting up authentication...")

-	// Start the local callback server
-	handlerPort, err := c.protocolHandler.Start(ctx)
-	if err != nil {
-		return nil, fmt.Errorf("failed to start callback server: %w", err)
-	}
-	defer c.protocolHandler.Stop()
-
-	// Ensure protocol handler is installed and set as default
-	if err := SetupProtocolHandlerIfNeeded(handlerPort); err != nil {
-		fmt.Println("\n⚠ Protocol handler setup failed. Trying alternative method...")
-		fmt.Println("  If you see a browser 'Open with' dialog, select your default browser.")
-		fmt.Println("  For manual setup instructions, run: cliproxy kiro --help-protocol")
-		log.Debugf("kiro: protocol handler setup error: %v", err)
-		// Continue anyway - user might have set it up manually or select browser manually
-	} else {
-		// Force set our handler as default (prevents "Open with" dialog)
-		forceDefaultProtocolHandler()
-	}
-
 	// Step 2: Generate PKCE codes
 	codeVerifier, codeChallenge, err := generatePKCE()
 	if err != nil {
@@ -262,8 +335,15 @@ func (c *SocialAuthClient) LoginWithSocial(ctx context.Context, provider SocialP
 		return nil, fmt.Errorf("failed to generate state: %w", err)
 	}

-	// Step 4: Build the login URL (Kiro uses GET request with query params)
-	authURL := c.buildLoginURL(providerName, KiroRedirectURI, codeChallenge, state)
+	// Step 4: Start local HTTP callback server
+	redirectURI, resultChan, err := c.startWebCallbackServer(ctx, state)
+	if err != nil {
+		return nil, fmt.Errorf("failed to start callback server: %w", err)
+	}
+	log.Debugf("kiro social auth: callback server started at %s", redirectURI)
+
+	// Step 5: Build the login URL using HTTP redirect URI
+	authURL := c.buildLoginURL(providerName, redirectURI, codeChallenge, state)

 	// Set incognito mode based on config (defaults to true for Kiro, can be overridden with --no-incognito)
 	// Incognito mode enables multi-account support by bypassing cached sessions
@@ -279,7 +359,7 @@ func (c *SocialAuthClient) LoginWithSocial(ctx context.Context, provider SocialP
 		log.Debug("kiro: using incognito mode for multi-account support (default)")
 	}

-	// Step 5: Open browser for user authentication
+	// Step 6: Open browser for user authentication
 	fmt.Println("\n════════════════════════════════════════════════════════════")
 	fmt.Printf("  Opening browser for %s authentication...\n", providerName)
 	fmt.Println("════════════════════════════════════════════════════════════")
@@ -295,80 +375,78 @@ func (c *SocialAuthClient) LoginWithSocial(ctx context.Context, provider SocialP

 	fmt.Println("\n  Waiting for authentication callback...")

-	// Step 6: Wait for callback
-	callback, err := c.protocolHandler.WaitForCallback(ctx)
-	if err != nil {
-		return nil, fmt.Errorf("failed to receive callback: %w", err)
-	}
-
-	if callback.Error != "" {
-		return nil, fmt.Errorf("authentication error: %s", callback.Error)
-	}
-
-	if callback.State != state {
-		// Log state values for debugging, but don't expose in user-facing error
-		log.Debugf("kiro: OAuth state mismatch - expected %s, got %s", state, callback.State)
-		return nil, fmt.Errorf("OAuth state validation failed - please try again")
-	}
-
-	if callback.Code == "" {
-		return nil, fmt.Errorf("no authorization code received")
-	}
-
-	fmt.Println("\n✓ Authorization received!")
-
-	// Step 7: Exchange code for tokens
-	fmt.Println("Exchanging code for tokens...")
-
-	tokenReq := &CreateTokenRequest{
-		Code:         callback.Code,
-		CodeVerifier: codeVerifier,
-		RedirectURI:  KiroRedirectURI,
-	}
-
-	tokenResp, err := c.CreateToken(ctx, tokenReq)
-	if err != nil {
-		return nil, fmt.Errorf("failed to exchange code for tokens: %w", err)
-	}
-
-	fmt.Println("\n✓ Authentication successful!")
-
-	// Close the browser window
-	if err := browser.CloseBrowser(); err != nil {
-		log.Debugf("Failed to close browser: %v", err)
-	}
-
-	// Validate ExpiresIn - use default 1 hour if invalid
-	expiresIn := tokenResp.ExpiresIn
-	if expiresIn <= 0 {
-		expiresIn = 3600
-	}
-	expiresAt := time.Now().Add(time.Duration(expiresIn) * time.Second)
-
-	// Try to extract email from JWT access token first
-	email := ExtractEmailFromJWT(tokenResp.AccessToken)
-	
-	// If no email in JWT, ask user for account label (only in interactive mode)
-	if email == "" && isInteractiveTerminal() {
-		fmt.Print("\n  Enter account label for file naming (optional, press Enter to skip): ")
-		reader := bufio.NewReader(os.Stdin)
-		var err error
-		email, err = reader.ReadString('\n')
-		if err != nil {
-			log.Debugf("Failed to read account label: %v", err)
+	// Step 7: Wait for callback from HTTP server
+	select {
+	case <-ctx.Done():
+		return nil, ctx.Err()
+	case <-time.After(socialAuthTimeout):
+		return nil, fmt.Errorf("authentication timed out")
+	case callback := <-resultChan:
+		if callback.Error != "" {
+			return nil, fmt.Errorf("authentication error: %s", callback.Error)
 		}
-		email = strings.TrimSpace(email)
-	}

-	return &KiroTokenData{
-		AccessToken:  tokenResp.AccessToken,
-		RefreshToken: tokenResp.RefreshToken,
-		ProfileArn:   tokenResp.ProfileArn,
-		ExpiresAt:    expiresAt.Format(time.RFC3339),
-		AuthMethod:   "social",
-		Provider:     providerName,
-		Email:        email, // JWT email or user-provided label
-	}, nil
+		// State is already validated by the callback server
+		if callback.Code == "" {
+			return nil, fmt.Errorf("no authorization code received")
+		}
+
+		fmt.Println("\n✓ Authorization received!")
+
+		// Step 8: Exchange code for tokens
+		fmt.Println("Exchanging code for tokens...")
+
+		tokenReq := &CreateTokenRequest{
+			Code:         callback.Code,
+			CodeVerifier: codeVerifier,
+			RedirectURI:  redirectURI, // Use HTTP redirect URI, not kiro:// protocol
+		}
+
+		tokenResp, err := c.CreateToken(ctx, tokenReq)
+		if err != nil {
+			return nil, fmt.Errorf("failed to exchange code for tokens: %w", err)
+		}
+
+		fmt.Println("\n✓ Authentication successful!")
+
+		// Close the browser window
+		if err := browser.CloseBrowser(); err != nil {
+			log.Debugf("Failed to close browser: %v", err)
+		}
+
+		// Validate ExpiresIn - use default 1 hour if invalid
+		expiresIn := tokenResp.ExpiresIn
+		if expiresIn <= 0 {
+			expiresIn = 3600
+		}
+		expiresAt := time.Now().Add(time.Duration(expiresIn) * time.Second)
+
+		// Try to extract email from JWT access token first
+		email := ExtractEmailFromJWT(tokenResp.AccessToken)
+
+		// If no email in JWT, ask user for account label (only in interactive mode)
+		if email == "" && isInteractiveTerminal() {
+			fmt.Print("\n  Enter account label for file naming (optional, press Enter to skip): ")
+			reader := bufio.NewReader(os.Stdin)
+			var err error
+			email, err = reader.ReadString('\n')
+			if err != nil {
+				log.Debugf("Failed to read account label: %v", err)
+			}
+			email = strings.TrimSpace(email)
+		}
+
+		return &KiroTokenData{
+			AccessToken:  tokenResp.AccessToken,
+			RefreshToken: tokenResp.RefreshToken,
+			ProfileArn:   tokenResp.ProfileArn,
+			ExpiresAt:    expiresAt.Format(time.RFC3339),
+			AuthMethod:   "social",
+			Provider:     providerName,
+			Email:        email, // JWT email or user-provided label
+			Region:       "us-east-1",
+		}, nil
+	}
 }

 // LoginWithGoogle performs OAuth login with Google.
--- a/internal/auth/kiro/sso_oidc.go
+++ b/internal/auth/kiro/sso_oidc.go
@@ -735,6 +735,7 @@ func (c *SSOOIDCClient) RefreshToken(ctx context.Context, clientID, clientSecret
 		Provider:     "AWS",
 		ClientID:     clientID,
 		ClientSecret: clientSecret,
+		Region:       defaultIDCRegion,
 	}, nil
 }

@@ -850,16 +851,17 @@ func (c *SSOOIDCClient) LoginWithBuilderID(ctx context.Context) (*KiroTokenData,
 				ClientID:     regResp.ClientID,
 				ClientSecret: regResp.ClientSecret,
 				Email:        email,
+				Region:       defaultIDCRegion,
 			}, nil
-		}
-	}
+			}
+			}

-	// Close browser on timeout for better UX
-	if err := browser.CloseBrowser(); err != nil {
-		log.Debugf("Failed to close browser on timeout: %v", err)
-	}
-	return nil, fmt.Errorf("authorization timed out")
-}
+			// Close browser on timeout for better UX
+			if err := browser.CloseBrowser(); err != nil {
+			log.Debugf("Failed to close browser on timeout: %v", err)
+			}
+			return nil, fmt.Errorf("authorization timed out")
+			}

 // FetchUserEmail retrieves the user's email from AWS SSO OIDC userinfo endpoint.
 // Falls back to JWT parsing if userinfo fails.
@@ -1366,6 +1368,7 @@ func (c *SSOOIDCClient) LoginWithBuilderIDAuthCode(ctx context.Context) (*KiroTo
 			ClientID:     regResp.ClientID,
 			ClientSecret: regResp.ClientSecret,
 			Email:        email,
+			Region:       defaultIDCRegion,
 		}, nil
 	}
 }
--- a/internal/auth/kiro/token.go
+++ b/internal/auth/kiro/token.go
@@ -9,6 +9,8 @@ import (

 // KiroTokenStorage holds the persistent token data for Kiro authentication.
 type KiroTokenStorage struct {
+	// Type is the provider type for management UI recognition (must be "kiro")
+	Type string `json:"type"`
 	// AccessToken is the OAuth2 access token for API access
 	AccessToken string `json:"access_token"`
 	// RefreshToken is used to obtain new access tokens
@@ -23,6 +25,16 @@ type KiroTokenStorage struct {
 	Provider string `json:"provider"`
 	// LastRefresh is the timestamp of the last token refresh
 	LastRefresh string `json:"last_refresh"`
+	// ClientID is the OAuth client ID (required for token refresh)
+	ClientID string `json:"client_id,omitempty"`
+	// ClientSecret is the OAuth client secret (required for token refresh)
+	ClientSecret string `json:"client_secret,omitempty"`
+	// Region is the AWS region
+	Region string `json:"region,omitempty"`
+	// StartURL is the AWS Identity Center start URL (for IDC auth)
+	StartURL string `json:"start_url,omitempty"`
+	// Email is the user's email address
+	Email string `json:"email,omitempty"`
 }

 // SaveTokenToFile persists the token storage to the specified file path.
@@ -68,5 +80,10 @@ func (s *KiroTokenStorage) ToTokenData() *KiroTokenData {
 		ExpiresAt:    s.ExpiresAt,
 		AuthMethod:   s.AuthMethod,
 		Provider:     s.Provider,
+		ClientID:     s.ClientID,
+		ClientSecret: s.ClientSecret,
+		Region:       s.Region,
+		StartURL:     s.StartURL,
+		Email:        s.Email,
 	}
 }
--- a/internal/auth/kiro/token_repository.go
+++ b/internal/auth/kiro/token_repository.go
@@ -0,0 +1,274 @@
+package kiro
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io/fs"
+	"os"
+	"path/filepath"
+	"sort"
+	"strings"
+	"sync"
+	"time"
+
+	log "github.com/sirupsen/logrus"
+)
+
+// FileTokenRepository 实现 TokenRepository 接口，基于文件系统存储
+type FileTokenRepository struct {
+	mu      sync.RWMutex
+	baseDir string
+}
+
+// NewFileTokenRepository 创建一个新的文件 token 存储库
+func NewFileTokenRepository(baseDir string) *FileTokenRepository {
+	return &FileTokenRepository{
+		baseDir: baseDir,
+	}
+}
+
+// SetBaseDir 设置基础目录
+func (r *FileTokenRepository) SetBaseDir(dir string) {
+	r.mu.Lock()
+	r.baseDir = strings.TrimSpace(dir)
+	r.mu.Unlock()
+}
+
+// FindOldestUnverified 查找需要刷新的 token（按最后验证时间排序）
+func (r *FileTokenRepository) FindOldestUnverified(limit int) []*Token {
+	r.mu.RLock()
+	baseDir := r.baseDir
+	r.mu.RUnlock()
+
+	if baseDir == "" {
+		log.Debug("token repository: base directory not configured")
+		return nil
+	}
+
+	var tokens []*Token
+
+	err := filepath.WalkDir(baseDir, func(path string, d fs.DirEntry, walkErr error) error {
+		if walkErr != nil {
+			return nil // 忽略错误，继续遍历
+		}
+		if d.IsDir() {
+			return nil
+		}
+		if !strings.HasSuffix(strings.ToLower(d.Name()), ".json") {
+			return nil
+		}
+
+		// 只处理 kiro 相关的 token 文件
+		if !strings.HasPrefix(d.Name(), "kiro-") {
+			return nil
+		}
+
+		token, err := r.readTokenFile(path)
+		if err != nil {
+			log.Debugf("token repository: failed to read token file %s: %v", path, err)
+			return nil
+		}
+
+		if token != nil && token.RefreshToken != "" {
+			// 检查 token 是否需要刷新（过期前 5 分钟）
+			if token.ExpiresAt.IsZero() || time.Until(token.ExpiresAt) < 5*time.Minute {
+				tokens = append(tokens, token)
+			}
+		}
+
+		return nil
+	})
+
+	if err != nil {
+		log.Warnf("token repository: error walking directory: %v", err)
+	}
+
+	// 按最后验证时间排序（最旧的优先）
+	sort.Slice(tokens, func(i, j int) bool {
+		return tokens[i].LastVerified.Before(tokens[j].LastVerified)
+	})
+
+	// 限制返回数量
+	if limit > 0 && len(tokens) > limit {
+		tokens = tokens[:limit]
+	}
+
+	return tokens
+}
+
+// UpdateToken 更新 token 并持久化到文件
+func (r *FileTokenRepository) UpdateToken(token *Token) error {
+	if token == nil {
+		return fmt.Errorf("token repository: token is nil")
+	}
+
+	r.mu.RLock()
+	baseDir := r.baseDir
+	r.mu.RUnlock()
+
+	if baseDir == "" {
+		return fmt.Errorf("token repository: base directory not configured")
+	}
+
+	// 构建文件路径
+	filePath := filepath.Join(baseDir, token.ID)
+	if !strings.HasSuffix(filePath, ".json") {
+		filePath += ".json"
+	}
+
+	// 读取现有文件内容
+	existingData := make(map[string]any)
+	if data, err := os.ReadFile(filePath); err == nil {
+		_ = json.Unmarshal(data, &existingData)
+	}
+
+	// 更新字段
+	existingData["access_token"] = token.AccessToken
+	existingData["refresh_token"] = token.RefreshToken
+	existingData["last_refresh"] = time.Now().Format(time.RFC3339)
+
+	if !token.ExpiresAt.IsZero() {
+		existingData["expires_at"] = token.ExpiresAt.Format(time.RFC3339)
+	}
+
+	// 保持原有的关键字段
+	if token.ClientID != "" {
+		existingData["client_id"] = token.ClientID
+	}
+	if token.ClientSecret != "" {
+		existingData["client_secret"] = token.ClientSecret
+	}
+	if token.AuthMethod != "" {
+		existingData["auth_method"] = token.AuthMethod
+	}
+	if token.Region != "" {
+		existingData["region"] = token.Region
+	}
+	if token.StartURL != "" {
+		existingData["start_url"] = token.StartURL
+	}
+
+	// 序列化并写入文件
+	raw, err := json.MarshalIndent(existingData, "", "  ")
+	if err != nil {
+		return fmt.Errorf("token repository: marshal failed: %w", err)
+	}
+
+	// 原子写入：先写入临时文件，再重命名
+	tmpPath := filePath + ".tmp"
+	if err := os.WriteFile(tmpPath, raw, 0o600); err != nil {
+		return fmt.Errorf("token repository: write temp file failed: %w", err)
+	}
+	if err := os.Rename(tmpPath, filePath); err != nil {
+		_ = os.Remove(tmpPath)
+		return fmt.Errorf("token repository: rename failed: %w", err)
+	}
+
+	log.Debugf("token repository: updated token %s", token.ID)
+	return nil
+}
+
+// readTokenFile 从文件读取 token
+func (r *FileTokenRepository) readTokenFile(path string) (*Token, error) {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return nil, err
+	}
+
+	var metadata map[string]any
+	if err := json.Unmarshal(data, &metadata); err != nil {
+		return nil, err
+	}
+
+	// 检查是否是 kiro token
+	tokenType, _ := metadata["type"].(string)
+	if tokenType != "kiro" {
+		return nil, nil
+	}
+
+	// 检查 auth_method (case-insensitive comparison to handle "IdC", "IDC", "idc", etc.)
+	authMethod, _ := metadata["auth_method"].(string)
+	authMethod = strings.ToLower(authMethod)
+	if authMethod != "idc" && authMethod != "builder-id" {
+		return nil, nil // 只处理 IDC 和 Builder ID token
+	}
+
+	token := &Token{
+		ID:         filepath.Base(path),
+		AuthMethod: authMethod,
+	}
+
+	// 解析各字段
+	if v, ok := metadata["access_token"].(string); ok {
+		token.AccessToken = v
+	}
+	if v, ok := metadata["refresh_token"].(string); ok {
+		token.RefreshToken = v
+	}
+	if v, ok := metadata["client_id"].(string); ok {
+		token.ClientID = v
+	}
+	if v, ok := metadata["client_secret"].(string); ok {
+		token.ClientSecret = v
+	}
+	if v, ok := metadata["region"].(string); ok {
+		token.Region = v
+	}
+	if v, ok := metadata["start_url"].(string); ok {
+		token.StartURL = v
+	}
+	if v, ok := metadata["provider"].(string); ok {
+		token.Provider = v
+	}
+
+	// 解析时间字段
+	if v, ok := metadata["expires_at"].(string); ok {
+		if t, err := time.Parse(time.RFC3339, v); err == nil {
+			token.ExpiresAt = t
+		}
+	}
+	if v, ok := metadata["last_refresh"].(string); ok {
+		if t, err := time.Parse(time.RFC3339, v); err == nil {
+			token.LastVerified = t
+		}
+	}
+
+	return token, nil
+}
+
+// ListKiroTokens 列出所有 Kiro token（用于调试）
+func (r *FileTokenRepository) ListKiroTokens(ctx context.Context) ([]*Token, error) {
+	r.mu.RLock()
+	baseDir := r.baseDir
+	r.mu.RUnlock()
+
+	if baseDir == "" {
+		return nil, fmt.Errorf("token repository: base directory not configured")
+	}
+
+	var tokens []*Token
+
+	err := filepath.WalkDir(baseDir, func(path string, d fs.DirEntry, walkErr error) error {
+		if walkErr != nil {
+			return nil
+		}
+		if d.IsDir() {
+			return nil
+		}
+		if !strings.HasPrefix(d.Name(), "kiro-") || !strings.HasSuffix(d.Name(), ".json") {
+			return nil
+		}
+
+		token, err := r.readTokenFile(path)
+		if err != nil {
+			return nil
+		}
+		if token != nil {
+			tokens = append(tokens, token)
+		}
+		return nil
+	})
+
+	return tokens, err
+}
--- a/internal/auth/kiro/usage_checker.go
+++ b/internal/auth/kiro/usage_checker.go
@@ -0,0 +1,243 @@
+// Package kiro provides authentication functionality for AWS CodeWhisperer (Kiro) API.
+// This file implements usage quota checking and monitoring.
+package kiro
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+)
+
+// UsageQuotaResponse represents the API response structure for usage quota checking.
+type UsageQuotaResponse struct {
+	UsageBreakdownList []UsageBreakdownExtended `json:"usageBreakdownList"`
+	SubscriptionInfo   *SubscriptionInfo        `json:"subscriptionInfo,omitempty"`
+	NextDateReset      float64                  `json:"nextDateReset,omitempty"`
+}
+
+// UsageBreakdownExtended represents detailed usage information for quota checking.
+// Note: UsageBreakdown is already defined in codewhisperer_client.go
+type UsageBreakdownExtended struct {
+	ResourceType              string                 `json:"resourceType"`
+	UsageLimitWithPrecision   float64                `json:"usageLimitWithPrecision"`
+	CurrentUsageWithPrecision float64                `json:"currentUsageWithPrecision"`
+	FreeTrialInfo             *FreeTrialInfoExtended `json:"freeTrialInfo,omitempty"`
+}
+
+// FreeTrialInfoExtended represents free trial usage information.
+type FreeTrialInfoExtended struct {
+	FreeTrialStatus           string  `json:"freeTrialStatus"`
+	UsageLimitWithPrecision   float64 `json:"usageLimitWithPrecision"`
+	CurrentUsageWithPrecision float64 `json:"currentUsageWithPrecision"`
+}
+
+// QuotaStatus represents the quota status for a token.
+type QuotaStatus struct {
+	TotalLimit     float64
+	CurrentUsage   float64
+	RemainingQuota float64
+	IsExhausted    bool
+	ResourceType   string
+	NextReset      time.Time
+}
+
+// UsageChecker provides methods for checking token quota usage.
+type UsageChecker struct {
+	httpClient *http.Client
+	endpoint   string
+}
+
+// NewUsageChecker creates a new UsageChecker instance.
+func NewUsageChecker(cfg *config.Config) *UsageChecker {
+	return &UsageChecker{
+		httpClient: util.SetProxy(&cfg.SDKConfig, &http.Client{Timeout: 30 * time.Second}),
+		endpoint:   awsKiroEndpoint,
+	}
+}
+
+// NewUsageCheckerWithClient creates a UsageChecker with a custom HTTP client.
+func NewUsageCheckerWithClient(client *http.Client) *UsageChecker {
+	return &UsageChecker{
+		httpClient: client,
+		endpoint:   awsKiroEndpoint,
+	}
+}
+
+// CheckUsage retrieves usage limits for the given token.
+func (c *UsageChecker) CheckUsage(ctx context.Context, tokenData *KiroTokenData) (*UsageQuotaResponse, error) {
+	if tokenData == nil {
+		return nil, fmt.Errorf("token data is nil")
+	}
+
+	if tokenData.AccessToken == "" {
+		return nil, fmt.Errorf("access token is empty")
+	}
+
+	payload := map[string]interface{}{
+		"origin":       "AI_EDITOR",
+		"profileArn":   tokenData.ProfileArn,
+		"resourceType": "AGENTIC_REQUEST",
+	}
+
+	jsonBody, err := json.Marshal(payload)
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.endpoint, strings.NewReader(string(jsonBody)))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+
+	req.Header.Set("Content-Type", "application/x-amz-json-1.0")
+	req.Header.Set("x-amz-target", targetGetUsage)
+	req.Header.Set("Authorization", "Bearer "+tokenData.AccessToken)
+	req.Header.Set("Accept", "application/json")
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("request failed: %w", err)
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read response: %w", err)
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("API error (status %d): %s", resp.StatusCode, string(body))
+	}
+
+	var result UsageQuotaResponse
+	if err := json.Unmarshal(body, &result); err != nil {
+		return nil, fmt.Errorf("failed to parse usage response: %w", err)
+	}
+
+	return &result, nil
+}
+
+// CheckUsageByAccessToken retrieves usage limits using an access token and profile ARN directly.
+func (c *UsageChecker) CheckUsageByAccessToken(ctx context.Context, accessToken, profileArn string) (*UsageQuotaResponse, error) {
+	tokenData := &KiroTokenData{
+		AccessToken: accessToken,
+		ProfileArn:  profileArn,
+	}
+	return c.CheckUsage(ctx, tokenData)
+}
+
+// GetRemainingQuota calculates the remaining quota from usage limits.
+func GetRemainingQuota(usage *UsageQuotaResponse) float64 {
+	if usage == nil || len(usage.UsageBreakdownList) == 0 {
+		return 0
+	}
+
+	var totalRemaining float64
+	for _, breakdown := range usage.UsageBreakdownList {
+		remaining := breakdown.UsageLimitWithPrecision - breakdown.CurrentUsageWithPrecision
+		if remaining > 0 {
+			totalRemaining += remaining
+		}
+
+		if breakdown.FreeTrialInfo != nil {
+			freeRemaining := breakdown.FreeTrialInfo.UsageLimitWithPrecision - breakdown.FreeTrialInfo.CurrentUsageWithPrecision
+			if freeRemaining > 0 {
+				totalRemaining += freeRemaining
+			}
+		}
+	}
+
+	return totalRemaining
+}
+
+// IsQuotaExhausted checks if the quota is exhausted based on usage limits.
+func IsQuotaExhausted(usage *UsageQuotaResponse) bool {
+	if usage == nil || len(usage.UsageBreakdownList) == 0 {
+		return true
+	}
+
+	for _, breakdown := range usage.UsageBreakdownList {
+		if breakdown.CurrentUsageWithPrecision < breakdown.UsageLimitWithPrecision {
+			return false
+		}
+
+		if breakdown.FreeTrialInfo != nil {
+			if breakdown.FreeTrialInfo.CurrentUsageWithPrecision < breakdown.FreeTrialInfo.UsageLimitWithPrecision {
+				return false
+			}
+		}
+	}
+
+	return true
+}
+
+// GetQuotaStatus retrieves a comprehensive quota status for a token.
+func (c *UsageChecker) GetQuotaStatus(ctx context.Context, tokenData *KiroTokenData) (*QuotaStatus, error) {
+	usage, err := c.CheckUsage(ctx, tokenData)
+	if err != nil {
+		return nil, err
+	}
+
+	status := &QuotaStatus{
+		IsExhausted: IsQuotaExhausted(usage),
+	}
+
+	if len(usage.UsageBreakdownList) > 0 {
+		breakdown := usage.UsageBreakdownList[0]
+		status.TotalLimit = breakdown.UsageLimitWithPrecision
+		status.CurrentUsage = breakdown.CurrentUsageWithPrecision
+		status.RemainingQuota = breakdown.UsageLimitWithPrecision - breakdown.CurrentUsageWithPrecision
+		status.ResourceType = breakdown.ResourceType
+
+		if breakdown.FreeTrialInfo != nil {
+			status.TotalLimit += breakdown.FreeTrialInfo.UsageLimitWithPrecision
+			status.CurrentUsage += breakdown.FreeTrialInfo.CurrentUsageWithPrecision
+			freeRemaining := breakdown.FreeTrialInfo.UsageLimitWithPrecision - breakdown.FreeTrialInfo.CurrentUsageWithPrecision
+			if freeRemaining > 0 {
+				status.RemainingQuota += freeRemaining
+			}
+		}
+	}
+
+	if usage.NextDateReset > 0 {
+		status.NextReset = time.Unix(int64(usage.NextDateReset/1000), 0)
+	}
+
+	return status, nil
+}
+
+// CalculateAvailableCount calculates the available request count based on usage limits.
+func CalculateAvailableCount(usage *UsageQuotaResponse) float64 {
+	return GetRemainingQuota(usage)
+}
+
+// GetUsagePercentage calculates the usage percentage.
+func GetUsagePercentage(usage *UsageQuotaResponse) float64 {
+	if usage == nil || len(usage.UsageBreakdownList) == 0 {
+		return 100.0
+	}
+
+	var totalLimit, totalUsage float64
+	for _, breakdown := range usage.UsageBreakdownList {
+		totalLimit += breakdown.UsageLimitWithPrecision
+		totalUsage += breakdown.CurrentUsageWithPrecision
+
+		if breakdown.FreeTrialInfo != nil {
+			totalLimit += breakdown.FreeTrialInfo.UsageLimitWithPrecision
+			totalUsage += breakdown.FreeTrialInfo.CurrentUsageWithPrecision
+		}
+	}
+
+	if totalLimit == 0 {
+		return 100.0
+	}
+
+	return (totalUsage / totalLimit) * 100
+}
--- a/internal/cache/signature_cache.go
+++ b/internal/cache/signature_cache.go
@@ -3,6 +3,7 @@ package cache
 import (
 	"crypto/sha256"
 	"encoding/hex"
+	"strings"
 	"sync"
 	"time"
 )
@@ -23,18 +24,18 @@ const (
 	// MinValidSignatureLen is the minimum length for a signature to be considered valid
 	MinValidSignatureLen = 50

-	// SessionCleanupInterval controls how often stale sessions are purged
-	SessionCleanupInterval = 10 * time.Minute
+	// CacheCleanupInterval controls how often stale entries are purged
+	CacheCleanupInterval = 10 * time.Minute
 )

-// signatureCache stores signatures by sessionId -> textHash -> SignatureEntry
+// signatureCache stores signatures by model group -> textHash -> SignatureEntry
 var signatureCache sync.Map

-// sessionCleanupOnce ensures the background cleanup goroutine starts only once
-var sessionCleanupOnce sync.Once
+// cacheCleanupOnce ensures the background cleanup goroutine starts only once
+var cacheCleanupOnce sync.Once

-// sessionCache is the inner map type
-type sessionCache struct {
+// groupCache is the inner map type
+type groupCache struct {
 	mu      sync.RWMutex
 	entries map[string]SignatureEntry
 }
@@ -45,36 +46,36 @@ func hashText(text string) string {
 	return hex.EncodeToString(h[:])[:SignatureTextHashLen]
 }

-// getOrCreateSession gets or creates a session cache
-func getOrCreateSession(sessionID string) *sessionCache {
+// getOrCreateGroupCache gets or creates a cache bucket for a model group
+func getOrCreateGroupCache(groupKey string) *groupCache {
 	// Start background cleanup on first access
-	sessionCleanupOnce.Do(startSessionCleanup)
+	cacheCleanupOnce.Do(startCacheCleanup)

-	if val, ok := signatureCache.Load(sessionID); ok {
-		return val.(*sessionCache)
+	if val, ok := signatureCache.Load(groupKey); ok {
+		return val.(*groupCache)
 	}
-	sc := &sessionCache{entries: make(map[string]SignatureEntry)}
-	actual, _ := signatureCache.LoadOrStore(sessionID, sc)
-	return actual.(*sessionCache)
+	sc := &groupCache{entries: make(map[string]SignatureEntry)}
+	actual, _ := signatureCache.LoadOrStore(groupKey, sc)
+	return actual.(*groupCache)
 }

-// startSessionCleanup launches a background goroutine that periodically
-// removes sessions where all entries have expired.
-func startSessionCleanup() {
+// startCacheCleanup launches a background goroutine that periodically
+// removes caches where all entries have expired.
+func startCacheCleanup() {
 	go func() {
-		ticker := time.NewTicker(SessionCleanupInterval)
+		ticker := time.NewTicker(CacheCleanupInterval)
 		defer ticker.Stop()
 		for range ticker.C {
-			purgeExpiredSessions()
+			purgeExpiredCaches()
 		}
 	}()
 }

-// purgeExpiredSessions removes sessions with no valid (non-expired) entries.
-func purgeExpiredSessions() {
+// purgeExpiredCaches removes caches with no valid (non-expired) entries.
+func purgeExpiredCaches() {
 	now := time.Now()
 	signatureCache.Range(func(key, value any) bool {
-		sc := value.(*sessionCache)
+		sc := value.(*groupCache)
 		sc.mu.Lock()
 		// Remove expired entries
 		for k, entry := range sc.entries {
@@ -84,7 +85,7 @@ func purgeExpiredSessions() {
 		}
 		isEmpty := len(sc.entries) == 0
 		sc.mu.Unlock()
-		// Remove session if empty
+		// Remove cache bucket if empty
 		if isEmpty {
 			signatureCache.Delete(key)
 		}
@@ -92,19 +93,19 @@ func purgeExpiredSessions() {
 	})
 }

-// CacheSignature stores a thinking signature for a given session and text.
+// CacheSignature stores a thinking signature for a given model group and text.
 // Used for Claude models that require signed thinking blocks in multi-turn conversations.
-func CacheSignature(sessionID, text, signature string) {
-	if sessionID == "" || text == "" || signature == "" {
+func CacheSignature(modelName, text, signature string) {
+	if text == "" || signature == "" {
 		return
 	}
 	if len(signature) < MinValidSignatureLen {
 		return
 	}

-	sc := getOrCreateSession(sessionID)
+	groupKey := GetModelGroup(modelName)
 	textHash := hashText(text)
-
+	sc := getOrCreateGroupCache(groupKey)
 	sc.mu.Lock()
 	defer sc.mu.Unlock()

@@ -114,18 +115,25 @@ func CacheSignature(sessionID, text, signature string) {
 	}
 }

-// GetCachedSignature retrieves a cached signature for a given session and text.
+// GetCachedSignature retrieves a cached signature for a given model group and text.
 // Returns empty string if not found or expired.
-func GetCachedSignature(sessionID, text string) string {
-	if sessionID == "" || text == "" {
-		return ""
-	}
+func GetCachedSignature(modelName, text string) string {
+	groupKey := GetModelGroup(modelName)

-	val, ok := signatureCache.Load(sessionID)
-	if !ok {
+	if text == "" {
+		if groupKey == "gemini" {
+			return "skip_thought_signature_validator"
+		}
 		return ""
 	}
-	sc := val.(*sessionCache)
+	val, ok := signatureCache.Load(groupKey)
+	if !ok {
+		if groupKey == "gemini" {
+			return "skip_thought_signature_validator"
+		}
+		return ""
+	}
+	sc := val.(*groupCache)

 	textHash := hashText(text)

@@ -135,11 +143,17 @@ func GetCachedSignature(sessionID, text string) string {
 	entry, exists := sc.entries[textHash]
 	if !exists {
 		sc.mu.Unlock()
+		if groupKey == "gemini" {
+			return "skip_thought_signature_validator"
+		}
 		return ""
 	}
 	if now.Sub(entry.Timestamp) > SignatureCacheTTL {
 		delete(sc.entries, textHash)
 		sc.mu.Unlock()
+		if groupKey == "gemini" {
+			return "skip_thought_signature_validator"
+		}
 		return ""
 	}

@@ -151,19 +165,31 @@ func GetCachedSignature(sessionID, text string) string {
 	return entry.Signature
 }

-// ClearSignatureCache clears signature cache for a specific session or all sessions.
-func ClearSignatureCache(sessionID string) {
-	if sessionID != "" {
-		signatureCache.Delete(sessionID)
-	} else {
+// ClearSignatureCache clears signature cache for a specific model group or all groups.
+func ClearSignatureCache(modelName string) {
+	if modelName == "" {
 		signatureCache.Range(func(key, _ any) bool {
 			signatureCache.Delete(key)
 			return true
 		})
+		return
 	}
+	groupKey := GetModelGroup(modelName)
+	signatureCache.Delete(groupKey)
 }

 // HasValidSignature checks if a signature is valid (non-empty and long enough)
-func HasValidSignature(signature string) bool {
-	return signature != "" && len(signature) >= MinValidSignatureLen
+func HasValidSignature(modelName, signature string) bool {
+	return (signature != "" && len(signature) >= MinValidSignatureLen) || (signature == "skip_thought_signature_validator" && GetModelGroup(modelName) == "gemini")
+}
+
+func GetModelGroup(modelName string) string {
+	if strings.Contains(modelName, "gpt") {
+		return "gpt"
+	} else if strings.Contains(modelName, "claude") {
+		return "claude"
+	} else if strings.Contains(modelName, "gemini") {
+		return "gemini"
+	}
+	return modelName
 }
--- a/internal/cache/signature_cache_test.go
+++ b/internal/cache/signature_cache_test.go
@@ -5,38 +5,40 @@ import (
 	"time"
 )

+const testModelName = "claude-sonnet-4-5"
+
 func TestCacheSignature_BasicStorageAndRetrieval(t *testing.T) {
 	ClearSignatureCache("")

-	sessionID := "test-session-1"
 	text := "This is some thinking text content"
 	signature := "abc123validSignature1234567890123456789012345678901234567890"

 	// Store signature
-	CacheSignature(sessionID, text, signature)
+	CacheSignature(testModelName, text, signature)

 	// Retrieve signature
-	retrieved := GetCachedSignature(sessionID, text)
+	retrieved := GetCachedSignature(testModelName, text)
 	if retrieved != signature {
 		t.Errorf("Expected signature '%s', got '%s'", signature, retrieved)
 	}
 }

-func TestCacheSignature_DifferentSessions(t *testing.T) {
+func TestCacheSignature_DifferentModelGroups(t *testing.T) {
 	ClearSignatureCache("")

-	text := "Same text in different sessions"
+	text := "Same text across models"
 	sig1 := "signature1_1234567890123456789012345678901234567890123456"
 	sig2 := "signature2_1234567890123456789012345678901234567890123456"

-	CacheSignature("session-a", text, sig1)
-	CacheSignature("session-b", text, sig2)
+	geminiModel := "gemini-3-pro-preview"
+	CacheSignature(testModelName, text, sig1)
+	CacheSignature(geminiModel, text, sig2)

-	if GetCachedSignature("session-a", text) != sig1 {
-		t.Error("Session-a signature mismatch")
+	if GetCachedSignature(testModelName, text) != sig1 {
+		t.Error("Claude signature mismatch")
 	}
-	if GetCachedSignature("session-b", text) != sig2 {
-		t.Error("Session-b signature mismatch")
+	if GetCachedSignature(geminiModel, text) != sig2 {
+		t.Error("Gemini signature mismatch")
 	}
 }

@@ -44,13 +46,13 @@ func TestCacheSignature_NotFound(t *testing.T) {
 	ClearSignatureCache("")

 	// Non-existent session
-	if got := GetCachedSignature("nonexistent", "some text"); got != "" {
+	if got := GetCachedSignature(testModelName, "some text"); got != "" {
 		t.Errorf("Expected empty string for nonexistent session, got '%s'", got)
 	}

 	// Existing session but different text
-	CacheSignature("session-x", "text-a", "sigA12345678901234567890123456789012345678901234567890")
-	if got := GetCachedSignature("session-x", "text-b"); got != "" {
+	CacheSignature(testModelName, "text-a", "sigA12345678901234567890123456789012345678901234567890")
+	if got := GetCachedSignature(testModelName, "text-b"); got != "" {
 		t.Errorf("Expected empty string for different text, got '%s'", got)
 	}
 }
@@ -59,12 +61,11 @@ func TestCacheSignature_EmptyInputs(t *testing.T) {
 	ClearSignatureCache("")

 	// All empty/invalid inputs should be no-ops
-	CacheSignature("", "text", "sig12345678901234567890123456789012345678901234567890")
-	CacheSignature("session", "", "sig12345678901234567890123456789012345678901234567890")
-	CacheSignature("session", "text", "")
-	CacheSignature("session", "text", "short") // Too short
+	CacheSignature(testModelName, "", "sig12345678901234567890123456789012345678901234567890")
+	CacheSignature(testModelName, "text", "")
+	CacheSignature(testModelName, "text", "short") // Too short

-	if got := GetCachedSignature("session", "text"); got != "" {
+	if got := GetCachedSignature(testModelName, "text"); got != "" {
 		t.Errorf("Expected empty after invalid cache attempts, got '%s'", got)
 	}
 }
@@ -72,31 +73,27 @@ func TestCacheSignature_EmptyInputs(t *testing.T) {
 func TestCacheSignature_ShortSignatureRejected(t *testing.T) {
 	ClearSignatureCache("")

-	sessionID := "test-short-sig"
 	text := "Some text"
 	shortSig := "abc123" // Less than 50 chars

-	CacheSignature(sessionID, text, shortSig)
+	CacheSignature(testModelName, text, shortSig)

-	if got := GetCachedSignature(sessionID, text); got != "" {
+	if got := GetCachedSignature(testModelName, text); got != "" {
 		t.Errorf("Short signature should be rejected, got '%s'", got)
 	}
 }

-func TestClearSignatureCache_SpecificSession(t *testing.T) {
+func TestClearSignatureCache_ModelGroup(t *testing.T) {
 	ClearSignatureCache("")

 	sig := "validSig1234567890123456789012345678901234567890123456"
-	CacheSignature("session-1", "text", sig)
-	CacheSignature("session-2", "text", sig)
+	CacheSignature(testModelName, "text", sig)
+	CacheSignature(testModelName, "text-2", sig)

 	ClearSignatureCache("session-1")

-	if got := GetCachedSignature("session-1", "text"); got != "" {
-		t.Error("session-1 should be cleared")
-	}
-	if got := GetCachedSignature("session-2", "text"); got != sig {
-		t.Error("session-2 should still exist")
+	if got := GetCachedSignature(testModelName, "text"); got != sig {
+		t.Error("signature should remain when clearing unknown session")
 	}
 }

@@ -104,35 +101,37 @@ func TestClearSignatureCache_AllSessions(t *testing.T) {
 	ClearSignatureCache("")

 	sig := "validSig1234567890123456789012345678901234567890123456"
-	CacheSignature("session-1", "text", sig)
-	CacheSignature("session-2", "text", sig)
+	CacheSignature(testModelName, "text", sig)
+	CacheSignature(testModelName, "text-2", sig)

 	ClearSignatureCache("")

-	if got := GetCachedSignature("session-1", "text"); got != "" {
-		t.Error("session-1 should be cleared")
+	if got := GetCachedSignature(testModelName, "text"); got != "" {
+		t.Error("text should be cleared")
 	}
-	if got := GetCachedSignature("session-2", "text"); got != "" {
-		t.Error("session-2 should be cleared")
+	if got := GetCachedSignature(testModelName, "text-2"); got != "" {
+		t.Error("text-2 should be cleared")
 	}
 }

 func TestHasValidSignature(t *testing.T) {
 	tests := []struct {
 		name      string
+		modelName string
 		signature string
 		expected  bool
 	}{
-		{"valid long signature", "abc123validSignature1234567890123456789012345678901234567890", true},
-		{"exactly 50 chars", "12345678901234567890123456789012345678901234567890", true},
-		{"49 chars - invalid", "1234567890123456789012345678901234567890123456789", false},
-		{"empty string", "", false},
-		{"short signature", "abc", false},
+		{"valid long signature", testModelName, "abc123validSignature1234567890123456789012345678901234567890", true},
+		{"exactly 50 chars", testModelName, "12345678901234567890123456789012345678901234567890", true},
+		{"49 chars - invalid", testModelName, "1234567890123456789012345678901234567890123456789", false},
+		{"empty string", testModelName, "", false},
+		{"short signature", testModelName, "abc", false},
+		{"gemini sentinel", "gemini-3-pro-preview", "skip_thought_signature_validator", true},
 	}

 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			result := HasValidSignature(tt.signature)
+			result := HasValidSignature(tt.modelName, tt.signature)
 			if result != tt.expected {
 				t.Errorf("HasValidSignature(%q) = %v, expected %v", tt.signature, result, tt.expected)
 			}
@@ -143,21 +142,19 @@ func TestHasValidSignature(t *testing.T) {
 func TestCacheSignature_TextHashCollisionResistance(t *testing.T) {
 	ClearSignatureCache("")

-	sessionID := "hash-test-session"
-
 	// Different texts should produce different hashes
 	text1 := "First thinking text"
 	text2 := "Second thinking text"
 	sig1 := "signature1_1234567890123456789012345678901234567890123456"
 	sig2 := "signature2_1234567890123456789012345678901234567890123456"

-	CacheSignature(sessionID, text1, sig1)
-	CacheSignature(sessionID, text2, sig2)
+	CacheSignature(testModelName, text1, sig1)
+	CacheSignature(testModelName, text2, sig2)

-	if GetCachedSignature(sessionID, text1) != sig1 {
+	if GetCachedSignature(testModelName, text1) != sig1 {
 		t.Error("text1 signature mismatch")
 	}
-	if GetCachedSignature(sessionID, text2) != sig2 {
+	if GetCachedSignature(testModelName, text2) != sig2 {
 		t.Error("text2 signature mismatch")
 	}
 }
@@ -165,13 +162,12 @@ func TestCacheSignature_TextHashCollisionResistance(t *testing.T) {
 func TestCacheSignature_UnicodeText(t *testing.T) {
 	ClearSignatureCache("")

-	sessionID := "unicode-session"
 	text := "한글 텍스트와 이모지 🎉 그리고 特殊文字"
 	sig := "unicodeSig123456789012345678901234567890123456789012345"

-	CacheSignature(sessionID, text, sig)
+	CacheSignature(testModelName, text, sig)

-	if got := GetCachedSignature(sessionID, text); got != sig {
+	if got := GetCachedSignature(testModelName, text); got != sig {
 		t.Errorf("Unicode text signature retrieval failed, got '%s'", got)
 	}
 }
@@ -179,15 +175,14 @@ func TestCacheSignature_UnicodeText(t *testing.T) {
 func TestCacheSignature_Overwrite(t *testing.T) {
 	ClearSignatureCache("")

-	sessionID := "overwrite-session"
 	text := "Same text"
 	sig1 := "firstSignature12345678901234567890123456789012345678901"
 	sig2 := "secondSignature1234567890123456789012345678901234567890"

-	CacheSignature(sessionID, text, sig1)
-	CacheSignature(sessionID, text, sig2) // Overwrite
+	CacheSignature(testModelName, text, sig1)
+	CacheSignature(testModelName, text, sig2) // Overwrite

-	if got := GetCachedSignature(sessionID, text); got != sig2 {
+	if got := GetCachedSignature(testModelName, text); got != sig2 {
 		t.Errorf("Expected overwritten signature '%s', got '%s'", sig2, got)
 	}
 }
@@ -199,14 +194,13 @@ func TestCacheSignature_ExpirationLogic(t *testing.T) {

 	// This test verifies the expiration check exists
 	// In a real scenario, we'd mock time.Now()
-	sessionID := "expiration-test"
 	text := "text"
 	sig := "validSig1234567890123456789012345678901234567890123456"

-	CacheSignature(sessionID, text, sig)
+	CacheSignature(testModelName, text, sig)

 	// Fresh entry should be retrievable
-	if got := GetCachedSignature(sessionID, text); got != sig {
+	if got := GetCachedSignature(testModelName, text); got != sig {
 		t.Errorf("Fresh entry should be retrievable, got '%s'", got)
 	}

--- a/internal/cmd/login.go
+++ b/internal/cmd/login.go
@@ -118,6 +118,7 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
 	}

 	activatedProjects := make([]string, 0, len(projectSelections))
+	seenProjects := make(map[string]bool)
 	for _, candidateID := range projectSelections {
 		log.Infof("Activating project %s", candidateID)
 		if errSetup := performGeminiCLISetup(ctx, httpClient, storage, candidateID); errSetup != nil {
@@ -134,6 +135,13 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
 		if finalID == "" {
 			finalID = candidateID
 		}
+
+		// Skip duplicates
+		if seenProjects[finalID] {
+			log.Infof("Project %s already activated, skipping", finalID)
+			continue
+		}
+		seenProjects[finalID] = true
 		activatedProjects = append(activatedProjects, finalID)
 	}

@@ -261,8 +269,39 @@ func performGeminiCLISetup(ctx context.Context, httpClient *http.Client, storage
 			finalProjectID := projectID
 			if responseProjectID != "" {
 				if explicitProject && !strings.EqualFold(responseProjectID, projectID) {
-					log.Warnf("Gemini onboarding returned project %s instead of requested %s; using response project ID.", responseProjectID, projectID)
-					finalProjectID = responseProjectID
+					// Check if this is a free user (gen-lang-client projects or free/legacy tier)
+					isFreeUser := strings.HasPrefix(projectID, "gen-lang-client-") ||
+						strings.EqualFold(tierID, "FREE") ||
+						strings.EqualFold(tierID, "LEGACY")
+
+					if isFreeUser {
+						// Interactive prompt for free users
+						fmt.Printf("\nGoogle returned a different project ID:\n")
+						fmt.Printf("  Requested (frontend): %s\n", projectID)
+						fmt.Printf("  Returned (backend):   %s\n\n", responseProjectID)
+						fmt.Printf("  Backend project IDs have access to preview models (gemini-3-*).\n")
+						fmt.Printf("  This is normal for free tier users.\n\n")
+						fmt.Printf("Which project ID would you like to use?\n")
+						fmt.Printf("  [1] Backend (recommended): %s\n", responseProjectID)
+						fmt.Printf("  [2] Frontend: %s\n\n", projectID)
+						fmt.Printf("Enter choice [1]: ")
+
+						reader := bufio.NewReader(os.Stdin)
+						choice, _ := reader.ReadString('\n')
+						choice = strings.TrimSpace(choice)
+
+						if choice == "2" {
+							log.Infof("Using frontend project ID: %s", projectID)
+							fmt.Println(". Warning: Frontend project IDs may not have access to preview models.")
+							finalProjectID = projectID
+						} else {
+							log.Infof("Using backend project ID: %s (recommended)", responseProjectID)
+							finalProjectID = responseProjectID
+						}
+					} else {
+						// Pro users: keep requested project ID (original behavior)
+						log.Warnf("Gemini onboarding returned project %s instead of requested %s; keeping requested project ID.", responseProjectID, projectID)
+					}
 				} else {
 					finalProjectID = responseProjectID
 				}
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -261,6 +261,25 @@ type PayloadModelRule struct {
 	Protocol string `yaml:"protocol" json:"protocol"`
 }

+// CloakConfig configures request cloaking for non-Claude-Code clients.
+// Cloaking disguises API requests to appear as originating from the official Claude Code CLI.
+type CloakConfig struct {
+	// Mode controls cloaking behavior: "auto" (default), "always", or "never".
+	// - "auto": cloak only when client is not Claude Code (based on User-Agent)
+	// - "always": always apply cloaking regardless of client
+	// - "never": never apply cloaking
+	Mode string `yaml:"mode,omitempty" json:"mode,omitempty"`
+
+	// StrictMode controls how system prompts are handled when cloaking.
+	// - false (default): prepend Claude Code prompt to user system messages
+	// - true: strip all user system messages, keep only Claude Code prompt
+	StrictMode bool `yaml:"strict-mode,omitempty" json:"strict-mode,omitempty"`
+
+	// SensitiveWords is a list of words to obfuscate with zero-width characters.
+	// This can help bypass certain content filters.
+	SensitiveWords []string `yaml:"sensitive-words,omitempty" json:"sensitive-words,omitempty"`
+}
+
 // ClaudeKey represents the configuration for a Claude API key,
 // including the API key itself and an optional base URL for the API endpoint.
 type ClaudeKey struct {
@@ -289,6 +308,9 @@ type ClaudeKey struct {

 	// ExcludedModels lists model IDs that should be excluded for this provider.
 	ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"`
+
+	// Cloak configures request cloaking for non-Claude-Code clients.
+	Cloak *CloakConfig `yaml:"cloak,omitempty" json:"cloak,omitempty"`
 }

 func (k ClaudeKey) GetAPIKey() string  { return k.APIKey }
@@ -964,6 +986,7 @@ func SaveConfigPreserveComments(configFile string, cfg *Config) error {
 	removeLegacyGenerativeLanguageKeys(original.Content[0])

 	pruneMappingToGeneratedKeys(original.Content[0], generated.Content[0], "oauth-excluded-models")
+	pruneMappingToGeneratedKeys(original.Content[0], generated.Content[0], "oauth-model-alias")

 	// Merge generated into original in-place, preserving comments/order of existing nodes.
 	mergeMappingPreserve(original.Content[0], generated.Content[0])
@@ -1454,6 +1477,16 @@ func pruneMappingToGeneratedKeys(dstRoot, srcRoot *yaml.Node, key string) {
 	}
 	srcIdx := findMapKeyIndex(srcRoot, key)
 	if srcIdx < 0 {
+		// Keep an explicit empty mapping for oauth-model-alias when it was previously present.
+		//
+		// Rationale: LoadConfig runs MigrateOAuthModelAlias before unmarshalling. If the
+		// oauth-model-alias key is missing, migration will add the default antigravity aliases.
+		// When users delete the last channel from oauth-model-alias via the management API,
+		// we want that deletion to persist across hot reloads and restarts.
+		if key == "oauth-model-alias" {
+			dstRoot.Content[dstIdx+1] = &yaml.Node{Kind: yaml.MappingNode, Tag: "!!map"}
+			return
+		}
 		removeMapKey(dstRoot, key)
 		return
 	}
--- a/internal/logging/gin_logger.go
+++ b/internal/logging/gin_logger.go
@@ -4,6 +4,7 @@
 package logging

 import (
+	"errors"
 	"fmt"
 	"net/http"
 	"runtime/debug"
@@ -112,6 +113,11 @@ func isAIAPIPath(path string) bool {
 //   - gin.HandlerFunc: A middleware handler for panic recovery
 func GinLogrusRecovery() gin.HandlerFunc {
 	return gin.CustomRecovery(func(c *gin.Context, recovered interface{}) {
+		if err, ok := recovered.(error); ok && errors.Is(err, http.ErrAbortHandler) {
+			// Let net/http handle ErrAbortHandler so the connection is aborted without noisy stack logs.
+			panic(http.ErrAbortHandler)
+		}
+
 		log.WithFields(log.Fields{
 			"panic": recovered,
 			"stack": string(debug.Stack()),
--- a/internal/logging/gin_logger_test.go
+++ b/internal/logging/gin_logger_test.go
@@ -0,0 +1,60 @@
+package logging
+
+import (
+	"errors"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/gin-gonic/gin"
+)
+
+func TestGinLogrusRecoveryRepanicsErrAbortHandler(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	engine := gin.New()
+	engine.Use(GinLogrusRecovery())
+	engine.GET("/abort", func(c *gin.Context) {
+		panic(http.ErrAbortHandler)
+	})
+
+	req := httptest.NewRequest(http.MethodGet, "/abort", nil)
+	recorder := httptest.NewRecorder()
+
+	defer func() {
+		recovered := recover()
+		if recovered == nil {
+			t.Fatalf("expected panic, got nil")
+		}
+		err, ok := recovered.(error)
+		if !ok {
+			t.Fatalf("expected error panic, got %T", recovered)
+		}
+		if !errors.Is(err, http.ErrAbortHandler) {
+			t.Fatalf("expected ErrAbortHandler, got %v", err)
+		}
+		if err != http.ErrAbortHandler {
+			t.Fatalf("expected exact ErrAbortHandler sentinel, got %v", err)
+		}
+	}()
+
+	engine.ServeHTTP(recorder, req)
+}
+
+func TestGinLogrusRecoveryHandlesRegularPanic(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	engine := gin.New()
+	engine.Use(GinLogrusRecovery())
+	engine.GET("/panic", func(c *gin.Context) {
+		panic("boom")
+	})
+
+	req := httptest.NewRequest(http.MethodGet, "/panic", nil)
+	recorder := httptest.NewRecorder()
+
+	engine.ServeHTTP(recorder, req)
+	if recorder.Code != http.StatusInternalServerError {
+		t.Fatalf("expected 500, got %d", recorder.Code)
+	}
+}
--- a/internal/logging/global_logger.go
+++ b/internal/logging/global_logger.go
@@ -30,7 +30,7 @@ var (
 type LogFormatter struct{}

 // logFieldOrder defines the display order for common log fields.
-var logFieldOrder = []string{"provider", "model", "mode", "budget", "level", "original_value", "min", "max", "clamped_to", "error"}
+var logFieldOrder = []string{"provider", "model", "mode", "budget", "level", "original_mode", "original_value", "min", "max", "clamped_to", "error"}

 // Format renders a single log entry with custom formatting.
 func (m *LogFormatter) Format(entry *log.Entry) ([]byte, error) {
@@ -122,6 +122,24 @@ func isDirWritable(dir string) bool {
 	return true
 }

+// ResolveLogDirectory determines the directory used for application logs.
+func ResolveLogDirectory(cfg *config.Config) string {
+	logDir := "logs"
+	if base := util.WritablePath(); base != "" {
+		return filepath.Join(base, "logs")
+	}
+	if cfg == nil {
+		return logDir
+	}
+	if !isDirWritable(logDir) {
+		authDir := strings.TrimSpace(cfg.AuthDir)
+		if authDir != "" {
+			logDir = filepath.Join(authDir, "logs")
+		}
+	}
+	return logDir
+}
+
 // ConfigureLogOutput switches the global log destination between rotating files and stdout.
 // When logsMaxTotalSizeMB > 0, a background cleaner removes the oldest log files in the logs directory
 // until the total size is within the limit.
@@ -131,12 +149,7 @@ func ConfigureLogOutput(cfg *config.Config) error {
 	writerMu.Lock()
 	defer writerMu.Unlock()

-	logDir := "logs"
-	if base := util.WritablePath(); base != "" {
-		logDir = filepath.Join(base, "logs")
-	} else if !isDirWritable(logDir) {
-		logDir = filepath.Join(cfg.AuthDir, "logs")
-	}
+	logDir := ResolveLogDirectory(cfg)

 	protectedPath := ""
 	if cfg.LoggingToFile {
--- a/internal/logging/request_logger.go
+++ b/internal/logging/request_logger.go
@@ -44,10 +44,12 @@ type RequestLogger interface {
 	//   - apiRequest: The API request data
 	//   - apiResponse: The API response data
 	//   - requestID: Optional request ID for log file naming
+	//   - requestTimestamp: When the request was received
+	//   - apiResponseTimestamp: When the API response was received
 	//
 	// Returns:
 	//   - error: An error if logging fails, nil otherwise
-	LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string) error
+	LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error

 	// LogStreamingRequest initiates logging for a streaming request and returns a writer for chunks.
 	//
@@ -109,6 +111,12 @@ type StreamingLogWriter interface {
 	//   - error: An error if writing fails, nil otherwise
 	WriteAPIResponse(apiResponse []byte) error

+	// SetFirstChunkTimestamp sets the TTFB timestamp captured when first chunk was received.
+	//
+	// Parameters:
+	//   - timestamp: The time when first response chunk was received
+	SetFirstChunkTimestamp(timestamp time.Time)
+
 	// Close finalizes the log file and cleans up resources.
 	//
 	// Returns:
@@ -180,20 +188,22 @@ func (l *FileRequestLogger) SetEnabled(enabled bool) {
 //   - apiRequest: The API request data
 //   - apiResponse: The API response data
 //   - requestID: Optional request ID for log file naming
+//   - requestTimestamp: When the request was received
+//   - apiResponseTimestamp: When the API response was received
 //
 // Returns:
 //   - error: An error if logging fails, nil otherwise
-func (l *FileRequestLogger) LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string) error {
-	return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, false, requestID)
+func (l *FileRequestLogger) LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
+	return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, false, requestID, requestTimestamp, apiResponseTimestamp)
 }

 // LogRequestWithOptions logs a request with optional forced logging behavior.
 // The force flag allows writing error logs even when regular request logging is disabled.
-func (l *FileRequestLogger) LogRequestWithOptions(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string) error {
-	return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, force, requestID)
+func (l *FileRequestLogger) LogRequestWithOptions(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
+	return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, force, requestID, requestTimestamp, apiResponseTimestamp)
 }

-func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string) error {
+func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
 	if !l.enabled && !force {
 		return nil
 	}
@@ -247,6 +257,8 @@ func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[st
 		responseHeaders,
 		responseToWrite,
 		decompressErr,
+		requestTimestamp,
+		apiResponseTimestamp,
 	)
 	if errClose := logFile.Close(); errClose != nil {
 		log.WithError(errClose).Warn("failed to close request log file")
@@ -499,17 +511,22 @@ func (l *FileRequestLogger) writeNonStreamingLog(
 	responseHeaders map[string][]string,
 	response []byte,
 	decompressErr error,
+	requestTimestamp time.Time,
+	apiResponseTimestamp time.Time,
 ) error {
-	if errWrite := writeRequestInfoWithBody(w, url, method, requestHeaders, requestBody, requestBodyPath, time.Now()); errWrite != nil {
+	if requestTimestamp.IsZero() {
+		requestTimestamp = time.Now()
+	}
+	if errWrite := writeRequestInfoWithBody(w, url, method, requestHeaders, requestBody, requestBodyPath, requestTimestamp); errWrite != nil {
 		return errWrite
 	}
-	if errWrite := writeAPISection(w, "=== API REQUEST ===\n", "=== API REQUEST", apiRequest); errWrite != nil {
+	if errWrite := writeAPISection(w, "=== API REQUEST ===\n", "=== API REQUEST", apiRequest, time.Time{}); errWrite != nil {
 		return errWrite
 	}
 	if errWrite := writeAPIErrorResponses(w, apiResponseErrors); errWrite != nil {
 		return errWrite
 	}
-	if errWrite := writeAPISection(w, "=== API RESPONSE ===\n", "=== API RESPONSE", apiResponse); errWrite != nil {
+	if errWrite := writeAPISection(w, "=== API RESPONSE ===\n", "=== API RESPONSE", apiResponse, apiResponseTimestamp); errWrite != nil {
 		return errWrite
 	}
 	return writeResponseSection(w, statusCode, true, responseHeaders, bytes.NewReader(response), decompressErr, true)
@@ -583,7 +600,7 @@ func writeRequestInfoWithBody(
 	return nil
 }

-func writeAPISection(w io.Writer, sectionHeader string, sectionPrefix string, payload []byte) error {
+func writeAPISection(w io.Writer, sectionHeader string, sectionPrefix string, payload []byte, timestamp time.Time) error {
 	if len(payload) == 0 {
 		return nil
 	}
@@ -601,6 +618,11 @@ func writeAPISection(w io.Writer, sectionHeader string, sectionPrefix string, pa
 		if _, errWrite := io.WriteString(w, sectionHeader); errWrite != nil {
 			return errWrite
 		}
+		if !timestamp.IsZero() {
+			if _, errWrite := io.WriteString(w, fmt.Sprintf("Timestamp: %s\n", timestamp.Format(time.RFC3339Nano))); errWrite != nil {
+				return errWrite
+			}
+		}
 		if _, errWrite := w.Write(payload); errWrite != nil {
 			return errWrite
 		}
@@ -974,6 +996,9 @@ type FileStreamingLogWriter struct {

 	// apiResponse stores the upstream API response data.
 	apiResponse []byte
+
+	// apiResponseTimestamp captures when the API response was received.
+	apiResponseTimestamp time.Time
 }

 // WriteChunkAsync writes a response chunk asynchronously (non-blocking).
@@ -1053,6 +1078,12 @@ func (w *FileStreamingLogWriter) WriteAPIResponse(apiResponse []byte) error {
 	return nil
 }

+func (w *FileStreamingLogWriter) SetFirstChunkTimestamp(timestamp time.Time) {
+	if !timestamp.IsZero() {
+		w.apiResponseTimestamp = timestamp
+	}
+}
+
 // Close finalizes the log file and cleans up resources.
 // It writes all buffered data to the file in the correct order:
 // API REQUEST -> API RESPONSE -> RESPONSE (status, headers, body chunks)
@@ -1140,10 +1171,10 @@ func (w *FileStreamingLogWriter) writeFinalLog(logFile *os.File) error {
 	if errWrite := writeRequestInfoWithBody(logFile, w.url, w.method, w.requestHeaders, nil, w.requestBodyPath, w.timestamp); errWrite != nil {
 		return errWrite
 	}
-	if errWrite := writeAPISection(logFile, "=== API REQUEST ===\n", "=== API REQUEST", w.apiRequest); errWrite != nil {
+	if errWrite := writeAPISection(logFile, "=== API REQUEST ===\n", "=== API REQUEST", w.apiRequest, time.Time{}); errWrite != nil {
 		return errWrite
 	}
-	if errWrite := writeAPISection(logFile, "=== API RESPONSE ===\n", "=== API RESPONSE", w.apiResponse); errWrite != nil {
+	if errWrite := writeAPISection(logFile, "=== API RESPONSE ===\n", "=== API RESPONSE", w.apiResponse, w.apiResponseTimestamp); errWrite != nil {
 		return errWrite
 	}

@@ -1220,6 +1251,8 @@ func (w *NoOpStreamingLogWriter) WriteAPIResponse(_ []byte) error {
 	return nil
 }

+func (w *NoOpStreamingLogWriter) SetFirstChunkTimestamp(_ time.Time) {}
+
 // Close is a no-op implementation that does nothing and always returns nil.
 //
 // Returns:
--- a/internal/registry/kiro_model_converter.go
+++ b/internal/registry/kiro_model_converter.go
@@ -0,0 +1,303 @@
+// Package registry provides Kiro model conversion utilities.
+// This file handles converting dynamic Kiro API model lists to the internal ModelInfo format,
+// and merging with static metadata for thinking support and other capabilities.
+package registry
+
+import (
+	"strings"
+	"time"
+)
+
+// KiroAPIModel represents a model from Kiro API response.
+// This is a local copy to avoid import cycles with the kiro package.
+// The structure mirrors kiro.KiroModel for easy data conversion.
+type KiroAPIModel struct {
+	// ModelID is the unique identifier for the model (e.g., "claude-sonnet-4.5")
+	ModelID string
+	// ModelName is the human-readable name
+	ModelName string
+	// Description is the model description
+	Description string
+	// RateMultiplier is the credit multiplier for this model
+	RateMultiplier float64
+	// RateUnit is the unit for rate calculation (e.g., "credit")
+	RateUnit string
+	// MaxInputTokens is the maximum input token limit
+	MaxInputTokens int
+}
+
+// DefaultKiroThinkingSupport defines the default thinking configuration for Kiro models.
+// All Kiro models support thinking with the following budget range.
+var DefaultKiroThinkingSupport = &ThinkingSupport{
+	Min:            1024,  // Minimum thinking budget tokens
+	Max:            32000, // Maximum thinking budget tokens
+	ZeroAllowed:    true,  // Allow disabling thinking with 0
+	DynamicAllowed: true,  // Allow dynamic thinking budget (-1)
+}
+
+// DefaultKiroContextLength is the default context window size for Kiro models.
+const DefaultKiroContextLength = 200000
+
+// DefaultKiroMaxCompletionTokens is the default max completion tokens for Kiro models.
+const DefaultKiroMaxCompletionTokens = 64000
+
+// ConvertKiroAPIModels converts Kiro API models to internal ModelInfo format.
+// It performs the following transformations:
+//   - Normalizes model ID (e.g., claude-sonnet-4.5 → kiro-claude-sonnet-4-5)
+//   - Adds default thinking support metadata
+//   - Sets default context length and max completion tokens if not provided
+//
+// Parameters:
+//   - kiroModels: List of models from Kiro API response
+//
+// Returns:
+//   - []*ModelInfo: Converted model information list
+func ConvertKiroAPIModels(kiroModels []*KiroAPIModel) []*ModelInfo {
+	if len(kiroModels) == 0 {
+		return nil
+	}
+
+	now := time.Now().Unix()
+	result := make([]*ModelInfo, 0, len(kiroModels))
+
+	for _, km := range kiroModels {
+		// Skip nil models
+		if km == nil {
+			continue
+		}
+
+		// Skip models without valid ID
+		if km.ModelID == "" {
+			continue
+		}
+
+		// Normalize the model ID to kiro-* format
+		normalizedID := normalizeKiroModelID(km.ModelID)
+
+		// Create ModelInfo with converted data
+		info := &ModelInfo{
+			ID:          normalizedID,
+			Object:      "model",
+			Created:     now,
+			OwnedBy:     "aws",
+			Type:        "kiro",
+			DisplayName: generateKiroDisplayName(km.ModelName, normalizedID),
+			Description: km.Description,
+			// Use MaxInputTokens from API if available, otherwise use default
+			ContextLength:       getContextLength(km.MaxInputTokens),
+			MaxCompletionTokens: DefaultKiroMaxCompletionTokens,
+			// All Kiro models support thinking
+			Thinking: cloneThinkingSupport(DefaultKiroThinkingSupport),
+		}
+
+		result = append(result, info)
+	}
+
+	return result
+}
+
+// GenerateAgenticVariants creates -agentic variants for each model.
+// Agentic variants are optimized for coding agents with chunked writes.
+//
+// Parameters:
+//   - models: Base models to generate variants for
+//
+// Returns:
+//   - []*ModelInfo: Combined list of base models and their agentic variants
+func GenerateAgenticVariants(models []*ModelInfo) []*ModelInfo {
+	if len(models) == 0 {
+		return nil
+	}
+
+	// Pre-allocate result with capacity for both base models and variants
+	result := make([]*ModelInfo, 0, len(models)*2)
+
+	for _, model := range models {
+		if model == nil {
+			continue
+		}
+
+		// Add the base model first
+		result = append(result, model)
+
+		// Skip if model already has -agentic suffix
+		if strings.HasSuffix(model.ID, "-agentic") {
+			continue
+		}
+
+		// Skip special models that shouldn't have agentic variants
+		if model.ID == "kiro-auto" {
+			continue
+		}
+
+		// Create agentic variant
+		agenticModel := &ModelInfo{
+			ID:                  model.ID + "-agentic",
+			Object:              model.Object,
+			Created:             model.Created,
+			OwnedBy:             model.OwnedBy,
+			Type:                model.Type,
+			DisplayName:         model.DisplayName + " (Agentic)",
+			Description:         generateAgenticDescription(model.Description),
+			ContextLength:       model.ContextLength,
+			MaxCompletionTokens: model.MaxCompletionTokens,
+			Thinking:            cloneThinkingSupport(model.Thinking),
+		}
+
+		result = append(result, agenticModel)
+	}
+
+	return result
+}
+
+// MergeWithStaticMetadata merges dynamic models with static metadata.
+// Static metadata takes priority for any overlapping fields.
+// This allows manual overrides for specific models while keeping dynamic discovery.
+//
+// Parameters:
+//   - dynamicModels: Models from Kiro API (converted to ModelInfo)
+//   - staticModels: Predefined model metadata (from GetKiroModels())
+//
+// Returns:
+//   - []*ModelInfo: Merged model list with static metadata taking priority
+func MergeWithStaticMetadata(dynamicModels, staticModels []*ModelInfo) []*ModelInfo {
+	if len(dynamicModels) == 0 && len(staticModels) == 0 {
+		return nil
+	}
+
+	// Build a map of static models for quick lookup
+	staticMap := make(map[string]*ModelInfo, len(staticModels))
+	for _, sm := range staticModels {
+		if sm != nil && sm.ID != "" {
+			staticMap[sm.ID] = sm
+		}
+	}
+
+	// Build result, preferring static metadata where available
+	seenIDs := make(map[string]struct{})
+	result := make([]*ModelInfo, 0, len(dynamicModels)+len(staticModels))
+
+	// First, process dynamic models and merge with static if available
+	for _, dm := range dynamicModels {
+		if dm == nil || dm.ID == "" {
+			continue
+		}
+
+		// Skip duplicates
+		if _, seen := seenIDs[dm.ID]; seen {
+			continue
+		}
+		seenIDs[dm.ID] = struct{}{}
+
+		// Check if static metadata exists for this model
+		if sm, exists := staticMap[dm.ID]; exists {
+			// Static metadata takes priority - use static model
+			result = append(result, sm)
+		} else {
+			// No static metadata - use dynamic model
+			result = append(result, dm)
+		}
+	}
+
+	// Add any static models not in dynamic list
+	for _, sm := range staticModels {
+		if sm == nil || sm.ID == "" {
+			continue
+		}
+		if _, seen := seenIDs[sm.ID]; seen {
+			continue
+		}
+		seenIDs[sm.ID] = struct{}{}
+		result = append(result, sm)
+	}
+
+	return result
+}
+
+// normalizeKiroModelID converts Kiro API model IDs to internal format.
+// Transformation rules:
+//   - Adds "kiro-" prefix if not present
+//   - Replaces dots with hyphens (e.g., 4.5 → 4-5)
+//   - Handles special cases like "auto" → "kiro-auto"
+//
+// Examples:
+//   - "claude-sonnet-4.5" → "kiro-claude-sonnet-4-5"
+//   - "claude-opus-4.5" → "kiro-claude-opus-4-5"
+//   - "auto" → "kiro-auto"
+//   - "kiro-claude-sonnet-4-5" → "kiro-claude-sonnet-4-5" (unchanged)
+func normalizeKiroModelID(modelID string) string {
+	if modelID == "" {
+		return ""
+	}
+
+	// Trim whitespace
+	modelID = strings.TrimSpace(modelID)
+
+	// Replace dots with hyphens (e.g., 4.5 → 4-5)
+	normalized := strings.ReplaceAll(modelID, ".", "-")
+
+	// Add kiro- prefix if not present
+	if !strings.HasPrefix(normalized, "kiro-") {
+		normalized = "kiro-" + normalized
+	}
+
+	return normalized
+}
+
+// generateKiroDisplayName creates a human-readable display name.
+// Uses the API-provided model name if available, otherwise generates from ID.
+func generateKiroDisplayName(modelName, normalizedID string) string {
+	if modelName != "" {
+		return "Kiro " + modelName
+	}
+
+	// Generate from normalized ID by removing kiro- prefix and formatting
+	displayID := strings.TrimPrefix(normalizedID, "kiro-")
+	// Capitalize first letter of each word
+	words := strings.Split(displayID, "-")
+	for i, word := range words {
+		if len(word) > 0 {
+			words[i] = strings.ToUpper(word[:1]) + word[1:]
+		}
+	}
+	return "Kiro " + strings.Join(words, " ")
+}
+
+// generateAgenticDescription creates description for agentic variants.
+func generateAgenticDescription(baseDescription string) string {
+	if baseDescription == "" {
+		return "Optimized for coding agents with chunked writes"
+	}
+	return baseDescription + " (Agentic mode: chunked writes)"
+}
+
+// getContextLength returns the context length, using default if not provided.
+func getContextLength(maxInputTokens int) int {
+	if maxInputTokens > 0 {
+		return maxInputTokens
+	}
+	return DefaultKiroContextLength
+}
+
+// cloneThinkingSupport creates a deep copy of ThinkingSupport.
+// Returns nil if input is nil.
+func cloneThinkingSupport(ts *ThinkingSupport) *ThinkingSupport {
+	if ts == nil {
+		return nil
+	}
+
+	clone := &ThinkingSupport{
+		Min:            ts.Min,
+		Max:            ts.Max,
+		ZeroAllowed:    ts.ZeroAllowed,
+		DynamicAllowed: ts.DynamicAllowed,
+	}
+
+	// Deep copy Levels slice if present
+	if len(ts.Levels) > 0 {
+		clone.Levels = make([]string, len(ts.Levels))
+		copy(clone.Levels, ts.Levels)
+	}
+
+	return clone
+}
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -1,785 +1,69 @@
-// Package registry provides model definitions for various AI service providers.
-// This file contains static model definitions that can be used by clients
-// when registering their supported models.
+// Package registry provides model definitions and lookup helpers for various AI providers.
+// Static model metadata is stored in model_definitions_static_data.go.
 package registry

-// GetClaudeModels returns the standard Claude model definitions
-func GetClaudeModels() []*ModelInfo {
-	return []*ModelInfo{
+import (
+	"sort"
+	"strings"
+)

-		{
-			ID:                  "claude-haiku-4-5-20251001",
-			Object:              "model",
-			Created:             1759276800, // 2025-10-01
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4.5 Haiku",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			// Thinking: not supported for Haiku models
-		},
-		{
-			ID:                  "claude-sonnet-4-5-20250929",
-			Object:              "model",
-			Created:             1759104000, // 2025-09-29
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4.5 Sonnet",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
-		},
-		{
-			ID:                  "claude-opus-4-5-20251101",
-			Object:              "model",
-			Created:             1761955200, // 2025-11-01
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4.5 Opus",
-			Description:         "Premium model combining maximum intelligence with practical performance",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
-		},
-		{
-			ID:                  "claude-opus-4-1-20250805",
-			Object:              "model",
-			Created:             1722945600, // 2025-08-05
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4.1 Opus",
-			ContextLength:       200000,
-			MaxCompletionTokens: 32000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
-		},
-		{
-			ID:                  "claude-opus-4-20250514",
-			Object:              "model",
-			Created:             1715644800, // 2025-05-14
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4 Opus",
-			ContextLength:       200000,
-			MaxCompletionTokens: 32000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
-		},
-		{
-			ID:                  "claude-sonnet-4-20250514",
-			Object:              "model",
-			Created:             1715644800, // 2025-05-14
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4 Sonnet",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
-		},
-		{
-			ID:                  "claude-3-7-sonnet-20250219",
-			Object:              "model",
-			Created:             1708300800, // 2025-02-19
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 3.7 Sonnet",
-			ContextLength:       128000,
-			MaxCompletionTokens: 8192,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
-		},
-		{
-			ID:                  "claude-3-5-haiku-20241022",
-			Object:              "model",
-			Created:             1729555200, // 2024-10-22
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 3.5 Haiku",
-			ContextLength:       128000,
-			MaxCompletionTokens: 8192,
-			// Thinking: not supported for Haiku models
-		},
-	}
-}
-
-// GetGeminiModels returns the standard Gemini model definitions
-func GetGeminiModels() []*ModelInfo {
-	return []*ModelInfo{
-		{
-			ID:                         "gemini-2.5-pro",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-pro",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Pro",
-			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash",
-			Version:                    "001",
-			DisplayName:                "Gemini 2.5 Flash",
-			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash-lite",
-			Object:                     "model",
-			Created:                    1753142400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash-lite",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Flash Lite",
-			Description:                "Our smallest and most cost effective model, built for at scale usage.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-3-pro-preview",
-			Object:                     "model",
-			Created:                    1737158400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-pro-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Pro Preview",
-			Description:                "Gemini 3 Pro Preview",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
-		},
-		{
-			ID:                         "gemini-3-flash-preview",
-			Object:                     "model",
-			Created:                    1765929600,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-flash-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Flash Preview",
-			Description:                "Gemini 3 Flash Preview",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
-		},
-		{
-			ID:                         "gemini-3-pro-image-preview",
-			Object:                     "model",
-			Created:                    1737158400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-pro-image-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Pro Image Preview",
-			Description:                "Gemini 3 Pro Image Preview",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
-		},
-	}
-}
-
-func GetGeminiVertexModels() []*ModelInfo {
-	return []*ModelInfo{
-		{
-			ID:                         "gemini-2.5-pro",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-pro",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Pro",
-			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash",
-			Version:                    "001",
-			DisplayName:                "Gemini 2.5 Flash",
-			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash-lite",
-			Object:                     "model",
-			Created:                    1753142400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash-lite",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Flash Lite",
-			Description:                "Our smallest and most cost effective model, built for at scale usage.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-3-pro-preview",
-			Object:                     "model",
-			Created:                    1737158400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-pro-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Pro Preview",
-			Description:                "Gemini 3 Pro Preview",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
-		},
-		{
-			ID:                         "gemini-3-flash-preview",
-			Object:                     "model",
-			Created:                    1765929600,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-flash-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Flash Preview",
-			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
-		},
-		{
-			ID:                         "gemini-3-pro-image-preview",
-			Object:                     "model",
-			Created:                    1737158400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-pro-image-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Pro Image Preview",
-			Description:                "Gemini 3 Pro Image Preview",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
-		},
-	}
-}
-
-// GetGeminiCLIModels returns the standard Gemini model definitions
-func GetGeminiCLIModels() []*ModelInfo {
-	return []*ModelInfo{
-		{
-			ID:                         "gemini-2.5-pro",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-pro",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Pro",
-			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash",
-			Version:                    "001",
-			DisplayName:                "Gemini 2.5 Flash",
-			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash-lite",
-			Object:                     "model",
-			Created:                    1753142400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash-lite",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Flash Lite",
-			Description:                "Our smallest and most cost effective model, built for at scale usage.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-3-pro-preview",
-			Object:                     "model",
-			Created:                    1737158400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-pro-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Pro Preview",
-			Description:                "Our most intelligent model with SOTA reasoning and multimodal understanding, and powerful agentic and vibe coding capabilities",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
-		},
-		{
-			ID:                         "gemini-3-flash-preview",
-			Object:                     "model",
-			Created:                    1765929600,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-flash-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Flash Preview",
-			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
-		},
-	}
-}
-
-// GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
-func GetAIStudioModels() []*ModelInfo {
-	return []*ModelInfo{
-		{
-			ID:                         "gemini-2.5-pro",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-pro",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Pro",
-			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash",
-			Version:                    "001",
-			DisplayName:                "Gemini 2.5 Flash",
-			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash-lite",
-			Object:                     "model",
-			Created:                    1753142400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash-lite",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Flash Lite",
-			Description:                "Our smallest and most cost effective model, built for at scale usage.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-3-pro-preview",
-			Object:                     "model",
-			Created:                    1737158400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-pro-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Pro Preview",
-			Description:                "Gemini 3 Pro Preview",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-3-flash-preview",
-			Object:                     "model",
-			Created:                    1765929600,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3-flash-preview",
-			Version:                    "3.0",
-			DisplayName:                "Gemini 3 Flash Preview",
-			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-pro-latest",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-pro-latest",
-			Version:                    "2.5",
-			DisplayName:                "Gemini Pro Latest",
-			Description:                "Latest release of Gemini Pro",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-flash-latest",
-			Object:                     "model",
-			Created:                    1750118400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-flash-latest",
-			Version:                    "2.5",
-			DisplayName:                "Gemini Flash Latest",
-			Description:                "Latest release of Gemini Flash",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-flash-lite-latest",
-			Object:                     "model",
-			Created:                    1753142400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-flash-lite-latest",
-			Version:                    "2.5",
-			DisplayName:                "Gemini Flash-Lite Latest",
-			Description:                "Latest release of Gemini Flash-Lite",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                         "gemini-2.5-flash-image-preview",
-			Object:                     "model",
-			Created:                    1756166400,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash-image-preview",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Flash Image Preview",
-			Description:                "State-of-the-art image generation and editing model.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           8192,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			// image models don't support thinkingConfig; leave Thinking nil
-		},
-		{
-			ID:                         "gemini-2.5-flash-image",
-			Object:                     "model",
-			Created:                    1759363200,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-2.5-flash-image",
-			Version:                    "2.5",
-			DisplayName:                "Gemini 2.5 Flash Image",
-			Description:                "State-of-the-art image generation and editing model.",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           8192,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			// image models don't support thinkingConfig; leave Thinking nil
-		},
-	}
-}
-
-// GetOpenAIModels returns the standard OpenAI model definitions
-func GetOpenAIModels() []*ModelInfo {
-	return []*ModelInfo{
-		{
-			ID:                  "gpt-5",
-			Object:              "model",
-			Created:             1754524800,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-08-07",
-			DisplayName:         "GPT 5",
-			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}},
-		},
-		{
-			ID:                  "gpt-5-codex",
-			Object:              "model",
-			Created:             1757894400,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-09-15",
-			DisplayName:         "GPT 5 Codex",
-			Description:         "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
-		},
-		{
-			ID:                  "gpt-5-codex-mini",
-			Object:              "model",
-			Created:             1762473600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-11-07",
-			DisplayName:         "GPT 5 Codex Mini",
-			Description:         "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
-		},
-		{
-			ID:                  "gpt-5.1",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5",
-			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}},
-		},
-		{
-			ID:                  "gpt-5.1-codex",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5.1 Codex",
-			Description:         "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
-		},
-		{
-			ID:                  "gpt-5.1-codex-mini",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5.1 Codex Mini",
-			Description:         "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
-		},
-		{
-			ID:                  "gpt-5.1-codex-max",
-			Object:              "model",
-			Created:             1763424000,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-max",
-			DisplayName:         "GPT 5.1 Codex Max",
-			Description:         "Stable version of GPT 5.1 Codex Max",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
-		},
-		{
-			ID:                  "gpt-5.2",
-			Object:              "model",
-			Created:             1765440000,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.2",
-			DisplayName:         "GPT 5.2",
-			Description:         "Stable version of GPT 5.2",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}},
-		},
-		{
-			ID:                  "gpt-5.2-codex",
-			Object:              "model",
-			Created:             1765440000,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.2",
-			DisplayName:         "GPT 5.2 Codex",
-			Description:         "Stable version of GPT 5.2 Codex, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
-		},
-	}
-}
-
-// GetQwenModels returns the standard Qwen model definitions
-func GetQwenModels() []*ModelInfo {
-	return []*ModelInfo{
-		{
-			ID:                  "qwen3-coder-plus",
-			Object:              "model",
-			Created:             1753228800,
-			OwnedBy:             "qwen",
-			Type:                "qwen",
-			Version:             "3.0",
-			DisplayName:         "Qwen3 Coder Plus",
-			Description:         "Advanced code generation and understanding model",
-			ContextLength:       32768,
-			MaxCompletionTokens: 8192,
-			SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
-		},
-		{
-			ID:                  "qwen3-coder-flash",
-			Object:              "model",
-			Created:             1753228800,
-			OwnedBy:             "qwen",
-			Type:                "qwen",
-			Version:             "3.0",
-			DisplayName:         "Qwen3 Coder Flash",
-			Description:         "Fast code generation model",
-			ContextLength:       8192,
-			MaxCompletionTokens: 2048,
-			SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
-		},
-		{
-			ID:                  "vision-model",
-			Object:              "model",
-			Created:             1758672000,
-			OwnedBy:             "qwen",
-			Type:                "qwen",
-			Version:             "3.0",
-			DisplayName:         "Qwen3 Vision Model",
-			Description:         "Vision model model",
-			ContextLength:       32768,
-			MaxCompletionTokens: 2048,
-			SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
-		},
-	}
-}
-
-// iFlowThinkingSupport is a shared ThinkingSupport configuration for iFlow models
-// that support thinking mode via chat_template_kwargs.enable_thinking (boolean toggle).
-// Uses level-based configuration so standard normalization flows apply before conversion.
-var iFlowThinkingSupport = &ThinkingSupport{
-	Levels: []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"},
-}
-
-// GetIFlowModels returns supported models for iFlow OAuth accounts.
-func GetIFlowModels() []*ModelInfo {
-	entries := []struct {
-		ID          string
-		DisplayName string
-		Description string
-		Created     int64
-		Thinking    *ThinkingSupport
-	}{
-		{ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600},
-		{ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800},
-		{ID: "qwen3-max", DisplayName: "Qwen3-Max", Description: "Qwen3 flagship model", Created: 1758672000},
-		{ID: "qwen3-vl-plus", DisplayName: "Qwen3-VL-Plus", Description: "Qwen3 multimodal vision-language", Created: 1758672000},
-		{ID: "qwen3-max-preview", DisplayName: "Qwen3-Max-Preview", Description: "Qwen3 Max preview build", Created: 1757030400},
-		{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
-		{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400, Thinking: iFlowThinkingSupport},
-		{ID: "glm-4.7", DisplayName: "GLM-4.7", Description: "Zhipu GLM 4.7 general model", Created: 1766448000, Thinking: iFlowThinkingSupport},
-		{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
-		{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200},
-		{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Chat", Created: 1764576000},
-		{ID: "deepseek-v3.2-reasoner", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Reasoner", Created: 1764576000},
-		{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000},
-		{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200},
-		{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200},
-		{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
-		{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
-		{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
-		{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
-		{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
-		{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: iFlowThinkingSupport},
-		{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
-		{ID: "iflow-rome-30ba3b", DisplayName: "iFlow-ROME", Description: "iFlow Rome 30BA3B model", Created: 1736899200},
-	}
-	models := make([]*ModelInfo, 0, len(entries))
-	for _, entry := range entries {
-		models = append(models, &ModelInfo{
-			ID:          entry.ID,
-			Object:      "model",
-			Created:     entry.Created,
-			OwnedBy:     "iflow",
-			Type:        "iflow",
-			DisplayName: entry.DisplayName,
-			Description: entry.Description,
-			Thinking:    entry.Thinking,
+// GetStaticModelDefinitionsByChannel returns static model definitions for a given channel/provider.
+// It returns nil when the channel is unknown.
+//
+// Supported channels:
+//   - claude
+//   - gemini
+//   - vertex
+//   - gemini-cli
+//   - aistudio
+//   - codex
+//   - qwen
+//   - iflow
+//   - antigravity (returns static overrides only)
+func GetStaticModelDefinitionsByChannel(channel string) []*ModelInfo {
+	key := strings.ToLower(strings.TrimSpace(channel))
+	switch key {
+	case "claude":
+		return GetClaudeModels()
+	case "gemini":
+		return GetGeminiModels()
+	case "vertex":
+		return GetGeminiVertexModels()
+	case "gemini-cli":
+		return GetGeminiCLIModels()
+	case "aistudio":
+		return GetAIStudioModels()
+	case "codex":
+		return GetOpenAIModels()
+	case "qwen":
+		return GetQwenModels()
+	case "iflow":
+		return GetIFlowModels()
+	case "antigravity":
+		cfg := GetAntigravityModelConfig()
+		if len(cfg) == 0 {
+			return nil
+		}
+		models := make([]*ModelInfo, 0, len(cfg))
+		for modelID, entry := range cfg {
+			if modelID == "" || entry == nil {
+				continue
+			}
+			models = append(models, &ModelInfo{
+				ID:                  modelID,
+				Object:              "model",
+				OwnedBy:             "antigravity",
+				Type:                "antigravity",
+				Thinking:            entry.Thinking,
+				MaxCompletionTokens: entry.MaxCompletionTokens,
+			})
+		}
+		sort.Slice(models, func(i, j int) bool {
+			return strings.ToLower(models[i].ID) < strings.ToLower(models[j].ID)
 		})
-	}
-	return models
-}
-
-// AntigravityModelConfig captures static antigravity model overrides, including
-// Thinking budget limits and provider max completion tokens.
-type AntigravityModelConfig struct {
-	Thinking            *ThinkingSupport
-	MaxCompletionTokens int
-	Name                string
-}
-
-// GetAntigravityModelConfig returns static configuration for antigravity models.
-// Keys use upstream model names returned by the Antigravity models endpoint.
-func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
-	return map[string]*AntigravityModelConfig{
-		"gemini-2.5-flash":           {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash"},
-		"gemini-2.5-flash-lite":      {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash-lite"},
-		"rev19-uic3-1p":              {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/rev19-uic3-1p"},
-		"gemini-3-pro-high":          {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-high"},
-		"gemini-3-pro-image":         {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-image"},
-		"gemini-3-flash":             {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, Name: "models/gemini-3-flash"},
-		"claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
-		"claude-opus-4-5-thinking":   {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
+		return models
+	default:
+		return nil
 	}
 }

@@ -809,10 +93,9 @@ func LookupStaticModelInfo(modelID string) *ModelInfo {
 	}

 	// Check Antigravity static config
-	if cfg := GetAntigravityModelConfig()[modelID]; cfg != nil && cfg.Thinking != nil {
+	if cfg := GetAntigravityModelConfig()[modelID]; cfg != nil {
 		return &ModelInfo{
 			ID:                  modelID,
-			Name:                cfg.Name,
 			Thinking:            cfg.Thinking,
 			MaxCompletionTokens: cfg.MaxCompletionTokens,
 		}
--- a/internal/registry/model_definitions_static_data.go
+++ b/internal/registry/model_definitions_static_data.go
@@ -0,0 +1,846 @@
+// Package registry provides model definitions for various AI service providers.
+// This file stores the static model metadata catalog.
+package registry
+
+// GetClaudeModels returns the standard Claude model definitions
+func GetClaudeModels() []*ModelInfo {
+	return []*ModelInfo{
+
+		{
+			ID:                  "claude-haiku-4-5-20251001",
+			Object:              "model",
+			Created:             1759276800, // 2025-10-01
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.5 Haiku",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			// Thinking: not supported for Haiku models
+		},
+		{
+			ID:                  "claude-sonnet-4-5-20250929",
+			Object:              "model",
+			Created:             1759104000, // 2025-09-29
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.5 Sonnet",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
+		},
+		{
+			ID:                  "claude-opus-4-5-20251101",
+			Object:              "model",
+			Created:             1761955200, // 2025-11-01
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.5 Opus",
+			Description:         "Premium model combining maximum intelligence with practical performance",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
+		},
+		{
+			ID:                  "claude-opus-4-1-20250805",
+			Object:              "model",
+			Created:             1722945600, // 2025-08-05
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.1 Opus",
+			ContextLength:       200000,
+			MaxCompletionTokens: 32000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
+		},
+		{
+			ID:                  "claude-opus-4-20250514",
+			Object:              "model",
+			Created:             1715644800, // 2025-05-14
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4 Opus",
+			ContextLength:       200000,
+			MaxCompletionTokens: 32000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
+		},
+		{
+			ID:                  "claude-sonnet-4-20250514",
+			Object:              "model",
+			Created:             1715644800, // 2025-05-14
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4 Sonnet",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
+		},
+		{
+			ID:                  "claude-3-7-sonnet-20250219",
+			Object:              "model",
+			Created:             1708300800, // 2025-02-19
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 3.7 Sonnet",
+			ContextLength:       128000,
+			MaxCompletionTokens: 8192,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
+		},
+		{
+			ID:                  "claude-3-5-haiku-20241022",
+			Object:              "model",
+			Created:             1729555200, // 2024-10-22
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 3.5 Haiku",
+			ContextLength:       128000,
+			MaxCompletionTokens: 8192,
+			// Thinking: not supported for Haiku models
+		},
+	}
+}
+
+// GetGeminiModels returns the standard Gemini model definitions
+func GetGeminiModels() []*ModelInfo {
+	return []*ModelInfo{
+		{
+			ID:                         "gemini-2.5-pro",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-pro",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Pro",
+			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash",
+			Version:                    "001",
+			DisplayName:                "Gemini 2.5 Flash",
+			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash-lite",
+			Object:                     "model",
+			Created:                    1753142400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash-lite",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Flash Lite",
+			Description:                "Our smallest and most cost effective model, built for at scale usage.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-3-pro-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Preview",
+			Description:                "Gemini 3 Pro Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
+		},
+		{
+			ID:                         "gemini-3-flash-preview",
+			Object:                     "model",
+			Created:                    1765929600,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-flash-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Flash Preview",
+			Description:                "Gemini 3 Flash Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
+		},
+		{
+			ID:                         "gemini-3-pro-image-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-image-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Image Preview",
+			Description:                "Gemini 3 Pro Image Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
+		},
+	}
+}
+
+func GetGeminiVertexModels() []*ModelInfo {
+	return []*ModelInfo{
+		{
+			ID:                         "gemini-2.5-pro",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-pro",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Pro",
+			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash",
+			Version:                    "001",
+			DisplayName:                "Gemini 2.5 Flash",
+			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash-lite",
+			Object:                     "model",
+			Created:                    1753142400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash-lite",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Flash Lite",
+			Description:                "Our smallest and most cost effective model, built for at scale usage.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-3-pro-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Preview",
+			Description:                "Gemini 3 Pro Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
+		},
+		{
+			ID:                         "gemini-3-flash-preview",
+			Object:                     "model",
+			Created:                    1765929600,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-flash-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Flash Preview",
+			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
+		},
+		{
+			ID:                         "gemini-3-pro-image-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-image-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Image Preview",
+			Description:                "Gemini 3 Pro Image Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
+		},
+		// Imagen image generation models - use :predict action
+		{
+			ID:                         "imagen-4.0-generate-001",
+			Object:                     "model",
+			Created:                    1750000000,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/imagen-4.0-generate-001",
+			Version:                    "4.0",
+			DisplayName:                "Imagen 4.0 Generate",
+			Description:                "Imagen 4.0 image generation model",
+			SupportedGenerationMethods: []string{"predict"},
+		},
+		{
+			ID:                         "imagen-4.0-ultra-generate-001",
+			Object:                     "model",
+			Created:                    1750000000,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/imagen-4.0-ultra-generate-001",
+			Version:                    "4.0",
+			DisplayName:                "Imagen 4.0 Ultra Generate",
+			Description:                "Imagen 4.0 Ultra high-quality image generation model",
+			SupportedGenerationMethods: []string{"predict"},
+		},
+		{
+			ID:                         "imagen-3.0-generate-002",
+			Object:                     "model",
+			Created:                    1740000000,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/imagen-3.0-generate-002",
+			Version:                    "3.0",
+			DisplayName:                "Imagen 3.0 Generate",
+			Description:                "Imagen 3.0 image generation model",
+			SupportedGenerationMethods: []string{"predict"},
+		},
+		{
+			ID:                         "imagen-3.0-fast-generate-001",
+			Object:                     "model",
+			Created:                    1740000000,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/imagen-3.0-fast-generate-001",
+			Version:                    "3.0",
+			DisplayName:                "Imagen 3.0 Fast Generate",
+			Description:                "Imagen 3.0 fast image generation model",
+			SupportedGenerationMethods: []string{"predict"},
+		},
+		{
+			ID:                         "imagen-4.0-fast-generate-001",
+			Object:                     "model",
+			Created:                    1750000000,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/imagen-4.0-fast-generate-001",
+			Version:                    "4.0",
+			DisplayName:                "Imagen 4.0 Fast Generate",
+			Description:                "Imagen 4.0 fast image generation model",
+			SupportedGenerationMethods: []string{"predict"},
+		},
+	}
+}
+
+// GetGeminiCLIModels returns the standard Gemini model definitions
+func GetGeminiCLIModels() []*ModelInfo {
+	return []*ModelInfo{
+		{
+			ID:                         "gemini-2.5-pro",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-pro",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Pro",
+			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash",
+			Version:                    "001",
+			DisplayName:                "Gemini 2.5 Flash",
+			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash-lite",
+			Object:                     "model",
+			Created:                    1753142400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash-lite",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Flash Lite",
+			Description:                "Our smallest and most cost effective model, built for at scale usage.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-3-pro-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Preview",
+			Description:                "Our most intelligent model with SOTA reasoning and multimodal understanding, and powerful agentic and vibe coding capabilities",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
+		},
+		{
+			ID:                         "gemini-3-flash-preview",
+			Object:                     "model",
+			Created:                    1765929600,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-flash-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Flash Preview",
+			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
+		},
+	}
+}
+
+// GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
+func GetAIStudioModels() []*ModelInfo {
+	return []*ModelInfo{
+		{
+			ID:                         "gemini-2.5-pro",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-pro",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Pro",
+			Description:                "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash",
+			Version:                    "001",
+			DisplayName:                "Gemini 2.5 Flash",
+			Description:                "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-2.5-flash-lite",
+			Object:                     "model",
+			Created:                    1753142400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash-lite",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Flash Lite",
+			Description:                "Our smallest and most cost effective model, built for at scale usage.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-3-pro-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Preview",
+			Description:                "Gemini 3 Pro Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-3-flash-preview",
+			Object:                     "model",
+			Created:                    1765929600,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-flash-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Flash Preview",
+			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-pro-latest",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-pro-latest",
+			Version:                    "2.5",
+			DisplayName:                "Gemini Pro Latest",
+			Description:                "Latest release of Gemini Pro",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-flash-latest",
+			Object:                     "model",
+			Created:                    1750118400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-flash-latest",
+			Version:                    "2.5",
+			DisplayName:                "Gemini Flash Latest",
+			Description:                "Latest release of Gemini Flash",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                         "gemini-flash-lite-latest",
+			Object:                     "model",
+			Created:                    1753142400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-flash-lite-latest",
+			Version:                    "2.5",
+			DisplayName:                "Gemini Flash-Lite Latest",
+			Description:                "Latest release of Gemini Flash-Lite",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		// {
+		// 	ID:                         "gemini-2.5-flash-image-preview",
+		// 	Object:                     "model",
+		// 	Created:                    1756166400,
+		// 	OwnedBy:                    "google",
+		// 	Type:                       "gemini",
+		// 	Name:                       "models/gemini-2.5-flash-image-preview",
+		// 	Version:                    "2.5",
+		// 	DisplayName:                "Gemini 2.5 Flash Image Preview",
+		// 	Description:                "State-of-the-art image generation and editing model.",
+		// 	InputTokenLimit:            1048576,
+		// 	OutputTokenLimit:           8192,
+		// 	SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+		// 	// image models don't support thinkingConfig; leave Thinking nil
+		// },
+		{
+			ID:                         "gemini-2.5-flash-image",
+			Object:                     "model",
+			Created:                    1759363200,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-2.5-flash-image",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Flash Image",
+			Description:                "State-of-the-art image generation and editing model.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           8192,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			// image models don't support thinkingConfig; leave Thinking nil
+		},
+	}
+}
+
+// GetOpenAIModels returns the standard OpenAI model definitions
+func GetOpenAIModels() []*ModelInfo {
+	return []*ModelInfo{
+		{
+			ID:                  "gpt-5",
+			Object:              "model",
+			Created:             1754524800,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5-2025-08-07",
+			DisplayName:         "GPT 5",
+			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}},
+		},
+		{
+			ID:                  "gpt-5-codex",
+			Object:              "model",
+			Created:             1757894400,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5-2025-09-15",
+			DisplayName:         "GPT 5 Codex",
+			Description:         "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
+		},
+		{
+			ID:                  "gpt-5-codex-mini",
+			Object:              "model",
+			Created:             1762473600,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5-2025-11-07",
+			DisplayName:         "GPT 5 Codex Mini",
+			Description:         "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
+		},
+		{
+			ID:                  "gpt-5.1",
+			Object:              "model",
+			Created:             1762905600,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5.1-2025-11-12",
+			DisplayName:         "GPT 5",
+			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}},
+		},
+		{
+			ID:                  "gpt-5.1-codex",
+			Object:              "model",
+			Created:             1762905600,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5.1-2025-11-12",
+			DisplayName:         "GPT 5.1 Codex",
+			Description:         "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
+		},
+		{
+			ID:                  "gpt-5.1-codex-mini",
+			Object:              "model",
+			Created:             1762905600,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5.1-2025-11-12",
+			DisplayName:         "GPT 5.1 Codex Mini",
+			Description:         "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
+		},
+		{
+			ID:                  "gpt-5.1-codex-max",
+			Object:              "model",
+			Created:             1763424000,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5.1-max",
+			DisplayName:         "GPT 5.1 Codex Max",
+			Description:         "Stable version of GPT 5.1 Codex Max",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
+		},
+		{
+			ID:                  "gpt-5.2",
+			Object:              "model",
+			Created:             1765440000,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5.2",
+			DisplayName:         "GPT 5.2",
+			Description:         "Stable version of GPT 5.2",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}},
+		},
+		{
+			ID:                  "gpt-5.2-codex",
+			Object:              "model",
+			Created:             1765440000,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5.2",
+			DisplayName:         "GPT 5.2 Codex",
+			Description:         "Stable version of GPT 5.2 Codex, The best model for coding and agentic tasks across domains.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
+		},
+	}
+}
+
+// GetQwenModels returns the standard Qwen model definitions
+func GetQwenModels() []*ModelInfo {
+	return []*ModelInfo{
+		{
+			ID:                  "qwen3-coder-plus",
+			Object:              "model",
+			Created:             1753228800,
+			OwnedBy:             "qwen",
+			Type:                "qwen",
+			Version:             "3.0",
+			DisplayName:         "Qwen3 Coder Plus",
+			Description:         "Advanced code generation and understanding model",
+			ContextLength:       32768,
+			MaxCompletionTokens: 8192,
+			SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
+		},
+		{
+			ID:                  "qwen3-coder-flash",
+			Object:              "model",
+			Created:             1753228800,
+			OwnedBy:             "qwen",
+			Type:                "qwen",
+			Version:             "3.0",
+			DisplayName:         "Qwen3 Coder Flash",
+			Description:         "Fast code generation model",
+			ContextLength:       8192,
+			MaxCompletionTokens: 2048,
+			SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
+		},
+		{
+			ID:                  "vision-model",
+			Object:              "model",
+			Created:             1758672000,
+			OwnedBy:             "qwen",
+			Type:                "qwen",
+			Version:             "3.0",
+			DisplayName:         "Qwen3 Vision Model",
+			Description:         "Vision model model",
+			ContextLength:       32768,
+			MaxCompletionTokens: 2048,
+			SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
+		},
+	}
+}
+
+// iFlowThinkingSupport is a shared ThinkingSupport configuration for iFlow models
+// that support thinking mode via chat_template_kwargs.enable_thinking (boolean toggle).
+// Uses level-based configuration so standard normalization flows apply before conversion.
+var iFlowThinkingSupport = &ThinkingSupport{
+	Levels: []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"},
+}
+
+// GetIFlowModels returns supported models for iFlow OAuth accounts.
+func GetIFlowModels() []*ModelInfo {
+	entries := []struct {
+		ID          string
+		DisplayName string
+		Description string
+		Created     int64
+		Thinking    *ThinkingSupport
+	}{
+		{ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600},
+		{ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800},
+		{ID: "qwen3-max", DisplayName: "Qwen3-Max", Description: "Qwen3 flagship model", Created: 1758672000},
+		{ID: "qwen3-vl-plus", DisplayName: "Qwen3-VL-Plus", Description: "Qwen3 multimodal vision-language", Created: 1758672000},
+		{ID: "qwen3-max-preview", DisplayName: "Qwen3-Max-Preview", Description: "Qwen3 Max preview build", Created: 1757030400, Thinking: iFlowThinkingSupport},
+		{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
+		{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400, Thinking: iFlowThinkingSupport},
+		{ID: "glm-4.7", DisplayName: "GLM-4.7", Description: "Zhipu GLM 4.7 general model", Created: 1766448000, Thinking: iFlowThinkingSupport},
+		{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
+		{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200},
+		{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Chat", Created: 1764576000},
+		{ID: "deepseek-v3.2-reasoner", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Reasoner", Created: 1764576000},
+		{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000, Thinking: iFlowThinkingSupport},
+		{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200, Thinking: iFlowThinkingSupport},
+		{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200},
+		{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
+		{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
+		{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
+		{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
+		{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
+		{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: iFlowThinkingSupport},
+		{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
+		{ID: "iflow-rome-30ba3b", DisplayName: "iFlow-ROME", Description: "iFlow Rome 30BA3B model", Created: 1736899200},
+	}
+	models := make([]*ModelInfo, 0, len(entries))
+	for _, entry := range entries {
+		models = append(models, &ModelInfo{
+			ID:          entry.ID,
+			Object:      "model",
+			Created:     entry.Created,
+			OwnedBy:     "iflow",
+			Type:        "iflow",
+			DisplayName: entry.DisplayName,
+			Description: entry.Description,
+			Thinking:    entry.Thinking,
+		})
+	}
+	return models
+}
+
+// AntigravityModelConfig captures static antigravity model overrides, including
+// Thinking budget limits and provider max completion tokens.
+type AntigravityModelConfig struct {
+	Thinking            *ThinkingSupport
+	MaxCompletionTokens int
+}
+
+// GetAntigravityModelConfig returns static configuration for antigravity models.
+// Keys use upstream model names returned by the Antigravity models endpoint.
+func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
+	return map[string]*AntigravityModelConfig{
+		// "rev19-uic3-1p":              {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}},
+		"gemini-2.5-flash":           {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
+		"gemini-2.5-flash-lite":      {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
+		"gemini-3-pro-high":          {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
+		"gemini-3-pro-image":         {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
+		"gemini-3-flash":             {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}},
+		"claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
+		"claude-opus-4-5-thinking":   {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
+		"claude-sonnet-4-5":          {MaxCompletionTokens: 64000},
+		"gpt-oss-120b-medium":        {},
+		"tab_flash_lite_preview":     {},
+	}
+}
--- a/internal/registry/model_registry.go
+++ b/internal/registry/model_registry.go
@@ -80,6 +80,8 @@ type ThinkingSupport struct {
 type ModelRegistration struct {
 	// Info contains the model metadata
 	Info *ModelInfo
+	// InfoByProvider maps provider identifiers to specific ModelInfo to support differing capabilities.
+	InfoByProvider map[string]*ModelInfo
 	// Count is the number of active clients that can provide this model
 	Count int
 	// LastUpdated tracks when this registration was last modified
@@ -134,16 +136,19 @@ func GetGlobalRegistry() *ModelRegistry {
 	return globalRegistry
 }

-// LookupModelInfo searches the dynamic registry first, then falls back to static model definitions.
-//
-// This helper exists because some code paths only have a model ID and still need Thinking and
-// max completion token metadata even when the dynamic registry hasn't been populated.
-func LookupModelInfo(modelID string) *ModelInfo {
+// LookupModelInfo searches dynamic registry (provider-specific > global) then static definitions.
+func LookupModelInfo(modelID string, provider ...string) *ModelInfo {
 	modelID = strings.TrimSpace(modelID)
 	if modelID == "" {
 		return nil
 	}
-	if info := GetGlobalRegistry().GetModelInfo(modelID); info != nil {
+
+	p := ""
+	if len(provider) > 0 {
+		p = strings.ToLower(strings.TrimSpace(provider[0]))
+	}
+
+	if info := GetGlobalRegistry().GetModelInfo(modelID, p); info != nil {
 		return info
 	}
 	return LookupStaticModelInfo(modelID)
@@ -299,6 +304,9 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
 				if count, okProv := reg.Providers[oldProvider]; okProv {
 					if count <= toRemove {
 						delete(reg.Providers, oldProvider)
+						if reg.InfoByProvider != nil {
+							delete(reg.InfoByProvider, oldProvider)
+						}
 					} else {
 						reg.Providers[oldProvider] = count - toRemove
 					}
@@ -348,6 +356,12 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
 		model := newModels[id]
 		if reg, ok := r.models[id]; ok {
 			reg.Info = cloneModelInfo(model)
+			if provider != "" {
+				if reg.InfoByProvider == nil {
+					reg.InfoByProvider = make(map[string]*ModelInfo)
+				}
+				reg.InfoByProvider[provider] = cloneModelInfo(model)
+			}
 			reg.LastUpdated = now
 			if reg.QuotaExceededClients != nil {
 				delete(reg.QuotaExceededClients, clientID)
@@ -411,11 +425,15 @@ func (r *ModelRegistry) addModelRegistration(modelID, provider string, model *Mo
 		if existing.SuspendedClients == nil {
 			existing.SuspendedClients = make(map[string]string)
 		}
+		if existing.InfoByProvider == nil {
+			existing.InfoByProvider = make(map[string]*ModelInfo)
+		}
 		if provider != "" {
 			if existing.Providers == nil {
 				existing.Providers = make(map[string]int)
 			}
 			existing.Providers[provider]++
+			existing.InfoByProvider[provider] = cloneModelInfo(model)
 		}
 		log.Debugf("Incremented count for model %s, now %d clients", modelID, existing.Count)
 		return
@@ -423,6 +441,7 @@ func (r *ModelRegistry) addModelRegistration(modelID, provider string, model *Mo

 	registration := &ModelRegistration{
 		Info:                 cloneModelInfo(model),
+		InfoByProvider:       make(map[string]*ModelInfo),
 		Count:                1,
 		LastUpdated:          now,
 		QuotaExceededClients: make(map[string]*time.Time),
@@ -430,6 +449,7 @@ func (r *ModelRegistry) addModelRegistration(modelID, provider string, model *Mo
 	}
 	if provider != "" {
 		registration.Providers = map[string]int{provider: 1}
+		registration.InfoByProvider[provider] = cloneModelInfo(model)
 	}
 	r.models[modelID] = registration
 	log.Debugf("Registered new model %s from provider %s", modelID, provider)
@@ -455,6 +475,9 @@ func (r *ModelRegistry) removeModelRegistration(clientID, modelID, provider stri
 		if count, ok := registration.Providers[provider]; ok {
 			if count <= 1 {
 				delete(registration.Providers, provider)
+				if registration.InfoByProvider != nil {
+					delete(registration.InfoByProvider, provider)
+				}
 			} else {
 				registration.Providers[provider] = count - 1
 			}
@@ -539,6 +562,9 @@ func (r *ModelRegistry) unregisterClientInternal(clientID string) {
 				if count, ok := registration.Providers[provider]; ok {
 					if count <= 1 {
 						delete(registration.Providers, provider)
+						if registration.InfoByProvider != nil {
+							delete(registration.InfoByProvider, provider)
+						}
 					} else {
 						registration.Providers[provider] = count - 1
 					}
@@ -945,12 +971,22 @@ func (r *ModelRegistry) GetModelProviders(modelID string) []string {
 	return result
 }

-// GetModelInfo returns the registered ModelInfo for the given model ID, if present.
-// Returns nil if the model is unknown to the registry.
-func (r *ModelRegistry) GetModelInfo(modelID string) *ModelInfo {
+// GetModelInfo returns ModelInfo, prioritizing provider-specific definition if available.
+func (r *ModelRegistry) GetModelInfo(modelID, provider string) *ModelInfo {
 	r.mutex.RLock()
 	defer r.mutex.RUnlock()
 	if reg, ok := r.models[modelID]; ok && reg != nil {
+		// Try provider specific definition first
+		if provider != "" && reg.InfoByProvider != nil {
+			if reg.Providers != nil {
+				if count, ok := reg.Providers[provider]; ok && count > 0 {
+					if info, ok := reg.InfoByProvider[provider]; ok && info != nil {
+						return info
+					}
+				}
+			}
+		}
+		// Fallback to global info (last registered)
 		return reg.Info
 	}
 	return nil
@@ -1006,10 +1042,10 @@ func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string)
 			"owned_by": model.OwnedBy,
 		}
 		if model.Created > 0 {
-			result["created"] = model.Created
+			result["created_at"] = model.Created
 		}
 		if model.Type != "" {
-			result["type"] = model.Type
+			result["type"] = "model"
 		}
 		if model.DisplayName != "" {
 			result["display_name"] = model.DisplayName
--- a/internal/runtime/executor/aistudio_executor.go
+++ b/internal/runtime/executor/aistudio_executor.go
@@ -393,12 +393,13 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, stream)
 	payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
-	payload, err := thinking.ApplyThinking(payload, req.Model, "gemini")
+	payload, err := thinking.ApplyThinking(payload, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return nil, translatedPayload{}, err
 	}
 	payload = fixGeminiImageAspectRatio(baseModel, payload)
-	payload = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", payload, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	payload = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", payload, originalTranslated, requestedModel)
 	payload, _ = sjson.DeleteBytes(payload, "generationConfig.maxOutputTokens")
 	payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseMimeType")
 	payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseJsonSchema")
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -137,97 +137,119 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
 	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)

-	translated, err = thinking.ApplyThinking(translated, req.Model, "antigravity")
+	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return resp, err
 	}

-	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated, requestedModel)

 	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)

-	var lastStatus int
-	var lastBody []byte
-	var lastErr error
+	attempts := antigravityRetryAttempts(auth, e.cfg)

-	for idx, baseURL := range baseURLs {
-		httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, false, opts.Alt, baseURL)
-		if errReq != nil {
-			err = errReq
-			return resp, err
-		}
+attemptLoop:
+	for attempt := 0; attempt < attempts; attempt++ {
+		var lastStatus int
+		var lastBody []byte
+		var lastErr error

-		httpResp, errDo := httpClient.Do(httpReq)
-		if errDo != nil {
-			recordAPIResponseError(ctx, e.cfg, errDo)
-			if errors.Is(errDo, context.Canceled) || errors.Is(errDo, context.DeadlineExceeded) {
-				return resp, errDo
+		for idx, baseURL := range baseURLs {
+			httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, false, opts.Alt, baseURL)
+			if errReq != nil {
+				err = errReq
+				return resp, err
 			}
-			lastStatus = 0
-			lastBody = nil
-			lastErr = errDo
-			if idx+1 < len(baseURLs) {
-				log.Debugf("antigravity executor: request error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
-				continue
+
+			httpResp, errDo := httpClient.Do(httpReq)
+			if errDo != nil {
+				recordAPIResponseError(ctx, e.cfg, errDo)
+				if errors.Is(errDo, context.Canceled) || errors.Is(errDo, context.DeadlineExceeded) {
+					return resp, errDo
+				}
+				lastStatus = 0
+				lastBody = nil
+				lastErr = errDo
+				if idx+1 < len(baseURLs) {
+					log.Debugf("antigravity executor: request error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+					continue
+				}
+				err = errDo
+				return resp, err
 			}
-			err = errDo
-			return resp, err
+
+			recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+			bodyBytes, errRead := io.ReadAll(httpResp.Body)
+			if errClose := httpResp.Body.Close(); errClose != nil {
+				log.Errorf("antigravity executor: close response body error: %v", errClose)
+			}
+			if errRead != nil {
+				recordAPIResponseError(ctx, e.cfg, errRead)
+				err = errRead
+				return resp, err
+			}
+			appendAPIResponseChunk(ctx, e.cfg, bodyBytes)
+
+			if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices {
+				log.Debugf("antigravity executor: upstream error status: %d, body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), bodyBytes))
+				lastStatus = httpResp.StatusCode
+				lastBody = append([]byte(nil), bodyBytes...)
+				lastErr = nil
+				if httpResp.StatusCode == http.StatusTooManyRequests && idx+1 < len(baseURLs) {
+					log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+					continue
+				}
+				if antigravityShouldRetryNoCapacity(httpResp.StatusCode, bodyBytes) {
+					if idx+1 < len(baseURLs) {
+						log.Debugf("antigravity executor: no capacity on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+						continue
+					}
+					if attempt+1 < attempts {
+						delay := antigravityNoCapacityRetryDelay(attempt)
+						log.Debugf("antigravity executor: no capacity for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts)
+						if errWait := antigravityWait(ctx, delay); errWait != nil {
+							return resp, errWait
+						}
+						continue attemptLoop
+					}
+				}
+				sErr := statusErr{code: httpResp.StatusCode, msg: string(bodyBytes)}
+				if httpResp.StatusCode == http.StatusTooManyRequests {
+					if retryAfter, parseErr := parseRetryDelay(bodyBytes); parseErr == nil && retryAfter != nil {
+						sErr.retryAfter = retryAfter
+					}
+				}
+				err = sErr
+				return resp, err
+			}
+
+			reporter.publish(ctx, parseAntigravityUsage(bodyBytes))
+			var param any
+			converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bodyBytes, &param)
+			resp = cliproxyexecutor.Response{Payload: []byte(converted)}
+			reporter.ensurePublished(ctx)
+			return resp, nil
 		}

-		recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
-		bodyBytes, errRead := io.ReadAll(httpResp.Body)
-		if errClose := httpResp.Body.Close(); errClose != nil {
-			log.Errorf("antigravity executor: close response body error: %v", errClose)
-		}
-		if errRead != nil {
-			recordAPIResponseError(ctx, e.cfg, errRead)
-			err = errRead
-			return resp, err
-		}
-		appendAPIResponseChunk(ctx, e.cfg, bodyBytes)
-
-		if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices {
-			log.Debugf("antigravity executor: upstream error status: %d, body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), bodyBytes))
-			lastStatus = httpResp.StatusCode
-			lastBody = append([]byte(nil), bodyBytes...)
-			lastErr = nil
-			if httpResp.StatusCode == http.StatusTooManyRequests && idx+1 < len(baseURLs) {
-				log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
-				continue
-			}
-			sErr := statusErr{code: httpResp.StatusCode, msg: string(bodyBytes)}
-			if httpResp.StatusCode == http.StatusTooManyRequests {
-				if retryAfter, parseErr := parseRetryDelay(bodyBytes); parseErr == nil && retryAfter != nil {
+		switch {
+		case lastStatus != 0:
+			sErr := statusErr{code: lastStatus, msg: string(lastBody)}
+			if lastStatus == http.StatusTooManyRequests {
+				if retryAfter, parseErr := parseRetryDelay(lastBody); parseErr == nil && retryAfter != nil {
 					sErr.retryAfter = retryAfter
 				}
 			}
 			err = sErr
-			return resp, err
+		case lastErr != nil:
+			err = lastErr
+		default:
+			err = statusErr{code: http.StatusServiceUnavailable, msg: "antigravity executor: no base url available"}
 		}
-
-		reporter.publish(ctx, parseAntigravityUsage(bodyBytes))
-		var param any
-		converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bodyBytes, &param)
-		resp = cliproxyexecutor.Response{Payload: []byte(converted)}
-		reporter.ensurePublished(ctx)
-		return resp, nil
+		return resp, err
 	}

-	switch {
-	case lastStatus != 0:
-		sErr := statusErr{code: lastStatus, msg: string(lastBody)}
-		if lastStatus == http.StatusTooManyRequests {
-			if retryAfter, parseErr := parseRetryDelay(lastBody); parseErr == nil && retryAfter != nil {
-				sErr.retryAfter = retryAfter
-			}
-		}
-		err = sErr
-	case lastErr != nil:
-		err = lastErr
-	default:
-		err = statusErr{code: http.StatusServiceUnavailable, msg: "antigravity executor: no base url available"}
-	}
 	return resp, err
 }

@@ -256,160 +278,182 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)

-	translated, err = thinking.ApplyThinking(translated, req.Model, "antigravity")
+	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return resp, err
 	}

-	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated, requestedModel)

 	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)

-	var lastStatus int
-	var lastBody []byte
-	var lastErr error
+	attempts := antigravityRetryAttempts(auth, e.cfg)

-	for idx, baseURL := range baseURLs {
-		httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, true, opts.Alt, baseURL)
-		if errReq != nil {
-			err = errReq
-			return resp, err
-		}
+attemptLoop:
+	for attempt := 0; attempt < attempts; attempt++ {
+		var lastStatus int
+		var lastBody []byte
+		var lastErr error

-		httpResp, errDo := httpClient.Do(httpReq)
-		if errDo != nil {
-			recordAPIResponseError(ctx, e.cfg, errDo)
-			if errors.Is(errDo, context.Canceled) || errors.Is(errDo, context.DeadlineExceeded) {
-				return resp, errDo
+		for idx, baseURL := range baseURLs {
+			httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, true, opts.Alt, baseURL)
+			if errReq != nil {
+				err = errReq
+				return resp, err
 			}
-			lastStatus = 0
-			lastBody = nil
-			lastErr = errDo
-			if idx+1 < len(baseURLs) {
-				log.Debugf("antigravity executor: request error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
-				continue
-			}
-			err = errDo
-			return resp, err
-		}
-		recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
-		if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices {
-			bodyBytes, errRead := io.ReadAll(httpResp.Body)
-			if errClose := httpResp.Body.Close(); errClose != nil {
-				log.Errorf("antigravity executor: close response body error: %v", errClose)
-			}
-			if errRead != nil {
-				recordAPIResponseError(ctx, e.cfg, errRead)
-				if errors.Is(errRead, context.Canceled) || errors.Is(errRead, context.DeadlineExceeded) {
-					err = errRead
-					return resp, err
-				}
-				if errCtx := ctx.Err(); errCtx != nil {
-					err = errCtx
-					return resp, err
+
+			httpResp, errDo := httpClient.Do(httpReq)
+			if errDo != nil {
+				recordAPIResponseError(ctx, e.cfg, errDo)
+				if errors.Is(errDo, context.Canceled) || errors.Is(errDo, context.DeadlineExceeded) {
+					return resp, errDo
 				}
 				lastStatus = 0
 				lastBody = nil
-				lastErr = errRead
+				lastErr = errDo
 				if idx+1 < len(baseURLs) {
-					log.Debugf("antigravity executor: read error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+					log.Debugf("antigravity executor: request error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
 					continue
 				}
-				err = errRead
+				err = errDo
 				return resp, err
 			}
-			appendAPIResponseChunk(ctx, e.cfg, bodyBytes)
-			lastStatus = httpResp.StatusCode
-			lastBody = append([]byte(nil), bodyBytes...)
-			lastErr = nil
-			if httpResp.StatusCode == http.StatusTooManyRequests && idx+1 < len(baseURLs) {
-				log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
-				continue
+			recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+			if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices {
+				bodyBytes, errRead := io.ReadAll(httpResp.Body)
+				if errClose := httpResp.Body.Close(); errClose != nil {
+					log.Errorf("antigravity executor: close response body error: %v", errClose)
+				}
+				if errRead != nil {
+					recordAPIResponseError(ctx, e.cfg, errRead)
+					if errors.Is(errRead, context.Canceled) || errors.Is(errRead, context.DeadlineExceeded) {
+						err = errRead
+						return resp, err
+					}
+					if errCtx := ctx.Err(); errCtx != nil {
+						err = errCtx
+						return resp, err
+					}
+					lastStatus = 0
+					lastBody = nil
+					lastErr = errRead
+					if idx+1 < len(baseURLs) {
+						log.Debugf("antigravity executor: read error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+						continue
+					}
+					err = errRead
+					return resp, err
+				}
+				appendAPIResponseChunk(ctx, e.cfg, bodyBytes)
+				lastStatus = httpResp.StatusCode
+				lastBody = append([]byte(nil), bodyBytes...)
+				lastErr = nil
+				if httpResp.StatusCode == http.StatusTooManyRequests && idx+1 < len(baseURLs) {
+					log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+					continue
+				}
+				if antigravityShouldRetryNoCapacity(httpResp.StatusCode, bodyBytes) {
+					if idx+1 < len(baseURLs) {
+						log.Debugf("antigravity executor: no capacity on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+						continue
+					}
+					if attempt+1 < attempts {
+						delay := antigravityNoCapacityRetryDelay(attempt)
+						log.Debugf("antigravity executor: no capacity for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts)
+						if errWait := antigravityWait(ctx, delay); errWait != nil {
+							return resp, errWait
+						}
+						continue attemptLoop
+					}
+				}
+				sErr := statusErr{code: httpResp.StatusCode, msg: string(bodyBytes)}
+				if httpResp.StatusCode == http.StatusTooManyRequests {
+					if retryAfter, parseErr := parseRetryDelay(bodyBytes); parseErr == nil && retryAfter != nil {
+						sErr.retryAfter = retryAfter
+					}
+				}
+				err = sErr
+				return resp, err
 			}
-			sErr := statusErr{code: httpResp.StatusCode, msg: string(bodyBytes)}
-			if httpResp.StatusCode == http.StatusTooManyRequests {
-				if retryAfter, parseErr := parseRetryDelay(bodyBytes); parseErr == nil && retryAfter != nil {
+
+			out := make(chan cliproxyexecutor.StreamChunk)
+			go func(resp *http.Response) {
+				defer close(out)
+				defer func() {
+					if errClose := resp.Body.Close(); errClose != nil {
+						log.Errorf("antigravity executor: close response body error: %v", errClose)
+					}
+				}()
+				scanner := bufio.NewScanner(resp.Body)
+				scanner.Buffer(nil, streamScannerBuffer)
+				for scanner.Scan() {
+					line := scanner.Bytes()
+					appendAPIResponseChunk(ctx, e.cfg, line)
+
+					// Filter usage metadata for all models
+					// Only retain usage statistics in the terminal chunk
+					line = FilterSSEUsageMetadata(line)
+
+					payload := jsonPayload(line)
+					if payload == nil {
+						continue
+					}
+
+					if detail, ok := parseAntigravityStreamUsage(payload); ok {
+						reporter.publish(ctx, detail)
+					}
+
+					out <- cliproxyexecutor.StreamChunk{Payload: payload}
+				}
+				if errScan := scanner.Err(); errScan != nil {
+					recordAPIResponseError(ctx, e.cfg, errScan)
+					reporter.publishFailure(ctx)
+					out <- cliproxyexecutor.StreamChunk{Err: errScan}
+				} else {
+					reporter.ensurePublished(ctx)
+				}
+			}(httpResp)
+
+			var buffer bytes.Buffer
+			for chunk := range out {
+				if chunk.Err != nil {
+					return resp, chunk.Err
+				}
+				if len(chunk.Payload) > 0 {
+					_, _ = buffer.Write(chunk.Payload)
+					_, _ = buffer.Write([]byte("\n"))
+				}
+			}
+			resp = cliproxyexecutor.Response{Payload: e.convertStreamToNonStream(buffer.Bytes())}
+
+			reporter.publish(ctx, parseAntigravityUsage(resp.Payload))
+			var param any
+			converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, resp.Payload, &param)
+			resp = cliproxyexecutor.Response{Payload: []byte(converted)}
+			reporter.ensurePublished(ctx)
+
+			return resp, nil
+		}
+
+		switch {
+		case lastStatus != 0:
+			sErr := statusErr{code: lastStatus, msg: string(lastBody)}
+			if lastStatus == http.StatusTooManyRequests {
+				if retryAfter, parseErr := parseRetryDelay(lastBody); parseErr == nil && retryAfter != nil {
 					sErr.retryAfter = retryAfter
 				}
 			}
 			err = sErr
-			return resp, err
+		case lastErr != nil:
+			err = lastErr
+		default:
+			err = statusErr{code: http.StatusServiceUnavailable, msg: "antigravity executor: no base url available"}
 		}
-
-		out := make(chan cliproxyexecutor.StreamChunk)
-		go func(resp *http.Response) {
-			defer close(out)
-			defer func() {
-				if errClose := resp.Body.Close(); errClose != nil {
-					log.Errorf("antigravity executor: close response body error: %v", errClose)
-				}
-			}()
-			scanner := bufio.NewScanner(resp.Body)
-			scanner.Buffer(nil, streamScannerBuffer)
-			for scanner.Scan() {
-				line := scanner.Bytes()
-				appendAPIResponseChunk(ctx, e.cfg, line)
-
-				// Filter usage metadata for all models
-				// Only retain usage statistics in the terminal chunk
-				line = FilterSSEUsageMetadata(line)
-
-				payload := jsonPayload(line)
-				if payload == nil {
-					continue
-				}
-
-				if detail, ok := parseAntigravityStreamUsage(payload); ok {
-					reporter.publish(ctx, detail)
-				}
-
-				out <- cliproxyexecutor.StreamChunk{Payload: payload}
-			}
-			if errScan := scanner.Err(); errScan != nil {
-				recordAPIResponseError(ctx, e.cfg, errScan)
-				reporter.publishFailure(ctx)
-				out <- cliproxyexecutor.StreamChunk{Err: errScan}
-			} else {
-				reporter.ensurePublished(ctx)
-			}
-		}(httpResp)
-
-		var buffer bytes.Buffer
-		for chunk := range out {
-			if chunk.Err != nil {
-				return resp, chunk.Err
-			}
-			if len(chunk.Payload) > 0 {
-				_, _ = buffer.Write(chunk.Payload)
-				_, _ = buffer.Write([]byte("\n"))
-			}
-		}
-		resp = cliproxyexecutor.Response{Payload: e.convertStreamToNonStream(buffer.Bytes())}
-
-		reporter.publish(ctx, parseAntigravityUsage(resp.Payload))
-		var param any
-		converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, resp.Payload, &param)
-		resp = cliproxyexecutor.Response{Payload: []byte(converted)}
-		reporter.ensurePublished(ctx)
-
-		return resp, nil
+		return resp, err
 	}

-	switch {
-	case lastStatus != 0:
-		sErr := statusErr{code: lastStatus, msg: string(lastBody)}
-		if lastStatus == http.StatusTooManyRequests {
-			if retryAfter, parseErr := parseRetryDelay(lastBody); parseErr == nil && retryAfter != nil {
-				sErr.retryAfter = retryAfter
-			}
-		}
-		err = sErr
-	case lastErr != nil:
-		err = lastErr
-	default:
-		err = statusErr{code: http.StatusServiceUnavailable, msg: "antigravity executor: no base url available"}
-	}
 	return resp, err
 }

@@ -517,8 +561,8 @@ func (e *AntigravityExecutor) convertStreamToNonStream(stream []byte) []byte {
 		}
 		if usageResult := responseNode.Get("usageMetadata"); usageResult.Exists() {
 			usageRaw = usageResult.Raw
-		} else if usageResult := root.Get("usageMetadata"); usageResult.Exists() {
-			usageRaw = usageResult.Raw
+		} else if usageMetadataResult := root.Get("usageMetadata"); usageMetadataResult.Exists() {
+			usageRaw = usageMetadataResult.Raw
 		}

 		if partsResult := responseNode.Get("candidates.0.content.parts"); partsResult.IsArray() {
@@ -622,150 +666,171 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)

-	translated, err = thinking.ApplyThinking(translated, req.Model, "antigravity")
+	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return nil, err
 	}

-	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated, requestedModel)

 	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)

-	var lastStatus int
-	var lastBody []byte
-	var lastErr error
+	attempts := antigravityRetryAttempts(auth, e.cfg)

-	for idx, baseURL := range baseURLs {
-		httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, true, opts.Alt, baseURL)
-		if errReq != nil {
-			err = errReq
-			return nil, err
-		}
+attemptLoop:
+	for attempt := 0; attempt < attempts; attempt++ {
+		var lastStatus int
+		var lastBody []byte
+		var lastErr error

-		httpResp, errDo := httpClient.Do(httpReq)
-		if errDo != nil {
-			recordAPIResponseError(ctx, e.cfg, errDo)
-			if errors.Is(errDo, context.Canceled) || errors.Is(errDo, context.DeadlineExceeded) {
-				return nil, errDo
+		for idx, baseURL := range baseURLs {
+			httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, true, opts.Alt, baseURL)
+			if errReq != nil {
+				err = errReq
+				return nil, err
 			}
-			lastStatus = 0
-			lastBody = nil
-			lastErr = errDo
-			if idx+1 < len(baseURLs) {
-				log.Debugf("antigravity executor: request error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
-				continue
-			}
-			err = errDo
-			return nil, err
-		}
-		recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
-		if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices {
-			bodyBytes, errRead := io.ReadAll(httpResp.Body)
-			if errClose := httpResp.Body.Close(); errClose != nil {
-				log.Errorf("antigravity executor: close response body error: %v", errClose)
-			}
-			if errRead != nil {
-				recordAPIResponseError(ctx, e.cfg, errRead)
-				if errors.Is(errRead, context.Canceled) || errors.Is(errRead, context.DeadlineExceeded) {
-					err = errRead
-					return nil, err
-				}
-				if errCtx := ctx.Err(); errCtx != nil {
-					err = errCtx
-					return nil, err
+			httpResp, errDo := httpClient.Do(httpReq)
+			if errDo != nil {
+				recordAPIResponseError(ctx, e.cfg, errDo)
+				if errors.Is(errDo, context.Canceled) || errors.Is(errDo, context.DeadlineExceeded) {
+					return nil, errDo
 				}
 				lastStatus = 0
 				lastBody = nil
-				lastErr = errRead
+				lastErr = errDo
 				if idx+1 < len(baseURLs) {
-					log.Debugf("antigravity executor: read error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+					log.Debugf("antigravity executor: request error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
 					continue
 				}
-				err = errRead
+				err = errDo
 				return nil, err
 			}
-			appendAPIResponseChunk(ctx, e.cfg, bodyBytes)
-			lastStatus = httpResp.StatusCode
-			lastBody = append([]byte(nil), bodyBytes...)
-			lastErr = nil
-			if httpResp.StatusCode == http.StatusTooManyRequests && idx+1 < len(baseURLs) {
-				log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
-				continue
+			recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+			if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices {
+				bodyBytes, errRead := io.ReadAll(httpResp.Body)
+				if errClose := httpResp.Body.Close(); errClose != nil {
+					log.Errorf("antigravity executor: close response body error: %v", errClose)
+				}
+				if errRead != nil {
+					recordAPIResponseError(ctx, e.cfg, errRead)
+					if errors.Is(errRead, context.Canceled) || errors.Is(errRead, context.DeadlineExceeded) {
+						err = errRead
+						return nil, err
+					}
+					if errCtx := ctx.Err(); errCtx != nil {
+						err = errCtx
+						return nil, err
+					}
+					lastStatus = 0
+					lastBody = nil
+					lastErr = errRead
+					if idx+1 < len(baseURLs) {
+						log.Debugf("antigravity executor: read error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+						continue
+					}
+					err = errRead
+					return nil, err
+				}
+				appendAPIResponseChunk(ctx, e.cfg, bodyBytes)
+				lastStatus = httpResp.StatusCode
+				lastBody = append([]byte(nil), bodyBytes...)
+				lastErr = nil
+				if httpResp.StatusCode == http.StatusTooManyRequests && idx+1 < len(baseURLs) {
+					log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+					continue
+				}
+				if antigravityShouldRetryNoCapacity(httpResp.StatusCode, bodyBytes) {
+					if idx+1 < len(baseURLs) {
+						log.Debugf("antigravity executor: no capacity on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+						continue
+					}
+					if attempt+1 < attempts {
+						delay := antigravityNoCapacityRetryDelay(attempt)
+						log.Debugf("antigravity executor: no capacity for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts)
+						if errWait := antigravityWait(ctx, delay); errWait != nil {
+							return nil, errWait
+						}
+						continue attemptLoop
+					}
+				}
+				sErr := statusErr{code: httpResp.StatusCode, msg: string(bodyBytes)}
+				if httpResp.StatusCode == http.StatusTooManyRequests {
+					if retryAfter, parseErr := parseRetryDelay(bodyBytes); parseErr == nil && retryAfter != nil {
+						sErr.retryAfter = retryAfter
+					}
+				}
+				err = sErr
+				return nil, err
 			}
-			sErr := statusErr{code: httpResp.StatusCode, msg: string(bodyBytes)}
-			if httpResp.StatusCode == http.StatusTooManyRequests {
-				if retryAfter, parseErr := parseRetryDelay(bodyBytes); parseErr == nil && retryAfter != nil {
+
+			out := make(chan cliproxyexecutor.StreamChunk)
+			stream = out
+			go func(resp *http.Response) {
+				defer close(out)
+				defer func() {
+					if errClose := resp.Body.Close(); errClose != nil {
+						log.Errorf("antigravity executor: close response body error: %v", errClose)
+					}
+				}()
+				scanner := bufio.NewScanner(resp.Body)
+				scanner.Buffer(nil, streamScannerBuffer)
+				var param any
+				for scanner.Scan() {
+					line := scanner.Bytes()
+					appendAPIResponseChunk(ctx, e.cfg, line)
+
+					// Filter usage metadata for all models
+					// Only retain usage statistics in the terminal chunk
+					line = FilterSSEUsageMetadata(line)
+
+					payload := jsonPayload(line)
+					if payload == nil {
+						continue
+					}
+
+					if detail, ok := parseAntigravityStreamUsage(payload); ok {
+						reporter.publish(ctx, detail)
+					}
+
+					chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bytes.Clone(payload), &param)
+					for i := range chunks {
+						out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
+					}
+				}
+				tail := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, []byte("[DONE]"), &param)
+				for i := range tail {
+					out <- cliproxyexecutor.StreamChunk{Payload: []byte(tail[i])}
+				}
+				if errScan := scanner.Err(); errScan != nil {
+					recordAPIResponseError(ctx, e.cfg, errScan)
+					reporter.publishFailure(ctx)
+					out <- cliproxyexecutor.StreamChunk{Err: errScan}
+				} else {
+					reporter.ensurePublished(ctx)
+				}
+			}(httpResp)
+			return stream, nil
+		}
+
+		switch {
+		case lastStatus != 0:
+			sErr := statusErr{code: lastStatus, msg: string(lastBody)}
+			if lastStatus == http.StatusTooManyRequests {
+				if retryAfter, parseErr := parseRetryDelay(lastBody); parseErr == nil && retryAfter != nil {
 					sErr.retryAfter = retryAfter
 				}
 			}
 			err = sErr
-			return nil, err
+		case lastErr != nil:
+			err = lastErr
+		default:
+			err = statusErr{code: http.StatusServiceUnavailable, msg: "antigravity executor: no base url available"}
 		}
-
-		out := make(chan cliproxyexecutor.StreamChunk)
-		stream = out
-		go func(resp *http.Response) {
-			defer close(out)
-			defer func() {
-				if errClose := resp.Body.Close(); errClose != nil {
-					log.Errorf("antigravity executor: close response body error: %v", errClose)
-				}
-			}()
-			scanner := bufio.NewScanner(resp.Body)
-			scanner.Buffer(nil, streamScannerBuffer)
-			var param any
-			for scanner.Scan() {
-				line := scanner.Bytes()
-				appendAPIResponseChunk(ctx, e.cfg, line)
-
-				// Filter usage metadata for all models
-				// Only retain usage statistics in the terminal chunk
-				line = FilterSSEUsageMetadata(line)
-
-				payload := jsonPayload(line)
-				if payload == nil {
-					continue
-				}
-
-				if detail, ok := parseAntigravityStreamUsage(payload); ok {
-					reporter.publish(ctx, detail)
-				}
-
-				chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bytes.Clone(payload), &param)
-				for i := range chunks {
-					out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
-				}
-			}
-			tail := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, []byte("[DONE]"), &param)
-			for i := range tail {
-				out <- cliproxyexecutor.StreamChunk{Payload: []byte(tail[i])}
-			}
-			if errScan := scanner.Err(); errScan != nil {
-				recordAPIResponseError(ctx, e.cfg, errScan)
-				reporter.publishFailure(ctx)
-				out <- cliproxyexecutor.StreamChunk{Err: errScan}
-			} else {
-				reporter.ensurePublished(ctx)
-			}
-		}(httpResp)
-		return stream, nil
+		return nil, err
 	}

-	switch {
-	case lastStatus != 0:
-		sErr := statusErr{code: lastStatus, msg: string(lastBody)}
-		if lastStatus == http.StatusTooManyRequests {
-			if retryAfter, parseErr := parseRetryDelay(lastBody); parseErr == nil && retryAfter != nil {
-				sErr.retryAfter = retryAfter
-			}
-		}
-		err = sErr
-	case lastErr != nil:
-		err = lastErr
-	default:
-		err = statusErr{code: http.StatusServiceUnavailable, msg: "antigravity executor: no base url available"}
-	}
 	return nil, err
 }

@@ -803,7 +868,7 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
 	// Prepare payload once (doesn't depend on baseURL)
 	payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)

-	payload, err := thinking.ApplyThinking(payload, req.Model, "antigravity")
+	payload, err := thinking.ApplyThinking(payload, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
 	}
@@ -995,7 +1060,7 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 		now := time.Now().Unix()
 		modelConfig := registry.GetAntigravityModelConfig()
 		models := make([]*registry.ModelInfo, 0, len(result.Map()))
-		for originalName := range result.Map() {
+		for originalName, modelData := range result.Map() {
 			modelID := strings.TrimSpace(originalName)
 			if modelID == "" {
 				continue
@@ -1004,16 +1069,19 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 			case "chat_20706", "chat_23310", "gemini-2.5-flash-thinking", "gemini-3-pro-low", "gemini-2.5-pro":
 				continue
 			}
-			cfg := modelConfig[modelID]
-			modelName := modelID
-			if cfg != nil && cfg.Name != "" {
-				modelName = cfg.Name
+			modelCfg := modelConfig[modelID]
+
+			// Extract displayName from upstream response, fallback to modelID
+			displayName := modelData.Get("displayName").String()
+			if displayName == "" {
+				displayName = modelID
 			}
+
 			modelInfo := &registry.ModelInfo{
 				ID:          modelID,
-				Name:        modelName,
-				Description: modelID,
-				DisplayName: modelID,
+				Name:        modelID,
+				Description: displayName,
+				DisplayName: displayName,
 				Version:     modelID,
 				Object:      "model",
 				Created:     now,
@@ -1021,12 +1089,12 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 				Type:        antigravityAuthType,
 			}
 			// Look up Thinking support from static config using upstream model name.
-			if cfg != nil {
-				if cfg.Thinking != nil {
-					modelInfo.Thinking = cfg.Thinking
+			if modelCfg != nil {
+				if modelCfg.Thinking != nil {
+					modelInfo.Thinking = modelCfg.Thinking
 				}
-				if cfg.MaxCompletionTokens > 0 {
-					modelInfo.MaxCompletionTokens = cfg.MaxCompletionTokens
+				if modelCfg.MaxCompletionTokens > 0 {
+					modelInfo.MaxCompletionTokens = modelCfg.MaxCompletionTokens
 				}
 			}
 			models = append(models, modelInfo)
@@ -1206,7 +1274,7 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
 	payload = geminiToAntigravity(modelName, payload, projectID)
 	payload, _ = sjson.SetBytes(payload, "model", modelName)

-	if strings.Contains(modelName, "claude") {
+	if strings.Contains(modelName, "claude") || strings.Contains(modelName, "gemini-3-pro-high") {
 		strJSON := string(payload)
 		paths := make([]string, 0)
 		util.Walk(gjson.ParseBytes(payload), "", "parametersJsonSchema", &paths)
@@ -1217,7 +1285,17 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
 		// Use the centralized schema cleaner to handle unsupported keywords,
 		// const->enum conversion, and flattening of types/anyOf.
 		strJSON = util.CleanJSONSchemaForAntigravity(strJSON)
-
+		payload = []byte(strJSON)
+	} else {
+		strJSON := string(payload)
+		paths := make([]string, 0)
+		util.Walk(gjson.Parse(strJSON), "", "parametersJsonSchema", &paths)
+		for _, p := range paths {
+			strJSON, _ = util.RenameKey(strJSON, p, p[:len(p)-len("parametersJsonSchema")]+"parameters")
+		}
+		// Clean tool schemas for Gemini to remove unsupported JSON Schema keywords
+		// without adding empty-schema placeholders.
+		strJSON = util.CleanJSONSchemaForGemini(strJSON)
 		payload = []byte(strJSON)
 	}

@@ -1234,6 +1312,12 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
 		}
 	}

+	if strings.Contains(modelName, "claude") {
+		payload, _ = sjson.SetBytes(payload, "request.toolConfig.functionCallingConfig.mode", "VALIDATED")
+	} else {
+		payload, _ = sjson.DeleteBytes(payload, "request.generationConfig.maxOutputTokens")
+	}
+
 	httpReq, errReq := http.NewRequestWithContext(ctx, http.MethodPost, requestURL.String(), bytes.NewReader(payload))
 	if errReq != nil {
 		return nil, errReq
@@ -1363,14 +1447,70 @@ func resolveUserAgent(auth *cliproxyauth.Auth) string {
 	return defaultAntigravityAgent
 }

+func antigravityRetryAttempts(auth *cliproxyauth.Auth, cfg *config.Config) int {
+	retry := 0
+	if cfg != nil {
+		retry = cfg.RequestRetry
+	}
+	if auth != nil {
+		if override, ok := auth.RequestRetryOverride(); ok {
+			retry = override
+		}
+	}
+	if retry < 0 {
+		retry = 0
+	}
+	attempts := retry + 1
+	if attempts < 1 {
+		return 1
+	}
+	return attempts
+}
+
+func antigravityShouldRetryNoCapacity(statusCode int, body []byte) bool {
+	if statusCode != http.StatusServiceUnavailable {
+		return false
+	}
+	if len(body) == 0 {
+		return false
+	}
+	msg := strings.ToLower(string(body))
+	return strings.Contains(msg, "no capacity available")
+}
+
+func antigravityNoCapacityRetryDelay(attempt int) time.Duration {
+	if attempt < 0 {
+		attempt = 0
+	}
+	delay := time.Duration(attempt+1) * 250 * time.Millisecond
+	if delay > 2*time.Second {
+		delay = 2 * time.Second
+	}
+	return delay
+}
+
+func antigravityWait(ctx context.Context, wait time.Duration) error {
+	if wait <= 0 {
+		return nil
+	}
+	timer := time.NewTimer(wait)
+	defer timer.Stop()
+	select {
+	case <-ctx.Done():
+		return ctx.Err()
+	case <-timer.C:
+		return nil
+	}
+}
+
 func antigravityBaseURLFallbackOrder(auth *cliproxyauth.Auth) []string {
 	if base := resolveCustomAntigravityBaseURL(auth); base != "" {
 		return []string{base}
 	}
 	return []string{
-		antigravitySandboxBaseURLDaily,
 		antigravityBaseURLDaily,
-		antigravityBaseURLProd,
+		antigravitySandboxBaseURLDaily,
+		// antigravityBaseURLProd,
 	}
 }

@@ -1409,31 +1549,10 @@ func geminiToAntigravity(modelName string, payload []byte, projectID string) []b
 	template, _ = sjson.Set(template, "request.sessionId", generateStableSessionID(payload))

 	template, _ = sjson.Delete(template, "request.safetySettings")
-	template, _ = sjson.Set(template, "request.toolConfig.functionCallingConfig.mode", "VALIDATED")
-
-	if !strings.HasPrefix(modelName, "gemini-3-") {
-		if thinkingLevel := gjson.Get(template, "request.generationConfig.thinkingConfig.thinkingLevel"); thinkingLevel.Exists() {
-			template, _ = sjson.Delete(template, "request.generationConfig.thinkingConfig.thinkingLevel")
-			template, _ = sjson.Set(template, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
-		}
+	if toolConfig := gjson.Get(template, "toolConfig"); toolConfig.Exists() && !gjson.Get(template, "request.toolConfig").Exists() {
+		template, _ = sjson.SetRaw(template, "request.toolConfig", toolConfig.Raw)
+		template, _ = sjson.Delete(template, "toolConfig")
 	}
-
-	if strings.Contains(modelName, "claude") {
-		gjson.Get(template, "request.tools").ForEach(func(key, tool gjson.Result) bool {
-			tool.Get("functionDeclarations").ForEach(func(funKey, funcDecl gjson.Result) bool {
-				if funcDecl.Get("parametersJsonSchema").Exists() {
-					template, _ = sjson.SetRaw(template, fmt.Sprintf("request.tools.%d.functionDeclarations.%d.parameters", key.Int(), funKey.Int()), funcDecl.Get("parametersJsonSchema").Raw)
-					template, _ = sjson.Delete(template, fmt.Sprintf("request.tools.%d.functionDeclarations.%d.parameters.$schema", key.Int(), funKey.Int()))
-					template, _ = sjson.Delete(template, fmt.Sprintf("request.tools.%d.functionDeclarations.%d.parametersJsonSchema", key.Int(), funKey.Int()))
-				}
-				return true
-			})
-			return true
-		})
-	} else {
-		template, _ = sjson.Delete(template, "request.generationConfig.maxOutputTokens")
-	}
-
 	return []byte(template)
 }

--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -17,7 +17,6 @@ import (
 	claudeauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
@@ -106,22 +105,21 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

-	body, err = thinking.ApplyThinking(body, req.Model, "claude")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return resp, err
 	}

-	if !strings.HasPrefix(baseModel, "claude-3-5-haiku") {
-		body = checkSystemInstructions(body)
-	}
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	// Apply cloaking (system prompt injection, fake user ID, sensitive word obfuscation)
+	// based on client type and configuration.
+	body = applyCloaking(ctx, e.cfg, auth, body, baseModel)
+
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)

 	// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
 	body = disableThinkingIfToolChoiceForced(body)

-	// Ensure max_tokens > thinking.budget_tokens when thinking is enabled
-	body = ensureMaxTokensForThinking(baseModel, body)
-
 	// Extract betas from body and convert to header
 	var extraBetas []string
 	extraBetas, body = extractAndRemoveBetas(body)
@@ -165,7 +163,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("response body close error: %v", errClose)
@@ -239,20 +237,21 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

-	body, err = thinking.ApplyThinking(body, req.Model, "claude")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return nil, err
 	}

-	body = checkSystemInstructions(body)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	// Apply cloaking (system prompt injection, fake user ID, sensitive word obfuscation)
+	// based on client type and configuration.
+	body = applyCloaking(ctx, e.cfg, auth, body, baseModel)
+
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)

 	// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
 	body = disableThinkingIfToolChoiceForced(body)

-	// Ensure max_tokens > thinking.budget_tokens when thinking is enabled
-	body = ensureMaxTokensForThinking(baseModel, body)
-
 	// Extract betas from body and convert to header
 	var extraBetas []string
 	extraBetas, body = extractAndRemoveBetas(body)
@@ -296,7 +295,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("response body close error: %v", errClose)
 		}
@@ -541,81 +540,6 @@ func disableThinkingIfToolChoiceForced(body []byte) []byte {
 	return body
 }

-// ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled.
-// Anthropic API requires this constraint; violating it returns a 400 error.
-// This function should be called after all thinking configuration is finalized.
-// It looks up the model's MaxCompletionTokens from the registry to use as the cap.
-func ensureMaxTokensForThinking(modelName string, body []byte) []byte {
-	thinkingType := gjson.GetBytes(body, "thinking.type").String()
-	if thinkingType != "enabled" {
-		return body
-	}
-
-	budgetTokens := gjson.GetBytes(body, "thinking.budget_tokens").Int()
-	if budgetTokens <= 0 {
-		return body
-	}
-
-	maxTokens := gjson.GetBytes(body, "max_tokens").Int()
-
-	// Look up the model's max completion tokens from the registry
-	maxCompletionTokens := 0
-	if modelInfo := registry.LookupModelInfo(modelName); modelInfo != nil {
-		maxCompletionTokens = modelInfo.MaxCompletionTokens
-	}
-
-	// Fall back to budget + buffer if registry lookup fails or returns 0
-	const fallbackBuffer = 4000
-	requiredMaxTokens := budgetTokens + fallbackBuffer
-	if maxCompletionTokens > 0 {
-		requiredMaxTokens = int64(maxCompletionTokens)
-	}
-
-	if maxTokens < requiredMaxTokens {
-		body, _ = sjson.SetBytes(body, "max_tokens", requiredMaxTokens)
-	}
-	return body
-}
-
-func (e *ClaudeExecutor) resolveClaudeConfig(auth *cliproxyauth.Auth) *config.ClaudeKey {
-	if auth == nil || e.cfg == nil {
-		return nil
-	}
-	var attrKey, attrBase string
-	if auth.Attributes != nil {
-		attrKey = strings.TrimSpace(auth.Attributes["api_key"])
-		attrBase = strings.TrimSpace(auth.Attributes["base_url"])
-	}
-	for i := range e.cfg.ClaudeKey {
-		entry := &e.cfg.ClaudeKey[i]
-		cfgKey := strings.TrimSpace(entry.APIKey)
-		cfgBase := strings.TrimSpace(entry.BaseURL)
-		if attrKey != "" && attrBase != "" {
-			if strings.EqualFold(cfgKey, attrKey) && strings.EqualFold(cfgBase, attrBase) {
-				return entry
-			}
-			continue
-		}
-		if attrKey != "" && strings.EqualFold(cfgKey, attrKey) {
-			if cfgBase == "" || strings.EqualFold(cfgBase, attrBase) {
-				return entry
-			}
-		}
-		if attrKey == "" && attrBase != "" && strings.EqualFold(cfgBase, attrBase) {
-			return entry
-		}
-	}
-	if attrKey != "" {
-		for i := range e.cfg.ClaudeKey {
-			entry := &e.cfg.ClaudeKey[i]
-			if strings.EqualFold(strings.TrimSpace(entry.APIKey), attrKey) {
-				return entry
-			}
-		}
-	}
-	return nil
-}
-
 type compositeReadCloser struct {
 	io.Reader
 	closers []func() error
@@ -809,6 +733,11 @@ func applyClaudeToolPrefix(body []byte, prefix string) []byte {

 	if tools := gjson.GetBytes(body, "tools"); tools.Exists() && tools.IsArray() {
 		tools.ForEach(func(index, tool gjson.Result) bool {
+			// Skip built-in tools (web_search, code_execution, etc.) which have
+			// a "type" field and require their name to remain unchanged.
+			if tool.Get("type").Exists() && tool.Get("type").String() != "" {
+				return true
+			}
 			name := tool.Get("name").String()
 			if name == "" || strings.HasPrefix(name, prefix) {
 				return true
@@ -901,3 +830,163 @@ func stripClaudeToolPrefixFromStreamLine(line []byte, prefix string) []byte {
 	}
 	return updated
 }
+
+// getClientUserAgent extracts the client User-Agent from the gin context.
+func getClientUserAgent(ctx context.Context) string {
+	if ginCtx, ok := ctx.Value("gin").(*gin.Context); ok && ginCtx != nil && ginCtx.Request != nil {
+		return ginCtx.GetHeader("User-Agent")
+	}
+	return ""
+}
+
+// getCloakConfigFromAuth extracts cloak configuration from auth attributes.
+// Returns (cloakMode, strictMode, sensitiveWords).
+func getCloakConfigFromAuth(auth *cliproxyauth.Auth) (string, bool, []string) {
+	if auth == nil || auth.Attributes == nil {
+		return "auto", false, nil
+	}
+
+	cloakMode := auth.Attributes["cloak_mode"]
+	if cloakMode == "" {
+		cloakMode = "auto"
+	}
+
+	strictMode := strings.ToLower(auth.Attributes["cloak_strict_mode"]) == "true"
+
+	var sensitiveWords []string
+	if wordsStr := auth.Attributes["cloak_sensitive_words"]; wordsStr != "" {
+		sensitiveWords = strings.Split(wordsStr, ",")
+		for i := range sensitiveWords {
+			sensitiveWords[i] = strings.TrimSpace(sensitiveWords[i])
+		}
+	}
+
+	return cloakMode, strictMode, sensitiveWords
+}
+
+// resolveClaudeKeyCloakConfig finds the matching ClaudeKey config and returns its CloakConfig.
+func resolveClaudeKeyCloakConfig(cfg *config.Config, auth *cliproxyauth.Auth) *config.CloakConfig {
+	if cfg == nil || auth == nil {
+		return nil
+	}
+
+	apiKey, baseURL := claudeCreds(auth)
+	if apiKey == "" {
+		return nil
+	}
+
+	for i := range cfg.ClaudeKey {
+		entry := &cfg.ClaudeKey[i]
+		cfgKey := strings.TrimSpace(entry.APIKey)
+		cfgBase := strings.TrimSpace(entry.BaseURL)
+
+		// Match by API key
+		if strings.EqualFold(cfgKey, apiKey) {
+			// If baseURL is specified, also check it
+			if baseURL != "" && cfgBase != "" && !strings.EqualFold(cfgBase, baseURL) {
+				continue
+			}
+			return entry.Cloak
+		}
+	}
+
+	return nil
+}
+
+// injectFakeUserID generates and injects a fake user ID into the request metadata.
+func injectFakeUserID(payload []byte) []byte {
+	metadata := gjson.GetBytes(payload, "metadata")
+	if !metadata.Exists() {
+		payload, _ = sjson.SetBytes(payload, "metadata.user_id", generateFakeUserID())
+		return payload
+	}
+
+	existingUserID := gjson.GetBytes(payload, "metadata.user_id").String()
+	if existingUserID == "" || !isValidUserID(existingUserID) {
+		payload, _ = sjson.SetBytes(payload, "metadata.user_id", generateFakeUserID())
+	}
+	return payload
+}
+
+// checkSystemInstructionsWithMode injects Claude Code system prompt.
+// In strict mode, it replaces all user system messages.
+// In non-strict mode (default), it prepends to existing system messages.
+func checkSystemInstructionsWithMode(payload []byte, strictMode bool) []byte {
+	system := gjson.GetBytes(payload, "system")
+	claudeCodeInstructions := `[{"type":"text","text":"You are Claude Code, Anthropic's official CLI for Claude."}]`
+
+	if strictMode {
+		// Strict mode: replace all system messages with Claude Code prompt only
+		payload, _ = sjson.SetRawBytes(payload, "system", []byte(claudeCodeInstructions))
+		return payload
+	}
+
+	// Non-strict mode (default): prepend Claude Code prompt to existing system messages
+	if system.IsArray() {
+		if gjson.GetBytes(payload, "system.0.text").String() != "You are Claude Code, Anthropic's official CLI for Claude." {
+			system.ForEach(func(_, part gjson.Result) bool {
+				if part.Get("type").String() == "text" {
+					claudeCodeInstructions, _ = sjson.SetRaw(claudeCodeInstructions, "-1", part.Raw)
+				}
+				return true
+			})
+			payload, _ = sjson.SetRawBytes(payload, "system", []byte(claudeCodeInstructions))
+		}
+	} else {
+		payload, _ = sjson.SetRawBytes(payload, "system", []byte(claudeCodeInstructions))
+	}
+	return payload
+}
+
+// applyCloaking applies cloaking transformations to the payload based on config and client.
+// Cloaking includes: system prompt injection, fake user ID, and sensitive word obfuscation.
+func applyCloaking(ctx context.Context, cfg *config.Config, auth *cliproxyauth.Auth, payload []byte, model string) []byte {
+	clientUserAgent := getClientUserAgent(ctx)
+
+	// Get cloak config from ClaudeKey configuration
+	cloakCfg := resolveClaudeKeyCloakConfig(cfg, auth)
+
+	// Determine cloak settings
+	var cloakMode string
+	var strictMode bool
+	var sensitiveWords []string
+
+	if cloakCfg != nil {
+		cloakMode = cloakCfg.Mode
+		strictMode = cloakCfg.StrictMode
+		sensitiveWords = cloakCfg.SensitiveWords
+	}
+
+	// Fallback to auth attributes if no config found
+	if cloakMode == "" {
+		attrMode, attrStrict, attrWords := getCloakConfigFromAuth(auth)
+		cloakMode = attrMode
+		if !strictMode {
+			strictMode = attrStrict
+		}
+		if len(sensitiveWords) == 0 {
+			sensitiveWords = attrWords
+		}
+	}
+
+	// Determine if cloaking should be applied
+	if !shouldCloak(cloakMode, clientUserAgent) {
+		return payload
+	}
+
+	// Skip system instructions for claude-3-5-haiku models
+	if !strings.HasPrefix(model, "claude-3-5-haiku") {
+		payload = checkSystemInstructionsWithMode(payload, strictMode)
+	}
+
+	// Inject fake user ID
+	payload = injectFakeUserID(payload)
+
+	// Apply sensitive word obfuscation
+	if len(sensitiveWords) > 0 {
+		matcher := buildSensitiveWordMatcher(sensitiveWords)
+		payload = obfuscateSensitiveWords(payload, matcher)
+	}
+
+	return payload
+}
--- a/internal/runtime/executor/claude_executor_test.go
+++ b/internal/runtime/executor/claude_executor_test.go
@@ -25,6 +25,18 @@ func TestApplyClaudeToolPrefix(t *testing.T) {
 	}
 }

+func TestApplyClaudeToolPrefix_SkipsBuiltinTools(t *testing.T) {
+	input := []byte(`{"tools":[{"type":"web_search_20250305","name":"web_search"},{"name":"my_custom_tool","input_schema":{"type":"object"}}]}`)
+	out := applyClaudeToolPrefix(input, "proxy_")
+
+	if got := gjson.GetBytes(out, "tools.0.name").String(); got != "web_search" {
+		t.Fatalf("built-in tool name should not be prefixed: tools.0.name = %q, want %q", got, "web_search")
+	}
+	if got := gjson.GetBytes(out, "tools.1.name").String(); got != "proxy_my_custom_tool" {
+		t.Fatalf("custom tool should be prefixed: tools.1.name = %q, want %q", got, "proxy_my_custom_tool")
+	}
+}
+
 func TestStripClaudeToolPrefixFromResponse(t *testing.T) {
 	input := []byte(`{"content":[{"type":"tool_use","name":"proxy_alpha","id":"t1","input":{}},{"type":"tool_use","name":"bravo","id":"t2","input":{}}]}`)
 	out := stripClaudeToolPrefixFromResponse(input, "proxy_")
--- a/internal/runtime/executor/cloak_obfuscate.go
+++ b/internal/runtime/executor/cloak_obfuscate.go
@@ -0,0 +1,176 @@
+package executor
+
+import (
+	"regexp"
+	"sort"
+	"strings"
+	"unicode/utf8"
+
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// zeroWidthSpace is the Unicode zero-width space character used for obfuscation.
+const zeroWidthSpace = "\u200B"
+
+// SensitiveWordMatcher holds the compiled regex for matching sensitive words.
+type SensitiveWordMatcher struct {
+	regex *regexp.Regexp
+}
+
+// buildSensitiveWordMatcher compiles a regex from the word list.
+// Words are sorted by length (longest first) for proper matching.
+func buildSensitiveWordMatcher(words []string) *SensitiveWordMatcher {
+	if len(words) == 0 {
+		return nil
+	}
+
+	// Filter and normalize words
+	var validWords []string
+	for _, w := range words {
+		w = strings.TrimSpace(w)
+		if utf8.RuneCountInString(w) >= 2 && !strings.Contains(w, zeroWidthSpace) {
+			validWords = append(validWords, w)
+		}
+	}
+
+	if len(validWords) == 0 {
+		return nil
+	}
+
+	// Sort by length (longest first) for proper matching
+	sort.Slice(validWords, func(i, j int) bool {
+		return len(validWords[i]) > len(validWords[j])
+	})
+
+	// Escape and join
+	escaped := make([]string, len(validWords))
+	for i, w := range validWords {
+		escaped[i] = regexp.QuoteMeta(w)
+	}
+
+	pattern := "(?i)" + strings.Join(escaped, "|")
+	re, err := regexp.Compile(pattern)
+	if err != nil {
+		return nil
+	}
+
+	return &SensitiveWordMatcher{regex: re}
+}
+
+// obfuscateWord inserts a zero-width space after the first grapheme.
+func obfuscateWord(word string) string {
+	if strings.Contains(word, zeroWidthSpace) {
+		return word
+	}
+
+	// Get first rune
+	r, size := utf8.DecodeRuneInString(word)
+	if r == utf8.RuneError || size >= len(word) {
+		return word
+	}
+
+	return string(r) + zeroWidthSpace + word[size:]
+}
+
+// obfuscateText replaces all sensitive words in the text.
+func (m *SensitiveWordMatcher) obfuscateText(text string) string {
+	if m == nil || m.regex == nil {
+		return text
+	}
+	return m.regex.ReplaceAllStringFunc(text, obfuscateWord)
+}
+
+// obfuscateSensitiveWords processes the payload and obfuscates sensitive words
+// in system blocks and message content.
+func obfuscateSensitiveWords(payload []byte, matcher *SensitiveWordMatcher) []byte {
+	if matcher == nil || matcher.regex == nil {
+		return payload
+	}
+
+	// Obfuscate in system blocks
+	payload = obfuscateSystemBlocks(payload, matcher)
+
+	// Obfuscate in messages
+	payload = obfuscateMessages(payload, matcher)
+
+	return payload
+}
+
+// obfuscateSystemBlocks obfuscates sensitive words in system blocks.
+func obfuscateSystemBlocks(payload []byte, matcher *SensitiveWordMatcher) []byte {
+	system := gjson.GetBytes(payload, "system")
+	if !system.Exists() {
+		return payload
+	}
+
+	if system.IsArray() {
+		modified := false
+		system.ForEach(func(key, value gjson.Result) bool {
+			if value.Get("type").String() == "text" {
+				text := value.Get("text").String()
+				obfuscated := matcher.obfuscateText(text)
+				if obfuscated != text {
+					path := "system." + key.String() + ".text"
+					payload, _ = sjson.SetBytes(payload, path, obfuscated)
+					modified = true
+				}
+			}
+			return true
+		})
+		if modified {
+			return payload
+		}
+	} else if system.Type == gjson.String {
+		text := system.String()
+		obfuscated := matcher.obfuscateText(text)
+		if obfuscated != text {
+			payload, _ = sjson.SetBytes(payload, "system", obfuscated)
+		}
+	}
+
+	return payload
+}
+
+// obfuscateMessages obfuscates sensitive words in message content.
+func obfuscateMessages(payload []byte, matcher *SensitiveWordMatcher) []byte {
+	messages := gjson.GetBytes(payload, "messages")
+	if !messages.Exists() || !messages.IsArray() {
+		return payload
+	}
+
+	messages.ForEach(func(msgKey, msg gjson.Result) bool {
+		content := msg.Get("content")
+		if !content.Exists() {
+			return true
+		}
+
+		msgPath := "messages." + msgKey.String()
+
+		if content.Type == gjson.String {
+			// Simple string content
+			text := content.String()
+			obfuscated := matcher.obfuscateText(text)
+			if obfuscated != text {
+				payload, _ = sjson.SetBytes(payload, msgPath+".content", obfuscated)
+			}
+		} else if content.IsArray() {
+			// Array of content blocks
+			content.ForEach(func(blockKey, block gjson.Result) bool {
+				if block.Get("type").String() == "text" {
+					text := block.Get("text").String()
+					obfuscated := matcher.obfuscateText(text)
+					if obfuscated != text {
+						path := msgPath + ".content." + blockKey.String() + ".text"
+						payload, _ = sjson.SetBytes(payload, path, obfuscated)
+					}
+				}
+				return true
+			})
+		}
+
+		return true
+	})
+
+	return payload
+}
--- a/internal/runtime/executor/cloak_utils.go
+++ b/internal/runtime/executor/cloak_utils.go
@@ -0,0 +1,47 @@
+package executor
+
+import (
+	"crypto/rand"
+	"encoding/hex"
+	"regexp"
+	"strings"
+
+	"github.com/google/uuid"
+)
+
+// userIDPattern matches Claude Code format: user_[64-hex]_account__session_[uuid-v4]
+var userIDPattern = regexp.MustCompile(`^user_[a-fA-F0-9]{64}_account__session_[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$`)
+
+// generateFakeUserID generates a fake user ID in Claude Code format.
+// Format: user_[64-hex-chars]_account__session_[UUID-v4]
+func generateFakeUserID() string {
+	hexBytes := make([]byte, 32)
+	_, _ = rand.Read(hexBytes)
+	hexPart := hex.EncodeToString(hexBytes)
+	uuidPart := uuid.New().String()
+	return "user_" + hexPart + "_account__session_" + uuidPart
+}
+
+// isValidUserID checks if a user ID matches Claude Code format.
+func isValidUserID(userID string) bool {
+	return userIDPattern.MatchString(userID)
+}
+
+// shouldCloak determines if request should be cloaked based on config and client User-Agent.
+// Returns true if cloaking should be applied.
+func shouldCloak(cloakMode string, userAgent string) bool {
+	switch strings.ToLower(cloakMode) {
+	case "always":
+		return true
+	case "never":
+		return false
+	default: // "auto" or empty
+		// If client is Claude Code, don't cloak
+		return !strings.HasPrefix(userAgent, "claude-cli")
+	}
+}
+
+// isClaudeCodeClient checks if the User-Agent indicates a Claude Code client.
+func isClaudeCodeClient(userAgent string) bool {
+	return strings.HasPrefix(userAgent, "claude-cli")
+}
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -96,16 +96,18 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	body = sdktranslator.TranslateRequest(from, to, baseModel, body, false)
 	body = misc.StripCodexUserAgent(body)

-	body, err = thinking.ApplyThinking(body, req.Model, "codex")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return resp, err
 	}

-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 	body, _ = sjson.SetBytes(body, "stream", true)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
+	body, _ = sjson.DeleteBytes(body, "safety_identifier")
 	if !gjson.GetBytes(body, "instructions").Exists() {
 		body, _ = sjson.SetBytes(body, "instructions", "")
 	}
@@ -148,7 +150,7 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		return resp, err
 	}
@@ -207,14 +209,16 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	body = sdktranslator.TranslateRequest(from, to, baseModel, body, true)
 	body = misc.StripCodexUserAgent(body)

-	body, err = thinking.ApplyThinking(body, req.Model, "codex")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return nil, err
 	}

-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
+	body, _ = sjson.DeleteBytes(body, "safety_identifier")
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 	if !gjson.GetBytes(body, "instructions").Exists() {
 		body, _ = sjson.SetBytes(body, "instructions", "")
@@ -261,7 +265,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 			return nil, readErr
 		}
 		appendAPIResponseChunk(ctx, e.cfg, data)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
 		err = statusErr{code: httpResp.StatusCode, msg: string(data)}
 		return nil, err
 	}
@@ -314,7 +318,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
 	body = sdktranslator.TranslateRequest(from, to, baseModel, body, false)
 	body = misc.StripCodexUserAgent(body)

-	body, err := thinking.ApplyThinking(body, req.Model, "codex")
+	body, err := thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
 	}
@@ -322,6 +326,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
+	body, _ = sjson.DeleteBytes(body, "safety_identifier")
 	body, _ = sjson.SetBytes(body, "stream", false)
 	if !gjson.GetBytes(body, "instructions").Exists() {
 		body, _ = sjson.SetBytes(body, "instructions", "")
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -123,13 +123,14 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
 	basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)

-	basePayload, err = thinking.ApplyThinking(basePayload, req.Model, "gemini-cli")
+	basePayload, err = thinking.ApplyThinking(basePayload, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return resp, err
 	}

 	basePayload = fixGeminiCLIImageAspectRatio(baseModel, basePayload)
-	basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated, requestedModel)

 	action := "generateContent"
 	if req.Metadata != nil {
@@ -226,7 +227,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth

 		lastStatus = httpResp.StatusCode
 		lastBody = append([]byte(nil), data...)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
 		if httpResp.StatusCode == 429 {
 			if idx+1 < len(models) {
 				log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1])
@@ -272,13 +273,14 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)

-	basePayload, err = thinking.ApplyThinking(basePayload, req.Model, "gemini-cli")
+	basePayload, err = thinking.ApplyThinking(basePayload, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return nil, err
 	}

 	basePayload = fixGeminiCLIImageAspectRatio(baseModel, basePayload)
-	basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated, requestedModel)

 	projectID := resolveGeminiProjectID(auth)

@@ -358,7 +360,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 			appendAPIResponseChunk(ctx, e.cfg, data)
 			lastStatus = httpResp.StatusCode
 			lastBody = append([]byte(nil), data...)
-			log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
+			logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
 			if httpResp.StatusCode == 429 {
 				if idx+1 < len(models) {
 					log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1])
@@ -479,7 +481,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
 	for range models {
 		payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)

-		payload, err = thinking.ApplyThinking(payload, req.Model, "gemini-cli")
+		payload, err = thinking.ApplyThinking(payload, req.Model, from.String(), to.String(), e.Identifier())
 		if err != nil {
 			return cliproxyexecutor.Response{}, err
 		}
--- a/internal/runtime/executor/gemini_executor.go
+++ b/internal/runtime/executor/gemini_executor.go
@@ -120,13 +120,14 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)

-	body, err = thinking.ApplyThinking(body, req.Model, "gemini")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return resp, err
 	}

 	body = fixGeminiImageAspectRatio(baseModel, body)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

 	action := "generateContent"
@@ -187,7 +188,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		return resp, err
 	}
@@ -222,13 +223,14 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)

-	body, err = thinking.ApplyThinking(body, req.Model, "gemini")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return nil, err
 	}

 	body = fixGeminiImageAspectRatio(baseModel, body)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

 	baseURL := resolveGeminiBaseURL(auth)
@@ -280,7 +282,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("gemini executor: close response body error: %v", errClose)
 		}
@@ -338,7 +340,7 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 	to := sdktranslator.FromString("gemini")
 	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)

-	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, "gemini")
+	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
 	}
@@ -400,7 +402,7 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 	}
 	appendAPIResponseChunk(ctx, e.cfg, data)
 	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
-		log.Debugf("request error, error status: %d, error body: %s", resp.StatusCode, summarizeErrorBody(resp.Header.Get("Content-Type"), data))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", resp.StatusCode, summarizeErrorBody(resp.Header.Get("Content-Type"), data))
 		return cliproxyexecutor.Response{}, statusErr{code: resp.StatusCode, msg: string(data)}
 	}

--- a/internal/runtime/executor/gemini_vertex_executor.go
+++ b/internal/runtime/executor/gemini_vertex_executor.go
@@ -12,6 +12,7 @@ import (
 	"io"
 	"net/http"
 	"strings"
+	"time"

 	vertexauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/vertex"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
@@ -31,6 +32,143 @@ const (
 	vertexAPIVersion = "v1"
 )

+// isImagenModel checks if the model name is an Imagen image generation model.
+// Imagen models use the :predict action instead of :generateContent.
+func isImagenModel(model string) bool {
+	lowerModel := strings.ToLower(model)
+	return strings.Contains(lowerModel, "imagen")
+}
+
+// getVertexAction returns the appropriate action for the given model.
+// Imagen models use "predict", while Gemini models use "generateContent".
+func getVertexAction(model string, isStream bool) string {
+	if isImagenModel(model) {
+		return "predict"
+	}
+	if isStream {
+		return "streamGenerateContent"
+	}
+	return "generateContent"
+}
+
+// convertImagenToGeminiResponse converts Imagen API response to Gemini format
+// so it can be processed by the standard translation pipeline.
+// This ensures Imagen models return responses in the same format as gemini-3-pro-image-preview.
+func convertImagenToGeminiResponse(data []byte, model string) []byte {
+	predictions := gjson.GetBytes(data, "predictions")
+	if !predictions.Exists() || !predictions.IsArray() {
+		return data
+	}
+
+	// Build Gemini-compatible response with inlineData
+	parts := make([]map[string]any, 0)
+	for _, pred := range predictions.Array() {
+		imageData := pred.Get("bytesBase64Encoded").String()
+		mimeType := pred.Get("mimeType").String()
+		if mimeType == "" {
+			mimeType = "image/png"
+		}
+		if imageData != "" {
+			parts = append(parts, map[string]any{
+				"inlineData": map[string]any{
+					"mimeType": mimeType,
+					"data":     imageData,
+				},
+			})
+		}
+	}
+
+	// Generate unique response ID using timestamp
+	responseId := fmt.Sprintf("imagen-%d", time.Now().UnixNano())
+
+	response := map[string]any{
+		"candidates": []map[string]any{{
+			"content": map[string]any{
+				"parts": parts,
+				"role":  "model",
+			},
+			"finishReason": "STOP",
+		}},
+		"responseId":   responseId,
+		"modelVersion": model,
+		// Imagen API doesn't return token counts, set to 0 for tracking purposes
+		"usageMetadata": map[string]any{
+			"promptTokenCount":     0,
+			"candidatesTokenCount": 0,
+			"totalTokenCount":      0,
+		},
+	}
+
+	result, err := json.Marshal(response)
+	if err != nil {
+		return data
+	}
+	return result
+}
+
+// convertToImagenRequest converts a Gemini-style request to Imagen API format.
+// Imagen API uses a different structure: instances[].prompt instead of contents[].
+func convertToImagenRequest(payload []byte) ([]byte, error) {
+	// Extract prompt from Gemini-style contents
+	prompt := ""
+
+	// Try to get prompt from contents[0].parts[0].text
+	contentsText := gjson.GetBytes(payload, "contents.0.parts.0.text")
+	if contentsText.Exists() {
+		prompt = contentsText.String()
+	}
+
+	// If no contents, try messages format (OpenAI-compatible)
+	if prompt == "" {
+		messagesText := gjson.GetBytes(payload, "messages.#.content")
+		if messagesText.Exists() && messagesText.IsArray() {
+			for _, msg := range messagesText.Array() {
+				if msg.String() != "" {
+					prompt = msg.String()
+					break
+				}
+			}
+		}
+	}
+
+	// If still no prompt, try direct prompt field
+	if prompt == "" {
+		directPrompt := gjson.GetBytes(payload, "prompt")
+		if directPrompt.Exists() {
+			prompt = directPrompt.String()
+		}
+	}
+
+	if prompt == "" {
+		return nil, fmt.Errorf("imagen: no prompt found in request")
+	}
+
+	// Build Imagen API request
+	imagenReq := map[string]any{
+		"instances": []map[string]any{
+			{
+				"prompt": prompt,
+			},
+		},
+		"parameters": map[string]any{
+			"sampleCount": 1,
+		},
+	}
+
+	// Extract optional parameters
+	if aspectRatio := gjson.GetBytes(payload, "aspectRatio"); aspectRatio.Exists() {
+		imagenReq["parameters"].(map[string]any)["aspectRatio"] = aspectRatio.String()
+	}
+	if sampleCount := gjson.GetBytes(payload, "sampleCount"); sampleCount.Exists() {
+		imagenReq["parameters"].(map[string]any)["sampleCount"] = int(sampleCount.Int())
+	}
+	if negativePrompt := gjson.GetBytes(payload, "negativePrompt"); negativePrompt.Exists() {
+		imagenReq["instances"].([]map[string]any)[0]["negativePrompt"] = negativePrompt.String()
+	}
+
+	return json.Marshal(imagenReq)
+}
+
 // GeminiVertexExecutor sends requests to Vertex AI Gemini endpoints using service account credentials.
 type GeminiVertexExecutor struct {
 	cfg *config.Config
@@ -160,26 +298,39 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
 	defer reporter.trackFailure(ctx, &err)

-	from := opts.SourceFormat
-	to := sdktranslator.FromString("gemini")
+	var body []byte

-	originalPayload := bytes.Clone(req.Payload)
-	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
-	}
-	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	// Handle Imagen models with special request format
+	if isImagenModel(baseModel) {
+		imagenBody, errImagen := convertToImagenRequest(req.Payload)
+		if errImagen != nil {
+			return resp, errImagen
+		}
+		body = imagenBody
+	} else {
+		// Standard Gemini translation flow
+		from := opts.SourceFormat
+		to := sdktranslator.FromString("gemini")

-	body, err = thinking.ApplyThinking(body, req.Model, "gemini")
-	if err != nil {
-		return resp, err
+		originalPayload := bytes.Clone(req.Payload)
+		if len(opts.OriginalRequest) > 0 {
+			originalPayload = bytes.Clone(opts.OriginalRequest)
+		}
+		originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
+		body = sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+
+		body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
+		if err != nil {
+			return resp, err
+		}
+
+		body = fixGeminiImageAspectRatio(baseModel, body)
+		requestedModel := payloadRequestedModel(opts, req.Model)
+		body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
+		body, _ = sjson.SetBytes(body, "model", baseModel)
 	}

-	body = fixGeminiImageAspectRatio(baseModel, body)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
-	body, _ = sjson.SetBytes(body, "model", baseModel)
-
-	action := "generateContent"
+	action := getVertexAction(baseModel, false)
 	if req.Metadata != nil {
 		if a, _ := req.Metadata["action"].(string); a == "countTokens" {
 			action = "countTokens"
@@ -238,7 +389,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		return resp, err
 	}
@@ -249,6 +400,16 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 	}
 	appendAPIResponseChunk(ctx, e.cfg, data)
 	reporter.publish(ctx, parseGeminiUsage(data))
+
+	// For Imagen models, convert response to Gemini format before translation
+	// This ensures Imagen responses use the same format as gemini-3-pro-image-preview
+	if isImagenModel(baseModel) {
+		data = convertImagenToGeminiResponse(data, baseModel)
+	}
+
+	// Standard Gemini translation (works for both Gemini and converted Imagen responses)
+	from := opts.SourceFormat
+	to := sdktranslator.FromString("gemini")
 	var param any
 	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
 	resp = cliproxyexecutor.Response{Payload: []byte(out)}
@@ -272,16 +433,17 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)

-	body, err = thinking.ApplyThinking(body, req.Model, "gemini")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return resp, err
 	}

 	body = fixGeminiImageAspectRatio(baseModel, body)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

-	action := "generateContent"
+	action := getVertexAction(baseModel, false)
 	if req.Metadata != nil {
 		if a, _ := req.Metadata["action"].(string); a == "countTokens" {
 			action = "countTokens"
@@ -341,7 +503,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		return resp, err
 	}
@@ -375,21 +537,26 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)

-	body, err = thinking.ApplyThinking(body, req.Model, "gemini")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return nil, err
 	}

 	body = fixGeminiImageAspectRatio(baseModel, body)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

+	action := getVertexAction(baseModel, true)
 	baseURL := vertexBaseURL(location)
-	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, "streamGenerateContent")
-	if opts.Alt == "" {
-		url = url + "?alt=sse"
-	} else {
-		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
+	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, action)
+	// Imagen models don't support streaming, skip SSE params
+	if !isImagenModel(baseModel) {
+		if opts.Alt == "" {
+			url = url + "?alt=sse"
+		} else {
+			url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
+		}
 	}
 	body, _ = sjson.DeleteBytes(body, "session_id")

@@ -434,7 +601,7 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("vertex executor: close response body error: %v", errClose)
 		}
@@ -494,24 +661,29 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)

-	body, err = thinking.ApplyThinking(body, req.Model, "gemini")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return nil, err
 	}

 	body = fixGeminiImageAspectRatio(baseModel, body)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

+	action := getVertexAction(baseModel, true)
 	// For API key auth, use simpler URL format without project/location
 	if baseURL == "" {
 		baseURL = "https://generativelanguage.googleapis.com"
 	}
-	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, "streamGenerateContent")
-	if opts.Alt == "" {
-		url = url + "?alt=sse"
-	} else {
-		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
+	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, action)
+	// Imagen models don't support streaming, skip SSE params
+	if !isImagenModel(baseModel) {
+		if opts.Alt == "" {
+			url = url + "?alt=sse"
+		} else {
+			url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
+		}
 	}
 	body, _ = sjson.DeleteBytes(body, "session_id")

@@ -553,7 +725,7 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("vertex executor: close response body error: %v", errClose)
 		}
@@ -605,7 +777,7 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context

 	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)

-	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, "gemini")
+	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
 	}
@@ -666,7 +838,7 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(b)}
 	}
 	data, errRead := io.ReadAll(httpResp.Body)
@@ -689,7 +861,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *

 	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)

-	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, "gemini")
+	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
 	}
@@ -750,7 +922,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(b)}
 	}
 	data, errRead := io.ReadAll(httpResp.Body)
--- a/internal/runtime/executor/github_copilot_executor.go
+++ b/internal/runtime/executor/github_copilot_executor.go
@@ -119,7 +119,8 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
 	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	body = e.normalizeModel(req.Model, body)
-	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.SetBytes(body, "stream", false)

 	path := githubCopilotChatPath
@@ -218,7 +219,8 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
 	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 	body = e.normalizeModel(req.Model, body)
-	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.SetBytes(body, "stream", true)
 	// Enable stream options for usage stats in stream
 	if !useResponses {
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -92,13 +92,14 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

-	body, err = thinking.ApplyThinking(body, req.Model, "iflow")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), "iflow", e.Identifier())
 	if err != nil {
 		return resp, err
 	}

 	body = preserveReasoningContentInMessages(body)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)

 	endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint

@@ -141,7 +142,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("iflow request error: status %d body %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		return resp, err
 	}
@@ -190,7 +191,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

-	body, err = thinking.ApplyThinking(body, req.Model, "iflow")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), "iflow", e.Identifier())
 	if err != nil {
 		return nil, err
 	}
@@ -201,7 +202,8 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
 		body = ensureToolsArray(body)
 	}
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)

 	endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint

@@ -242,7 +244,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 			log.Errorf("iflow executor: close response body error: %v", errClose)
 		}
 		appendAPIResponseChunk(ctx, e.cfg, data)
-		log.Debugf("iflow streaming error: status %d body %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
+		logWithRequestID(ctx).Debugf("request error, error status: %d error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
 		err = statusErr{code: httpResp.StatusCode, msg: string(data)}
 		return nil, err
 	}
--- a/internal/runtime/executor/kiro_executor.go
+++ b/internal/runtime/executor/kiro_executor.go
--- a/internal/runtime/executor/logging_helpers.go
+++ b/internal/runtime/executor/logging_helpers.go
@@ -12,7 +12,10 @@ import (

 	"github.com/gin-gonic/gin"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	log "github.com/sirupsen/logrus"
+	"github.com/tidwall/gjson"
 )

 const (
@@ -332,6 +335,12 @@ func summarizeErrorBody(contentType string, body []byte) string {
 		}
 		return "[html body omitted]"
 	}
+
+	// Try to extract error message from JSON response
+	if message := extractJSONErrorMessage(body); message != "" {
+		return message
+	}
+
 	return string(body)
 }

@@ -358,3 +367,25 @@ func extractHTMLTitle(body []byte) string {
 	}
 	return strings.Join(strings.Fields(title), " ")
 }
+
+// extractJSONErrorMessage attempts to extract error.message from JSON error responses
+func extractJSONErrorMessage(body []byte) string {
+	result := gjson.GetBytes(body, "error.message")
+	if result.Exists() && result.String() != "" {
+		return result.String()
+	}
+	return ""
+}
+
+// logWithRequestID returns a logrus Entry with request_id field populated from context.
+// If no request ID is found in context, it returns the standard logger.
+func logWithRequestID(ctx context.Context) *log.Entry {
+	if ctx == nil {
+		return log.NewEntry(log.StandardLogger())
+	}
+	requestID := logging.GetRequestID(ctx)
+	if requestID == "" {
+		return log.NewEntry(log.StandardLogger())
+	}
+	return log.WithField("request_id", requestID)
+}
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -90,9 +90,10 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, opts.Stream)
 	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), opts.Stream)
-	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated, requestedModel)

-	translated, err = thinking.ApplyThinking(translated, req.Model, "openai")
+	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return resp, err
 	}
@@ -145,7 +146,7 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		return resp, err
 	}
@@ -185,9 +186,10 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
 	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
-	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated, requestedModel)

-	translated, err = thinking.ApplyThinking(translated, req.Model, "openai")
+	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return nil, err
 	}
@@ -237,7 +239,7 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("openai compat executor: close response body error: %v", errClose)
 		}
@@ -297,7 +299,7 @@ func (e *OpenAICompatExecutor) CountTokens(ctx context.Context, auth *cliproxyau

 	modelForCounting := baseModel

-	translated, err := thinking.ApplyThinking(translated, req.Model, "openai")
+	translated, err := thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
 	}
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -5,6 +5,8 @@ import (
 	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -12,8 +14,9 @@ import (
 // applyPayloadConfigWithRoot behaves like applyPayloadConfig but treats all parameter
 // paths as relative to the provided root path (for example, "request" for Gemini CLI)
 // and restricts matches to the given protocol when supplied. Defaults are checked
-// against the original payload when provided.
-func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string, payload, original []byte) []byte {
+// against the original payload when provided. requestedModel carries the client-visible
+// model name before alias resolution so payload rules can target aliases precisely.
+func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string, payload, original []byte, requestedModel string) []byte {
 	if cfg == nil || len(payload) == 0 {
 		return payload
 	}
@@ -22,9 +25,11 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 		return payload
 	}
 	model = strings.TrimSpace(model)
-	if model == "" {
+	requestedModel = strings.TrimSpace(requestedModel)
+	if model == "" && requestedModel == "" {
 		return payload
 	}
+	candidates := payloadModelCandidates(model, requestedModel)
 	out := payload
 	source := original
 	if len(source) == 0 {
@@ -34,7 +39,7 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 	// Apply default rules: first write wins per field across all matching rules.
 	for i := range rules.Default {
 		rule := &rules.Default[i]
-		if !payloadRuleMatchesModel(rule, model, protocol) {
+		if !payloadRuleMatchesModels(rule, protocol, candidates) {
 			continue
 		}
 		for path, value := range rule.Params {
@@ -59,7 +64,7 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 	// Apply default raw rules: first write wins per field across all matching rules.
 	for i := range rules.DefaultRaw {
 		rule := &rules.DefaultRaw[i]
-		if !payloadRuleMatchesModel(rule, model, protocol) {
+		if !payloadRuleMatchesModels(rule, protocol, candidates) {
 			continue
 		}
 		for path, value := range rule.Params {
@@ -88,7 +93,7 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 	// Apply override rules: last write wins per field across all matching rules.
 	for i := range rules.Override {
 		rule := &rules.Override[i]
-		if !payloadRuleMatchesModel(rule, model, protocol) {
+		if !payloadRuleMatchesModels(rule, protocol, candidates) {
 			continue
 		}
 		for path, value := range rule.Params {
@@ -106,7 +111,7 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 	// Apply override raw rules: last write wins per field across all matching rules.
 	for i := range rules.OverrideRaw {
 		rule := &rules.OverrideRaw[i]
-		if !payloadRuleMatchesModel(rule, model, protocol) {
+		if !payloadRuleMatchesModels(rule, protocol, candidates) {
 			continue
 		}
 		for path, value := range rule.Params {
@@ -128,6 +133,18 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 	return out
 }

+func payloadRuleMatchesModels(rule *config.PayloadRule, protocol string, models []string) bool {
+	if rule == nil || len(models) == 0 {
+		return false
+	}
+	for _, model := range models {
+		if payloadRuleMatchesModel(rule, model, protocol) {
+			return true
+		}
+	}
+	return false
+}
+
 func payloadRuleMatchesModel(rule *config.PayloadRule, model, protocol string) bool {
 	if rule == nil {
 		return false
@@ -150,6 +167,42 @@ func payloadRuleMatchesModel(rule *config.PayloadRule, model, protocol string) b
 	return false
 }

+func payloadModelCandidates(model, requestedModel string) []string {
+	model = strings.TrimSpace(model)
+	requestedModel = strings.TrimSpace(requestedModel)
+	if model == "" && requestedModel == "" {
+		return nil
+	}
+	candidates := make([]string, 0, 3)
+	seen := make(map[string]struct{}, 3)
+	addCandidate := func(value string) {
+		value = strings.TrimSpace(value)
+		if value == "" {
+			return
+		}
+		key := strings.ToLower(value)
+		if _, ok := seen[key]; ok {
+			return
+		}
+		seen[key] = struct{}{}
+		candidates = append(candidates, value)
+	}
+	if model != "" {
+		addCandidate(model)
+	}
+	if requestedModel != "" {
+		parsed := thinking.ParseSuffix(requestedModel)
+		base := strings.TrimSpace(parsed.ModelName)
+		if base != "" {
+			addCandidate(base)
+		}
+		if parsed.HasSuffix {
+			addCandidate(requestedModel)
+		}
+	}
+	return candidates
+}
+
 // buildPayloadPath combines an optional root path with a relative parameter path.
 // When root is empty, the parameter path is used as-is. When root is non-empty,
 // the parameter path is treated as relative to root.
@@ -186,6 +239,35 @@ func payloadRawValue(value any) ([]byte, bool) {
 	}
 }

+func payloadRequestedModel(opts cliproxyexecutor.Options, fallback string) string {
+	fallback = strings.TrimSpace(fallback)
+	if len(opts.Metadata) == 0 {
+		return fallback
+	}
+	raw, ok := opts.Metadata[cliproxyexecutor.RequestedModelMetadataKey]
+	if !ok || raw == nil {
+		return fallback
+	}
+	switch v := raw.(type) {
+	case string:
+		if strings.TrimSpace(v) == "" {
+			return fallback
+		}
+		return strings.TrimSpace(v)
+	case []byte:
+		if len(v) == 0 {
+			return fallback
+		}
+		trimmed := strings.TrimSpace(string(v))
+		if trimmed == "" {
+			return fallback
+		}
+		return trimmed
+	default:
+		return fallback
+	}
+}
+
 // matchModelPattern performs simple wildcard matching where '*' matches zero or more characters.
 // Examples:
 //
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -86,12 +86,13 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

-	body, err = thinking.ApplyThinking(body, req.Model, "openai")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return resp, err
 	}

-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)

 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
@@ -132,7 +133,7 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		return resp, err
 	}
@@ -172,7 +173,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

-	body, err = thinking.ApplyThinking(body, req.Model, "openai")
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return nil, err
 	}
@@ -184,7 +185,8 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 		body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`))
 	}
 	body, _ = sjson.SetBytes(body, "stream_options.include_usage", true)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)

 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
@@ -220,7 +222,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
 		appendAPIResponseChunk(ctx, e.cfg, b)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("qwen executor: close response body error: %v", errClose)
 		}
--- a/internal/thinking/apply.go
+++ b/internal/thinking/apply.go
@@ -2,6 +2,8 @@
 package thinking

 import (
+	"strings"
+
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
@@ -59,7 +61,9 @@ func IsUserDefinedModel(modelInfo *registry.ModelInfo) bool {
 // Parameters:
 //   - body: Original request body JSON
 //   - model: Model name, optionally with thinking suffix (e.g., "claude-sonnet-4-5(16384)")
-//   - provider: Provider name (gemini, gemini-cli, antigravity, claude, openai, codex, iflow)
+//   - fromFormat: Source request format (e.g., openai, codex, gemini)
+//   - toFormat: Target provider format for the request body (gemini, gemini-cli, antigravity, claude, openai, codex, iflow)
+//   - providerKey: Provider identifier used for registry model lookups (may differ from toFormat, e.g., openrouter -> openai)
 //
 // Returns:
 //   - Modified request body JSON with thinking configuration applied
@@ -76,16 +80,25 @@ func IsUserDefinedModel(modelInfo *registry.ModelInfo) bool {
 // Example:
 //
 //	// With suffix - suffix config takes priority
-//	result, err := thinking.ApplyThinking(body, "gemini-2.5-pro(8192)", "gemini")
+//	result, err := thinking.ApplyThinking(body, "gemini-2.5-pro(8192)", "gemini", "gemini", "gemini")
 //
 //	// Without suffix - uses body config
-//	result, err := thinking.ApplyThinking(body, "gemini-2.5-pro", "gemini")
-func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
+//	result, err := thinking.ApplyThinking(body, "gemini-2.5-pro", "gemini", "gemini", "gemini")
+func ApplyThinking(body []byte, model string, fromFormat string, toFormat string, providerKey string) ([]byte, error) {
+	providerFormat := strings.ToLower(strings.TrimSpace(toFormat))
+	providerKey = strings.ToLower(strings.TrimSpace(providerKey))
+	if providerKey == "" {
+		providerKey = providerFormat
+	}
+	fromFormat = strings.ToLower(strings.TrimSpace(fromFormat))
+	if fromFormat == "" {
+		fromFormat = providerFormat
+	}
 	// 1. Route check: Get provider applier
-	applier := GetProviderApplier(provider)
+	applier := GetProviderApplier(providerFormat)
 	if applier == nil {
 		log.WithFields(log.Fields{
-			"provider": provider,
+			"provider": providerFormat,
 			"model":    model,
 		}).Debug("thinking: unknown provider, passthrough |")
 		return body, nil
@@ -94,25 +107,26 @@ func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
 	// 2. Parse suffix and get modelInfo
 	suffixResult := ParseSuffix(model)
 	baseModel := suffixResult.ModelName
-	modelInfo := registry.LookupModelInfo(baseModel)
+	// Use provider-specific lookup to handle capability differences across providers.
+	modelInfo := registry.LookupModelInfo(baseModel, providerKey)

 	// 3. Model capability check
 	// Unknown models are treated as user-defined so thinking config can still be applied.
 	// The upstream service is responsible for validating the configuration.
 	if IsUserDefinedModel(modelInfo) {
-		return applyUserDefinedModel(body, modelInfo, provider, suffixResult)
+		return applyUserDefinedModel(body, modelInfo, fromFormat, providerFormat, suffixResult)
 	}
 	if modelInfo.Thinking == nil {
-		config := extractThinkingConfig(body, provider)
+		config := extractThinkingConfig(body, providerFormat)
 		if hasThinkingConfig(config) {
 			log.WithFields(log.Fields{
 				"model":    baseModel,
-				"provider": provider,
+				"provider": providerFormat,
 			}).Debug("thinking: model does not support thinking, stripping config |")
-			return StripThinkingConfig(body, provider), nil
+			return StripThinkingConfig(body, providerFormat), nil
 		}
 		log.WithFields(log.Fields{
-			"provider": provider,
+			"provider": providerFormat,
 			"model":    baseModel,
 		}).Debug("thinking: model does not support thinking, passthrough |")
 		return body, nil
@@ -121,19 +135,19 @@ func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
 	// 4. Get config: suffix priority over body
 	var config ThinkingConfig
 	if suffixResult.HasSuffix {
-		config = parseSuffixToConfig(suffixResult.RawSuffix, provider, model)
+		config = parseSuffixToConfig(suffixResult.RawSuffix, providerFormat, model)
 		log.WithFields(log.Fields{
-			"provider": provider,
+			"provider": providerFormat,
 			"model":    model,
 			"mode":     config.Mode,
 			"budget":   config.Budget,
 			"level":    config.Level,
 		}).Debug("thinking: config from model suffix |")
 	} else {
-		config = extractThinkingConfig(body, provider)
+		config = extractThinkingConfig(body, providerFormat)
 		if hasThinkingConfig(config) {
 			log.WithFields(log.Fields{
-				"provider": provider,
+				"provider": providerFormat,
 				"model":    modelInfo.ID,
 				"mode":     config.Mode,
 				"budget":   config.Budget,
@@ -144,17 +158,17 @@ func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {

 	if !hasThinkingConfig(config) {
 		log.WithFields(log.Fields{
-			"provider": provider,
+			"provider": providerFormat,
 			"model":    modelInfo.ID,
 		}).Debug("thinking: no config found, passthrough |")
 		return body, nil
 	}

 	// 5. Validate and normalize configuration
-	validated, err := ValidateConfig(config, modelInfo, provider)
+	validated, err := ValidateConfig(config, modelInfo, fromFormat, providerFormat, suffixResult.HasSuffix)
 	if err != nil {
 		log.WithFields(log.Fields{
-			"provider": provider,
+			"provider": providerFormat,
 			"model":    modelInfo.ID,
 			"error":    err.Error(),
 		}).Warn("thinking: validation failed |")
@@ -167,14 +181,14 @@ func ApplyThinking(body []byte, model string, provider string) ([]byte, error) {
 	// Defensive check: ValidateConfig should never return (nil, nil)
 	if validated == nil {
 		log.WithFields(log.Fields{
-			"provider": provider,
+			"provider": providerFormat,
 			"model":    modelInfo.ID,
 		}).Warn("thinking: ValidateConfig returned nil config without error, passthrough |")
 		return body, nil
 	}

 	log.WithFields(log.Fields{
-		"provider": provider,
+		"provider": providerFormat,
 		"model":    modelInfo.ID,
 		"mode":     validated.Mode,
 		"budget":   validated.Budget,
@@ -228,7 +242,7 @@ func parseSuffixToConfig(rawSuffix, provider, model string) ThinkingConfig {

 // applyUserDefinedModel applies thinking configuration for user-defined models
 // without ThinkingSupport validation.
-func applyUserDefinedModel(body []byte, modelInfo *registry.ModelInfo, provider string, suffixResult SuffixResult) ([]byte, error) {
+func applyUserDefinedModel(body []byte, modelInfo *registry.ModelInfo, fromFormat, toFormat string, suffixResult SuffixResult) ([]byte, error) {
 	// Get model ID for logging
 	modelID := ""
 	if modelInfo != nil {
@@ -240,39 +254,57 @@ func applyUserDefinedModel(body []byte, modelInfo *registry.ModelInfo, provider
 	// Get config: suffix priority over body
 	var config ThinkingConfig
 	if suffixResult.HasSuffix {
-		config = parseSuffixToConfig(suffixResult.RawSuffix, provider, modelID)
+		config = parseSuffixToConfig(suffixResult.RawSuffix, toFormat, modelID)
 	} else {
-		config = extractThinkingConfig(body, provider)
+		config = extractThinkingConfig(body, toFormat)
 	}

 	if !hasThinkingConfig(config) {
 		log.WithFields(log.Fields{
 			"model":    modelID,
-			"provider": provider,
+			"provider": toFormat,
 		}).Debug("thinking: user-defined model, passthrough (no config) |")
 		return body, nil
 	}

-	applier := GetProviderApplier(provider)
+	applier := GetProviderApplier(toFormat)
 	if applier == nil {
 		log.WithFields(log.Fields{
 			"model":    modelID,
-			"provider": provider,
+			"provider": toFormat,
 		}).Debug("thinking: user-defined model, passthrough (unknown provider) |")
 		return body, nil
 	}

 	log.WithFields(log.Fields{
-		"provider": provider,
+		"provider": toFormat,
 		"model":    modelID,
 		"mode":     config.Mode,
 		"budget":   config.Budget,
 		"level":    config.Level,
 	}).Debug("thinking: applying config for user-defined model (skip validation)")

+	config = normalizeUserDefinedConfig(config, fromFormat, toFormat)
 	return applier.Apply(body, config, modelInfo)
 }

+func normalizeUserDefinedConfig(config ThinkingConfig, fromFormat, toFormat string) ThinkingConfig {
+	if config.Mode != ModeLevel {
+		return config
+	}
+	if !isBudgetBasedProvider(toFormat) || !isLevelBasedProvider(fromFormat) {
+		return config
+	}
+	budget, ok := ConvertLevelToBudget(string(config.Level))
+	if !ok {
+		return config
+	}
+	config.Mode = ModeBudget
+	config.Budget = budget
+	config.Level = ""
+	return config
+}
+
 // extractThinkingConfig extracts provider-specific thinking config from request body.
 func extractThinkingConfig(body []byte, provider string) ThinkingConfig {
 	if len(body) == 0 || !gjson.ValidBytes(body) {
@@ -289,7 +321,11 @@ func extractThinkingConfig(body []byte, provider string) ThinkingConfig {
 	case "codex":
 		return extractCodexConfig(body)
 	case "iflow":
-		return extractIFlowConfig(body)
+		config := extractIFlowConfig(body)
+		if hasThinkingConfig(config) {
+			return config
+		}
+		return extractOpenAIConfig(body)
 	default:
 		return ThinkingConfig{}
 	}
--- a/internal/thinking/errors.go
+++ b/internal/thinking/errors.go
@@ -24,6 +24,10 @@ const (
 	// Example: using level with a budget-only model
 	ErrLevelNotSupported ErrorCode = "LEVEL_NOT_SUPPORTED"

+	// ErrBudgetOutOfRange indicates the budget value is outside model range.
+	// Example: budget 64000 exceeds max 20000
+	ErrBudgetOutOfRange ErrorCode = "BUDGET_OUT_OF_RANGE"
+
 	// ErrProviderMismatch indicates the provider does not match the model.
 	// Example: applying Claude format to a Gemini model
 	ErrProviderMismatch ErrorCode = "PROVIDER_MISMATCH"
--- a/internal/thinking/provider/claude/apply.go
+++ b/internal/thinking/provider/claude/apply.go
@@ -80,9 +80,66 @@ func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *

 	result, _ := sjson.SetBytes(body, "thinking.type", "enabled")
 	result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget)
+
+	// Ensure max_tokens > thinking.budget_tokens (Anthropic API constraint)
+	result = a.normalizeClaudeBudget(result, config.Budget, modelInfo)
 	return result, nil
 }

+// normalizeClaudeBudget applies Claude-specific constraints to ensure max_tokens > budget_tokens.
+// Anthropic API requires this constraint; violating it returns a 400 error.
+func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo *registry.ModelInfo) []byte {
+	if budgetTokens <= 0 {
+		return body
+	}
+
+	// Ensure the request satisfies Claude constraints:
+	//  1) Determine effective max_tokens (request overrides model default)
+	//  2) If budget_tokens >= max_tokens, reduce budget_tokens to max_tokens-1
+	//  3) If the adjusted budget falls below the model minimum, leave the request unchanged
+	//  4) If max_tokens came from model default, write it back into the request
+
+	effectiveMax, setDefaultMax := a.effectiveMaxTokens(body, modelInfo)
+	if setDefaultMax && effectiveMax > 0 {
+		body, _ = sjson.SetBytes(body, "max_tokens", effectiveMax)
+	}
+
+	// Compute the budget we would apply after enforcing budget_tokens < max_tokens.
+	adjustedBudget := budgetTokens
+	if effectiveMax > 0 && adjustedBudget >= effectiveMax {
+		adjustedBudget = effectiveMax - 1
+	}
+
+	minBudget := 0
+	if modelInfo != nil && modelInfo.Thinking != nil {
+		minBudget = modelInfo.Thinking.Min
+	}
+	if minBudget > 0 && adjustedBudget > 0 && adjustedBudget < minBudget {
+		// If enforcing the max_tokens constraint would push the budget below the model minimum,
+		// leave the request unchanged.
+		return body
+	}
+
+	if adjustedBudget != budgetTokens {
+		body, _ = sjson.SetBytes(body, "thinking.budget_tokens", adjustedBudget)
+	}
+
+	return body
+}
+
+// effectiveMaxTokens returns the max tokens to cap thinking:
+// prefer request-provided max_tokens; otherwise fall back to model default.
+// The boolean indicates whether the value came from the model default (and thus should be written back).
+func (a *Applier) effectiveMaxTokens(body []byte, modelInfo *registry.ModelInfo) (max int, fromModel bool) {
+	if maxTok := gjson.GetBytes(body, "max_tokens"); maxTok.Exists() && maxTok.Int() > 0 {
+		return int(maxTok.Int()), false
+	}
+	if modelInfo != nil && modelInfo.MaxCompletionTokens > 0 {
+		return modelInfo.MaxCompletionTokens, true
+	}
+	return 0, false
+}
+
 func applyCompatibleClaude(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
 	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
 		return body, nil
--- a/internal/thinking/provider/iflow/apply.go
+++ b/internal/thinking/provider/iflow/apply.go
@@ -1,7 +1,7 @@
-// Package iflow implements thinking configuration for iFlow models (GLM, MiniMax).
+// Package iflow implements thinking configuration for iFlow models.
 //
 // iFlow models use boolean toggle semantics:
-//   - GLM models: chat_template_kwargs.enable_thinking (boolean)
+//   - Models using chat_template_kwargs.enable_thinking (boolean toggle)
 //   - MiniMax models: reasoning_split (boolean)
 //
 // Level values are converted to boolean: none=false, all others=true
@@ -20,6 +20,7 @@ import (
 // Applier implements thinking.ProviderApplier for iFlow models.
 //
 // iFlow-specific behavior:
+//   - enable_thinking toggle models: enable_thinking boolean
 //   - GLM models: enable_thinking boolean + clear_thinking=false
 //   - MiniMax models: reasoning_split boolean
 //   - Level to boolean: none=false, others=true
@@ -61,8 +62,8 @@ func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *
 		return body, nil
 	}

-	if isGLMModel(modelInfo.ID) {
-		return applyGLM(body, config), nil
+	if isEnableThinkingModel(modelInfo.ID) {
+		return applyEnableThinking(body, config, isGLMModel(modelInfo.ID)), nil
 	}

 	if isMiniMaxModel(modelInfo.ID) {
@@ -97,7 +98,8 @@ func configToBoolean(config thinking.ThinkingConfig) bool {
 	}
 }

-// applyGLM applies thinking configuration for GLM models.
+// applyEnableThinking applies thinking configuration for models that use
+// chat_template_kwargs.enable_thinking format.
 //
 // Output format when enabled:
 //
@@ -107,9 +109,8 @@ func configToBoolean(config thinking.ThinkingConfig) bool {
 //
 //	{"chat_template_kwargs": {"enable_thinking": false}}
 //
-// Note: clear_thinking is only set when thinking is enabled, to preserve
-// thinking output in the response.
-func applyGLM(body []byte, config thinking.ThinkingConfig) []byte {
+// Note: clear_thinking is only set for GLM models when thinking is enabled.
+func applyEnableThinking(body []byte, config thinking.ThinkingConfig, setClearThinking bool) []byte {
 	enableThinking := configToBoolean(config)

 	if len(body) == 0 || !gjson.ValidBytes(body) {
@@ -118,8 +119,11 @@ func applyGLM(body []byte, config thinking.ThinkingConfig) []byte {

 	result, _ := sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)

+	// clear_thinking is a GLM-only knob, strip it for other models.
+	result, _ = sjson.DeleteBytes(result, "chat_template_kwargs.clear_thinking")
+
 	// clear_thinking only needed when thinking is enabled
-	if enableThinking {
+	if enableThinking && setClearThinking {
 		result, _ = sjson.SetBytes(result, "chat_template_kwargs.clear_thinking", false)
 	}

@@ -143,8 +147,21 @@ func applyMiniMax(body []byte, config thinking.ThinkingConfig) []byte {
 	return result
 }

+// isEnableThinkingModel determines if the model uses chat_template_kwargs.enable_thinking format.
+func isEnableThinkingModel(modelID string) bool {
+	if isGLMModel(modelID) {
+		return true
+	}
+	id := strings.ToLower(modelID)
+	switch id {
+	case "qwen3-max-preview", "deepseek-v3.2", "deepseek-v3.1":
+		return true
+	default:
+		return false
+	}
+}
+
 // isGLMModel determines if the model is a GLM series model.
-// GLM models use chat_template_kwargs.enable_thinking format.
 func isGLMModel(modelID string) bool {
 	return strings.HasPrefix(strings.ToLower(modelID), "glm")
 }
--- a/internal/thinking/strip.go
+++ b/internal/thinking/strip.go
@@ -27,28 +27,32 @@ func StripThinkingConfig(body []byte, provider string) []byte {
 		return body
 	}

+	var paths []string
 	switch provider {
 	case "claude":
-		result, _ := sjson.DeleteBytes(body, "thinking")
-		return result
+		paths = []string{"thinking"}
 	case "gemini":
-		result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig")
-		return result
+		paths = []string{"generationConfig.thinkingConfig"}
 	case "gemini-cli", "antigravity":
-		result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig")
-		return result
+		paths = []string{"request.generationConfig.thinkingConfig"}
 	case "openai":
-		result, _ := sjson.DeleteBytes(body, "reasoning_effort")
-		return result
+		paths = []string{"reasoning_effort"}
 	case "codex":
-		result, _ := sjson.DeleteBytes(body, "reasoning.effort")
-		return result
+		paths = []string{"reasoning.effort"}
 	case "iflow":
-		result, _ := sjson.DeleteBytes(body, "chat_template_kwargs.enable_thinking")
-		result, _ = sjson.DeleteBytes(result, "chat_template_kwargs.clear_thinking")
-		result, _ = sjson.DeleteBytes(result, "reasoning_split")
-		return result
+		paths = []string{
+			"chat_template_kwargs.enable_thinking",
+			"chat_template_kwargs.clear_thinking",
+			"reasoning_split",
+			"reasoning_effort",
+		}
 	default:
 		return body
 	}
+
+	result := body
+	for _, path := range paths {
+		result, _ = sjson.DeleteBytes(result, path)
+	}
+	return result
 }
--- a/internal/thinking/validate.go
+++ b/internal/thinking/validate.go
@@ -9,64 +9,6 @@ import (
 	log "github.com/sirupsen/logrus"
 )

-// ClampBudget clamps a budget value to the model's supported range.
-//
-// Logging:
-//   - Warn when value=0 but ZeroAllowed=false
-//   - Debug when value is clamped to min/max
-//
-// Fields: provider, model, original_value, clamped_to, min, max
-func ClampBudget(value int, modelInfo *registry.ModelInfo, provider string) int {
-	model := "unknown"
-	support := (*registry.ThinkingSupport)(nil)
-	if modelInfo != nil {
-		if modelInfo.ID != "" {
-			model = modelInfo.ID
-		}
-		support = modelInfo.Thinking
-	}
-	if support == nil {
-		return value
-	}
-
-	// Auto value (-1) passes through without clamping.
-	if value == -1 {
-		return value
-	}
-
-	min := support.Min
-	max := support.Max
-	if value == 0 && !support.ZeroAllowed {
-		log.WithFields(log.Fields{
-			"provider":       provider,
-			"model":          model,
-			"original_value": value,
-			"clamped_to":     min,
-			"min":            min,
-			"max":            max,
-		}).Warn("thinking: budget zero not allowed |")
-		return min
-	}
-
-	// Some models are level-only and do not define numeric budget ranges.
-	if min == 0 && max == 0 {
-		return value
-	}
-
-	if value < min {
-		if value == 0 && support.ZeroAllowed {
-			return 0
-		}
-		logClamp(provider, model, value, min, min, max)
-		return min
-	}
-	if value > max {
-		logClamp(provider, model, value, max, min, max)
-		return max
-	}
-	return value
-}
-
 // ValidateConfig validates a thinking configuration against model capabilities.
 //
 // This function performs comprehensive validation:
@@ -74,10 +16,16 @@ func ClampBudget(value int, modelInfo *registry.ModelInfo, provider string) int
 //   - Auto-converts between Budget and Level formats based on model capability
 //   - Validates that requested level is in the model's supported levels list
 //   - Clamps budget values to model's allowed range
+//   - When converting Budget -> Level for level-only models, clamps the derived standard level to the nearest supported level
+//     (special values none/auto are preserved)
+//   - When config comes from a model suffix, strict budget validation is disabled (we clamp instead of error)
 //
 // Parameters:
 //   - config: The thinking configuration to validate
 //   - support: Model's ThinkingSupport properties (nil means no thinking support)
+//   - fromFormat: Source provider format (used to determine strict validation rules)
+//   - toFormat: Target provider format
+//   - fromSuffix: Whether config was sourced from model suffix
 //
 // Returns:
 //   - Normalized ThinkingConfig with clamped values
@@ -87,9 +35,8 @@ func ClampBudget(value int, modelInfo *registry.ModelInfo, provider string) int
 //   - Budget-only model + Level config → Level converted to Budget
 //   - Level-only model + Budget config → Budget converted to Level
 //   - Hybrid model → preserve original format
-func ValidateConfig(config ThinkingConfig, modelInfo *registry.ModelInfo, provider string) (*ThinkingConfig, error) {
-	normalized := config
-
+func ValidateConfig(config ThinkingConfig, modelInfo *registry.ModelInfo, fromFormat, toFormat string, fromSuffix bool) (*ThinkingConfig, error) {
+	fromFormat, toFormat = strings.ToLower(strings.TrimSpace(fromFormat)), strings.ToLower(strings.TrimSpace(toFormat))
 	model := "unknown"
 	support := (*registry.ThinkingSupport)(nil)
 	if modelInfo != nil {
@@ -103,101 +50,108 @@ func ValidateConfig(config ThinkingConfig, modelInfo *registry.ModelInfo, provid
 		if config.Mode != ModeNone {
 			return nil, NewThinkingErrorWithModel(ErrThinkingNotSupported, "thinking not supported for this model", model)
 		}
-		return &normalized, nil
+		return &config, nil
 	}

+	allowClampUnsupported := isBudgetBasedProvider(fromFormat) && isLevelBasedProvider(toFormat)
+	strictBudget := !fromSuffix && fromFormat != "" && isSameProviderFamily(fromFormat, toFormat)
+	budgetDerivedFromLevel := false
+
 	capability := detectModelCapability(modelInfo)
 	switch capability {
 	case CapabilityBudgetOnly:
-		if normalized.Mode == ModeLevel {
-			if normalized.Level == LevelAuto {
+		if config.Mode == ModeLevel {
+			if config.Level == LevelAuto {
 				break
 			}
-			budget, ok := ConvertLevelToBudget(string(normalized.Level))
+			budget, ok := ConvertLevelToBudget(string(config.Level))
 			if !ok {
-				return nil, NewThinkingError(ErrUnknownLevel, fmt.Sprintf("unknown level: %s", normalized.Level))
+				return nil, NewThinkingError(ErrUnknownLevel, fmt.Sprintf("unknown level: %s", config.Level))
 			}
-			normalized.Mode = ModeBudget
-			normalized.Budget = budget
-			normalized.Level = ""
+			config.Mode = ModeBudget
+			config.Budget = budget
+			config.Level = ""
+			budgetDerivedFromLevel = true
 		}
 	case CapabilityLevelOnly:
-		if normalized.Mode == ModeBudget {
-			level, ok := ConvertBudgetToLevel(normalized.Budget)
+		if config.Mode == ModeBudget {
+			level, ok := ConvertBudgetToLevel(config.Budget)
 			if !ok {
-				return nil, NewThinkingError(ErrUnknownLevel, fmt.Sprintf("budget %d cannot be converted to a valid level", normalized.Budget))
+				return nil, NewThinkingError(ErrUnknownLevel, fmt.Sprintf("budget %d cannot be converted to a valid level", config.Budget))
 			}
-			normalized.Mode = ModeLevel
-			normalized.Level = ThinkingLevel(level)
-			normalized.Budget = 0
+			// When converting Budget -> Level for level-only models, clamp the derived standard level
+			// to the nearest supported level. Special values (none/auto) are preserved.
+			config.Mode = ModeLevel
+			config.Level = clampLevel(ThinkingLevel(level), modelInfo, toFormat)
+			config.Budget = 0
 		}
 	case CapabilityHybrid:
 	}

-	if normalized.Mode == ModeLevel && normalized.Level == LevelNone {
-		normalized.Mode = ModeNone
-		normalized.Budget = 0
-		normalized.Level = ""
+	if config.Mode == ModeLevel && config.Level == LevelNone {
+		config.Mode = ModeNone
+		config.Budget = 0
+		config.Level = ""
 	}
-	if normalized.Mode == ModeLevel && normalized.Level == LevelAuto {
-		normalized.Mode = ModeAuto
-		normalized.Budget = -1
-		normalized.Level = ""
+	if config.Mode == ModeLevel && config.Level == LevelAuto {
+		config.Mode = ModeAuto
+		config.Budget = -1
+		config.Level = ""
 	}
-	if normalized.Mode == ModeBudget && normalized.Budget == 0 {
-		normalized.Mode = ModeNone
-		normalized.Level = ""
+	if config.Mode == ModeBudget && config.Budget == 0 {
+		config.Mode = ModeNone
+		config.Level = ""
 	}

-	if len(support.Levels) > 0 && normalized.Mode == ModeLevel {
-		if !isLevelSupported(string(normalized.Level), support.Levels) {
-			validLevels := normalizeLevels(support.Levels)
-			message := fmt.Sprintf("level %q not supported, valid levels: %s", strings.ToLower(string(normalized.Level)), strings.Join(validLevels, ", "))
-			return nil, NewThinkingError(ErrLevelNotSupported, message)
+	if len(support.Levels) > 0 && config.Mode == ModeLevel {
+		if !isLevelSupported(string(config.Level), support.Levels) {
+			if allowClampUnsupported {
+				config.Level = clampLevel(config.Level, modelInfo, toFormat)
+			}
+			if !isLevelSupported(string(config.Level), support.Levels) {
+				// User explicitly specified an unsupported level - return error
+				// (budget-derived levels may be clamped based on source format)
+				validLevels := normalizeLevels(support.Levels)
+				message := fmt.Sprintf("level %q not supported, valid levels: %s", strings.ToLower(string(config.Level)), strings.Join(validLevels, ", "))
+				return nil, NewThinkingError(ErrLevelNotSupported, message)
+			}
+		}
+	}
+
+	if strictBudget && config.Mode == ModeBudget && !budgetDerivedFromLevel {
+		min, max := support.Min, support.Max
+		if min != 0 || max != 0 {
+			if config.Budget < min || config.Budget > max || (config.Budget == 0 && !support.ZeroAllowed) {
+				message := fmt.Sprintf("budget %d out of range [%d,%d]", config.Budget, min, max)
+				return nil, NewThinkingError(ErrBudgetOutOfRange, message)
+			}
 		}
 	}

 	// Convert ModeAuto to mid-range if dynamic not allowed
-	if normalized.Mode == ModeAuto && !support.DynamicAllowed {
-		normalized = convertAutoToMidRange(normalized, support, provider, model)
+	if config.Mode == ModeAuto && !support.DynamicAllowed {
+		config = convertAutoToMidRange(config, support, toFormat, model)
 	}

-	if normalized.Mode == ModeNone && provider == "claude" {
+	if config.Mode == ModeNone && toFormat == "claude" {
 		// Claude supports explicit disable via thinking.type="disabled".
 		// Keep Budget=0 so applier can omit budget_tokens.
-		normalized.Budget = 0
-		normalized.Level = ""
+		config.Budget = 0
+		config.Level = ""
 	} else {
-		switch normalized.Mode {
+		switch config.Mode {
 		case ModeBudget, ModeAuto, ModeNone:
-			normalized.Budget = ClampBudget(normalized.Budget, modelInfo, provider)
+			config.Budget = clampBudget(config.Budget, modelInfo, toFormat)
 		}

 		// ModeNone with clamped Budget > 0: set Level to lowest for Level-only/Hybrid models
 		// This ensures Apply layer doesn't need to access support.Levels
-		if normalized.Mode == ModeNone && normalized.Budget > 0 && len(support.Levels) > 0 {
-			normalized.Level = ThinkingLevel(support.Levels[0])
+		if config.Mode == ModeNone && config.Budget > 0 && len(support.Levels) > 0 {
+			config.Level = ThinkingLevel(support.Levels[0])
 		}
 	}

-	return &normalized, nil
-}
-
-func isLevelSupported(level string, supported []string) bool {
-	for _, candidate := range supported {
-		if strings.EqualFold(level, strings.TrimSpace(candidate)) {
-			return true
-		}
-	}
-	return false
-}
-
-func normalizeLevels(levels []string) []string {
-	normalized := make([]string, 0, len(levels))
-	for _, level := range levels {
-		normalized = append(normalized, strings.ToLower(strings.TrimSpace(level)))
-	}
-	return normalized
+	return &config, nil
 }

 // convertAutoToMidRange converts ModeAuto to a mid-range value when dynamic is not allowed.
@@ -246,7 +200,172 @@ func convertAutoToMidRange(config ThinkingConfig, support *registry.ThinkingSupp
 	return config
 }

-// logClamp logs a debug message when budget clamping occurs.
+// standardLevelOrder defines the canonical ordering of thinking levels from lowest to highest.
+var standardLevelOrder = []ThinkingLevel{LevelMinimal, LevelLow, LevelMedium, LevelHigh, LevelXHigh}
+
+// clampLevel clamps the given level to the nearest supported level.
+// On tie, prefers the lower level.
+func clampLevel(level ThinkingLevel, modelInfo *registry.ModelInfo, provider string) ThinkingLevel {
+	model := "unknown"
+	var supported []string
+	if modelInfo != nil {
+		if modelInfo.ID != "" {
+			model = modelInfo.ID
+		}
+		if modelInfo.Thinking != nil {
+			supported = modelInfo.Thinking.Levels
+		}
+	}
+
+	if len(supported) == 0 || isLevelSupported(string(level), supported) {
+		return level
+	}
+
+	pos := levelIndex(string(level))
+	if pos == -1 {
+		return level
+	}
+	bestIdx, bestDist := -1, len(standardLevelOrder)+1
+
+	for _, s := range supported {
+		if idx := levelIndex(strings.TrimSpace(s)); idx != -1 {
+			if dist := abs(pos - idx); dist < bestDist || (dist == bestDist && idx < bestIdx) {
+				bestIdx, bestDist = idx, dist
+			}
+		}
+	}
+
+	if bestIdx >= 0 {
+		clamped := standardLevelOrder[bestIdx]
+		log.WithFields(log.Fields{
+			"provider":       provider,
+			"model":          model,
+			"original_value": string(level),
+			"clamped_to":     string(clamped),
+		}).Debug("thinking: level clamped |")
+		return clamped
+	}
+	return level
+}
+
+// clampBudget clamps a budget value to the model's supported range.
+func clampBudget(value int, modelInfo *registry.ModelInfo, provider string) int {
+	model := "unknown"
+	support := (*registry.ThinkingSupport)(nil)
+	if modelInfo != nil {
+		if modelInfo.ID != "" {
+			model = modelInfo.ID
+		}
+		support = modelInfo.Thinking
+	}
+	if support == nil {
+		return value
+	}
+
+	// Auto value (-1) passes through without clamping.
+	if value == -1 {
+		return value
+	}
+
+	min, max := support.Min, support.Max
+	if value == 0 && !support.ZeroAllowed {
+		log.WithFields(log.Fields{
+			"provider":       provider,
+			"model":          model,
+			"original_value": value,
+			"clamped_to":     min,
+			"min":            min,
+			"max":            max,
+		}).Warn("thinking: budget zero not allowed |")
+		return min
+	}
+
+	// Some models are level-only and do not define numeric budget ranges.
+	if min == 0 && max == 0 {
+		return value
+	}
+
+	if value < min {
+		if value == 0 && support.ZeroAllowed {
+			return 0
+		}
+		logClamp(provider, model, value, min, min, max)
+		return min
+	}
+	if value > max {
+		logClamp(provider, model, value, max, min, max)
+		return max
+	}
+	return value
+}
+
+func isLevelSupported(level string, supported []string) bool {
+	for _, s := range supported {
+		if strings.EqualFold(level, strings.TrimSpace(s)) {
+			return true
+		}
+	}
+	return false
+}
+
+func levelIndex(level string) int {
+	for i, l := range standardLevelOrder {
+		if strings.EqualFold(level, string(l)) {
+			return i
+		}
+	}
+	return -1
+}
+
+func normalizeLevels(levels []string) []string {
+	out := make([]string, len(levels))
+	for i, l := range levels {
+		out[i] = strings.ToLower(strings.TrimSpace(l))
+	}
+	return out
+}
+
+func isBudgetBasedProvider(provider string) bool {
+	switch provider {
+	case "gemini", "gemini-cli", "antigravity", "claude":
+		return true
+	default:
+		return false
+	}
+}
+
+func isLevelBasedProvider(provider string) bool {
+	switch provider {
+	case "openai", "openai-response", "codex":
+		return true
+	default:
+		return false
+	}
+}
+
+func isGeminiFamily(provider string) bool {
+	switch provider {
+	case "gemini", "gemini-cli", "antigravity":
+		return true
+	default:
+		return false
+	}
+}
+
+func isSameProviderFamily(from, to string) bool {
+	if from == to {
+		return true
+	}
+	return isGeminiFamily(from) && isGeminiFamily(to)
+}
+
+func abs(x int) int {
+	if x < 0 {
+		return -x
+	}
+	return x
+}
+
 func logClamp(provider, model string, original, clampedTo, min, max int) {
 	log.WithFields(log.Fields{
 		"provider":       provider,
--- a/internal/translator/antigravity/claude/antigravity_claude_request.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_request.go
@@ -7,12 +7,9 @@ package claude

 import (
 	"bytes"
-	"crypto/sha256"
-	"encoding/hex"
 	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/cache"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
@@ -20,29 +17,6 @@ import (
 	"github.com/tidwall/sjson"
 )

-// deriveSessionID generates a stable session ID from the request.
-// Uses the hash of the first user message to identify the conversation.
-func deriveSessionID(rawJSON []byte) string {
-	messages := gjson.GetBytes(rawJSON, "messages")
-	if !messages.IsArray() {
-		return ""
-	}
-	for _, msg := range messages.Array() {
-		if msg.Get("role").String() == "user" {
-			content := msg.Get("content").String()
-			if content == "" {
-				// Try to get text from content array
-				content = msg.Get("content.0.text").String()
-			}
-			if content != "" {
-				h := sha256.Sum256([]byte(content))
-				return hex.EncodeToString(h[:16])
-			}
-		}
-	}
-	return ""
-}
-
 // ConvertClaudeRequestToAntigravity parses and transforms a Claude Code API request into Gemini CLI API format.
 // It extracts the model name, system instruction, message contents, and tool declarations
 // from the raw JSON request and returns them in the format expected by the Gemini CLI API.
@@ -62,11 +36,9 @@ func deriveSessionID(rawJSON []byte) string {
 // Returns:
 //   - []byte: The transformed request data in Gemini CLI API format
 func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _ bool) []byte {
+	enableThoughtTranslate := true
 	rawJSON := bytes.Clone(inputRawJSON)

-	// Derive session ID for signature caching
-	sessionID := deriveSessionID(rawJSON)
-
 	// system instruction
 	systemInstructionJSON := ""
 	hasSystemInstruction := false
@@ -125,41 +97,49 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 					if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "thinking" {
 						// Use GetThinkingText to handle wrapped thinking objects
 						thinkingText := thinking.GetThinkingText(contentResult)
-						signatureResult := contentResult.Get("signature")
-						clientSignature := ""
-						if signatureResult.Exists() && signatureResult.String() != "" {
-							clientSignature = signatureResult.String()
-						}

 						// Always try cached signature first (more reliable than client-provided)
 						// Client may send stale or invalid signatures from different sessions
 						signature := ""
-						if sessionID != "" && thinkingText != "" {
-							if cachedSig := cache.GetCachedSignature(sessionID, thinkingText); cachedSig != "" {
+						if thinkingText != "" {
+							if cachedSig := cache.GetCachedSignature(modelName, thinkingText); cachedSig != "" {
 								signature = cachedSig
 								// log.Debugf("Using cached signature for thinking block")
 							}
 						}

 						// Fallback to client signature only if cache miss and client signature is valid
-						if signature == "" && cache.HasValidSignature(clientSignature) {
-							signature = clientSignature
+						if signature == "" {
+							signatureResult := contentResult.Get("signature")
+							clientSignature := ""
+							if signatureResult.Exists() && signatureResult.String() != "" {
+								arrayClientSignatures := strings.SplitN(signatureResult.String(), "#", 2)
+								if len(arrayClientSignatures) == 2 {
+									if modelName == arrayClientSignatures[0] {
+										clientSignature = arrayClientSignatures[1]
+									}
+								}
+							}
+							if cache.HasValidSignature(modelName, clientSignature) {
+								signature = clientSignature
+							}
 							// log.Debugf("Using client-provided signature for thinking block")
 						}

 						// Store for subsequent tool_use in the same message
-						if cache.HasValidSignature(signature) {
+						if cache.HasValidSignature(modelName, signature) {
 							currentMessageThinkingSignature = signature
 						}

 						// Skip trailing unsigned thinking blocks on last assistant message
-						isUnsigned := !cache.HasValidSignature(signature)
+						isUnsigned := !cache.HasValidSignature(modelName, signature)

 						// If unsigned, skip entirely (don't convert to text)
 						// Claude requires assistant messages to start with thinking blocks when thinking is enabled
 						// Converting to text would break this requirement
 						if isUnsigned {
 							// log.Debugf("Dropping unsigned thinking block (no valid signature)")
+							enableThoughtTranslate = false
 							continue
 						}

@@ -207,7 +187,7 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 							// This is the approach used in opencode-google-antigravity-auth for Gemini
 							// and also works for Claude through Antigravity API
 							const skipSentinel = "skip_thought_signature_validator"
-							if cache.HasValidSignature(currentMessageThinkingSignature) {
+							if cache.HasValidSignature(modelName, currentMessageThinkingSignature) {
 								partJSON, _ = sjson.Set(partJSON, "thoughtSignature", currentMessageThinkingSignature)
 							} else {
 								// No valid signature - use skip sentinel to bypass validation
@@ -387,15 +367,12 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 	}

 	// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled
-	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
-		modelInfo := registry.LookupModelInfo(modelName)
-		if modelInfo != nil && modelInfo.Thinking != nil {
-			if t.Get("type").String() == "enabled" {
-				if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
-					budget := int(b.Int())
-					out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
-					out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-				}
+	if t := gjson.GetBytes(rawJSON, "thinking"); enableThoughtTranslate && t.Exists() && t.IsObject() {
+		if t.Get("type").String() == "enabled" {
+			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
+				budget := int(b.Int())
+				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
+				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true)
 			}
 		}
 	}
--- a/internal/translator/antigravity/claude/antigravity_claude_request_test.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_request_test.go
@@ -4,6 +4,7 @@ import (
 	"strings"
 	"testing"

+	"github.com/router-for-me/CLIProxyAPI/v6/internal/cache"
 	"github.com/tidwall/gjson"
 )

@@ -73,30 +74,41 @@ func TestConvertClaudeRequestToAntigravity_RoleMapping(t *testing.T) {
 }

 func TestConvertClaudeRequestToAntigravity_ThinkingBlocks(t *testing.T) {
+	cache.ClearSignatureCache("")
+
 	// Valid signature must be at least 50 characters
 	validSignature := "abc123validSignature1234567890123456789012345678901234567890"
+	thinkingText := "Let me think..."
+
+	// Pre-cache the signature (simulating a previous response for the same thinking text)
 	inputJSON := []byte(`{
 		"model": "claude-sonnet-4-5-thinking",
 		"messages": [
+			{
+				"role": "user",
+				"content": [{"type": "text", "text": "Test user message"}]
+			},
 			{
 				"role": "assistant",
 				"content": [
-					{"type": "thinking", "thinking": "Let me think...", "signature": "` + validSignature + `"},
+					{"type": "thinking", "thinking": "` + thinkingText + `", "signature": "` + validSignature + `"},
 					{"type": "text", "text": "Answer"}
 				]
 			}
 		]
 	}`)

+	cache.CacheSignature("claude-sonnet-4-5-thinking", thinkingText, validSignature)
+
 	output := ConvertClaudeRequestToAntigravity("claude-sonnet-4-5-thinking", inputJSON, false)
 	outputStr := string(output)

-	// Check thinking block conversion
-	firstPart := gjson.Get(outputStr, "request.contents.0.parts.0")
+	// Check thinking block conversion (now in contents.1 due to user message)
+	firstPart := gjson.Get(outputStr, "request.contents.1.parts.0")
 	if !firstPart.Get("thought").Bool() {
 		t.Error("thinking block should have thought: true")
 	}
-	if firstPart.Get("text").String() != "Let me think..." {
+	if firstPart.Get("text").String() != thinkingText {
 		t.Error("thinking text mismatch")
 	}
 	if firstPart.Get("thoughtSignature").String() != validSignature {
@@ -105,6 +117,8 @@ func TestConvertClaudeRequestToAntigravity_ThinkingBlocks(t *testing.T) {
 }

 func TestConvertClaudeRequestToAntigravity_ThinkingBlockWithoutSignature(t *testing.T) {
+	cache.ClearSignatureCache("")
+
 	// Unsigned thinking blocks should be removed entirely (not converted to text)
 	inputJSON := []byte(`{
 		"model": "claude-sonnet-4-5-thinking",
@@ -226,14 +240,22 @@ func TestConvertClaudeRequestToAntigravity_ToolUse(t *testing.T) {
 }

 func TestConvertClaudeRequestToAntigravity_ToolUse_WithSignature(t *testing.T) {
+	cache.ClearSignatureCache("")
+
 	validSignature := "abc123validSignature1234567890123456789012345678901234567890"
+	thinkingText := "Let me think..."
+
 	inputJSON := []byte(`{
 		"model": "claude-sonnet-4-5-thinking",
 		"messages": [
+			{
+				"role": "user",
+				"content": [{"type": "text", "text": "Test user message"}]
+			},
 			{
 				"role": "assistant",
 				"content": [
-					{"type": "thinking", "thinking": "Let me think...", "signature": "` + validSignature + `"},
+					{"type": "thinking", "thinking": "` + thinkingText + `", "signature": "` + validSignature + `"},
 					{
 						"type": "tool_use",
 						"id": "call_123",
@@ -245,11 +267,13 @@ func TestConvertClaudeRequestToAntigravity_ToolUse_WithSignature(t *testing.T) {
 		]
 	}`)

+	cache.CacheSignature("claude-sonnet-4-5-thinking", thinkingText, validSignature)
+
 	output := ConvertClaudeRequestToAntigravity("claude-sonnet-4-5-thinking", inputJSON, false)
 	outputStr := string(output)

-	// Check function call has the signature from the preceding thinking block
-	part := gjson.Get(outputStr, "request.contents.0.parts.1")
+	// Check function call has the signature from the preceding thinking block (now in contents.1)
+	part := gjson.Get(outputStr, "request.contents.1.parts.1")
 	if part.Get("functionCall.name").String() != "get_weather" {
 		t.Errorf("Expected functionCall, got %s", part.Raw)
 	}
@@ -259,26 +283,36 @@ func TestConvertClaudeRequestToAntigravity_ToolUse_WithSignature(t *testing.T) {
 }

 func TestConvertClaudeRequestToAntigravity_ReorderThinking(t *testing.T) {
+	cache.ClearSignatureCache("")
+
 	// Case: text block followed by thinking block -> should be reordered to thinking first
 	validSignature := "abc123validSignature1234567890123456789012345678901234567890"
+	thinkingText := "Planning..."
+
 	inputJSON := []byte(`{
 		"model": "claude-sonnet-4-5-thinking",
 		"messages": [
+			{
+				"role": "user",
+				"content": [{"type": "text", "text": "Test user message"}]
+			},
 			{
 				"role": "assistant",
 				"content": [
 					{"type": "text", "text": "Here is the plan."},
-					{"type": "thinking", "thinking": "Planning...", "signature": "` + validSignature + `"}
+					{"type": "thinking", "thinking": "` + thinkingText + `", "signature": "` + validSignature + `"}
 				]
 			}
 		]
 	}`)

+	cache.CacheSignature("claude-sonnet-4-5-thinking", thinkingText, validSignature)
+
 	output := ConvertClaudeRequestToAntigravity("claude-sonnet-4-5-thinking", inputJSON, false)
 	outputStr := string(output)

-	// Verify order: Thinking block MUST be first
-	parts := gjson.Get(outputStr, "request.contents.0.parts").Array()
+	// Verify order: Thinking block MUST be first (now in contents.1 due to user message)
+	parts := gjson.Get(outputStr, "request.contents.1.parts").Array()
 	if len(parts) != 2 {
 		t.Fatalf("Expected 2 parts, got %d", len(parts))
 	}
@@ -343,8 +377,8 @@ func TestConvertClaudeRequestToAntigravity_ThinkingConfig(t *testing.T) {
 		if thinkingConfig.Get("thinkingBudget").Int() != 8000 {
 			t.Errorf("Expected thinkingBudget 8000, got %d", thinkingConfig.Get("thinkingBudget").Int())
 		}
-		if !thinkingConfig.Get("include_thoughts").Bool() {
-			t.Error("include_thoughts should be true")
+		if !thinkingConfig.Get("includeThoughts").Bool() {
+			t.Error("includeThoughts should be true")
 		}
 	} else {
 		t.Log("thinkingConfig not present - model may not be registered in test registry")
@@ -459,7 +493,12 @@ func TestConvertClaudeRequestToAntigravity_TrailingUnsignedThinking_Removed(t *t
 }

 func TestConvertClaudeRequestToAntigravity_TrailingSignedThinking_Kept(t *testing.T) {
+	cache.ClearSignatureCache("")
+
 	// Last assistant message ends with signed thinking block - should be kept
+	validSignature := "abc123validSignature1234567890123456789012345678901234567890"
+	thinkingText := "Valid thinking..."
+
 	inputJSON := []byte(`{
 		"model": "claude-sonnet-4-5-thinking",
 		"messages": [
@@ -471,12 +510,14 @@ func TestConvertClaudeRequestToAntigravity_TrailingSignedThinking_Kept(t *testin
 				"role": "assistant",
 				"content": [
 					{"type": "text", "text": "Here is my answer"},
-					{"type": "thinking", "thinking": "Valid thinking...", "signature": "abc123validSignature1234567890123456789012345678901234567890"}
+					{"type": "thinking", "thinking": "` + thinkingText + `", "signature": "` + validSignature + `"}
 				]
 			}
 		]
 	}`)

+	cache.CacheSignature("claude-sonnet-4-5-thinking", thinkingText, validSignature)
+
 	output := ConvertClaudeRequestToAntigravity("claude-sonnet-4-5-thinking", inputJSON, false)
 	outputStr := string(output)

--- a/internal/translator/antigravity/claude/antigravity_claude_response.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_response.go
@@ -41,7 +41,6 @@ type Params struct {
 	HasContent           bool   // Tracks whether any content (text, thinking, or tool use) has been output

 	// Signature caching support
-	SessionID           string          // Session ID derived from request for signature caching
 	CurrentThinkingText strings.Builder // Accumulates thinking text for signature caching
 }

@@ -70,9 +69,9 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
 			HasFirstResponse: false,
 			ResponseType:     0,
 			ResponseIndex:    0,
-			SessionID:        deriveSessionID(originalRequestRawJSON),
 		}
 	}
+	modelName := gjson.GetBytes(requestRawJSON, "model").String()

 	params := (*param).(*Params)

@@ -138,14 +137,14 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
 					if thoughtSignature := partResult.Get("thoughtSignature"); thoughtSignature.Exists() && thoughtSignature.String() != "" {
 						// log.Debug("Branch: signature_delta")

-						if params.SessionID != "" && params.CurrentThinkingText.Len() > 0 {
-							cache.CacheSignature(params.SessionID, params.CurrentThinkingText.String(), thoughtSignature.String())
-							// log.Debugf("Cached signature for thinking block (sessionID=%s, textLen=%d)", params.SessionID, params.CurrentThinkingText.Len())
+						if params.CurrentThinkingText.Len() > 0 {
+							cache.CacheSignature(modelName, params.CurrentThinkingText.String(), thoughtSignature.String())
+							// log.Debugf("Cached signature for thinking block (textLen=%d)", params.CurrentThinkingText.Len())
 							params.CurrentThinkingText.Reset()
 						}

 						output = output + "event: content_block_delta\n"
-						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":""}}`, params.ResponseIndex), "delta.signature", thoughtSignature.String())
+						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":""}}`, params.ResponseIndex), "delta.signature", fmt.Sprintf("%s#%s", cache.GetModelGroup(modelName), thoughtSignature.String()))
 						output = output + fmt.Sprintf("data: %s\n\n\n", data)
 						params.HasContent = true
 					} else if params.ResponseType == 2 { // Continue existing thinking block if already in thinking state
@@ -372,7 +371,7 @@ func resolveStopReason(params *Params) string {
 //   - string: A Claude-compatible JSON response.
 func ConvertAntigravityResponseToClaudeNonStream(_ context.Context, _ string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, _ *any) string {
 	_ = originalRequestRawJSON
-	_ = requestRawJSON
+	modelName := gjson.GetBytes(requestRawJSON, "model").String()

 	root := gjson.ParseBytes(rawJSON)
 	promptTokens := root.Get("response.usageMetadata.promptTokenCount").Int()
@@ -437,7 +436,7 @@ func ConvertAntigravityResponseToClaudeNonStream(_ context.Context, _ string, or
 		block := `{"type":"thinking","thinking":""}`
 		block, _ = sjson.Set(block, "thinking", thinkingBuilder.String())
 		if thinkingSignature != "" {
-			block, _ = sjson.Set(block, "signature", thinkingSignature)
+			block, _ = sjson.Set(block, "signature", fmt.Sprintf("%s#%s", cache.GetModelGroup(modelName), thinkingSignature))
 		}
 		responseJSON, _ = sjson.SetRaw(responseJSON, "content.-1", block)
 		thinkingBuilder.Reset()
--- a/internal/translator/antigravity/claude/antigravity_claude_response_test.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_response_test.go
@@ -12,10 +12,10 @@ import (
 // Signature Caching Tests
 // ============================================================================

-func TestConvertAntigravityResponseToClaude_SessionIDDerived(t *testing.T) {
+func TestConvertAntigravityResponseToClaude_ParamsInitialized(t *testing.T) {
 	cache.ClearSignatureCache("")

-	// Request with user message - should derive session ID
+	// Request with user message - should initialize params
 	requestJSON := []byte(`{
 		"messages": [
 			{"role": "user", "content": [{"type": "text", "text": "Hello world"}]}
@@ -37,10 +37,12 @@ func TestConvertAntigravityResponseToClaude_SessionIDDerived(t *testing.T) {
 	ctx := context.Background()
 	ConvertAntigravityResponseToClaude(ctx, "claude-sonnet-4-5-thinking", requestJSON, requestJSON, responseJSON, &param)

-	// Verify session ID was set
 	params := param.(*Params)
-	if params.SessionID == "" {
-		t.Error("SessionID should be derived from request")
+	if !params.HasFirstResponse {
+		t.Error("HasFirstResponse should be set after first chunk")
+	}
+	if params.CurrentThinkingText.Len() == 0 {
+		t.Error("Thinking text should be accumulated")
 	}
 }

@@ -97,6 +99,7 @@ func TestConvertAntigravityResponseToClaude_SignatureCached(t *testing.T) {
 	cache.ClearSignatureCache("")

 	requestJSON := []byte(`{
+		"model": "claude-sonnet-4-5-thinking",
 		"messages": [{"role": "user", "content": [{"type": "text", "text": "Cache test"}]}]
 	}`)

@@ -129,12 +132,8 @@ func TestConvertAntigravityResponseToClaude_SignatureCached(t *testing.T) {
 	// Process thinking chunk
 	ConvertAntigravityResponseToClaude(ctx, "claude-sonnet-4-5-thinking", requestJSON, requestJSON, thinkingChunk, &param)
 	params := param.(*Params)
-	sessionID := params.SessionID
 	thinkingText := params.CurrentThinkingText.String()

-	if sessionID == "" {
-		t.Fatal("SessionID should be set")
-	}
 	if thinkingText == "" {
 		t.Fatal("Thinking text should be accumulated")
 	}
@@ -143,7 +142,7 @@ func TestConvertAntigravityResponseToClaude_SignatureCached(t *testing.T) {
 	ConvertAntigravityResponseToClaude(ctx, "claude-sonnet-4-5-thinking", requestJSON, requestJSON, signatureChunk, &param)

 	// Verify signature was cached
-	cachedSig := cache.GetCachedSignature(sessionID, thinkingText)
+	cachedSig := cache.GetCachedSignature("claude-sonnet-4-5-thinking", thinkingText)
 	if cachedSig != validSignature {
 		t.Errorf("Expected cached signature '%s', got '%s'", validSignature, cachedSig)
 	}
@@ -158,6 +157,7 @@ func TestConvertAntigravityResponseToClaude_MultipleThinkingBlocks(t *testing.T)
 	cache.ClearSignatureCache("")

 	requestJSON := []byte(`{
+		"model": "claude-sonnet-4-5-thinking",
 		"messages": [{"role": "user", "content": [{"type": "text", "text": "Multi block test"}]}]
 	}`)

@@ -221,13 +221,12 @@ func TestConvertAntigravityResponseToClaude_MultipleThinkingBlocks(t *testing.T)
 	// Process first thinking block
 	ConvertAntigravityResponseToClaude(ctx, "claude-sonnet-4-5-thinking", requestJSON, requestJSON, block1Thinking, &param)
 	params := param.(*Params)
-	sessionID := params.SessionID
 	firstThinkingText := params.CurrentThinkingText.String()

 	ConvertAntigravityResponseToClaude(ctx, "claude-sonnet-4-5-thinking", requestJSON, requestJSON, block1Sig, &param)

 	// Verify first signature cached
-	if cache.GetCachedSignature(sessionID, firstThinkingText) != validSig1 {
+	if cache.GetCachedSignature("claude-sonnet-4-5-thinking", firstThinkingText) != validSig1 {
 		t.Error("First thinking block signature should be cached")
 	}

@@ -241,76 +240,7 @@ func TestConvertAntigravityResponseToClaude_MultipleThinkingBlocks(t *testing.T)
 	ConvertAntigravityResponseToClaude(ctx, "claude-sonnet-4-5-thinking", requestJSON, requestJSON, block2Sig, &param)

 	// Verify second signature cached
-	if cache.GetCachedSignature(sessionID, secondThinkingText) != validSig2 {
+	if cache.GetCachedSignature("claude-sonnet-4-5-thinking", secondThinkingText) != validSig2 {
 		t.Error("Second thinking block signature should be cached")
 	}
 }
-
-func TestDeriveSessionIDFromRequest(t *testing.T) {
-	tests := []struct {
-		name      string
-		input     []byte
-		wantEmpty bool
-	}{
-		{
-			name:      "valid user message",
-			input:     []byte(`{"messages": [{"role": "user", "content": "Hello"}]}`),
-			wantEmpty: false,
-		},
-		{
-			name:      "user message with content array",
-			input:     []byte(`{"messages": [{"role": "user", "content": [{"type": "text", "text": "Hello"}]}]}`),
-			wantEmpty: false,
-		},
-		{
-			name:      "no user message",
-			input:     []byte(`{"messages": [{"role": "assistant", "content": "Hi"}]}`),
-			wantEmpty: true,
-		},
-		{
-			name:      "empty messages",
-			input:     []byte(`{"messages": []}`),
-			wantEmpty: true,
-		},
-		{
-			name:      "no messages field",
-			input:     []byte(`{}`),
-			wantEmpty: true,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result := deriveSessionID(tt.input)
-			if tt.wantEmpty && result != "" {
-				t.Errorf("Expected empty session ID, got '%s'", result)
-			}
-			if !tt.wantEmpty && result == "" {
-				t.Error("Expected non-empty session ID")
-			}
-		})
-	}
-}
-
-func TestDeriveSessionIDFromRequest_Deterministic(t *testing.T) {
-	input := []byte(`{"messages": [{"role": "user", "content": "Same message"}]}`)
-
-	id1 := deriveSessionID(input)
-	id2 := deriveSessionID(input)
-
-	if id1 != id2 {
-		t.Errorf("Session ID should be deterministic: '%s' != '%s'", id1, id2)
-	}
-}
-
-func TestDeriveSessionIDFromRequest_DifferentMessages(t *testing.T) {
-	input1 := []byte(`{"messages": [{"role": "user", "content": "Message A"}]}`)
-	input2 := []byte(`{"messages": [{"role": "user", "content": "Message B"}]}`)
-
-	id1 := deriveSessionID(input1)
-	id2 := deriveSessionID(input2)
-
-	if id1 == id2 {
-		t.Error("Different messages should produce different session IDs")
-	}
-}
--- a/internal/translator/antigravity/gemini/antigravity_gemini_request.go
+++ b/internal/translator/antigravity/gemini/antigravity_gemini_request.go
@@ -8,6 +8,7 @@ package gemini
 import (
 	"bytes"
 	"fmt"
+	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
@@ -32,12 +33,12 @@ import (
 //
 // Returns:
 //   - []byte: The transformed request data in Gemini API format
-func ConvertGeminiRequestToAntigravity(_ string, inputRawJSON []byte, _ bool) []byte {
+func ConvertGeminiRequestToAntigravity(modelName string, inputRawJSON []byte, _ bool) []byte {
 	rawJSON := bytes.Clone(inputRawJSON)
 	template := ""
 	template = `{"project":"","request":{},"model":""}`
 	template, _ = sjson.SetRaw(template, "request", string(rawJSON))
-	template, _ = sjson.Set(template, "model", gjson.Get(template, "request.model").String())
+	template, _ = sjson.Set(template, "model", modelName)
 	template, _ = sjson.Delete(template, "request.model")

 	template, errFixCLIToolResponse := fixCLIToolResponse(template)
@@ -97,37 +98,40 @@ func ConvertGeminiRequestToAntigravity(_ string, inputRawJSON []byte, _ bool) []
 		}
 	}

-	// Gemini-specific handling: add skip_thought_signature_validator to functionCall parts
-	// and remove thinking blocks entirely (Gemini doesn't need to preserve them)
-	const skipSentinel = "skip_thought_signature_validator"
+	// Gemini-specific handling for non-Claude models:
+	// - Add skip_thought_signature_validator to functionCall parts so upstream can bypass signature validation.
+	// - Also mark thinking parts with the same sentinel when present (we keep the parts; we only annotate them).
+	if !strings.Contains(modelName, "claude") {
+		const skipSentinel = "skip_thought_signature_validator"

-	gjson.GetBytes(rawJSON, "request.contents").ForEach(func(contentIdx, content gjson.Result) bool {
-		if content.Get("role").String() == "model" {
-			// First pass: collect indices of thinking parts to remove
-			var thinkingIndicesToRemove []int64
-			content.Get("parts").ForEach(func(partIdx, part gjson.Result) bool {
-				// Mark thinking blocks for removal
-				if part.Get("thought").Bool() {
-					thinkingIndicesToRemove = append(thinkingIndicesToRemove, partIdx.Int())
-				}
-				// Add skip sentinel to functionCall parts
-				if part.Get("functionCall").Exists() {
-					existingSig := part.Get("thoughtSignature").String()
-					if existingSig == "" || len(existingSig) < 50 {
-						rawJSON, _ = sjson.SetBytes(rawJSON, fmt.Sprintf("request.contents.%d.parts.%d.thoughtSignature", contentIdx.Int(), partIdx.Int()), skipSentinel)
+		gjson.GetBytes(rawJSON, "request.contents").ForEach(func(contentIdx, content gjson.Result) bool {
+			if content.Get("role").String() == "model" {
+				// First pass: collect indices of thinking parts to mark with skip sentinel
+				var thinkingIndicesToSkipSignature []int64
+				content.Get("parts").ForEach(func(partIdx, part gjson.Result) bool {
+					// Collect indices of thinking blocks to mark with skip sentinel
+					if part.Get("thought").Bool() {
+						thinkingIndicesToSkipSignature = append(thinkingIndicesToSkipSignature, partIdx.Int())
 					}
-				}
-				return true
-			})
+					// Add skip sentinel to functionCall parts
+					if part.Get("functionCall").Exists() {
+						existingSig := part.Get("thoughtSignature").String()
+						if existingSig == "" || len(existingSig) < 50 {
+							rawJSON, _ = sjson.SetBytes(rawJSON, fmt.Sprintf("request.contents.%d.parts.%d.thoughtSignature", contentIdx.Int(), partIdx.Int()), skipSentinel)
+						}
+					}
+					return true
+				})

-			// Remove thinking blocks in reverse order to preserve indices
-			for i := len(thinkingIndicesToRemove) - 1; i >= 0; i-- {
-				idx := thinkingIndicesToRemove[i]
-				rawJSON, _ = sjson.DeleteBytes(rawJSON, fmt.Sprintf("request.contents.%d.parts.%d", contentIdx.Int(), idx))
+				// Add skip_thought_signature_validator sentinel to thinking blocks in reverse order to preserve indices
+				for i := len(thinkingIndicesToSkipSignature) - 1; i >= 0; i-- {
+					idx := thinkingIndicesToSkipSignature[i]
+					rawJSON, _ = sjson.SetBytes(rawJSON, fmt.Sprintf("request.contents.%d.parts.%d.thoughtSignature", contentIdx.Int(), idx), skipSentinel)
+				}
 			}
-		}
-		return true
-	})
+			return true
+		})
+	}

 	return common.AttachDefaultSafetySettings(rawJSON, "request.safetySettings")
 }
--- a/internal/translator/antigravity/gemini/antigravity_gemini_request_test.go
+++ b/internal/translator/antigravity/gemini/antigravity_gemini_request_test.go
@@ -62,40 +62,6 @@ func TestConvertGeminiRequestToAntigravity_AddSkipSentinelToFunctionCall(t *test
 	}
 }

-func TestConvertGeminiRequestToAntigravity_RemoveThinkingBlocks(t *testing.T) {
-	// Thinking blocks should be removed entirely for Gemini
-	validSignature := "abc123validSignature1234567890123456789012345678901234567890"
-	inputJSON := []byte(fmt.Sprintf(`{
-		"model": "gemini-3-pro-preview",
-		"contents": [
-			{
-				"role": "model",
-				"parts": [
-					{"thought": true, "text": "Thinking...", "thoughtSignature": "%s"},
-					{"text": "Here is my response"}
-				]
-			}
-		]
-	}`, validSignature))
-
-	output := ConvertGeminiRequestToAntigravity("gemini-3-pro-preview", inputJSON, false)
-	outputStr := string(output)
-
-	// Check that thinking block is removed
-	parts := gjson.Get(outputStr, "request.contents.0.parts").Array()
-	if len(parts) != 1 {
-		t.Fatalf("Expected 1 part (thinking removed), got %d", len(parts))
-	}
-
-	// Only text part should remain
-	if parts[0].Get("thought").Bool() {
-		t.Error("Thinking block should be removed for Gemini")
-	}
-	if parts[0].Get("text").String() != "Here is my response" {
-		t.Errorf("Expected text 'Here is my response', got '%s'", parts[0].Get("text").String())
-	}
-}
-
 func TestConvertGeminiRequestToAntigravity_ParallelFunctionCalls(t *testing.T) {
 	// Multiple functionCalls should all get skip_thought_signature_validator
 	inputJSON := []byte(`{
--- a/internal/translator/antigravity/gemini/antigravity_gemini_response.go
+++ b/internal/translator/antigravity/gemini/antigravity_gemini_response.go
@@ -41,6 +41,7 @@ func ConvertAntigravityResponseToGemini(ctx context.Context, _ string, originalR
 			responseResult := gjson.GetBytes(rawJSON, "response")
 			if responseResult.Exists() {
 				chunk = []byte(responseResult.Raw)
+				chunk = restoreUsageMetadata(chunk)
 			}
 		} else {
 			chunkTemplate := "[]"
@@ -76,7 +77,8 @@ func ConvertAntigravityResponseToGemini(ctx context.Context, _ string, originalR
 func ConvertAntigravityResponseToGeminiNonStream(_ context.Context, _ string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, _ *any) string {
 	responseResult := gjson.GetBytes(rawJSON, "response")
 	if responseResult.Exists() {
-		return responseResult.Raw
+		chunk := restoreUsageMetadata([]byte(responseResult.Raw))
+		return string(chunk)
 	}
 	return string(rawJSON)
 }
@@ -84,3 +86,15 @@ func ConvertAntigravityResponseToGeminiNonStream(_ context.Context, _ string, or
 func GeminiTokenCount(ctx context.Context, count int64) string {
 	return fmt.Sprintf(`{"totalTokens":%d,"promptTokensDetails":[{"modality":"TEXT","tokenCount":%d}]}`, count, count)
 }
+
+// restoreUsageMetadata renames cpaUsageMetadata back to usageMetadata.
+// The executor renames usageMetadata to cpaUsageMetadata in non-terminal chunks
+// to preserve usage data while hiding it from clients that don't expect it.
+// When returning standard Gemini API format, we must restore the original name.
+func restoreUsageMetadata(chunk []byte) []byte {
+	if cpaUsage := gjson.GetBytes(chunk, "cpaUsageMetadata"); cpaUsage.Exists() {
+		chunk, _ = sjson.SetRawBytes(chunk, "usageMetadata", []byte(cpaUsage.Raw))
+		chunk, _ = sjson.DeleteBytes(chunk, "cpaUsageMetadata")
+	}
+	return chunk
+}
--- a/internal/translator/antigravity/gemini/antigravity_gemini_response_test.go
+++ b/internal/translator/antigravity/gemini/antigravity_gemini_response_test.go
@@ -0,0 +1,95 @@
+package gemini
+
+import (
+	"context"
+	"testing"
+)
+
+func TestRestoreUsageMetadata(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    []byte
+		expected string
+	}{
+		{
+			name:     "cpaUsageMetadata renamed to usageMetadata",
+			input:    []byte(`{"modelVersion":"gemini-3-pro","cpaUsageMetadata":{"promptTokenCount":100,"candidatesTokenCount":200}}`),
+			expected: `{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":200}}`,
+		},
+		{
+			name:     "no cpaUsageMetadata unchanged",
+			input:    []byte(`{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}`),
+			expected: `{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}`,
+		},
+		{
+			name:     "empty input",
+			input:    []byte(`{}`),
+			expected: `{}`,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := restoreUsageMetadata(tt.input)
+			if string(result) != tt.expected {
+				t.Errorf("restoreUsageMetadata() = %s, want %s", string(result), tt.expected)
+			}
+		})
+	}
+}
+
+func TestConvertAntigravityResponseToGeminiNonStream(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    []byte
+		expected string
+	}{
+		{
+			name:     "cpaUsageMetadata restored in response",
+			input:    []byte(`{"response":{"modelVersion":"gemini-3-pro","cpaUsageMetadata":{"promptTokenCount":100}}}`),
+			expected: `{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}`,
+		},
+		{
+			name:     "usageMetadata preserved",
+			input:    []byte(`{"response":{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}}`),
+			expected: `{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}`,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := ConvertAntigravityResponseToGeminiNonStream(context.Background(), "", nil, nil, tt.input, nil)
+			if result != tt.expected {
+				t.Errorf("ConvertAntigravityResponseToGeminiNonStream() = %s, want %s", result, tt.expected)
+			}
+		})
+	}
+}
+
+func TestConvertAntigravityResponseToGeminiStream(t *testing.T) {
+	ctx := context.WithValue(context.Background(), "alt", "")
+
+	tests := []struct {
+		name     string
+		input    []byte
+		expected string
+	}{
+		{
+			name:     "cpaUsageMetadata restored in streaming response",
+			input:    []byte(`data: {"response":{"modelVersion":"gemini-3-pro","cpaUsageMetadata":{"promptTokenCount":100}}}`),
+			expected: `{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}`,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			results := ConvertAntigravityResponseToGemini(ctx, "", nil, nil, tt.input, nil)
+			if len(results) != 1 {
+				t.Fatalf("expected 1 result, got %d", len(results))
+			}
+			if results[0] != tt.expected {
+				t.Errorf("ConvertAntigravityResponseToGemini() = %s, want %s", results[0], tt.expected)
+			}
+		})
+	}
+}
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
@@ -66,6 +66,13 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 		out, _ = sjson.SetBytes(out, "request.generationConfig.maxOutputTokens", maxTok.Num)
 	}

+	// Candidate count (OpenAI 'n' parameter)
+	if n := gjson.GetBytes(rawJSON, "n"); n.Exists() && n.Type == gjson.Number {
+		if val := n.Int(); val > 1 {
+			out, _ = sjson.SetBytes(out, "request.generationConfig.candidateCount", val)
+		}
+	}
+
 	// Map OpenAI modalities -> Gemini CLI request.generationConfig.responseModalities
 	// e.g. "modalities": ["image", "text"] -> ["IMAGE", "TEXT"]
 	if mods := gjson.GetBytes(rawJSON, "modalities"); mods.Exists() && mods.IsArray() {
@@ -132,6 +139,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 			}
 		}

+		systemPartIndex := 0
 		for i := 0; i < len(arr); i++ {
 			m := arr[i]
 			role := m.Get("role").String()
@@ -141,16 +149,19 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 				// system -> request.systemInstruction as a user message style
 				if content.Type == gjson.String {
 					out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
-					out, _ = sjson.SetBytes(out, "request.systemInstruction.parts.0.text", content.String())
+					out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), content.String())
+					systemPartIndex++
 				} else if content.IsObject() && content.Get("type").String() == "text" {
 					out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
-					out, _ = sjson.SetBytes(out, "request.systemInstruction.parts.0.text", content.Get("text").String())
+					out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), content.Get("text").String())
+					systemPartIndex++
 				} else if content.IsArray() {
 					contents := content.Array()
 					if len(contents) > 0 {
 						out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
 						for j := 0; j < len(contents); j++ {
-							out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", j), contents[j].Get("text").String())
+							out, _ = sjson.SetBytes(out, fmt.Sprintf("request.systemInstruction.parts.%d.text", systemPartIndex), contents[j].Get("text").String())
+							systemPartIndex++
 						}
 					}
 				}
@@ -165,7 +176,10 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 					for _, item := range items {
 						switch item.Get("type").String() {
 						case "text":
-							node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", item.Get("text").String())
+							text := item.Get("text").String()
+							if text != "" {
+								node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", text)
+							}
 							p++
 						case "image_url":
 							imageURL := item.Get("image_url.url").String()
@@ -209,7 +223,10 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 					for _, item := range content.Array() {
 						switch item.Get("type").String() {
 						case "text":
-							node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", item.Get("text").String())
+							text := item.Get("text").String()
+							if text != "" {
+								node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", text)
+							}
 							p++
 						case "image_url":
 							// If the assistant returned an inline data URL, preserve it for history fidelity.
@@ -288,12 +305,12 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 		}
 	}

-	// tools -> request.tools[0].functionDeclarations + request.tools[0].googleSearch passthrough
+	// tools -> request.tools[].functionDeclarations + request.tools[].googleSearch passthrough
 	tools := gjson.GetBytes(rawJSON, "tools")
 	if tools.IsArray() && len(tools.Array()) > 0 {
-		toolNode := []byte(`{}`)
-		hasTool := false
+		functionToolNode := []byte(`{}`)
 		hasFunction := false
+		googleSearchNodes := make([][]byte, 0)
 		for _, t := range tools.Array() {
 			if t.Get("type").String() == "function" {
 				fn := t.Get("function")
@@ -332,31 +349,37 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 					}
 					fnRaw, _ = sjson.Delete(fnRaw, "strict")
 					if !hasFunction {
-						toolNode, _ = sjson.SetRawBytes(toolNode, "functionDeclarations", []byte("[]"))
+						functionToolNode, _ = sjson.SetRawBytes(functionToolNode, "functionDeclarations", []byte("[]"))
 					}
-					tmp, errSet := sjson.SetRawBytes(toolNode, "functionDeclarations.-1", []byte(fnRaw))
+					tmp, errSet := sjson.SetRawBytes(functionToolNode, "functionDeclarations.-1", []byte(fnRaw))
 					if errSet != nil {
 						log.Warnf("Failed to append tool declaration for '%s': %v", fn.Get("name").String(), errSet)
 						continue
 					}
-					toolNode = tmp
+					functionToolNode = tmp
 					hasFunction = true
-					hasTool = true
 				}
 			}
 			if gs := t.Get("google_search"); gs.Exists() {
+				googleToolNode := []byte(`{}`)
 				var errSet error
-				toolNode, errSet = sjson.SetRawBytes(toolNode, "googleSearch", []byte(gs.Raw))
+				googleToolNode, errSet = sjson.SetRawBytes(googleToolNode, "googleSearch", []byte(gs.Raw))
 				if errSet != nil {
 					log.Warnf("Failed to set googleSearch tool: %v", errSet)
 					continue
 				}
-				hasTool = true
+				googleSearchNodes = append(googleSearchNodes, googleToolNode)
 			}
 		}
-		if hasTool {
-			out, _ = sjson.SetRawBytes(out, "request.tools", []byte("[]"))
-			out, _ = sjson.SetRawBytes(out, "request.tools.0", toolNode)
+		if hasFunction || len(googleSearchNodes) > 0 {
+			toolsNode := []byte("[]")
+			if hasFunction {
+				toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", functionToolNode)
+			}
+			for _, googleNode := range googleSearchNodes {
+				toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", googleNode)
+			}
+			out, _ = sjson.SetRawBytes(out, "request.tools", toolsNode)
 		}
 	}

--- a/internal/translator/claude/gemini/claude_gemini_request.go
+++ b/internal/translator/claude/gemini/claude_gemini_request.go
@@ -15,7 +15,7 @@ import (
 	"strings"

 	"github.com/google/uuid"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -98,9 +98,8 @@ func ConvertGeminiRequestToClaude(modelName string, inputRawJSON []byte, stream
 		// Temperature setting for controlling response randomness
 		if temp := genConfig.Get("temperature"); temp.Exists() {
 			out, _ = sjson.Set(out, "temperature", temp.Float())
-		}
-		// Top P setting for nucleus sampling
-		if topP := genConfig.Get("topP"); topP.Exists() {
+		} else if topP := genConfig.Get("topP"); topP.Exists() {
+			// Top P setting for nucleus sampling (filtered out if temperature is set)
 			out, _ = sjson.Set(out, "top_p", topP.Float())
 		}
 		// Stop sequences configuration for custom termination conditions
@@ -115,18 +114,41 @@ func ConvertGeminiRequestToClaude(modelName string, inputRawJSON []byte, stream
 			}
 		}
 		// Include thoughts configuration for reasoning process visibility
-		// Only apply for models that support thinking and use numeric budgets, not discrete levels.
+		// Translator only does format conversion, ApplyThinking handles model capability validation.
 		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
-			modelInfo := registry.LookupModelInfo(modelName)
-			if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
-				// Check for thinkingBudget first - if present, enable thinking with budget
-				if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() && thinkingBudget.Int() > 0 {
-					out, _ = sjson.Set(out, "thinking.type", "enabled")
-					out, _ = sjson.Set(out, "thinking.budget_tokens", thinkingBudget.Int())
-				} else if includeThoughts := thinkingConfig.Get("include_thoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
-					// Fallback to include_thoughts if no budget specified
+			if thinkingLevel := thinkingConfig.Get("thinkingLevel"); thinkingLevel.Exists() {
+				level := strings.ToLower(strings.TrimSpace(thinkingLevel.String()))
+				switch level {
+				case "":
+				case "none":
+					out, _ = sjson.Set(out, "thinking.type", "disabled")
+					out, _ = sjson.Delete(out, "thinking.budget_tokens")
+				case "auto":
 					out, _ = sjson.Set(out, "thinking.type", "enabled")
+					out, _ = sjson.Delete(out, "thinking.budget_tokens")
+				default:
+					if budget, ok := thinking.ConvertLevelToBudget(level); ok {
+						out, _ = sjson.Set(out, "thinking.type", "enabled")
+						out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
+					}
 				}
+			} else if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
+				budget := int(thinkingBudget.Int())
+				switch budget {
+				case 0:
+					out, _ = sjson.Set(out, "thinking.type", "disabled")
+					out, _ = sjson.Delete(out, "thinking.budget_tokens")
+				case -1:
+					out, _ = sjson.Set(out, "thinking.type", "enabled")
+					out, _ = sjson.Delete(out, "thinking.budget_tokens")
+				default:
+					out, _ = sjson.Set(out, "thinking.type", "enabled")
+					out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
+				}
+			} else if includeThoughts := thinkingConfig.Get("includeThoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
+				out, _ = sjson.Set(out, "thinking.type", "enabled")
+			} else if includeThoughts := thinkingConfig.Get("include_thoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
+				out, _ = sjson.Set(out, "thinking.type", "enabled")
 			}
 		}
 	}
--- a/internal/translator/claude/openai/chat-completions/claude_openai_request.go
+++ b/internal/translator/claude/openai/chat-completions/claude_openai_request.go
@@ -15,7 +15,6 @@ import (
 	"strings"

 	"github.com/google/uuid"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -66,23 +65,21 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream

 	root := gjson.ParseBytes(rawJSON)

+	// Convert OpenAI reasoning_effort to Claude thinking config.
 	if v := root.Get("reasoning_effort"); v.Exists() {
-		modelInfo := registry.LookupModelInfo(modelName)
-		if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
-			effort := strings.ToLower(strings.TrimSpace(v.String()))
-			if effort != "" {
-				budget, ok := thinking.ConvertLevelToBudget(effort)
-				if ok {
-					switch budget {
-					case 0:
-						out, _ = sjson.Set(out, "thinking.type", "disabled")
-					case -1:
+		effort := strings.ToLower(strings.TrimSpace(v.String()))
+		if effort != "" {
+			budget, ok := thinking.ConvertLevelToBudget(effort)
+			if ok {
+				switch budget {
+				case 0:
+					out, _ = sjson.Set(out, "thinking.type", "disabled")
+				case -1:
+					out, _ = sjson.Set(out, "thinking.type", "enabled")
+				default:
+					if budget > 0 {
 						out, _ = sjson.Set(out, "thinking.type", "enabled")
-					default:
-						if budget > 0 {
-							out, _ = sjson.Set(out, "thinking.type", "enabled")
-							out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
-						}
+						out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
 					}
 				}
 			}
@@ -113,10 +110,8 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
 	// Temperature setting for controlling response randomness
 	if temp := root.Get("temperature"); temp.Exists() {
 		out, _ = sjson.Set(out, "temperature", temp.Float())
-	}
-
-	// Top P setting for nucleus sampling
-	if topP := root.Get("top_p"); topP.Exists() {
+	} else if topP := root.Get("top_p"); topP.Exists() {
+		// Top P setting for nucleus sampling (filtered out if temperature is set)
 		out, _ = sjson.Set(out, "top_p", topP.Float())
 	}

@@ -141,17 +136,35 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream

 	// Process messages and transform them to Claude Code format
 	if messages := root.Get("messages"); messages.Exists() && messages.IsArray() {
+		messageIndex := 0
+		systemMessageIndex := -1
 		messages.ForEach(func(_, message gjson.Result) bool {
 			role := message.Get("role").String()
 			contentResult := message.Get("content")

 			switch role {
-			case "system", "user", "assistant":
-				// Create Claude Code message with appropriate role mapping
-				if role == "system" {
-					role = "user"
+			case "system":
+				if systemMessageIndex == -1 {
+					systemMsg := `{"role":"user","content":[]}`
+					out, _ = sjson.SetRaw(out, "messages.-1", systemMsg)
+					systemMessageIndex = messageIndex
+					messageIndex++
 				}
-
+				if contentResult.Exists() && contentResult.Type == gjson.String && contentResult.String() != "" {
+					textPart := `{"type":"text","text":""}`
+					textPart, _ = sjson.Set(textPart, "text", contentResult.String())
+					out, _ = sjson.SetRaw(out, fmt.Sprintf("messages.%d.content.-1", systemMessageIndex), textPart)
+				} else if contentResult.Exists() && contentResult.IsArray() {
+					contentResult.ForEach(func(_, part gjson.Result) bool {
+						if part.Get("type").String() == "text" {
+							textPart := `{"type":"text","text":""}`
+							textPart, _ = sjson.Set(textPart, "text", part.Get("text").String())
+							out, _ = sjson.SetRaw(out, fmt.Sprintf("messages.%d.content.-1", systemMessageIndex), textPart)
+						}
+						return true
+					})
+				}
+			case "user", "assistant":
 				msg := `{"role":"","content":[]}`
 				msg, _ = sjson.Set(msg, "role", role)

@@ -230,6 +243,7 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
 				}

 				out, _ = sjson.SetRaw(out, "messages.-1", msg)
+				messageIndex++

 			case "tool":
 				// Handle tool result messages conversion
@@ -240,6 +254,7 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
 				msg, _ = sjson.Set(msg, "content.0.tool_use_id", toolCallID)
 				msg, _ = sjson.Set(msg, "content.0.content", content)
 				out, _ = sjson.SetRaw(out, "messages.-1", msg)
+				messageIndex++
 			}
 			return true
 		})
--- a/internal/translator/claude/openai/responses/claude_openai-responses_request.go
+++ b/internal/translator/claude/openai/responses/claude_openai-responses_request.go
@@ -10,7 +10,6 @@ import (
 	"strings"

 	"github.com/google/uuid"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -54,23 +53,21 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte

 	root := gjson.ParseBytes(rawJSON)

+	// Convert OpenAI Responses reasoning.effort to Claude thinking config.
 	if v := root.Get("reasoning.effort"); v.Exists() {
-		modelInfo := registry.LookupModelInfo(modelName)
-		if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 {
-			effort := strings.ToLower(strings.TrimSpace(v.String()))
-			if effort != "" {
-				budget, ok := thinking.ConvertLevelToBudget(effort)
-				if ok {
-					switch budget {
-					case 0:
-						out, _ = sjson.Set(out, "thinking.type", "disabled")
-					case -1:
+		effort := strings.ToLower(strings.TrimSpace(v.String()))
+		if effort != "" {
+			budget, ok := thinking.ConvertLevelToBudget(effort)
+			if ok {
+				switch budget {
+				case 0:
+					out, _ = sjson.Set(out, "thinking.type", "disabled")
+				case -1:
+					out, _ = sjson.Set(out, "thinking.type", "enabled")
+				default:
+					if budget > 0 {
 						out, _ = sjson.Set(out, "thinking.type", "enabled")
-					default:
-						if budget > 0 {
-							out, _ = sjson.Set(out, "thinking.type", "enabled")
-							out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
-						}
+						out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
 					}
 				}
 			}
--- a/internal/translator/codex/claude/codex_claude_request.go
+++ b/internal/translator/codex/claude/codex_claude_request.go
@@ -12,7 +12,6 @@ import (
 	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -52,7 +51,7 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	systemsResult := rootResult.Get("system")
 	if systemsResult.IsArray() {
 		systemResults := systemsResult.Array()
-		message := `{"type":"message","role":"user","content":[]}`
+		message := `{"type":"message","role":"developer","content":[]}`
 		for i := 0; i < len(systemResults); i++ {
 			systemResult := systemResults[i]
 			systemTypeResult := systemResult.Get("type")
@@ -218,18 +217,15 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	// Add additional configuration parameters for the Codex API.
 	template, _ = sjson.Set(template, "parallel_tool_calls", true)

-	// Convert thinking.budget_tokens to reasoning.effort for level-based models
-	reasoningEffort := "medium" // default
+	// Convert thinking.budget_tokens to reasoning.effort.
+	reasoningEffort := "medium"
 	if thinkingConfig := rootResult.Get("thinking"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
-		modelInfo := registry.LookupModelInfo(modelName)
 		switch thinkingConfig.Get("type").String() {
 		case "enabled":
-			if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) > 0 {
-				if budgetTokens := thinkingConfig.Get("budget_tokens"); budgetTokens.Exists() {
-					budget := int(budgetTokens.Int())
-					if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" {
-						reasoningEffort = effort
-					}
+			if budgetTokens := thinkingConfig.Get("budget_tokens"); budgetTokens.Exists() {
+				budget := int(budgetTokens.Int())
+				if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" {
+					reasoningEffort = effort
 				}
 			}
 		case "disabled":
@@ -245,21 +241,23 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	template, _ = sjson.Set(template, "include", []string{"reasoning.encrypted_content"})

 	// Add a first message to ignore system instructions and ensure proper execution.
-	inputResult := gjson.Get(template, "input")
-	if inputResult.Exists() && inputResult.IsArray() {
-		inputResults := inputResult.Array()
-		newInput := "[]"
-		for i := 0; i < len(inputResults); i++ {
-			if i == 0 {
-				firstText := inputResults[i].Get("content.0.text")
-				firstInstructions := "EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!"
-				if firstText.Exists() && firstText.String() != firstInstructions {
-					newInput, _ = sjson.SetRaw(newInput, "-1", `{"type":"message","role":"user","content":[{"type":"input_text","text":"EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!"}]}`)
+	if misc.GetCodexInstructionsEnabled() {
+		inputResult := gjson.Get(template, "input")
+		if inputResult.Exists() && inputResult.IsArray() {
+			inputResults := inputResult.Array()
+			newInput := "[]"
+			for i := 0; i < len(inputResults); i++ {
+				if i == 0 {
+					firstText := inputResults[i].Get("content.0.text")
+					firstInstructions := "EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!"
+					if firstText.Exists() && firstText.String() != firstInstructions {
+						newInput, _ = sjson.SetRaw(newInput, "-1", `{"type":"message","role":"user","content":[{"type":"input_text","text":"EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!"}]}`)
+					}
 				}
+				newInput, _ = sjson.SetRaw(newInput, "-1", inputResults[i].Raw)
 			}
-			newInput, _ = sjson.SetRaw(newInput, "-1", inputResults[i].Raw)
+			template, _ = sjson.SetRaw(template, "input", newInput)
 		}
-		template, _ = sjson.SetRaw(template, "input", newInput)
 	}

 	return []byte(template)
--- a/internal/translator/codex/claude/codex_claude_response.go
+++ b/internal/translator/codex/claude/codex_claude_response.go
@@ -117,8 +117,12 @@ func ConvertCodexResponseToClaude(_ context.Context, _ string, originalRequestRa
 		} else {
 			template, _ = sjson.Set(template, "delta.stop_reason", "end_turn")
 		}
-		template, _ = sjson.Set(template, "usage.input_tokens", rootResult.Get("response.usage.input_tokens").Int())
-		template, _ = sjson.Set(template, "usage.output_tokens", rootResult.Get("response.usage.output_tokens").Int())
+		inputTokens, outputTokens, cachedTokens := extractResponsesUsage(rootResult.Get("response.usage"))
+		template, _ = sjson.Set(template, "usage.input_tokens", inputTokens)
+		template, _ = sjson.Set(template, "usage.output_tokens", outputTokens)
+		if cachedTokens > 0 {
+			template, _ = sjson.Set(template, "usage.cache_read_input_tokens", cachedTokens)
+		}

 		output = "event: message_delta\n"
 		output += fmt.Sprintf("data: %s\n\n", template)
@@ -204,8 +208,12 @@ func ConvertCodexResponseToClaudeNonStream(_ context.Context, _ string, original
 	out := `{"id":"","type":"message","role":"assistant","model":"","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":0,"output_tokens":0}}`
 	out, _ = sjson.Set(out, "id", responseData.Get("id").String())
 	out, _ = sjson.Set(out, "model", responseData.Get("model").String())
-	out, _ = sjson.Set(out, "usage.input_tokens", responseData.Get("usage.input_tokens").Int())
-	out, _ = sjson.Set(out, "usage.output_tokens", responseData.Get("usage.output_tokens").Int())
+	inputTokens, outputTokens, cachedTokens := extractResponsesUsage(responseData.Get("usage"))
+	out, _ = sjson.Set(out, "usage.input_tokens", inputTokens)
+	out, _ = sjson.Set(out, "usage.output_tokens", outputTokens)
+	if cachedTokens > 0 {
+		out, _ = sjson.Set(out, "usage.cache_read_input_tokens", cachedTokens)
+	}

 	hasToolCall := false

@@ -308,12 +316,27 @@ func ConvertCodexResponseToClaudeNonStream(_ context.Context, _ string, original
 		out, _ = sjson.SetRaw(out, "stop_sequence", stopSequence.Raw)
 	}

-	if responseData.Get("usage.input_tokens").Exists() || responseData.Get("usage.output_tokens").Exists() {
-		out, _ = sjson.Set(out, "usage.input_tokens", responseData.Get("usage.input_tokens").Int())
-		out, _ = sjson.Set(out, "usage.output_tokens", responseData.Get("usage.output_tokens").Int())
+	return out
+}
+
+func extractResponsesUsage(usage gjson.Result) (int64, int64, int64) {
+	if !usage.Exists() || usage.Type == gjson.Null {
+		return 0, 0, 0
 	}

-	return out
+	inputTokens := usage.Get("input_tokens").Int()
+	outputTokens := usage.Get("output_tokens").Int()
+	cachedTokens := usage.Get("input_tokens_details.cached_tokens").Int()
+
+	if cachedTokens > 0 {
+		if inputTokens >= cachedTokens {
+			inputTokens -= cachedTokens
+		} else {
+			inputTokens = 0
+		}
+	}
+
+	return inputTokens, outputTokens, cachedTokens
 }

 // buildReverseMapFromClaudeOriginalShortToOriginal builds a map[short]original from original Claude request tools.
--- a/internal/translator/codex/gemini/codex_gemini_request.go
+++ b/internal/translator/codex/gemini/codex_gemini_request.go
@@ -14,7 +14,6 @@ import (
 	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
@@ -95,7 +94,7 @@ func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	// System instruction -> as a user message with input_text parts
 	sysParts := root.Get("system_instruction.parts")
 	if sysParts.IsArray() {
-		msg := `{"type":"message","role":"user","content":[]}`
+		msg := `{"type":"message","role":"developer","content":[]}`
 		arr := sysParts.Array()
 		for i := 0; i < len(arr); i++ {
 			p := arr[i]
@@ -249,22 +248,28 @@ func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	// Fixed flags aligning with Codex expectations
 	out, _ = sjson.Set(out, "parallel_tool_calls", true)

-	// Convert thinkingBudget to reasoning.effort for level-based models
-	reasoningEffort := "medium" // default
+	// Convert Gemini thinkingConfig to Codex reasoning.effort.
+	effortSet := false
 	if genConfig := root.Get("generationConfig"); genConfig.Exists() {
 		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
-			modelInfo := registry.LookupModelInfo(modelName)
-			if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) > 0 {
-				if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
-					budget := int(thinkingBudget.Int())
-					if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" {
-						reasoningEffort = effort
-					}
+			if thinkingLevel := thinkingConfig.Get("thinkingLevel"); thinkingLevel.Exists() {
+				effort := strings.ToLower(strings.TrimSpace(thinkingLevel.String()))
+				if effort != "" {
+					out, _ = sjson.Set(out, "reasoning.effort", effort)
+					effortSet = true
+				}
+			} else if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
+				if effort, ok := thinking.ConvertBudgetToLevel(int(thinkingBudget.Int())); ok {
+					out, _ = sjson.Set(out, "reasoning.effort", effort)
+					effortSet = true
 				}
 			}
 		}
 	}
-	out, _ = sjson.Set(out, "reasoning.effort", reasoningEffort)
+	if !effortSet {
+		// No thinking config, set default effort
+		out, _ = sjson.Set(out, "reasoning.effort", "medium")
+	}
 	out, _ = sjson.Set(out, "reasoning.summary", "auto")
 	out, _ = sjson.Set(out, "stream", true)
 	out, _ = sjson.Set(out, "store", false)
--- a/internal/translator/codex/openai/chat-completions/codex_openai_request.go
+++ b/internal/translator/codex/openai/chat-completions/codex_openai_request.go
@@ -33,7 +33,7 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
 	rawJSON := bytes.Clone(inputRawJSON)
 	userAgent := misc.ExtractCodexUserAgent(rawJSON)
 	// Start with empty JSON object
-	out := `{}`
+	out := `{"instructions":""}`

 	// Stream must be set to true
 	out, _ = sjson.Set(out, "stream", stream)
@@ -98,7 +98,9 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
 	// Extract system instructions from first system message (string or text object)
 	messages := gjson.GetBytes(rawJSON, "messages")
 	_, instructions := misc.CodexInstructionsForModel(modelName, "", userAgent)
-	out, _ = sjson.Set(out, "instructions", instructions)
+	if misc.GetCodexInstructionsEnabled() {
+		out, _ = sjson.Set(out, "instructions", instructions)
+	}
 	// if messages.IsArray() {
 	// 	arr := messages.Array()
 	// 	for i := 0; i < len(arr); i++ {
@@ -141,7 +143,7 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
 				msg := `{}`
 				msg, _ = sjson.Set(msg, "type", "message")
 				if role == "system" {
-					msg, _ = sjson.Set(msg, "role", "user")
+					msg, _ = sjson.Set(msg, "role", "developer")
 				} else {
 					msg, _ = sjson.Set(msg, "role", role)
 				}
--- a/internal/translator/codex/openai/responses/codex_openai-responses_request.go
+++ b/internal/translator/codex/openai/responses/codex_openai-responses_request.go
@@ -74,6 +74,11 @@ func ConvertOpenAIResponsesRequestToCodex(modelName string, inputRawJSON []byte,
 	}

 	if hasOfficialInstructions {
+		newInput := "[]"
+		for _, item := range inputResults {
+			newInput, _ = sjson.SetRaw(newInput, "-1", item.Raw)
+		}
+		rawJSON, _ = sjson.SetRawBytes(rawJSON, "input", []byte(newInput))
 		return rawJSON
 	}
 	// log.Debugf("instructions not matched, %s\n", originalInstructions)
--- a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
+++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
@@ -9,7 +9,6 @@ import (
 	"bytes"
 	"strings"

-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -161,14 +160,11 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []

 	// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled
 	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
-		modelInfo := registry.LookupModelInfo(modelName)
-		if modelInfo != nil && modelInfo.Thinking != nil {
-			if t.Get("type").String() == "enabled" {
-				if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
-					budget := int(b.Int())
-					out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
-					out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-				}
+		if t.Get("type").String() == "enabled" {
+			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
+				budget := int(b.Int())
+				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
+				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true)
 			}
 		}
 	}
--- a/Show More
+++ b/Show More