Merge branch 'router-for-me:main' into main

Merge PR #2474
# Conflicts: # internal/api/modules/amp/response_rewriter.go # internal/api/modules/amp/response_rewriter_test.go
2026-04-03 19:21:17 +00:00 · 2026-04-02 22:34:47 +08:00 · 2026-04-02 22:31:12 +08:00 · 2026-04-02 21:56:27 +08:00 · 2026-04-02 21:37:47 +08:00 · 2026-04-02 21:31:21 +08:00
288 changed files with 40303 additions and 8161 deletions
--- a/.github/workflows/docker-image.yml
+++ b/.github/workflows/docker-image.yml
@@ -16,6 +16,10 @@ jobs:
    steps:
      - name: Checkout
        uses: actions/checkout@v4
+      - name: Refresh models catalog
+        run: |
+          git fetch --depth 1 https://github.com/router-for-me/models.git main
+          git show FETCH_HEAD:models.json > internal/registry/models/models.json
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Login to DockerHub
@@ -25,7 +29,7 @@ jobs:
          password: ${{ secrets.DOCKERHUB_TOKEN }}
      - name: Generate Build Metadata
        run: |
-          echo VERSION=`git describe --tags --always --dirty` >> $GITHUB_ENV
+          echo "VERSION=${GITHUB_REF_NAME}" >> $GITHUB_ENV
          echo COMMIT=`git rev-parse --short HEAD` >> $GITHUB_ENV
          echo BUILD_DATE=`date -u +%Y-%m-%dT%H:%M:%SZ` >> $GITHUB_ENV
      - name: Build and push (amd64)
@@ -47,6 +51,10 @@ jobs:
    steps:
      - name: Checkout
        uses: actions/checkout@v4
+      - name: Refresh models catalog
+        run: |
+          git fetch --depth 1 https://github.com/router-for-me/models.git main
+          git show FETCH_HEAD:models.json > internal/registry/models/models.json
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Login to DockerHub
@@ -56,7 +64,7 @@ jobs:
          password: ${{ secrets.DOCKERHUB_TOKEN }}
      - name: Generate Build Metadata
        run: |
-          echo VERSION=`git describe --tags --always --dirty` >> $GITHUB_ENV
+          echo "VERSION=${GITHUB_REF_NAME}" >> $GITHUB_ENV
          echo COMMIT=`git rev-parse --short HEAD` >> $GITHUB_ENV
          echo BUILD_DATE=`date -u +%Y-%m-%dT%H:%M:%SZ` >> $GITHUB_ENV
      - name: Build and push (arm64)
@@ -90,7 +98,7 @@ jobs:
          password: ${{ secrets.DOCKERHUB_TOKEN }}
      - name: Generate Build Metadata
        run: |
-          echo VERSION=`git describe --tags --always --dirty` >> $GITHUB_ENV
+          echo "VERSION=${GITHUB_REF_NAME}" >> $GITHUB_ENV
          echo COMMIT=`git rev-parse --short HEAD` >> $GITHUB_ENV
          echo BUILD_DATE=`date -u +%Y-%m-%dT%H:%M:%SZ` >> $GITHUB_ENV
      - name: Create and push multi-arch manifests
--- a/.github/workflows/pr-test-build.yml
+++ b/.github/workflows/pr-test-build.yml
@@ -12,6 +12,10 @@ jobs:
    steps:
      - name: Checkout
        uses: actions/checkout@v4
+      - name: Refresh models catalog
+        run: |
+          git fetch --depth 1 https://github.com/router-for-me/models.git main
+          git show FETCH_HEAD:models.json > internal/registry/models/models.json
      - name: Set up Go
        uses: actions/setup-go@v5
        with:
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -16,6 +16,10 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+      - name: Refresh models catalog
+        run: |
+          git fetch --depth 1 https://github.com/router-for-me/models.git main
+          git show FETCH_HEAD:models.json > internal/registry/models/models.json
      - run: git fetch --force --tags
      - uses: actions/setup-go@v4
        with:
@@ -23,15 +27,14 @@ jobs:
          cache: true
      - name: Generate Build Metadata
        run: |
-          VERSION=$(git describe --tags --always --dirty)
-          echo "VERSION=${VERSION}" >> $GITHUB_ENV
+          echo "VERSION=${GITHUB_REF_NAME}" >> $GITHUB_ENV
          echo COMMIT=`git rev-parse --short HEAD` >> $GITHUB_ENV
          echo BUILD_DATE=`date -u +%Y-%m-%dT%H:%M:%SZ` >> $GITHUB_ENV
      - uses: goreleaser/goreleaser-action@v4
        with:
          distribution: goreleaser
          version: latest
-          args: release --clean
+          args: release --clean --skip=validate
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          VERSION: ${{ env.VERSION }}
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 # Binaries
 cli-proxy-api
 cliproxy
+/server
 *.exe


@@ -36,13 +37,13 @@ GEMINI.md

 # Tooling metadata
 .vscode/*
+.worktrees/
 .codex/*
 .claude/*
 .gemini/*
 .serena/*
 .agent/*
 .agents/*
-.agents/*
 .opencode/*
 .idea/*
 .bmad/*
--- a/.goreleaser.yml
+++ b/.goreleaser.yml
@@ -1,3 +1,5 @@
+version: 2
+
 builds:
  - id: "cli-proxy-api-plus"
    env:
@@ -6,6 +8,7 @@ builds:
      - linux
      - windows
      - darwin
+      - freebsd
    goarch:
      - amd64
      - arm64
--- a/README.md
+++ b/README.md
@@ -8,123 +8,6 @@ All third-party provider support is maintained by community contributors; CLIPro

 The Plus release stays in lockstep with the mainline features.

-## Differences from the Mainline
-
-[![z.ai](https://assets.router-for.me/english-5.png)](https://z.ai/subscribe?ic=8JVLJQFSKB)
-
-## New Features (Plus Enhanced)
-
-GLM CODING PLAN is a subscription service designed for AI coding, starting at just $10/month. It provides access to their flagship GLM-4.7 & （GLM-5 Only Available  for Pro Users）model across 10+ popular AI coding tools (Claude Code, Cline, Roo Code, etc.), offering developers top-tier, fast, and stable coding experiences.
-
-## Kiro Authentication
-
-### CLI Login
-
-> **Note:** Google/GitHub login is not available for third-party applications due to AWS Cognito restrictions.
-
-**AWS Builder ID** (recommended):
-
-```bash
-# Device code flow
-./CLIProxyAPI --kiro-aws-login
-
-# Authorization code flow
-./CLIProxyAPI --kiro-aws-authcode
-```
-
-**Import token from Kiro IDE:**
-
-```bash
-./CLIProxyAPI --kiro-import
-```
-
-To get a token from Kiro IDE:
-
-1. Open Kiro IDE and login with Google (or GitHub)
-2. Find the token file: `~/.kiro/kiro-auth-token.json`
-3. Run: `./CLIProxyAPI --kiro-import`
-
-**AWS IAM Identity Center (IDC):**
-
-```bash
-./CLIProxyAPI --kiro-idc-login --kiro-idc-start-url https://d-xxxxxxxxxx.awsapps.com/start
-
-# Specify region
-./CLIProxyAPI --kiro-idc-login --kiro-idc-start-url https://d-xxxxxxxxxx.awsapps.com/start --kiro-idc-region us-west-2
-```
-
-**Additional flags:**
-
-| Flag | Description |
-|------|-------------|
-| `--no-browser` | Don't open browser automatically, print URL instead |
-| `--no-incognito` | Use existing browser session (Kiro defaults to incognito). Useful for corporate SSO that requires an authenticated browser session |
-| `--kiro-idc-start-url` | IDC Start URL (required with `--kiro-idc-login`) |
-| `--kiro-idc-region` | IDC region (default: `us-east-1`) |
-| `--kiro-idc-flow` | IDC flow type: `authcode` (default) or `device` |
-
-### Web-based OAuth Login
-
-Access the Kiro OAuth web interface at:
-
-```
-http://your-server:8080/v0/oauth/kiro
-```
-
-This provides a browser-based OAuth flow for Kiro (AWS CodeWhisperer) authentication with:
- AWS Builder ID login
- AWS Identity Center (IDC) login
- Token import from Kiro IDE
-
-## Quick Deployment with Docker
-
-### One-Command Deployment
-
-```bash
-# Create deployment directory
-mkdir -p ~/cli-proxy && cd ~/cli-proxy
-
-# Create docker-compose.yml
-cat > docker-compose.yml << 'EOF'
-services:
-  cli-proxy-api:
-    image: eceasy/cli-proxy-api-plus:latest
-    container_name: cli-proxy-api-plus
-    ports:
-      - "8317:8317"
-    volumes:
-      - ./config.yaml:/CLIProxyAPI/config.yaml
-      - ./auths:/root/.cli-proxy-api
-      - ./logs:/CLIProxyAPI/logs
-    restart: unless-stopped
-EOF
-
-# Download example config
-curl -o config.yaml https://raw.githubusercontent.com/router-for-me/CLIProxyAPIPlus/main/config.example.yaml
-
-# Pull and start
-docker compose pull && docker compose up -d
-```
-
-### Configuration
-
-Edit `config.yaml` before starting:
-
-```yaml
-# Basic configuration example
-server:
-  port: 8317
-
-# Add your provider configurations here
-```
-
-### Update to Latest Version
-
-```bash
-cd ~/cli-proxy
-docker compose pull && docker compose up -d
-```
-
 ## Contributing

 This project only accepts pull requests that relate to third-party provider support. Any pull requests unrelated to third-party provider support will be rejected.
--- a/README_CN.md
+++ b/README_CN.md
@@ -6,125 +6,6 @@

 所有的第三方供应商支持都由第三方社区维护者提供，CLIProxyAPI 不提供技术支持。如需取得支持，请与对应的社区维护者联系。

-该 Plus 版本的主线功能与主线功能强制同步。
-
-## 与主线版本版本差异
-
-[![bigmodel.cn](https://assets.router-for.me/chinese-5.png)](https://www.bigmodel.cn/claude-code?ic=RRVJPB5SII)
-
-## 新增功能 (Plus 增强版)
-
-GLM CODING PLAN 是专为AI编码打造的订阅套餐，每月最低仅需20元，即可在十余款主流AI编码工具如 Claude Code、Cline、Roo Code 中畅享智谱旗舰模型GLM-4.7（受限于算力，目前仅限Pro用户开放），为开发者提供顶尖的编码体验。
-
-智谱AI为本产品提供了特别优惠，使用以下链接购买可以享受九折优惠：https://www.bigmodel.cn/claude-code?ic=RRVJPB5SII
-
-### 命令行登录
-
-> **注意：** 由于 AWS Cognito 限制，Google/GitHub 登录不可用于第三方应用。
-
-**AWS Builder ID**（推荐）：
-
-```bash
-# 设备码流程
-./CLIProxyAPI --kiro-aws-login
-
-# 授权码流程
-./CLIProxyAPI --kiro-aws-authcode
-```
-
-**从 Kiro IDE 导入令牌：**
-
-```bash
-./CLIProxyAPI --kiro-import
-```
-
-获取令牌步骤：
-
-1. 打开 Kiro IDE，使用 Google（或 GitHub）登录
-2. 找到令牌文件：`~/.kiro/kiro-auth-token.json`
-3. 运行：`./CLIProxyAPI --kiro-import`
-
-**AWS IAM Identity Center (IDC)：**
-
-```bash
-./CLIProxyAPI --kiro-idc-login --kiro-idc-start-url https://d-xxxxxxxxxx.awsapps.com/start
-
-# 指定区域
-./CLIProxyAPI --kiro-idc-login --kiro-idc-start-url https://d-xxxxxxxxxx.awsapps.com/start --kiro-idc-region us-west-2
-```
-
-**附加参数：**
-
-| 参数 | 说明 |
-|------|------|
-| `--no-browser` | 不自动打开浏览器，打印 URL |
-| `--no-incognito` | 使用已有浏览器会话（Kiro 默认使用无痕模式），适用于需要已登录浏览器会话的企业 SSO 场景 |
-| `--kiro-idc-start-url` | IDC Start URL（`--kiro-idc-login` 必需） |
-| `--kiro-idc-region` | IDC 区域（默认：`us-east-1`） |
-| `--kiro-idc-flow` | IDC 流程类型：`authcode`（默认）或 `device` |
-
-### 网页端 OAuth 登录
-
-访问 Kiro OAuth 网页认证界面：
-
-```
-http://your-server:8080/v0/oauth/kiro
-```
-
-提供基于浏览器的 Kiro (AWS CodeWhisperer) OAuth 认证流程，支持：
- AWS Builder ID 登录
- AWS Identity Center (IDC) 登录
- 从 Kiro IDE 导入令牌
-
-## Docker 快速部署
-
-### 一键部署
-
-```bash
-# 创建部署目录
-mkdir -p ~/cli-proxy && cd ~/cli-proxy
-
-# 创建 docker-compose.yml
-cat > docker-compose.yml << 'EOF'
-services:
-  cli-proxy-api:
-    image: eceasy/cli-proxy-api-plus:latest
-    container_name: cli-proxy-api-plus
-    ports:
-      - "8317:8317"
-    volumes:
-      - ./config.yaml:/CLIProxyAPI/config.yaml
-      - ./auths:/root/.cli-proxy-api
-      - ./logs:/CLIProxyAPI/logs
-    restart: unless-stopped
-EOF
-
-# 下载示例配置
-curl -o config.yaml https://raw.githubusercontent.com/router-for-me/CLIProxyAPIPlus/main/config.example.yaml
-
-# 拉取并启动
-docker compose pull && docker compose up -d
-```
-
-### 配置说明
-
-启动前请编辑 `config.yaml`：
-
-```yaml
-# 基本配置示例
-server:
-  port: 8317
-
-# 在此添加你的供应商配置
-```
-
-### 更新到最新版本
-
-```bash
-cd ~/cli-proxy
-docker compose pull && docker compose up -d
-```
-
 ## 贡献

 该项目仅接受第三方供应商支持的 Pull Request。任何非第三方供应商支持的 Pull Request 都将被拒绝。
@@ -133,4 +14,4 @@ docker compose pull && docker compose up -d

 ## 许可证

-此项目根据 MIT 许可证授权 - 有关详细信息，请参阅 [LICENSE](LICENSE) 文件。
+此项目根据 MIT 许可证授权 - 有关详细信息，请参阅 [LICENSE](LICENSE) 文件。
--- a/assets/bmoplus.png
+++ b/assets/bmoplus.png
--- a/assets/lingtrue.png
+++ b/assets/lingtrue.png
--- a/cmd/fetch_antigravity_models/main.go
+++ b/cmd/fetch_antigravity_models/main.go
@@ -0,0 +1,275 @@
+// Command fetch_antigravity_models connects to the Antigravity API using the
+// stored auth credentials and saves the dynamically fetched model list to a
+// JSON file for inspection or offline use.
+//
+// Usage:
+//
+//	go run ./cmd/fetch_antigravity_models [flags]
+//
+// Flags:
+//
+//	--auths-dir <path>  Directory containing auth JSON files (default: "auths")
+//	--output    <path>  Output JSON file path             (default: "antigravity_models.json")
+//	--pretty            Pretty-print the output JSON      (default: true)
+package main
+
+import (
+	"context"
+	"encoding/json"
+	"flag"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"path/filepath"
+	"strings"
+	"time"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
+	sdkauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/auth"
+	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+	"github.com/router-for-me/CLIProxyAPI/v6/sdk/proxyutil"
+	log "github.com/sirupsen/logrus"
+	"github.com/tidwall/gjson"
+)
+
+const (
+	antigravityBaseURLDaily        = "https://daily-cloudcode-pa.googleapis.com"
+	antigravitySandboxBaseURLDaily = "https://daily-cloudcode-pa.sandbox.googleapis.com"
+	antigravityBaseURLProd         = "https://cloudcode-pa.googleapis.com"
+	antigravityModelsPath          = "/v1internal:fetchAvailableModels"
+)
+
+func init() {
+	logging.SetupBaseLogger()
+	log.SetLevel(log.InfoLevel)
+}
+
+// modelOutput wraps the fetched model list with fetch metadata.
+type modelOutput struct {
+	Models []modelEntry `json:"models"`
+}
+
+// modelEntry contains only the fields we want to keep for static model definitions.
+type modelEntry struct {
+	ID                  string `json:"id"`
+	Object              string `json:"object"`
+	OwnedBy             string `json:"owned_by"`
+	Type                string `json:"type"`
+	DisplayName         string `json:"display_name"`
+	Name                string `json:"name"`
+	Description         string `json:"description"`
+	ContextLength       int    `json:"context_length,omitempty"`
+	MaxCompletionTokens int    `json:"max_completion_tokens,omitempty"`
+}
+
+func main() {
+	var authsDir string
+	var outputPath string
+	var pretty bool
+
+	flag.StringVar(&authsDir, "auths-dir", "auths", "Directory containing auth JSON files")
+	flag.StringVar(&outputPath, "output", "antigravity_models.json", "Output JSON file path")
+	flag.BoolVar(&pretty, "pretty", true, "Pretty-print the output JSON")
+	flag.Parse()
+
+	// Resolve relative paths against the working directory.
+	wd, err := os.Getwd()
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "error: cannot get working directory: %v\n", err)
+		os.Exit(1)
+	}
+	if !filepath.IsAbs(authsDir) {
+		authsDir = filepath.Join(wd, authsDir)
+	}
+	if !filepath.IsAbs(outputPath) {
+		outputPath = filepath.Join(wd, outputPath)
+	}
+
+	fmt.Printf("Scanning auth files in: %s\n", authsDir)
+
+	// Load all auth records from the directory.
+	fileStore := sdkauth.NewFileTokenStore()
+	fileStore.SetBaseDir(authsDir)
+
+	ctx := context.Background()
+	auths, err := fileStore.List(ctx)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "error: failed to list auth files: %v\n", err)
+		os.Exit(1)
+	}
+	if len(auths) == 0 {
+		fmt.Fprintf(os.Stderr, "error: no auth files found in %s\n", authsDir)
+		os.Exit(1)
+	}
+
+	// Find the first enabled antigravity auth.
+	var chosen *coreauth.Auth
+	for _, a := range auths {
+		if a == nil || a.Disabled {
+			continue
+		}
+		if strings.EqualFold(strings.TrimSpace(a.Provider), "antigravity") {
+			chosen = a
+			break
+		}
+	}
+	if chosen == nil {
+		fmt.Fprintf(os.Stderr, "error: no enabled antigravity auth found in %s\n", authsDir)
+		os.Exit(1)
+	}
+
+	fmt.Printf("Using auth: id=%s label=%s\n", chosen.ID, chosen.Label)
+
+	// Fetch models from the upstream Antigravity API.
+	fmt.Println("Fetching Antigravity model list from upstream...")
+
+	fetchCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
+	defer cancel()
+
+	models := fetchModels(fetchCtx, chosen)
+	if len(models) == 0 {
+		fmt.Fprintln(os.Stderr, "warning: no models returned (API may be unavailable or token expired)")
+	} else {
+		fmt.Printf("Fetched %d models.\n", len(models))
+	}
+
+	// Build the output payload.
+	out := modelOutput{
+		Models: models,
+	}
+
+	// Marshal to JSON.
+	var raw []byte
+	if pretty {
+		raw, err = json.MarshalIndent(out, "", "  ")
+	} else {
+		raw, err = json.Marshal(out)
+	}
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "error: failed to marshal JSON: %v\n", err)
+		os.Exit(1)
+	}
+
+	if err = os.WriteFile(outputPath, raw, 0o644); err != nil {
+		fmt.Fprintf(os.Stderr, "error: failed to write output file %s: %v\n", outputPath, err)
+		os.Exit(1)
+	}
+
+	fmt.Printf("Model list saved to: %s\n", outputPath)
+}
+
+func fetchModels(ctx context.Context, auth *coreauth.Auth) []modelEntry {
+	accessToken := metaStringValue(auth.Metadata, "access_token")
+	if accessToken == "" {
+		fmt.Fprintln(os.Stderr, "error: no access token found in auth")
+		return nil
+	}
+
+	baseURLs := []string{antigravityBaseURLProd, antigravityBaseURLDaily, antigravitySandboxBaseURLDaily}
+
+	for _, baseURL := range baseURLs {
+		modelsURL := baseURL + antigravityModelsPath
+
+		var payload []byte
+		if auth != nil && auth.Metadata != nil {
+			if pid, ok := auth.Metadata["project_id"].(string); ok && strings.TrimSpace(pid) != "" {
+				payload = []byte(fmt.Sprintf(`{"project": "%s"}`, strings.TrimSpace(pid)))
+			}
+		}
+		if len(payload) == 0 {
+			payload = []byte(`{}`)
+		}
+
+		httpReq, errReq := http.NewRequestWithContext(ctx, http.MethodPost, modelsURL, strings.NewReader(string(payload)))
+		if errReq != nil {
+			continue
+		}
+		httpReq.Close = true
+		httpReq.Header.Set("Content-Type", "application/json")
+		httpReq.Header.Set("Authorization", "Bearer "+accessToken)
+		httpReq.Header.Set("User-Agent", "antigravity/1.19.6 darwin/arm64")
+
+		httpClient := &http.Client{Timeout: 30 * time.Second}
+		if transport, _, errProxy := proxyutil.BuildHTTPTransport(auth.ProxyURL); errProxy == nil && transport != nil {
+			httpClient.Transport = transport
+		}
+		httpResp, errDo := httpClient.Do(httpReq)
+		if errDo != nil {
+			continue
+		}
+
+		bodyBytes, errRead := io.ReadAll(httpResp.Body)
+		httpResp.Body.Close()
+		if errRead != nil {
+			continue
+		}
+
+		if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices {
+			continue
+		}
+
+		result := gjson.GetBytes(bodyBytes, "models")
+		if !result.Exists() {
+			continue
+		}
+
+		var models []modelEntry
+
+		for originalName, modelData := range result.Map() {
+			modelID := strings.TrimSpace(originalName)
+			if modelID == "" {
+				continue
+			}
+			// Skip internal/experimental models
+			switch modelID {
+			case "chat_20706", "chat_23310", "tab_flash_lite_preview", "tab_jump_flash_lite_preview", "gemini-2.5-flash-thinking", "gemini-2.5-pro":
+				continue
+			}
+
+			displayName := modelData.Get("displayName").String()
+			if displayName == "" {
+				displayName = modelID
+			}
+
+			entry := modelEntry{
+				ID:          modelID,
+				Object:      "model",
+				OwnedBy:     "antigravity",
+				Type:        "antigravity",
+				DisplayName: displayName,
+				Name:        modelID,
+				Description: displayName,
+			}
+
+			if maxTok := modelData.Get("maxTokens").Int(); maxTok > 0 {
+				entry.ContextLength = int(maxTok)
+			}
+			if maxOut := modelData.Get("maxOutputTokens").Int(); maxOut > 0 {
+				entry.MaxCompletionTokens = int(maxOut)
+			}
+
+			models = append(models, entry)
+		}
+
+		return models
+	}
+
+	return nil
+}
+
+func metaStringValue(m map[string]interface{}, key string) string {
+	if m == nil {
+		return ""
+	}
+	v, ok := m[key]
+	if !ok {
+		return ""
+	}
+	switch val := v.(type) {
+	case string:
+		return val
+	default:
+		return ""
+	}
+}
--- a/cmd/mcpdebug/main.go
+++ b/cmd/mcpdebug/main.go
@@ -0,0 +1,20 @@
+package main
+
+import (
+	"encoding/hex"
+	"fmt"
+	"os"
+
+	cursorproto "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/cursor/proto"
+)
+
+func main() {
+	// Encode MCP result with empty execId
+	resultBytes := cursorproto.EncodeExecMcpResult(1, "", `{"test": "data"}`, false)
+	fmt.Printf("Result protobuf hex: %s\n", hex.EncodeToString(resultBytes))
+	fmt.Printf("Result length: %d bytes\n", len(resultBytes))
+
+	// Write to file for analysis
+	os.WriteFile("mcp_result.bin", resultBytes)
+	fmt.Println("Wrote mcp_result.bin")
+}
--- a/cmd/protocheck/main.go
+++ b/cmd/protocheck/main.go
@@ -0,0 +1,32 @@
+package main
+
+import (
+	"fmt"
+	cursorproto "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/cursor/proto"
+)
+
+func main() {
+	ecm := cursorproto.NewMsg("ExecClientMessage")
+	
+	// Try different field names
+	names := []string{
+		"mcp_result", "mcpResult", "McpResult", "MCP_RESULT",
+		"shell_result", "shellResult",
+	}
+	
+	for _, name := range names {
+		fd := ecm.Descriptor().Fields().ByName(name)
+		if fd != nil {
+			fmt.Printf("Found field %q: number=%d, kind=%s\n", name, fd.Number(), fd.Kind())
+		} else {
+			fmt.Printf("Field %q NOT FOUND\n", name)
+		}
+	}
+	
+	// List all fields
+	fmt.Println("\nAll fields in ExecClientMessage:")
+	for i := 0; i < ecm.Descriptor().Fields().Len(); i++ {
+		f := ecm.Descriptor().Fields().Get(i)
+		fmt.Printf("  %d: %q (number=%d)\n", i, f.Name(), f.Number())
+	}
+}
--- a/cmd/server/main.go
+++ b/cmd/server/main.go
@@ -25,6 +25,7 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/managementasset"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/store"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/tui"
@@ -78,10 +79,13 @@ func main() {
 	var kiloLogin bool
 	var iflowLogin bool
 	var iflowCookie bool
+	var gitlabLogin bool
+	var gitlabTokenLogin bool
 	var noBrowser bool
 	var oauthCallbackPort int
 	var antigravityLogin bool
 	var kimiLogin bool
+	var cursorLogin bool
 	var kiroLogin bool
 	var kiroGoogleLogin bool
 	var kiroAWSLogin bool
@@ -92,6 +96,7 @@ func main() {
 	var kiroIDCRegion string
 	var kiroIDCFlow string
 	var githubCopilotLogin bool
+	var codeBuddyLogin bool
 	var projectID string
 	var vertexImport string
 	var configPath string
@@ -100,6 +105,7 @@ func main() {
 	var standalone bool
 	var noIncognito bool
 	var useIncognito bool
+	var localModel bool

 	// Define command-line flags for different operation modes.
 	flag.BoolVar(&login, "login", false, "Login Google Account")
@@ -110,12 +116,15 @@ func main() {
 	flag.BoolVar(&kiloLogin, "kilo-login", false, "Login to Kilo AI using device flow")
 	flag.BoolVar(&iflowLogin, "iflow-login", false, "Login to iFlow using OAuth")
 	flag.BoolVar(&iflowCookie, "iflow-cookie", false, "Login to iFlow using Cookie")
+	flag.BoolVar(&gitlabLogin, "gitlab-login", false, "Login to GitLab Duo using OAuth")
+	flag.BoolVar(&gitlabTokenLogin, "gitlab-token-login", false, "Login to GitLab Duo using a personal access token")
 	flag.BoolVar(&noBrowser, "no-browser", false, "Don't open browser automatically for OAuth")
 	flag.IntVar(&oauthCallbackPort, "oauth-callback-port", 0, "Override OAuth callback port (defaults to provider-specific port)")
 	flag.BoolVar(&useIncognito, "incognito", false, "Open browser in incognito/private mode for OAuth (useful for multiple accounts)")
 	flag.BoolVar(&noIncognito, "no-incognito", false, "Force disable incognito mode (uses existing browser session)")
 	flag.BoolVar(&antigravityLogin, "antigravity-login", false, "Login to Antigravity using OAuth")
 	flag.BoolVar(&kimiLogin, "kimi-login", false, "Login to Kimi using OAuth")
+	flag.BoolVar(&cursorLogin, "cursor-login", false, "Login to Cursor using OAuth")
 	flag.BoolVar(&kiroLogin, "kiro-login", false, "Login to Kiro using Google OAuth")
 	flag.BoolVar(&kiroGoogleLogin, "kiro-google-login", false, "Login to Kiro using Google OAuth (same as --kiro-login)")
 	flag.BoolVar(&kiroAWSLogin, "kiro-aws-login", false, "Login to Kiro using AWS Builder ID (device code flow)")
@@ -126,12 +135,14 @@ func main() {
 	flag.StringVar(&kiroIDCRegion, "kiro-idc-region", "", "IDC region (default: us-east-1)")
 	flag.StringVar(&kiroIDCFlow, "kiro-idc-flow", "", "IDC flow type: authcode (default) or device")
 	flag.BoolVar(&githubCopilotLogin, "github-copilot-login", false, "Login to GitHub Copilot using device flow")
+	flag.BoolVar(&codeBuddyLogin, "codebuddy-login", false, "Login to CodeBuddy using browser OAuth flow")
 	flag.StringVar(&projectID, "project_id", "", "Project ID (Gemini only, not required)")
 	flag.StringVar(&configPath, "config", DefaultConfigPath, "Configure File Path")
 	flag.StringVar(&vertexImport, "vertex-import", "", "Import Vertex service account key JSON file")
 	flag.StringVar(&password, "password", "", "")
 	flag.BoolVar(&tuiMode, "tui", false, "Start with terminal management UI")
 	flag.BoolVar(&standalone, "standalone", false, "In TUI mode, start an embedded local server")
+	flag.BoolVar(&localModel, "local-model", false, "Use embedded model catalog only, skip remote model fetching")

 	flag.CommandLine.Usage = func() {
 		out := flag.CommandLine.Output()
@@ -509,6 +520,9 @@ func main() {
 	} else if githubCopilotLogin {
 		// Handle GitHub Copilot login
 		cmd.DoGitHubCopilotLogin(cfg, options)
+	} else if codeBuddyLogin {
+		// Handle CodeBuddy login
+		cmd.DoCodeBuddyLogin(cfg, options)
 	} else if codexLogin {
 		// Handle Codex login
 		cmd.DoCodexLogin(cfg, options)
@@ -526,8 +540,14 @@ func main() {
 		cmd.DoIFlowLogin(cfg, options)
 	} else if iflowCookie {
 		cmd.DoIFlowCookieAuth(cfg, options)
+	} else if gitlabLogin {
+		cmd.DoGitLabLogin(cfg, options)
+	} else if gitlabTokenLogin {
+		cmd.DoGitLabTokenLogin(cfg, options)
 	} else if kimiLogin {
 		cmd.DoKimiLogin(cfg, options)
+	} else if cursorLogin {
+		cmd.DoCursorLogin(cfg, options)
 	} else if kiroLogin {
 		// For Kiro auth, default to incognito mode for multi-account support
 		// Users can explicitly override with --no-incognito
@@ -569,10 +589,16 @@ func main() {
 			cmd.WaitForCloudDeploy()
 			return
 		}
+		if localModel && (!tuiMode || standalone) {
+			log.Info("Local model mode: using embedded model catalog, remote model updates disabled")
+		}
 		if tuiMode {
 			if standalone {
 				// Standalone mode: start an embedded local server and connect TUI client to it.
 				managementasset.StartAutoUpdater(context.Background(), configFilePath)
+				if !localModel {
+					registry.StartModelsUpdater(context.Background())
+				}
 				hook := tui.NewLogHook(2000)
 				hook.SetFormatter(&logging.LogFormatter{})
 				log.AddHook(hook)
@@ -643,15 +669,18 @@ func main() {
 				}
 			}
 		} else {
-      // Start the main proxy service
-      managementasset.StartAutoUpdater(context.Background(), configFilePath)
+			// Start the main proxy service
+			managementasset.StartAutoUpdater(context.Background(), configFilePath)
+			if !localModel {
+				registry.StartModelsUpdater(context.Background())
+			}

-      if cfg.AuthDir != "" {
-        kiro.InitializeAndStart(cfg.AuthDir, cfg)
-        defer kiro.StopGlobalRefreshManager()
-      }
+			if cfg.AuthDir != "" {
+				kiro.InitializeAndStart(cfg.AuthDir, cfg)
+				defer kiro.StopGlobalRefreshManager()
+			}

-      cmd.StartService(cfg, configFilePath, password)
+			cmd.StartService(cfg, configFilePath, password)
 		}
 	}
 }
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -25,6 +25,10 @@ remote-management:
  # Disable the bundled management control panel asset download and HTTP route when true.
  disable-control-panel: false

+  # Disable automatic periodic background updates of the management panel from GitHub (default: false).
+  # When enabled, the panel is only downloaded on first access if missing, and never auto-updated afterward.
+  # disable-auto-update-panel: false
+
  # GitHub repository for the management control panel. Accepts a repository URL or releases API URL.
  panel-github-repository: 'https://github.com/router-for-me/Cli-Proxy-API-Management-Center'

@@ -68,7 +72,8 @@ error-logs-max-files: 10
 usage-statistics-enabled: false

 # Proxy URL. Supports socks5/http/https protocols. Example: socks5://user:pass@192.168.1.1:1080/
-proxy-url: ''
+# Per-entry proxy-url also supports "direct" or "none" to bypass both the global proxy-url and environment proxies explicitly.
+proxy-url: ""

 # When true, unprefixed model requests only use credentials without a prefix (except when prefix == model name).
 force-model-prefix: false
@@ -91,6 +96,7 @@ max-retry-interval: 30
 quota-exceeded:
  switch-project: true # Whether to automatically switch to another project when a quota is exceeded
  switch-preview-model: true # Whether to automatically switch to a preview model when a quota is exceeded
+  antigravity-credits: true # Whether to retry Antigravity quota_exhausted 429s once with enabledCreditTypes=["GOOGLE_ONE_AI"]

 # Routing strategy for selecting credentials when multiple match.
 routing:
@@ -115,6 +121,7 @@ nonstream-keepalive-interval: 0
 #     headers:
 #       X-Custom-Header: "custom-value"
 #     proxy-url: "socks5://proxy.example.com:1080"
+#     # proxy-url: "direct" # optional: explicit direct connect for this credential
 #     models:
 #       - name: "gemini-2.5-flash" # upstream model name
 #         alias: "gemini-flash"    # client alias mapped to the upstream model
@@ -133,6 +140,7 @@ nonstream-keepalive-interval: 0
 #     headers:
 #       X-Custom-Header: "custom-value"
 #     proxy-url: "socks5://proxy.example.com:1080" # optional: per-key proxy override
+#     # proxy-url: "direct" # optional: explicit direct connect for this credential
 #     models:
 #       - name: "gpt-5-codex"   # upstream model name
 #         alias: "codex-latest" # client alias mapped to the upstream model
@@ -151,6 +159,7 @@ nonstream-keepalive-interval: 0
 #     headers:
 #       X-Custom-Header: "custom-value"
 #     proxy-url: "socks5://proxy.example.com:1080" # optional: per-key proxy override
+#     # proxy-url: "direct" # optional: explicit direct connect for this credential
 #     models:
 #       - name: "claude-3-5-sonnet-20241022" # upstream model name
 #         alias: "claude-sonnet-latest"      # client alias mapped to the upstream model
@@ -169,14 +178,31 @@ nonstream-keepalive-interval: 0
 #         - "API"
 #         - "proxy"
 #       cache-user-id: true          # optional: default is false; set true to reuse cached user_id per API key instead of generating a random one each request
+#     experimental-cch-signing: false # optional: default is false; when true, sign the final /v1/messages body using the current Claude Code cch algorithm
+#                                     # keep this disabled unless you explicitly need the behavior, so upstream seed changes fall back to legacy proxy behavior

 # Default headers for Claude API requests. Update when Claude Code releases new versions.
-# These are used as fallbacks when the client does not send its own headers.
+# In legacy mode, user-agent/package-version/runtime-version/timeout are used as fallbacks
+# when the client omits them, while OS/arch remain runtime-derived. When
+# stabilize-device-profile is enabled, OS/arch stay pinned to the baseline values below,
+# while user-agent/package-version/runtime-version seed a software fingerprint that can
+# still upgrade to newer official Claude client versions.
 # claude-header-defaults:
 #   user-agent: "claude-cli/2.1.44 (external, sdk-cli)"
 #   package-version: "0.74.0"
 #   runtime-version: "v24.3.0"
+#   os: "MacOS"
+#   arch: "arm64"
 #   timeout: "600"
+#   stabilize-device-profile: false  # optional, default false; set true to enable per-auth/API-key fingerprint pinning
+
+# Default headers for Codex OAuth model requests.
+# These are used only for file-backed/OAuth Codex requests when the client
+# does not send the header. `user-agent` applies to HTTP and websocket requests;
+# `beta-features` only applies to websocket requests. They do not apply to codex-api-key entries.
+# codex-header-defaults:
+#   user-agent: "codex_cli_rs/0.114.0 (Mac OS 14.2.0; x86_64) vscode/1.111.0"
+#   beta-features: "multi_agent"

 # Kiro (AWS CodeWhisperer) configuration
 # Note: Kiro API currently only operates in us-east-1 region
@@ -215,17 +241,32 @@ nonstream-keepalive-interval: 0
 #     api-key-entries:
 #       - api-key: "sk-or-v1-...b780"
 #         proxy-url: "socks5://proxy.example.com:1080" # optional: per-key proxy override
+#         # proxy-url: "direct" # optional: explicit direct connect for this credential
 #       - api-key: "sk-or-v1-...b781" # without proxy-url
 #     models: # The models supported by the provider.
 #       - name: "moonshotai/kimi-k2:free" # The actual model name.
-#         alias: "kimi-k2" # The alias used in the API.
+#         alias: "kimi-k2"               # The alias used in the API.
+#         thinking:                      # optional: omit to default to levels ["low","medium","high"]
+#           levels: ["low", "medium", "high"]
+#       # You may repeat the same alias to build an internal model pool.
+#       # The client still sees only one alias in the model list.
+#       # Requests to that alias will round-robin across the upstream names below,
+#       # and if the chosen upstream fails before producing output, the request will
+#       # continue with the next upstream model in the same alias pool.
+#       - name: "qwen3.5-plus"
+#         alias: "claude-opus-4.66"
+#       - name: "glm-5"
+#         alias: "claude-opus-4.66"
+#       - name: "kimi-k2.5"
+#         alias: "claude-opus-4.66"

-# Vertex API keys (Vertex-compatible endpoints, use API key + base URL)
+# Vertex API keys (Vertex-compatible endpoints, base-url is optional)
 # vertex-api-key:
 #   - api-key: "vk-123..."                        # x-goog-api-key header
 #     prefix: "test"                              # optional: require calls like "test/vertex-pro" to target this credential
-#     base-url: "https://example.com/api"         # e.g. https://zenmux.ai/api
+#     base-url: "https://example.com/api"         # optional, e.g. https://zenmux.ai/api; falls back to Google Vertex when omitted
 #     proxy-url: "socks5://proxy.example.com:1080" # optional per-key proxy override
+#     # proxy-url: "direct" # optional: explicit direct connect for this credential
 #     headers:
 #       X-Custom-Header: "custom-value"
 #     models:                                     # optional: map aliases to upstream model names
@@ -233,6 +274,9 @@ nonstream-keepalive-interval: 0
 #         alias: "vertex-flash"                   # client-visible alias
 #       - name: "gemini-2.5-pro"
 #         alias: "vertex-pro"
+#     excluded-models:                            # optional: models to exclude from listing
+#       - "imagen-3.0-generate-002"
+#       - "imagen-*"

 # Amp Integration
 # ampcode:
@@ -272,6 +316,10 @@ nonstream-keepalive-interval: 0
 # These aliases rename model IDs for both model listing and request routing.
 # Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow, kiro, github-copilot, kimi.
 # NOTE: Aliases do not apply to gemini-api-key, codex-api-key, claude-api-key, openai-compatibility, vertex-api-key, or ampcode.
+# NOTE: Because aliases affect the merged /v1 model list and merged request routing, overlapping
+# client-visible names can become ambiguous across providers. /api/provider/{provider}/... helps
+# you select the protocol surface, but inference backend selection can still follow the resolved
+# model/alias. For strict backend pinning, use unique aliases/prefixes or avoid overlapping names.
 # You can repeat the same name with different aliases to expose multiple client model names.
 # oauth-model-alias:
 #  antigravity:
--- a/docs/gitlab-duo.md
+++ b/docs/gitlab-duo.md
@@ -0,0 +1,115 @@
+# GitLab Duo guide
+
+CLIProxyAPI can now use GitLab Duo as a first-class provider instead of treating it as a plain text wrapper.
+
+It supports:
+
+- OAuth login
+- personal access token login
+- automatic refresh of GitLab `direct_access` metadata
+- dynamic model discovery from GitLab metadata
+- native GitLab AI gateway routing for Anthropic and OpenAI/Codex managed models
+- Claude-compatible and OpenAI-compatible downstream APIs
+
+## What this means
+
+If GitLab Duo returns an Anthropic-managed model, CLIProxyAPI routes requests through the GitLab AI gateway Anthropic proxy and uses the existing Claude executor path.
+
+If GitLab Duo returns an OpenAI-managed model, CLIProxyAPI routes requests through the GitLab AI gateway OpenAI proxy and uses the existing Codex/OpenAI executor path.
+
+That gives GitLab Duo much closer runtime behavior to the built-in `codex` provider:
+
+- Claude-compatible clients can use GitLab Duo models through `/v1/messages`
+- OpenAI-compatible clients can use GitLab Duo models through `/v1/chat/completions`
+- OpenAI Responses clients can use GitLab Duo models through `/v1/responses`
+
+The model list is not hardcoded. CLIProxyAPI reads the current model metadata from GitLab `direct_access` and registers:
+
+- a stable alias: `gitlab-duo`
+- any discovered managed model names, such as `claude-sonnet-4-5` or `gpt-5-codex`
+
+## Login
+
+OAuth login:
+
+```bash
+./CLIProxyAPI -gitlab-login
+```
+
+PAT login:
+
+```bash
+./CLIProxyAPI -gitlab-token-login
+```
+
+You can also provide inputs through environment variables:
+
+```bash
+export GITLAB_BASE_URL=https://gitlab.com
+export GITLAB_OAUTH_CLIENT_ID=your-client-id
+export GITLAB_OAUTH_CLIENT_SECRET=your-client-secret
+export GITLAB_PERSONAL_ACCESS_TOKEN=glpat-...
+```
+
+Notes:
+
+- OAuth requires a GitLab OAuth application.
+- PAT login requires a personal access token that can call the GitLab APIs used by Duo. In practice, `api` scope is the safe baseline.
+- Self-managed GitLab instances are supported through `GITLAB_BASE_URL`.
+
+## Using the models
+
+After login, start CLIProxyAPI normally and point your client at the local proxy.
+
+You can select:
+
+- `gitlab-duo` to use the current Duo-managed model for that account
+- the discovered provider model name if you want to pin it explicitly
+
+Examples:
+
+```bash
+curl http://127.0.0.1:8080/v1/models
+```
+
+```bash
+curl http://127.0.0.1:8080/v1/chat/completions \
+  -H 'Content-Type: application/json' \
+  -d '{
+    "model": "gitlab-duo",
+    "messages": [
+      {"role": "user", "content": "Write a Go HTTP middleware for request IDs."}
+    ]
+  }'
+```
+
+If the GitLab account is currently mapped to an Anthropic model, Claude-compatible clients can use the same account through the Claude handler path. If the account is currently mapped to an OpenAI/Codex model, OpenAI-compatible clients can use `/v1/chat/completions` or `/v1/responses`.
+
+## How model freshness works
+
+CLIProxyAPI does not ship a fixed GitLab Duo model catalog.
+
+Instead, it refreshes GitLab `direct_access` metadata and uses the returned `model_details` and any discovered model list entries to keep the local registry aligned with the current GitLab-managed model assignment.
+
+This matches GitLab's current public contract better than hardcoding model names.
+
+## Current scope
+
+The GitLab Duo provider now has:
+
+- OAuth and PAT auth flows
+- runtime refresh of Duo gateway credentials
+- native Anthropic gateway routing
+- native OpenAI/Codex gateway routing
+- handler-level smoke tests for Claude-compatible and OpenAI-compatible paths
+
+Still out of scope today:
+
+- websocket or session-specific parity beyond the current HTTP APIs
+- GitLab-specific IDE features that are not exposed through the public gateway contract
+
+## References
+
+- GitLab Code Suggestions API: https://docs.gitlab.com/api/code_suggestions/
+- GitLab Agent Assistant and managed credentials: https://docs.gitlab.com/user/duo_agent_platform/agent_assistant/
+- GitLab Duo model selection: https://docs.gitlab.com/user/gitlab_duo/model_selection/
--- a/docs/gitlab-duo_CN.md
+++ b/docs/gitlab-duo_CN.md
@@ -0,0 +1,115 @@
+# GitLab Duo 使用说明
+
+CLIProxyAPI 现在可以把 GitLab Duo 当作一等 Provider 来使用，而不是仅仅把它当成简单的文本补全封装。
+
+当前支持：
+
+- OAuth 登录
+- personal access token 登录
+- 自动刷新 GitLab `direct_access` 元数据
+- 根据 GitLab 返回的元数据动态发现模型
+- 针对 Anthropic 和 OpenAI/Codex 托管模型的 GitLab AI gateway 原生路由
+- Claude 兼容与 OpenAI 兼容下游 API
+
+## 这意味着什么
+
+如果 GitLab Duo 返回的是 Anthropic 托管模型，CLIProxyAPI 会通过 GitLab AI gateway 的 Anthropic 代理转发，并复用现有的 Claude executor 路径。
+
+如果 GitLab Duo 返回的是 OpenAI 托管模型，CLIProxyAPI 会通过 GitLab AI gateway 的 OpenAI 代理转发，并复用现有的 Codex/OpenAI executor 路径。
+
+这让 GitLab Duo 的运行时行为更接近内置的 `codex` Provider：
+
+- Claude 兼容客户端可以通过 `/v1/messages` 使用 GitLab Duo 模型
+- OpenAI 兼容客户端可以通过 `/v1/chat/completions` 使用 GitLab Duo 模型
+- OpenAI Responses 客户端可以通过 `/v1/responses` 使用 GitLab Duo 模型
+
+模型列表不是硬编码的。CLIProxyAPI 会从 GitLab `direct_access` 中读取当前模型元数据，并注册：
+
+- 一个稳定别名：`gitlab-duo`
+- GitLab 当前发现到的托管模型名，例如 `claude-sonnet-4-5` 或 `gpt-5-codex`
+
+## 登录
+
+OAuth 登录：
+
+```bash
+./CLIProxyAPI -gitlab-login
+```
+
+PAT 登录：
+
+```bash
+./CLIProxyAPI -gitlab-token-login
+```
+
+也可以通过环境变量提供输入：
+
+```bash
+export GITLAB_BASE_URL=https://gitlab.com
+export GITLAB_OAUTH_CLIENT_ID=your-client-id
+export GITLAB_OAUTH_CLIENT_SECRET=your-client-secret
+export GITLAB_PERSONAL_ACCESS_TOKEN=glpat-...
+```
+
+说明：
+
+- OAuth 方式需要一个 GitLab OAuth application。
+- PAT 登录需要一个能够调用 GitLab Duo 相关 API 的 personal access token。实践上，`api` scope 是最稳妥的基线。
+- 自建 GitLab 实例可以通过 `GITLAB_BASE_URL` 接入。
+
+## 如何使用模型
+
+登录完成后，正常启动 CLIProxyAPI，并让客户端连接到本地代理。
+
+你可以选择：
+
+- `gitlab-duo`，始终使用该账号当前的 Duo 托管模型
+- GitLab 当前发现到的 provider 模型名，如果你想显式固定模型
+
+示例：
+
+```bash
+curl http://127.0.0.1:8080/v1/models
+```
+
+```bash
+curl http://127.0.0.1:8080/v1/chat/completions \
+  -H 'Content-Type: application/json' \
+  -d '{
+    "model": "gitlab-duo",
+    "messages": [
+      {"role": "user", "content": "Write a Go HTTP middleware for request IDs."}
+    ]
+  }'
+```
+
+如果该 GitLab 账号当前绑定的是 Anthropic 模型，Claude 兼容客户端可以通过 Claude handler 路径直接使用它。如果当前绑定的是 OpenAI/Codex 模型，OpenAI 兼容客户端可以通过 `/v1/chat/completions` 或 `/v1/responses` 使用它。
+
+## 模型如何保持最新
+
+CLIProxyAPI 不内置固定的 GitLab Duo 模型清单。
+
+它会刷新 GitLab `direct_access` 元数据，并使用返回的 `model_details` 以及可能存在的模型列表字段，让本地 registry 尽量与 GitLab 当前分配的托管模型保持一致。
+
+这比硬编码模型名更符合 GitLab 当前公开 API 的实际契约。
+
+## 当前覆盖范围
+
+GitLab Duo Provider 目前已经具备：
+
+- OAuth 和 PAT 登录流程
+- Duo gateway 凭据的运行时刷新
+- Anthropic gateway 原生路由
+- OpenAI/Codex gateway 原生路由
+- Claude 兼容和 OpenAI 兼容路径的 handler 级 smoke 测试
+
+当前仍未覆盖：
+
+- websocket 或 session 级别的完全对齐
+- GitLab 公开 gateway 契约之外的 IDE 专有能力
+
+## 参考资料
+
+- GitLab Code Suggestions API: https://docs.gitlab.com/api/code_suggestions/
+- GitLab Agent Assistant 与 managed credentials: https://docs.gitlab.com/user/duo_agent_platform/agent_assistant/
+- GitLab Duo 模型选择: https://docs.gitlab.com/user/gitlab_duo/model_selection/
--- a/examples/custom-provider/main.go
+++ b/examples/custom-provider/main.go
@@ -52,11 +52,11 @@ func init() {
 	sdktr.Register(fOpenAI, fMyProv,
 		func(model string, raw []byte, stream bool) []byte { return raw },
 		sdktr.ResponseTransform{
-			Stream: func(ctx context.Context, model string, originalReq, translatedReq, raw []byte, param *any) []string {
-				return []string{string(raw)}
+			Stream: func(ctx context.Context, model string, originalReq, translatedReq, raw []byte, param *any) [][]byte {
+				return [][]byte{raw}
 			},
-			NonStream: func(ctx context.Context, model string, originalReq, translatedReq, raw []byte, param *any) string {
-				return string(raw)
+			NonStream: func(ctx context.Context, model string, originalReq, translatedReq, raw []byte, param *any) []byte {
+				return raw
 			},
 		},
 	)
--- a/gitlab-duo-codex-parity-plan.md
+++ b/gitlab-duo-codex-parity-plan.md
@@ -0,0 +1,278 @@
+# Plan: GitLab Duo Codex Parity
+
+**Generated**: 2026-03-10
+**Estimated Complexity**: High
+
+## Overview
+Bring GitLab Duo support from the current "auth + basic executor" stage to the same practical level as `codex` inside `CLIProxyAPI`: a user logs in once, points external clients such as Claude Code at `CLIProxyAPI`, selects GitLab Duo-backed models, and gets stable streaming, multi-turn behavior, tool calling compatibility, and predictable model routing without manual provider-specific workarounds.
+
+The core architectural shift is to stop treating GitLab Duo as only two REST wrappers (`/api/v4/chat/completions` and `/api/v4/code_suggestions/completions`) and instead use GitLab's `direct_access` contract as the primary runtime entrypoint wherever possible. Official GitLab docs confirm that `direct_access` returns AI gateway connection details, headers, token, and expiry; that contract is the closest path to codex-like provider behavior.
+
+## Prerequisites
+- Official GitLab Duo API references confirmed during implementation:
+  - `POST /api/v4/code_suggestions/direct_access`
+  - `POST /api/v4/code_suggestions/completions`
+  - `POST /api/v4/chat/completions`
+- Access to at least one real GitLab Duo account for manual verification.
+- One downstream client target for acceptance testing:
+  - Claude Code against Claude-compatible endpoint
+  - OpenAI-compatible client against `/v1/chat/completions` and `/v1/responses`
+- Existing PR branch as starting point:
+  - `feat/gitlab-duo-auth`
+  - PR [#2028](https://github.com/router-for-me/CLIProxyAPI/pull/2028)
+
+## Definition Of Done
+- GitLab Duo models can be used via `CLIProxyAPI` from the same client surfaces that already work for `codex`.
+- Upstream streaming is real passthrough or faithful chunked forwarding, not synthetic whole-response replay.
+- Tool/function calling survives translation layers without dropping fields or corrupting names.
+- Multi-turn and session semantics are stable across `chat/completions`, `responses`, and Claude-compatible routes.
+- Model exposure stays current from GitLab metadata or gateway discovery without hardcoded stale model tables.
+- `go test ./...` stays green and at least one real manual end-to-end client flow is documented.
+
+## Sprint 1: Contract And Gap Closure
+**Goal**: Replace assumptions with a hard compatibility contract between current `codex` behavior and what GitLab Duo can actually support.
+
+**Demo/Validation**:
+- Written matrix showing `codex` features vs current GitLab Duo behavior.
+- One checked-in developer note or test fixture for real GitLab Duo payload examples.
+
+### Task 1.1: Freeze Codex Parity Checklist
+- **Location**: [internal/runtime/executor/codex_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/codex_executor.go), [internal/runtime/executor/codex_websockets_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/codex_websockets_executor.go), [sdk/api/handlers/openai/openai_responses_handlers.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/api/handlers/openai/openai_responses_handlers.go), [sdk/api/handlers/openai/openai_responses_websocket.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/api/handlers/openai/openai_responses_websocket.go)
+- **Description**: Produce a concrete feature matrix for `codex`: HTTP execute, SSE execute, `/v1/responses`, websocket downstream path, tool calling, request IDs, session close semantics, and model registration behavior.
+- **Dependencies**: None
+- **Acceptance Criteria**:
+  - A checklist exists in repo docs or issue notes.
+  - Each capability is marked `required`, `optional`, or `not possible` for GitLab Duo.
+- **Validation**:
+  - Review against current `codex` code paths.
+
+### Task 1.2: Lock GitLab Duo Runtime Contract
+- **Location**: [internal/auth/gitlab/gitlab.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/auth/gitlab/gitlab.go), [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go)
+- **Description**: Validate the exact upstream contract we can rely on:
+  - `direct_access` fields and refresh cadence
+  - whether AI gateway path is usable directly
+  - when `chat/completions` is available vs when fallback is required
+  - what streaming shape is returned by `code_suggestions/completions?stream=true`
+- **Dependencies**: Task 1.1
+- **Acceptance Criteria**:
+  - GitLab transport decision is explicit: `gateway-first`, `REST-first`, or `hybrid`.
+  - Unknown areas are isolated behind feature flags, not spread across executor logic.
+- **Validation**:
+  - Official docs + captured real responses from a Duo account.
+
+### Task 1.3: Define Client-Facing Compatibility Targets
+- **Location**: [README.md](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/README.md), [gitlab-duo-codex-parity-plan.md](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/gitlab-duo-codex-parity-plan.md)
+- **Description**: Define exactly which external flows must work to call GitLab Duo support "like codex".
+- **Dependencies**: Task 1.2
+- **Acceptance Criteria**:
+  - Required surfaces are listed:
+    - Claude-compatible route
+    - OpenAI `chat/completions`
+    - OpenAI `responses`
+    - optional downstream websocket path
+  - Non-goals are explicit if GitLab upstream cannot support them.
+- **Validation**:
+  - Maintainer review of stated scope.
+
+## Sprint 2: Primary Transport Parity
+**Goal**: Move GitLab Duo execution onto a transport that supports codex-like runtime behavior.
+
+**Demo/Validation**:
+- A GitLab Duo model works over real streaming through `/v1/chat/completions`.
+- No synthetic "collect full body then fake stream" path remains on the primary flow.
+
+### Task 2.1: Refactor GitLab Executor Into Strategy Layers
+- **Location**: [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go)
+- **Description**: Split current executor into explicit strategies:
+  - auth refresh/direct access refresh
+  - gateway transport
+  - GitLab REST fallback transport
+  - downstream translation helpers
+- **Dependencies**: Sprint 1
+- **Acceptance Criteria**:
+  - Executor no longer mixes discovery, refresh, fallback selection, and response synthesis in one path.
+  - Transport choice is testable in isolation.
+- **Validation**:
+  - Unit tests for strategy selection and fallback boundaries.
+
+### Task 2.2: Implement Real Streaming Path
+- **Location**: [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go), [internal/runtime/executor/gitlab_executor_test.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor_test.go)
+- **Description**: Replace synthetic streaming with true upstream incremental forwarding:
+  - use gateway stream if available
+  - otherwise consume GitLab Code Suggestions streaming response and map chunks incrementally
+- **Dependencies**: Task 2.1
+- **Acceptance Criteria**:
+  - `ExecuteStream` emits chunks before upstream completion.
+  - error handling preserves status and early failure semantics.
+- **Validation**:
+  - tests with chunked upstream server
+  - manual curl check against `/v1/chat/completions` with `stream=true`
+
+### Task 2.3: Preserve Upstream Auth And Headers Correctly
+- **Location**: [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go), [internal/auth/gitlab/gitlab.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/auth/gitlab/gitlab.go)
+- **Description**: Use `direct_access` connection details as first-class transport state:
+  - gateway token
+  - expiry
+  - mandatory forwarded headers
+  - model metadata
+- **Dependencies**: Task 2.1
+- **Acceptance Criteria**:
+  - executor stops ignoring gateway headers/token when transport requires them
+  - refresh logic never over-fetches `direct_access`
+- **Validation**:
+  - tests verifying propagated headers and refresh interval behavior
+
+## Sprint 3: Request/Response Semantics Parity
+**Goal**: Make GitLab Duo behave correctly under the same request shapes that current `codex` consumers send.
+
+**Demo/Validation**:
+- OpenAI and Claude-compatible clients can do non-streaming and streaming conversations without losing structure.
+
+### Task 3.1: Normalize Multi-Turn Message Mapping
+- **Location**: [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go), [sdk/translator](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/translator)
+- **Description**: Replace the current "flatten prompt into one instruction" behavior with stable multi-turn mapping:
+  - preserve system context
+  - preserve user/assistant ordering
+  - maintain bounded context truncation
+- **Dependencies**: Sprint 2
+- **Acceptance Criteria**:
+  - multi-turn requests are not collapsed into a lossy single string unless fallback mode explicitly requires it
+  - truncation policy is deterministic and tested
+- **Validation**:
+  - golden tests for request mapping
+
+### Task 3.2: Tool Calling Compatibility Layer
+- **Location**: [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go), [sdk/api/handlers/openai/openai_responses_handlers.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/api/handlers/openai/openai_responses_handlers.go)
+- **Description**: Decide and implement one of two paths:
+  - native pass-through if GitLab gateway supports tool/function structures
+  - strict downgrade path with explicit unsupported errors instead of silent field loss
+- **Dependencies**: Task 3.1
+- **Acceptance Criteria**:
+  - tool-related fields are either preserved correctly or rejected explicitly
+  - no silent corruption of tool names, tool calls, or tool results
+- **Validation**:
+  - table-driven tests for tool payloads
+  - one manual client scenario using tools
+
+### Task 3.3: Token Counting And Usage Reporting Fidelity
+- **Location**: [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go), [internal/runtime/executor/usage_helpers.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/usage_helpers.go)
+- **Description**: Improve token/usage reporting so GitLab models behave like first-class providers in logs and scheduling.
+- **Dependencies**: Sprint 2
+- **Acceptance Criteria**:
+  - `CountTokens` uses the closest supported estimation path
+  - usage logging distinguishes prompt vs completion when possible
+- **Validation**:
+  - unit tests for token estimation outputs
+
+## Sprint 4: Responses And Session Parity
+**Goal**: Reach codex-level support for OpenAI Responses clients and long-lived sessions where GitLab upstream permits it.
+
+**Demo/Validation**:
+- `/v1/responses` works with GitLab Duo in a realistic client flow.
+- If websocket parity is not possible, the code explicitly declines it and keeps HTTP paths stable.
+
+### Task 4.1: Make GitLab Compatible With `/v1/responses`
+- **Location**: [sdk/api/handlers/openai/openai_responses_handlers.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/api/handlers/openai/openai_responses_handlers.go), [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go)
+- **Description**: Ensure GitLab transport can safely back the Responses API path, including compact responses if applicable.
+- **Dependencies**: Sprint 3
+- **Acceptance Criteria**:
+  - GitLab Duo can be selected behind `/v1/responses`
+  - response IDs and follow-up semantics are defined
+- **Validation**:
+  - handler tests analogous to codex/openai responses tests
+
+### Task 4.2: Evaluate Downstream Websocket Parity
+- **Location**: [sdk/api/handlers/openai/openai_responses_websocket.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/api/handlers/openai/openai_responses_websocket.go), [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go)
+- **Description**: Decide whether GitLab Duo can support downstream websocket sessions like codex:
+  - if yes, add session-aware execution path
+  - if no, mark GitLab auth as websocket-ineligible and keep HTTP routes first-class
+- **Dependencies**: Task 4.1
+- **Acceptance Criteria**:
+  - websocket behavior is explicit, not accidental
+  - no route claims websocket support when the upstream cannot honor it
+- **Validation**:
+  - websocket handler tests or explicit capability tests
+
+### Task 4.3: Add Session Cleanup And Failure Recovery Semantics
+- **Location**: [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go), [sdk/cliproxy/auth/conductor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/cliproxy/auth/conductor.go)
+- **Description**: Add codex-like session cleanup, retry boundaries, and model suspension/resume behavior for GitLab failures and quota events.
+- **Dependencies**: Sprint 2
+- **Acceptance Criteria**:
+  - auth/model cooldown behavior is predictable on GitLab 4xx/5xx/quota responses
+  - executor cleans up per-session resources if any are introduced
+- **Validation**:
+  - tests for quota and retry behavior
+
+## Sprint 5: Client UX, Model UX, And Manual E2E
+**Goal**: Make GitLab Duo feel like a normal built-in provider to operators and downstream clients.
+
+**Demo/Validation**:
+- A documented setup exists for "login once, point Claude Code at CLIProxyAPI, use GitLab Duo-backed model".
+
+### Task 5.1: Model Alias And Provider UX Cleanup
+- **Location**: [sdk/cliproxy/service.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/cliproxy/service.go), [README.md](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/README.md)
+- **Description**: Normalize what users see:
+  - stable alias such as `gitlab-duo`
+  - discovered upstream model names
+  - optional prefix behavior
+  - account labels that clearly distinguish OAuth vs PAT
+- **Dependencies**: Sprint 3
+- **Acceptance Criteria**:
+  - users can select a stable GitLab alias even when upstream model changes
+  - dynamic model discovery does not cause confusing model churn
+- **Validation**:
+  - registry tests and manual `/v1/models` inspection
+
+### Task 5.2: Add Real End-To-End Acceptance Tests
+- **Location**: [internal/runtime/executor/gitlab_executor_test.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor_test.go), [sdk/api/handlers/openai](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/api/handlers/openai)
+- **Description**: Add higher-level tests covering the actual proxy surfaces:
+  - OpenAI `chat/completions`
+  - OpenAI `responses`
+  - Claude-compatible request path if GitLab is routed there
+- **Dependencies**: Sprint 4
+- **Acceptance Criteria**:
+  - tests fail if streaming regresses into synthetic buffering again
+  - tests cover at least one tool-related request and one multi-turn request
+- **Validation**:
+  - `go test ./...`
+
+### Task 5.3: Publish Operator Documentation
+- **Location**: [README.md](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/README.md)
+- **Description**: Document:
+  - OAuth setup requirements
+  - PAT requirements
+  - current capability matrix
+  - known limitations if websocket/tool parity is partial
+- **Dependencies**: Sprint 5.1
+- **Acceptance Criteria**:
+  - setup instructions are enough for a new user to reproduce the GitLab Duo flow
+  - limitations are explicit
+- **Validation**:
+  - dry-run docs review from a clean environment
+
+## Testing Strategy
+- Keep `go test ./...` green after every committable task.
+- Add table-driven tests first for request mapping, refresh behavior, and dynamic model registration.
+- Add transport tests with `httptest.Server` for:
+  - real chunked streaming
+  - header propagation from `direct_access`
+  - upstream fallback rules
+- Add at least one manual acceptance checklist:
+  - login via OAuth
+  - login via PAT
+  - list models
+  - run one streaming prompt via OpenAI route
+  - run one prompt from the target downstream client
+
+## Potential Risks & Gotchas
+- GitLab public docs expose `direct_access`, but do not fully document every possible AI gateway path. We should isolate any empirically discovered gateway assumptions behind one transport layer and feature flags.
+- `chat/completions` availability differs by GitLab offering and version. The executor must not assume it always exists.
+- Code Suggestions is completion-oriented; lossy mapping from rich chat/tool payloads will make GitLab Duo feel worse than codex unless explicitly handled.
+- Synthetic streaming is not good enough for codex parity and will cause regressions in interactive clients.
+- Dynamic model discovery can create unstable UX if the stable alias and discovered model IDs are not separated cleanly.
+- PAT auth may validate successfully while still lacking effective Duo permissions. Error reporting must surface this explicitly.
+
+## Rollback Plan
+- Keep the current basic GitLab executor behind a fallback mode until the new transport path is stable.
+- If parity work destabilizes existing providers, revert only GitLab-specific executor changes and leave auth support intact.
+- Preserve the stable `gitlab-duo` alias so rollback does not break client configuration.
--- a/go.mod
+++ b/go.mod
@@ -83,6 +83,7 @@ require (
 	github.com/muesli/cancelreader v0.2.2 // indirect
 	github.com/muesli/termenv v0.16.0 // indirect
 	github.com/pelletier/go-toml/v2 v2.2.2 // indirect
+	github.com/pierrec/xxHash v0.1.5
 	github.com/pjbgf/sha1cd v0.5.0 // indirect
 	github.com/rivo/uniseg v0.4.7 // indirect
 	github.com/rs/xid v1.5.0 // indirect
@@ -91,8 +92,8 @@ require (
 	github.com/tidwall/pretty v1.2.0 // indirect
 	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
 	github.com/ugorji/go/codec v1.2.12 // indirect
-	github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
 	github.com/x448/float16 v0.8.4 // indirect
+	github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
 	golang.org/x/arch v0.8.0 // indirect
 	golang.org/x/sys v0.38.0 // indirect
 	golang.org/x/text v0.31.0 // indirect
--- a/go.sum
+++ b/go.sum
@@ -154,6 +154,8 @@ github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc
 github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk=
 github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM=
 github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
+github.com/pierrec/xxHash v0.1.5 h1:n/jBpwTHiER4xYvK3/CdPVnLDPchj8eTJFFLUb4QHBo=
+github.com/pierrec/xxHash v0.1.5/go.mod h1:w2waW5Zoa/Wc4Yqe0wgrIYAGKqRMf7czn2HNKXmuL+I=
 github.com/pjbgf/sha1cd v0.5.0 h1:a+UkboSi1znleCDUNT3M5YxjOnN1fz2FhN48FlwCxs0=
 github.com/pjbgf/sha1cd v0.5.0/go.mod h1:lhpGlyHLpQZoxMv8HcgXvZEhcGs0PG/vsZnEJ7H0iCM=
 github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ=
--- a/internal/api/handlers/management/api_tools.go
+++ b/internal/api/handlers/management/api_tools.go
@@ -6,7 +6,6 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
-	"net"
 	"net/http"
 	"net/url"
 	"strings"
@@ -14,13 +13,12 @@ import (

 	"github.com/fxamacker/cbor/v2"
 	"github.com/gin-gonic/gin"
-	log "github.com/sirupsen/logrus"
-	"golang.org/x/net/proxy"
-	"golang.org/x/oauth2"
-	"golang.org/x/oauth2/google"
-
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/geminicli"
 	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+	"github.com/router-for-me/CLIProxyAPI/v6/sdk/proxyutil"
+	log "github.com/sirupsen/logrus"
+	"golang.org/x/oauth2"
+	"golang.org/x/oauth2/google"
 )

 const defaultAPICallTimeout = 60 * time.Second
@@ -725,47 +723,12 @@ func (h *Handler) apiCallTransport(auth *coreauth.Auth) http.RoundTripper {
 }

 func buildProxyTransport(proxyStr string) *http.Transport {
-	proxyStr = strings.TrimSpace(proxyStr)
-	if proxyStr == "" {
+	transport, _, errBuild := proxyutil.BuildHTTPTransport(proxyStr)
+	if errBuild != nil {
+		log.WithError(errBuild).Debug("build proxy transport failed")
 		return nil
 	}
-
-	proxyURL, errParse := url.Parse(proxyStr)
-	if errParse != nil {
-		log.WithError(errParse).Debug("parse proxy URL failed")
-		return nil
-	}
-	if proxyURL.Scheme == "" || proxyURL.Host == "" {
-		log.Debug("proxy URL missing scheme/host")
-		return nil
-	}
-
-	if proxyURL.Scheme == "socks5" {
-		var proxyAuth *proxy.Auth
-		if proxyURL.User != nil {
-			username := proxyURL.User.Username()
-			password, _ := proxyURL.User.Password()
-			proxyAuth = &proxy.Auth{User: username, Password: password}
-		}
-		dialer, errSOCKS5 := proxy.SOCKS5("tcp", proxyURL.Host, proxyAuth, proxy.Direct)
-		if errSOCKS5 != nil {
-			log.WithError(errSOCKS5).Debug("create SOCKS5 dialer failed")
-			return nil
-		}
-		return &http.Transport{
-			Proxy: nil,
-			DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
-				return dialer.Dial(network, addr)
-			},
-		}
-	}
-
-	if proxyURL.Scheme == "http" || proxyURL.Scheme == "https" {
-		return &http.Transport{Proxy: http.ProxyURL(proxyURL)}
-	}
-
-	log.Debugf("unsupported proxy scheme: %s", proxyURL.Scheme)
-	return nil
+	return transport
 }

 // headerContainsValue checks whether a header map contains a target value (case-insensitive key and value).
--- a/internal/api/handlers/management/api_tools_test.go
+++ b/internal/api/handlers/management/api_tools_test.go
@@ -2,172 +2,112 @@ package management

 import (
 	"context"
-	"encoding/json"
-	"io"
 	"net/http"
-	"net/http/httptest"
-	"net/url"
-	"strings"
-	"sync"
 	"testing"
-	"time"

+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+	sdkconfig "github.com/router-for-me/CLIProxyAPI/v6/sdk/config"
 )

-type memoryAuthStore struct {
-	mu    sync.Mutex
-	items map[string]*coreauth.Auth
-}
+func TestAPICallTransportDirectBypassesGlobalProxy(t *testing.T) {
+	t.Parallel()

-func (s *memoryAuthStore) List(ctx context.Context) ([]*coreauth.Auth, error) {
-	_ = ctx
-	s.mu.Lock()
-	defer s.mu.Unlock()
-	out := make([]*coreauth.Auth, 0, len(s.items))
-	for _, a := range s.items {
-		out = append(out, a.Clone())
-	}
-	return out, nil
-}
-
-func (s *memoryAuthStore) Save(ctx context.Context, auth *coreauth.Auth) (string, error) {
-	_ = ctx
-	if auth == nil {
-		return "", nil
-	}
-	s.mu.Lock()
-	if s.items == nil {
-		s.items = make(map[string]*coreauth.Auth)
-	}
-	s.items[auth.ID] = auth.Clone()
-	s.mu.Unlock()
-	return auth.ID, nil
-}
-
-func (s *memoryAuthStore) Delete(ctx context.Context, id string) error {
-	_ = ctx
-	s.mu.Lock()
-	delete(s.items, id)
-	s.mu.Unlock()
-	return nil
-}
-
-func TestResolveTokenForAuth_Antigravity_RefreshesExpiredToken(t *testing.T) {
-	var callCount int
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		callCount++
-		if r.Method != http.MethodPost {
-			t.Fatalf("expected POST, got %s", r.Method)
-		}
-		if ct := r.Header.Get("Content-Type"); !strings.HasPrefix(ct, "application/x-www-form-urlencoded") {
-			t.Fatalf("unexpected content-type: %s", ct)
-		}
-		bodyBytes, _ := io.ReadAll(r.Body)
-		_ = r.Body.Close()
-		values, err := url.ParseQuery(string(bodyBytes))
-		if err != nil {
-			t.Fatalf("parse form: %v", err)
-		}
-		if values.Get("grant_type") != "refresh_token" {
-			t.Fatalf("unexpected grant_type: %s", values.Get("grant_type"))
-		}
-		if values.Get("refresh_token") != "rt" {
-			t.Fatalf("unexpected refresh_token: %s", values.Get("refresh_token"))
-		}
-		if values.Get("client_id") != antigravityOAuthClientID {
-			t.Fatalf("unexpected client_id: %s", values.Get("client_id"))
-		}
-		if values.Get("client_secret") != antigravityOAuthClientSecret {
-			t.Fatalf("unexpected client_secret")
-		}
-
-		w.Header().Set("Content-Type", "application/json")
-		_ = json.NewEncoder(w).Encode(map[string]any{
-			"access_token":  "new-token",
-			"refresh_token": "rt2",
-			"expires_in":    int64(3600),
-			"token_type":    "Bearer",
-		})
-	}))
-	t.Cleanup(srv.Close)
-
-	originalURL := antigravityOAuthTokenURL
-	antigravityOAuthTokenURL = srv.URL
-	t.Cleanup(func() { antigravityOAuthTokenURL = originalURL })
-
-	store := &memoryAuthStore{}
-	manager := coreauth.NewManager(store, nil, nil)
-
-	auth := &coreauth.Auth{
-		ID:       "antigravity-test.json",
-		FileName: "antigravity-test.json",
-		Provider: "antigravity",
-		Metadata: map[string]any{
-			"type":          "antigravity",
-			"access_token":  "old-token",
-			"refresh_token": "rt",
-			"expires_in":    int64(3600),
-			"timestamp":     time.Now().Add(-2 * time.Hour).UnixMilli(),
-			"expired":       time.Now().Add(-1 * time.Hour).Format(time.RFC3339),
+	h := &Handler{
+		cfg: &config.Config{
+			SDKConfig: sdkconfig.SDKConfig{ProxyURL: "http://global-proxy.example.com:8080"},
 		},
 	}
-	if _, err := manager.Register(context.Background(), auth); err != nil {
-		t.Fatalf("register auth: %v", err)
+
+	transport := h.apiCallTransport(&coreauth.Auth{ProxyURL: "direct"})
+	httpTransport, ok := transport.(*http.Transport)
+	if !ok {
+		t.Fatalf("transport type = %T, want *http.Transport", transport)
+	}
+	if httpTransport.Proxy != nil {
+		t.Fatal("expected direct transport to disable proxy function")
+	}
+}
+
+func TestAPICallTransportInvalidAuthFallsBackToGlobalProxy(t *testing.T) {
+	t.Parallel()
+
+	h := &Handler{
+		cfg: &config.Config{
+			SDKConfig: sdkconfig.SDKConfig{ProxyURL: "http://global-proxy.example.com:8080"},
+		},
+	}
+
+	transport := h.apiCallTransport(&coreauth.Auth{ProxyURL: "bad-value"})
+	httpTransport, ok := transport.(*http.Transport)
+	if !ok {
+		t.Fatalf("transport type = %T, want *http.Transport", transport)
+	}
+
+	req, errRequest := http.NewRequest(http.MethodGet, "https://example.com", nil)
+	if errRequest != nil {
+		t.Fatalf("http.NewRequest returned error: %v", errRequest)
+	}
+
+	proxyURL, errProxy := httpTransport.Proxy(req)
+	if errProxy != nil {
+		t.Fatalf("httpTransport.Proxy returned error: %v", errProxy)
+	}
+	if proxyURL == nil || proxyURL.String() != "http://global-proxy.example.com:8080" {
+		t.Fatalf("proxy URL = %v, want http://global-proxy.example.com:8080", proxyURL)
+	}
+}
+
+func TestAuthByIndexDistinguishesSharedAPIKeysAcrossProviders(t *testing.T) {
+	t.Parallel()
+
+	manager := coreauth.NewManager(nil, nil, nil)
+	geminiAuth := &coreauth.Auth{
+		ID:       "gemini:apikey:123",
+		Provider: "gemini",
+		Attributes: map[string]string{
+			"api_key": "shared-key",
+		},
+	}
+	compatAuth := &coreauth.Auth{
+		ID:       "openai-compatibility:bohe:456",
+		Provider: "bohe",
+		Label:    "bohe",
+		Attributes: map[string]string{
+			"api_key":      "shared-key",
+			"compat_name":  "bohe",
+			"provider_key": "bohe",
+		},
+	}
+
+	if _, errRegister := manager.Register(context.Background(), geminiAuth); errRegister != nil {
+		t.Fatalf("register gemini auth: %v", errRegister)
+	}
+	if _, errRegister := manager.Register(context.Background(), compatAuth); errRegister != nil {
+		t.Fatalf("register compat auth: %v", errRegister)
+	}
+
+	geminiIndex := geminiAuth.EnsureIndex()
+	compatIndex := compatAuth.EnsureIndex()
+	if geminiIndex == compatIndex {
+		t.Fatalf("shared api key produced duplicate auth_index %q", geminiIndex)
 	}

 	h := &Handler{authManager: manager}
-	token, err := h.resolveTokenForAuth(context.Background(), auth)
-	if err != nil {
-		t.Fatalf("resolveTokenForAuth: %v", err)
+
+	gotGemini := h.authByIndex(geminiIndex)
+	if gotGemini == nil {
+		t.Fatal("expected gemini auth by index")
 	}
-	if token != "new-token" {
-		t.Fatalf("expected refreshed token, got %q", token)
-	}
-	if callCount != 1 {
-		t.Fatalf("expected 1 refresh call, got %d", callCount)
+	if gotGemini.ID != geminiAuth.ID {
+		t.Fatalf("authByIndex(gemini) returned %q, want %q", gotGemini.ID, geminiAuth.ID)
 	}

-	updated, ok := manager.GetByID(auth.ID)
-	if !ok || updated == nil {
-		t.Fatalf("expected auth in manager after update")
+	gotCompat := h.authByIndex(compatIndex)
+	if gotCompat == nil {
+		t.Fatal("expected compat auth by index")
 	}
-	if got := tokenValueFromMetadata(updated.Metadata); got != "new-token" {
-		t.Fatalf("expected manager metadata updated, got %q", got)
-	}
-}
-
-func TestResolveTokenForAuth_Antigravity_SkipsRefreshWhenTokenValid(t *testing.T) {
-	var callCount int
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		callCount++
-		w.WriteHeader(http.StatusInternalServerError)
-	}))
-	t.Cleanup(srv.Close)
-
-	originalURL := antigravityOAuthTokenURL
-	antigravityOAuthTokenURL = srv.URL
-	t.Cleanup(func() { antigravityOAuthTokenURL = originalURL })
-
-	auth := &coreauth.Auth{
-		ID:       "antigravity-valid.json",
-		FileName: "antigravity-valid.json",
-		Provider: "antigravity",
-		Metadata: map[string]any{
-			"type":         "antigravity",
-			"access_token": "ok-token",
-			"expired":      time.Now().Add(30 * time.Minute).Format(time.RFC3339),
-		},
-	}
-	h := &Handler{}
-	token, err := h.resolveTokenForAuth(context.Background(), auth)
-	if err != nil {
-		t.Fatalf("resolveTokenForAuth: %v", err)
-	}
-	if token != "ok-token" {
-		t.Fatalf("expected existing token, got %q", token)
-	}
-	if callCount != 0 {
-		t.Fatalf("expected no refresh calls, got %d", callCount)
+	if gotCompat.ID != compatAuth.ID {
+		t.Fatalf("authByIndex(compat) returned %q, want %q", gotCompat.ID, compatAuth.ID)
 	}
 }
--- a/internal/api/handlers/management/auth_files.go
+++ b/internal/api/handlers/management/auth_files.go
--- a/internal/api/handlers/management/auth_files_batch_test.go
+++ b/internal/api/handlers/management/auth_files_batch_test.go
@@ -0,0 +1,197 @@
+package management
+
+import (
+	"bytes"
+	"encoding/json"
+	"mime/multipart"
+	"net/http"
+	"net/http/httptest"
+	"net/url"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+)
+
+func TestUploadAuthFile_BatchMultipart(t *testing.T) {
+	t.Setenv("MANAGEMENT_PASSWORD", "")
+	gin.SetMode(gin.TestMode)
+
+	authDir := t.TempDir()
+	manager := coreauth.NewManager(nil, nil, nil)
+	h := NewHandlerWithoutConfigFilePath(&config.Config{AuthDir: authDir}, manager)
+
+	files := []struct {
+		name    string
+		content string
+	}{
+		{name: "alpha.json", content: `{"type":"codex","email":"alpha@example.com"}`},
+		{name: "beta.json", content: `{"type":"claude","email":"beta@example.com"}`},
+	}
+
+	var body bytes.Buffer
+	writer := multipart.NewWriter(&body)
+	for _, file := range files {
+		part, err := writer.CreateFormFile("file", file.name)
+		if err != nil {
+			t.Fatalf("failed to create multipart file: %v", err)
+		}
+		if _, err = part.Write([]byte(file.content)); err != nil {
+			t.Fatalf("failed to write multipart content: %v", err)
+		}
+	}
+	if err := writer.Close(); err != nil {
+		t.Fatalf("failed to close multipart writer: %v", err)
+	}
+
+	rec := httptest.NewRecorder()
+	ctx, _ := gin.CreateTestContext(rec)
+	req := httptest.NewRequest(http.MethodPost, "/v0/management/auth-files", &body)
+	req.Header.Set("Content-Type", writer.FormDataContentType())
+	ctx.Request = req
+
+	h.UploadAuthFile(ctx)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected upload status %d, got %d with body %s", http.StatusOK, rec.Code, rec.Body.String())
+	}
+
+	var payload map[string]any
+	if err := json.Unmarshal(rec.Body.Bytes(), &payload); err != nil {
+		t.Fatalf("failed to decode response: %v", err)
+	}
+	if got, ok := payload["uploaded"].(float64); !ok || int(got) != len(files) {
+		t.Fatalf("expected uploaded=%d, got %#v", len(files), payload["uploaded"])
+	}
+
+	for _, file := range files {
+		fullPath := filepath.Join(authDir, file.name)
+		data, err := os.ReadFile(fullPath)
+		if err != nil {
+			t.Fatalf("expected uploaded file %s to exist: %v", file.name, err)
+		}
+		if string(data) != file.content {
+			t.Fatalf("expected file %s content %q, got %q", file.name, file.content, string(data))
+		}
+	}
+
+	auths := manager.List()
+	if len(auths) != len(files) {
+		t.Fatalf("expected %d auth entries, got %d", len(files), len(auths))
+	}
+}
+
+func TestUploadAuthFile_BatchMultipart_InvalidJSONDoesNotOverwriteExistingFile(t *testing.T) {
+	t.Setenv("MANAGEMENT_PASSWORD", "")
+	gin.SetMode(gin.TestMode)
+
+	authDir := t.TempDir()
+	manager := coreauth.NewManager(nil, nil, nil)
+	h := NewHandlerWithoutConfigFilePath(&config.Config{AuthDir: authDir}, manager)
+
+	existingName := "alpha.json"
+	existingContent := `{"type":"codex","email":"alpha@example.com"}`
+	if err := os.WriteFile(filepath.Join(authDir, existingName), []byte(existingContent), 0o600); err != nil {
+		t.Fatalf("failed to seed existing auth file: %v", err)
+	}
+
+	files := []struct {
+		name    string
+		content string
+	}{
+		{name: existingName, content: `{"type":"codex"`},
+		{name: "beta.json", content: `{"type":"claude","email":"beta@example.com"}`},
+	}
+
+	var body bytes.Buffer
+	writer := multipart.NewWriter(&body)
+	for _, file := range files {
+		part, err := writer.CreateFormFile("file", file.name)
+		if err != nil {
+			t.Fatalf("failed to create multipart file: %v", err)
+		}
+		if _, err = part.Write([]byte(file.content)); err != nil {
+			t.Fatalf("failed to write multipart content: %v", err)
+		}
+	}
+	if err := writer.Close(); err != nil {
+		t.Fatalf("failed to close multipart writer: %v", err)
+	}
+
+	rec := httptest.NewRecorder()
+	ctx, _ := gin.CreateTestContext(rec)
+	req := httptest.NewRequest(http.MethodPost, "/v0/management/auth-files", &body)
+	req.Header.Set("Content-Type", writer.FormDataContentType())
+	ctx.Request = req
+
+	h.UploadAuthFile(ctx)
+
+	if rec.Code != http.StatusMultiStatus {
+		t.Fatalf("expected upload status %d, got %d with body %s", http.StatusMultiStatus, rec.Code, rec.Body.String())
+	}
+
+	data, err := os.ReadFile(filepath.Join(authDir, existingName))
+	if err != nil {
+		t.Fatalf("expected existing auth file to remain readable: %v", err)
+	}
+	if string(data) != existingContent {
+		t.Fatalf("expected existing auth file to remain %q, got %q", existingContent, string(data))
+	}
+
+	betaData, err := os.ReadFile(filepath.Join(authDir, "beta.json"))
+	if err != nil {
+		t.Fatalf("expected valid auth file to be created: %v", err)
+	}
+	if string(betaData) != files[1].content {
+		t.Fatalf("expected beta auth file content %q, got %q", files[1].content, string(betaData))
+	}
+}
+
+func TestDeleteAuthFile_BatchQuery(t *testing.T) {
+	t.Setenv("MANAGEMENT_PASSWORD", "")
+	gin.SetMode(gin.TestMode)
+
+	authDir := t.TempDir()
+	files := []string{"alpha.json", "beta.json"}
+	for _, name := range files {
+		if err := os.WriteFile(filepath.Join(authDir, name), []byte(`{"type":"codex"}`), 0o600); err != nil {
+			t.Fatalf("failed to write auth file %s: %v", name, err)
+		}
+	}
+
+	manager := coreauth.NewManager(nil, nil, nil)
+	h := NewHandlerWithoutConfigFilePath(&config.Config{AuthDir: authDir}, manager)
+	h.tokenStore = &memoryAuthStore{}
+
+	rec := httptest.NewRecorder()
+	ctx, _ := gin.CreateTestContext(rec)
+	req := httptest.NewRequest(
+		http.MethodDelete,
+		"/v0/management/auth-files?name="+url.QueryEscape(files[0])+"&name="+url.QueryEscape(files[1]),
+		nil,
+	)
+	ctx.Request = req
+
+	h.DeleteAuthFile(ctx)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected delete status %d, got %d with body %s", http.StatusOK, rec.Code, rec.Body.String())
+	}
+
+	var payload map[string]any
+	if err := json.Unmarshal(rec.Body.Bytes(), &payload); err != nil {
+		t.Fatalf("failed to decode response: %v", err)
+	}
+	if got, ok := payload["deleted"].(float64); !ok || int(got) != len(files) {
+		t.Fatalf("expected deleted=%d, got %#v", len(files), payload["deleted"])
+	}
+
+	for _, name := range files {
+		if _, err := os.Stat(filepath.Join(authDir, name)); !os.IsNotExist(err) {
+			t.Fatalf("expected auth file %s to be removed, stat err: %v", name, err)
+		}
+	}
+}
--- a/internal/api/handlers/management/auth_files_delete_test.go
+++ b/internal/api/handlers/management/auth_files_delete_test.go
@@ -0,0 +1,129 @@
+package management
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"net/url"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+)
+
+func TestDeleteAuthFile_UsesAuthPathFromManager(t *testing.T) {
+	t.Setenv("MANAGEMENT_PASSWORD", "")
+	gin.SetMode(gin.TestMode)
+
+	tempDir := t.TempDir()
+	authDir := filepath.Join(tempDir, "auth")
+	externalDir := filepath.Join(tempDir, "external")
+	if errMkdirAuth := os.MkdirAll(authDir, 0o700); errMkdirAuth != nil {
+		t.Fatalf("failed to create auth dir: %v", errMkdirAuth)
+	}
+	if errMkdirExternal := os.MkdirAll(externalDir, 0o700); errMkdirExternal != nil {
+		t.Fatalf("failed to create external dir: %v", errMkdirExternal)
+	}
+
+	fileName := "codex-user@example.com-plus.json"
+	shadowPath := filepath.Join(authDir, fileName)
+	realPath := filepath.Join(externalDir, fileName)
+	if errWriteShadow := os.WriteFile(shadowPath, []byte(`{"type":"codex","email":"shadow@example.com"}`), 0o600); errWriteShadow != nil {
+		t.Fatalf("failed to write shadow file: %v", errWriteShadow)
+	}
+	if errWriteReal := os.WriteFile(realPath, []byte(`{"type":"codex","email":"real@example.com"}`), 0o600); errWriteReal != nil {
+		t.Fatalf("failed to write real file: %v", errWriteReal)
+	}
+
+	manager := coreauth.NewManager(nil, nil, nil)
+	record := &coreauth.Auth{
+		ID:          "legacy/" + fileName,
+		FileName:    fileName,
+		Provider:    "codex",
+		Status:      coreauth.StatusError,
+		Unavailable: true,
+		Attributes: map[string]string{
+			"path": realPath,
+		},
+		Metadata: map[string]any{
+			"type":  "codex",
+			"email": "real@example.com",
+		},
+	}
+	if _, errRegister := manager.Register(context.Background(), record); errRegister != nil {
+		t.Fatalf("failed to register auth record: %v", errRegister)
+	}
+
+	h := NewHandlerWithoutConfigFilePath(&config.Config{AuthDir: authDir}, manager)
+	h.tokenStore = &memoryAuthStore{}
+
+	deleteRec := httptest.NewRecorder()
+	deleteCtx, _ := gin.CreateTestContext(deleteRec)
+	deleteReq := httptest.NewRequest(http.MethodDelete, "/v0/management/auth-files?name="+url.QueryEscape(fileName), nil)
+	deleteCtx.Request = deleteReq
+	h.DeleteAuthFile(deleteCtx)
+
+	if deleteRec.Code != http.StatusOK {
+		t.Fatalf("expected delete status %d, got %d with body %s", http.StatusOK, deleteRec.Code, deleteRec.Body.String())
+	}
+	if _, errStatReal := os.Stat(realPath); !os.IsNotExist(errStatReal) {
+		t.Fatalf("expected managed auth file to be removed, stat err: %v", errStatReal)
+	}
+	if _, errStatShadow := os.Stat(shadowPath); errStatShadow != nil {
+		t.Fatalf("expected shadow auth file to remain, stat err: %v", errStatShadow)
+	}
+
+	listRec := httptest.NewRecorder()
+	listCtx, _ := gin.CreateTestContext(listRec)
+	listReq := httptest.NewRequest(http.MethodGet, "/v0/management/auth-files", nil)
+	listCtx.Request = listReq
+	h.ListAuthFiles(listCtx)
+
+	if listRec.Code != http.StatusOK {
+		t.Fatalf("expected list status %d, got %d with body %s", http.StatusOK, listRec.Code, listRec.Body.String())
+	}
+	var listPayload map[string]any
+	if errUnmarshal := json.Unmarshal(listRec.Body.Bytes(), &listPayload); errUnmarshal != nil {
+		t.Fatalf("failed to decode list payload: %v", errUnmarshal)
+	}
+	filesRaw, ok := listPayload["files"].([]any)
+	if !ok {
+		t.Fatalf("expected files array, payload: %#v", listPayload)
+	}
+	if len(filesRaw) != 0 {
+		t.Fatalf("expected removed auth to be hidden from list, got %d entries", len(filesRaw))
+	}
+}
+
+func TestDeleteAuthFile_FallbackToAuthDirPath(t *testing.T) {
+	t.Setenv("MANAGEMENT_PASSWORD", "")
+	gin.SetMode(gin.TestMode)
+
+	authDir := t.TempDir()
+	fileName := "fallback-user.json"
+	filePath := filepath.Join(authDir, fileName)
+	if errWrite := os.WriteFile(filePath, []byte(`{"type":"codex"}`), 0o600); errWrite != nil {
+		t.Fatalf("failed to write auth file: %v", errWrite)
+	}
+
+	manager := coreauth.NewManager(nil, nil, nil)
+	h := NewHandlerWithoutConfigFilePath(&config.Config{AuthDir: authDir}, manager)
+	h.tokenStore = &memoryAuthStore{}
+
+	deleteRec := httptest.NewRecorder()
+	deleteCtx, _ := gin.CreateTestContext(deleteRec)
+	deleteReq := httptest.NewRequest(http.MethodDelete, "/v0/management/auth-files?name="+url.QueryEscape(fileName), nil)
+	deleteCtx.Request = deleteReq
+	h.DeleteAuthFile(deleteCtx)
+
+	if deleteRec.Code != http.StatusOK {
+		t.Fatalf("expected delete status %d, got %d with body %s", http.StatusOK, deleteRec.Code, deleteRec.Body.String())
+	}
+	if _, errStat := os.Stat(filePath); !os.IsNotExist(errStat) {
+		t.Fatalf("expected auth file to be removed from auth dir, stat err: %v", errStat)
+	}
+}
--- a/internal/api/handlers/management/auth_files_download_test.go
+++ b/internal/api/handlers/management/auth_files_download_test.go
@@ -0,0 +1,62 @@
+package management
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"net/url"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+)
+
+func TestDownloadAuthFile_ReturnsFile(t *testing.T) {
+	t.Setenv("MANAGEMENT_PASSWORD", "")
+	gin.SetMode(gin.TestMode)
+
+	authDir := t.TempDir()
+	fileName := "download-user.json"
+	expected := []byte(`{"type":"codex"}`)
+	if err := os.WriteFile(filepath.Join(authDir, fileName), expected, 0o600); err != nil {
+		t.Fatalf("failed to write auth file: %v", err)
+	}
+
+	h := NewHandlerWithoutConfigFilePath(&config.Config{AuthDir: authDir}, nil)
+
+	rec := httptest.NewRecorder()
+	ctx, _ := gin.CreateTestContext(rec)
+	ctx.Request = httptest.NewRequest(http.MethodGet, "/v0/management/auth-files/download?name="+url.QueryEscape(fileName), nil)
+	h.DownloadAuthFile(ctx)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected download status %d, got %d with body %s", http.StatusOK, rec.Code, rec.Body.String())
+	}
+	if got := rec.Body.Bytes(); string(got) != string(expected) {
+		t.Fatalf("unexpected download content: %q", string(got))
+	}
+}
+
+func TestDownloadAuthFile_RejectsPathSeparators(t *testing.T) {
+	t.Setenv("MANAGEMENT_PASSWORD", "")
+	gin.SetMode(gin.TestMode)
+
+	h := NewHandlerWithoutConfigFilePath(&config.Config{AuthDir: t.TempDir()}, nil)
+
+	for _, name := range []string{
+		"../external/secret.json",
+		`..\\external\\secret.json`,
+		"nested/secret.json",
+		`nested\\secret.json`,
+	} {
+		rec := httptest.NewRecorder()
+		ctx, _ := gin.CreateTestContext(rec)
+		ctx.Request = httptest.NewRequest(http.MethodGet, "/v0/management/auth-files/download?name="+url.QueryEscape(name), nil)
+		h.DownloadAuthFile(ctx)
+
+		if rec.Code != http.StatusBadRequest {
+			t.Fatalf("expected %d for name %q, got %d with body %s", http.StatusBadRequest, name, rec.Code, rec.Body.String())
+		}
+	}
+}
--- a/internal/api/handlers/management/auth_files_download_windows_test.go
+++ b/internal/api/handlers/management/auth_files_download_windows_test.go
@@ -0,0 +1,51 @@
+//go:build windows
+
+package management
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"net/url"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+)
+
+func TestDownloadAuthFile_PreventsWindowsSlashTraversal(t *testing.T) {
+	t.Setenv("MANAGEMENT_PASSWORD", "")
+	gin.SetMode(gin.TestMode)
+
+	tempDir := t.TempDir()
+	authDir := filepath.Join(tempDir, "auth")
+	externalDir := filepath.Join(tempDir, "external")
+	if err := os.MkdirAll(authDir, 0o700); err != nil {
+		t.Fatalf("failed to create auth dir: %v", err)
+	}
+	if err := os.MkdirAll(externalDir, 0o700); err != nil {
+		t.Fatalf("failed to create external dir: %v", err)
+	}
+
+	secretName := "secret.json"
+	secretPath := filepath.Join(externalDir, secretName)
+	if err := os.WriteFile(secretPath, []byte(`{"secret":true}`), 0o600); err != nil {
+		t.Fatalf("failed to write external file: %v", err)
+	}
+
+	h := NewHandlerWithoutConfigFilePath(&config.Config{AuthDir: authDir}, nil)
+
+	rec := httptest.NewRecorder()
+	ctx, _ := gin.CreateTestContext(rec)
+	ctx.Request = httptest.NewRequest(
+		http.MethodGet,
+		"/v0/management/auth-files/download?name="+url.QueryEscape("../external/"+secretName),
+		nil,
+	)
+	h.DownloadAuthFile(ctx)
+
+	if rec.Code != http.StatusBadRequest {
+		t.Fatalf("expected status %d, got %d with body %s", http.StatusBadRequest, rec.Code, rec.Body.String())
+	}
+}
--- a/internal/api/handlers/management/auth_files_gitlab_test.go
+++ b/internal/api/handlers/management/auth_files_gitlab_test.go
@@ -0,0 +1,164 @@
+package management
+
+import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+)
+
+func TestRequestGitLabPATToken_SavesAuthRecord(t *testing.T) {
+	t.Setenv("MANAGEMENT_PASSWORD", "")
+	gin.SetMode(gin.TestMode)
+
+	upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if got := r.Header.Get("Authorization"); got != "Bearer glpat-test-token" {
+			t.Fatalf("authorization header = %q, want Bearer glpat-test-token", got)
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		switch r.URL.Path {
+		case "/api/v4/user":
+			_ = json.NewEncoder(w).Encode(map[string]any{
+				"id":       42,
+				"username": "gitlab-user",
+				"name":     "GitLab User",
+				"email":    "gitlab@example.com",
+			})
+		case "/api/v4/personal_access_tokens/self":
+			_ = json.NewEncoder(w).Encode(map[string]any{
+				"id":      7,
+				"name":    "management-center",
+				"scopes":  []string{"api", "read_user"},
+				"user_id": 42,
+			})
+		case "/api/v4/code_suggestions/direct_access":
+			_ = json.NewEncoder(w).Encode(map[string]any{
+				"base_url":   "https://cloud.gitlab.example.com",
+				"token":      "gateway-token",
+				"expires_at": 1893456000,
+				"headers": map[string]string{
+					"X-Gitlab-Realm": "saas",
+				},
+				"model_details": map[string]any{
+					"model_provider": "anthropic",
+					"model_name":     "claude-sonnet-4-5",
+				},
+			})
+		default:
+			http.NotFound(w, r)
+		}
+	}))
+	defer upstream.Close()
+
+	store := &memoryAuthStore{}
+	h := NewHandlerWithoutConfigFilePath(&config.Config{AuthDir: t.TempDir()}, coreauth.NewManager(nil, nil, nil))
+	h.tokenStore = store
+
+	rec := httptest.NewRecorder()
+	ctx, _ := gin.CreateTestContext(rec)
+	ctx.Request = httptest.NewRequest(http.MethodPost, "/v0/management/gitlab-auth-url", strings.NewReader(`{"base_url":"`+upstream.URL+`","personal_access_token":"glpat-test-token"}`))
+	ctx.Request.Header.Set("Content-Type", "application/json")
+
+	h.RequestGitLabPATToken(ctx)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected status %d, got %d with body %s", http.StatusOK, rec.Code, rec.Body.String())
+	}
+
+	var resp map[string]any
+	if err := json.Unmarshal(rec.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("decode response: %v", err)
+	}
+	if got := resp["status"]; got != "ok" {
+		t.Fatalf("status = %#v, want ok", got)
+	}
+	if got := resp["model_provider"]; got != "anthropic" {
+		t.Fatalf("model_provider = %#v, want anthropic", got)
+	}
+	if got := resp["model_name"]; got != "claude-sonnet-4-5" {
+		t.Fatalf("model_name = %#v, want claude-sonnet-4-5", got)
+	}
+
+	store.mu.Lock()
+	defer store.mu.Unlock()
+	if len(store.items) != 1 {
+		t.Fatalf("expected 1 saved auth record, got %d", len(store.items))
+	}
+	var saved *coreauth.Auth
+	for _, item := range store.items {
+		saved = item
+	}
+	if saved == nil {
+		t.Fatal("expected saved auth record")
+	}
+	if saved.Provider != "gitlab" {
+		t.Fatalf("provider = %q, want gitlab", saved.Provider)
+	}
+	if got := saved.Metadata["auth_kind"]; got != "personal_access_token" {
+		t.Fatalf("auth_kind = %#v, want personal_access_token", got)
+	}
+	if got := saved.Metadata["model_provider"]; got != "anthropic" {
+		t.Fatalf("saved model_provider = %#v, want anthropic", got)
+	}
+	if got := saved.Metadata["duo_gateway_token"]; got != "gateway-token" {
+		t.Fatalf("saved duo_gateway_token = %#v, want gateway-token", got)
+	}
+}
+
+func TestPostOAuthCallback_GitLabWritesPendingCallbackFile(t *testing.T) {
+	t.Setenv("MANAGEMENT_PASSWORD", "")
+	gin.SetMode(gin.TestMode)
+
+	authDir := t.TempDir()
+	state := "gitlab-state-123"
+	RegisterOAuthSession(state, "gitlab")
+	t.Cleanup(func() { CompleteOAuthSession(state) })
+
+	h := NewHandlerWithoutConfigFilePath(&config.Config{AuthDir: authDir}, coreauth.NewManager(nil, nil, nil))
+
+	rec := httptest.NewRecorder()
+	ctx, _ := gin.CreateTestContext(rec)
+	ctx.Request = httptest.NewRequest(http.MethodPost, "/v0/management/oauth-callback", strings.NewReader(`{"provider":"gitlab","redirect_url":"http://localhost:17171/auth/callback?code=test-code&state=`+state+`"}`))
+	ctx.Request.Header.Set("Content-Type", "application/json")
+
+	h.PostOAuthCallback(ctx)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected status %d, got %d with body %s", http.StatusOK, rec.Code, rec.Body.String())
+	}
+
+	filePath := filepath.Join(authDir, ".oauth-gitlab-"+state+".oauth")
+	data, err := os.ReadFile(filePath)
+	if err != nil {
+		t.Fatalf("read callback file: %v", err)
+	}
+
+	var payload map[string]string
+	if err := json.Unmarshal(data, &payload); err != nil {
+		t.Fatalf("decode callback payload: %v", err)
+	}
+	if got := payload["code"]; got != "test-code" {
+		t.Fatalf("callback code = %q, want test-code", got)
+	}
+	if got := payload["state"]; got != state {
+		t.Fatalf("callback state = %q, want %q", got, state)
+	}
+}
+
+func TestNormalizeOAuthProvider_GitLab(t *testing.T) {
+	provider, err := NormalizeOAuthProvider("gitlab")
+	if err != nil {
+		t.Fatalf("NormalizeOAuthProvider returned error: %v", err)
+	}
+	if provider != "gitlab" {
+		t.Fatalf("provider = %q, want gitlab", provider)
+	}
+}
--- a/internal/api/handlers/management/auth_files_patch_fields_test.go
+++ b/internal/api/handlers/management/auth_files_patch_fields_test.go
@@ -0,0 +1,164 @@
+package management
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+
+	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+)
+
+func TestPatchAuthFileFields_MergeHeadersAndDeleteEmptyValues(t *testing.T) {
+	t.Setenv("MANAGEMENT_PASSWORD", "")
+	gin.SetMode(gin.TestMode)
+
+	store := &memoryAuthStore{}
+	manager := coreauth.NewManager(store, nil, nil)
+	record := &coreauth.Auth{
+		ID:       "test.json",
+		FileName: "test.json",
+		Provider: "claude",
+		Attributes: map[string]string{
+			"path":            "/tmp/test.json",
+			"header:X-Old":    "old",
+			"header:X-Remove": "gone",
+		},
+		Metadata: map[string]any{
+			"type": "claude",
+			"headers": map[string]any{
+				"X-Old":    "old",
+				"X-Remove": "gone",
+			},
+		},
+	}
+	if _, errRegister := manager.Register(context.Background(), record); errRegister != nil {
+		t.Fatalf("failed to register auth record: %v", errRegister)
+	}
+
+	h := NewHandlerWithoutConfigFilePath(&config.Config{AuthDir: t.TempDir()}, manager)
+
+	body := `{"name":"test.json","prefix":"p1","proxy_url":"http://proxy.local","headers":{"X-Old":"new","X-New":"v","X-Remove":"  ","X-Nope":""}}`
+	rec := httptest.NewRecorder()
+	ctx, _ := gin.CreateTestContext(rec)
+	req := httptest.NewRequest(http.MethodPatch, "/v0/management/auth-files/fields", strings.NewReader(body))
+	req.Header.Set("Content-Type", "application/json")
+	ctx.Request = req
+	h.PatchAuthFileFields(ctx)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected status %d, got %d with body %s", http.StatusOK, rec.Code, rec.Body.String())
+	}
+
+	updated, ok := manager.GetByID("test.json")
+	if !ok || updated == nil {
+		t.Fatalf("expected auth record to exist after patch")
+	}
+
+	if updated.Prefix != "p1" {
+		t.Fatalf("prefix = %q, want %q", updated.Prefix, "p1")
+	}
+	if updated.ProxyURL != "http://proxy.local" {
+		t.Fatalf("proxy_url = %q, want %q", updated.ProxyURL, "http://proxy.local")
+	}
+
+	if updated.Metadata == nil {
+		t.Fatalf("expected metadata to be non-nil")
+	}
+	if got, _ := updated.Metadata["prefix"].(string); got != "p1" {
+		t.Fatalf("metadata.prefix = %q, want %q", got, "p1")
+	}
+	if got, _ := updated.Metadata["proxy_url"].(string); got != "http://proxy.local" {
+		t.Fatalf("metadata.proxy_url = %q, want %q", got, "http://proxy.local")
+	}
+
+	headersMeta, ok := updated.Metadata["headers"].(map[string]any)
+	if !ok {
+		raw, _ := json.Marshal(updated.Metadata["headers"])
+		t.Fatalf("metadata.headers = %T (%s), want map[string]any", updated.Metadata["headers"], string(raw))
+	}
+	if got := headersMeta["X-Old"]; got != "new" {
+		t.Fatalf("metadata.headers.X-Old = %#v, want %q", got, "new")
+	}
+	if got := headersMeta["X-New"]; got != "v" {
+		t.Fatalf("metadata.headers.X-New = %#v, want %q", got, "v")
+	}
+	if _, ok := headersMeta["X-Remove"]; ok {
+		t.Fatalf("expected metadata.headers.X-Remove to be deleted")
+	}
+	if _, ok := headersMeta["X-Nope"]; ok {
+		t.Fatalf("expected metadata.headers.X-Nope to be absent")
+	}
+
+	if got := updated.Attributes["header:X-Old"]; got != "new" {
+		t.Fatalf("attrs header:X-Old = %q, want %q", got, "new")
+	}
+	if got := updated.Attributes["header:X-New"]; got != "v" {
+		t.Fatalf("attrs header:X-New = %q, want %q", got, "v")
+	}
+	if _, ok := updated.Attributes["header:X-Remove"]; ok {
+		t.Fatalf("expected attrs header:X-Remove to be deleted")
+	}
+	if _, ok := updated.Attributes["header:X-Nope"]; ok {
+		t.Fatalf("expected attrs header:X-Nope to be absent")
+	}
+}
+
+func TestPatchAuthFileFields_HeadersEmptyMapIsNoop(t *testing.T) {
+	t.Setenv("MANAGEMENT_PASSWORD", "")
+	gin.SetMode(gin.TestMode)
+
+	store := &memoryAuthStore{}
+	manager := coreauth.NewManager(store, nil, nil)
+	record := &coreauth.Auth{
+		ID:       "noop.json",
+		FileName: "noop.json",
+		Provider: "claude",
+		Attributes: map[string]string{
+			"path":         "/tmp/noop.json",
+			"header:X-Kee": "1",
+		},
+		Metadata: map[string]any{
+			"type": "claude",
+			"headers": map[string]any{
+				"X-Kee": "1",
+			},
+		},
+	}
+	if _, errRegister := manager.Register(context.Background(), record); errRegister != nil {
+		t.Fatalf("failed to register auth record: %v", errRegister)
+	}
+
+	h := NewHandlerWithoutConfigFilePath(&config.Config{AuthDir: t.TempDir()}, manager)
+
+	body := `{"name":"noop.json","note":"hello","headers":{}}`
+	rec := httptest.NewRecorder()
+	ctx, _ := gin.CreateTestContext(rec)
+	req := httptest.NewRequest(http.MethodPatch, "/v0/management/auth-files/fields", strings.NewReader(body))
+	req.Header.Set("Content-Type", "application/json")
+	ctx.Request = req
+	h.PatchAuthFileFields(ctx)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected status %d, got %d with body %s", http.StatusOK, rec.Code, rec.Body.String())
+	}
+
+	updated, ok := manager.GetByID("noop.json")
+	if !ok || updated == nil {
+		t.Fatalf("expected auth record to exist after patch")
+	}
+	if got := updated.Attributes["header:X-Kee"]; got != "1" {
+		t.Fatalf("attrs header:X-Kee = %q, want %q", got, "1")
+	}
+	headersMeta, ok := updated.Metadata["headers"].(map[string]any)
+	if !ok {
+		t.Fatalf("expected metadata.headers to remain a map, got %T", updated.Metadata["headers"])
+	}
+	if got := headersMeta["X-Kee"]; got != "1" {
+		t.Fatalf("metadata.headers.X-Kee = %#v, want %q", got, "1")
+	}
+}
--- a/internal/api/handlers/management/config_lists.go
+++ b/internal/api/handlers/management/config_lists.go
@@ -509,19 +509,24 @@ func (h *Handler) PutVertexCompatKeys(c *gin.Context) {
 	}
 	for i := range arr {
 		normalizeVertexCompatKey(&arr[i])
+		if arr[i].APIKey == "" {
+			c.JSON(400, gin.H{"error": fmt.Sprintf("vertex-api-key[%d].api-key is required", i)})
+			return
+		}
 	}
-	h.cfg.VertexCompatAPIKey = arr
+	h.cfg.VertexCompatAPIKey = append([]config.VertexCompatKey(nil), arr...)
 	h.cfg.SanitizeVertexCompatKeys()
 	h.persist(c)
 }
 func (h *Handler) PatchVertexCompatKey(c *gin.Context) {
 	type vertexCompatPatch struct {
-		APIKey   *string                     `json:"api-key"`
-		Prefix   *string                     `json:"prefix"`
-		BaseURL  *string                     `json:"base-url"`
-		ProxyURL *string                     `json:"proxy-url"`
-		Headers  *map[string]string          `json:"headers"`
-		Models   *[]config.VertexCompatModel `json:"models"`
+		APIKey         *string                     `json:"api-key"`
+		Prefix         *string                     `json:"prefix"`
+		BaseURL        *string                     `json:"base-url"`
+		ProxyURL       *string                     `json:"proxy-url"`
+		Headers        *map[string]string          `json:"headers"`
+		Models         *[]config.VertexCompatModel `json:"models"`
+		ExcludedModels *[]string                   `json:"excluded-models"`
 	}
 	var body struct {
 		Index *int               `json:"index"`
@@ -585,6 +590,9 @@ func (h *Handler) PatchVertexCompatKey(c *gin.Context) {
 	if body.Value.Models != nil {
 		entry.Models = append([]config.VertexCompatModel(nil), (*body.Value.Models)...)
 	}
+	if body.Value.ExcludedModels != nil {
+		entry.ExcludedModels = config.NormalizeExcludedModels(*body.Value.ExcludedModels)
+	}
 	normalizeVertexCompatKey(&entry)
 	h.cfg.VertexCompatAPIKey[targetIndex] = entry
 	h.cfg.SanitizeVertexCompatKeys()
@@ -1029,6 +1037,7 @@ func normalizeVertexCompatKey(entry *config.VertexCompatKey) {
 	entry.BaseURL = strings.TrimSpace(entry.BaseURL)
 	entry.ProxyURL = strings.TrimSpace(entry.ProxyURL)
 	entry.Headers = config.NormalizeHeaders(entry.Headers)
+	entry.ExcludedModels = config.NormalizeExcludedModels(entry.ExcludedModels)
 	if len(entry.Models) == 0 {
 		return
 	}
--- a/internal/api/handlers/management/oauth_sessions.go
+++ b/internal/api/handlers/management/oauth_sessions.go
@@ -228,6 +228,8 @@ func NormalizeOAuthProvider(provider string) (string, error) {
 		return "anthropic", nil
 	case "codex", "openai":
 		return "codex", nil
+	case "gitlab":
+		return "gitlab", nil
 	case "gemini", "google":
 		return "gemini", nil
 	case "iflow", "i-flow":
--- a/internal/api/handlers/management/test_store_test.go
+++ b/internal/api/handlers/management/test_store_test.go
@@ -0,0 +1,49 @@
+package management
+
+import (
+	"context"
+	"sync"
+
+	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+)
+
+type memoryAuthStore struct {
+	mu    sync.Mutex
+	items map[string]*coreauth.Auth
+}
+
+func (s *memoryAuthStore) List(_ context.Context) ([]*coreauth.Auth, error) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	out := make([]*coreauth.Auth, 0, len(s.items))
+	for _, item := range s.items {
+		out = append(out, item)
+	}
+	return out, nil
+}
+
+func (s *memoryAuthStore) Save(_ context.Context, auth *coreauth.Auth) (string, error) {
+	if auth == nil {
+		return "", nil
+	}
+
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	if s.items == nil {
+		s.items = make(map[string]*coreauth.Auth)
+	}
+	s.items[auth.ID] = auth
+	return auth.ID, nil
+}
+
+func (s *memoryAuthStore) Delete(_ context.Context, id string) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	delete(s.items, id)
+	return nil
+}
+
+func (s *memoryAuthStore) SetBaseDir(string) {}
--- a/internal/api/middleware/response_writer.go
+++ b/internal/api/middleware/response_writer.go
@@ -15,6 +15,8 @@ import (
 )

 const requestBodyOverrideContextKey = "REQUEST_BODY_OVERRIDE"
+const responseBodyOverrideContextKey = "RESPONSE_BODY_OVERRIDE"
+const websocketTimelineOverrideContextKey = "WEBSOCKET_TIMELINE_OVERRIDE"

 // RequestInfo holds essential details of an incoming HTTP request for logging purposes.
 type RequestInfo struct {
@@ -304,6 +306,10 @@ func (w *ResponseWriterWrapper) Finalize(c *gin.Context) error {
 		if len(apiResponse) > 0 {
 			_ = w.streamWriter.WriteAPIResponse(apiResponse)
 		}
+		apiWebsocketTimeline := w.extractAPIWebsocketTimeline(c)
+		if len(apiWebsocketTimeline) > 0 {
+			_ = w.streamWriter.WriteAPIWebsocketTimeline(apiWebsocketTimeline)
+		}
 		if err := w.streamWriter.Close(); err != nil {
 			w.streamWriter = nil
 			return err
@@ -312,7 +318,7 @@ func (w *ResponseWriterWrapper) Finalize(c *gin.Context) error {
 		return nil
 	}

-	return w.logRequest(w.extractRequestBody(c), finalStatusCode, w.cloneHeaders(), w.body.Bytes(), w.extractAPIRequest(c), w.extractAPIResponse(c), w.extractAPIResponseTimestamp(c), slicesAPIResponseError, forceLog)
+	return w.logRequest(w.extractRequestBody(c), finalStatusCode, w.cloneHeaders(), w.extractResponseBody(c), w.extractWebsocketTimeline(c), w.extractAPIRequest(c), w.extractAPIResponse(c), w.extractAPIWebsocketTimeline(c), w.extractAPIResponseTimestamp(c), slicesAPIResponseError, forceLog)
 }

 func (w *ResponseWriterWrapper) cloneHeaders() map[string][]string {
@@ -352,6 +358,18 @@ func (w *ResponseWriterWrapper) extractAPIResponse(c *gin.Context) []byte {
 	return data
 }

+func (w *ResponseWriterWrapper) extractAPIWebsocketTimeline(c *gin.Context) []byte {
+	apiTimeline, isExist := c.Get("API_WEBSOCKET_TIMELINE")
+	if !isExist {
+		return nil
+	}
+	data, ok := apiTimeline.([]byte)
+	if !ok || len(data) == 0 {
+		return nil
+	}
+	return bytes.Clone(data)
+}
+
 func (w *ResponseWriterWrapper) extractAPIResponseTimestamp(c *gin.Context) time.Time {
 	ts, isExist := c.Get("API_RESPONSE_TIMESTAMP")
 	if !isExist {
@@ -364,19 +382,8 @@ func (w *ResponseWriterWrapper) extractAPIResponseTimestamp(c *gin.Context) time
 }

 func (w *ResponseWriterWrapper) extractRequestBody(c *gin.Context) []byte {
-	if c != nil {
-		if bodyOverride, isExist := c.Get(requestBodyOverrideContextKey); isExist {
-			switch value := bodyOverride.(type) {
-			case []byte:
-				if len(value) > 0 {
-					return bytes.Clone(value)
-				}
-			case string:
-				if strings.TrimSpace(value) != "" {
-					return []byte(value)
-				}
-			}
-		}
+	if body := extractBodyOverride(c, requestBodyOverrideContextKey); len(body) > 0 {
+		return body
 	}
 	if w.requestInfo != nil && len(w.requestInfo.Body) > 0 {
 		return w.requestInfo.Body
@@ -384,13 +391,48 @@ func (w *ResponseWriterWrapper) extractRequestBody(c *gin.Context) []byte {
 	return nil
 }

-func (w *ResponseWriterWrapper) logRequest(requestBody []byte, statusCode int, headers map[string][]string, body []byte, apiRequestBody, apiResponseBody []byte, apiResponseTimestamp time.Time, apiResponseErrors []*interfaces.ErrorMessage, forceLog bool) error {
+func (w *ResponseWriterWrapper) extractResponseBody(c *gin.Context) []byte {
+	if body := extractBodyOverride(c, responseBodyOverrideContextKey); len(body) > 0 {
+		return body
+	}
+	if w.body == nil || w.body.Len() == 0 {
+		return nil
+	}
+	return bytes.Clone(w.body.Bytes())
+}
+
+func (w *ResponseWriterWrapper) extractWebsocketTimeline(c *gin.Context) []byte {
+	return extractBodyOverride(c, websocketTimelineOverrideContextKey)
+}
+
+func extractBodyOverride(c *gin.Context, key string) []byte {
+	if c == nil {
+		return nil
+	}
+	bodyOverride, isExist := c.Get(key)
+	if !isExist {
+		return nil
+	}
+	switch value := bodyOverride.(type) {
+	case []byte:
+		if len(value) > 0 {
+			return bytes.Clone(value)
+		}
+	case string:
+		if strings.TrimSpace(value) != "" {
+			return []byte(value)
+		}
+	}
+	return nil
+}
+
+func (w *ResponseWriterWrapper) logRequest(requestBody []byte, statusCode int, headers map[string][]string, body, websocketTimeline, apiRequestBody, apiResponseBody, apiWebsocketTimeline []byte, apiResponseTimestamp time.Time, apiResponseErrors []*interfaces.ErrorMessage, forceLog bool) error {
 	if w.requestInfo == nil {
 		return nil
 	}

 	if loggerWithOptions, ok := w.logger.(interface {
-		LogRequestWithOptions(string, string, map[string][]string, []byte, int, map[string][]string, []byte, []byte, []byte, []*interfaces.ErrorMessage, bool, string, time.Time, time.Time) error
+		LogRequestWithOptions(string, string, map[string][]string, []byte, int, map[string][]string, []byte, []byte, []byte, []byte, []byte, []*interfaces.ErrorMessage, bool, string, time.Time, time.Time) error
 	}); ok {
 		return loggerWithOptions.LogRequestWithOptions(
 			w.requestInfo.URL,
@@ -400,8 +442,10 @@ func (w *ResponseWriterWrapper) logRequest(requestBody []byte, statusCode int, h
 			statusCode,
 			headers,
 			body,
+			websocketTimeline,
 			apiRequestBody,
 			apiResponseBody,
+			apiWebsocketTimeline,
 			apiResponseErrors,
 			forceLog,
 			w.requestInfo.RequestID,
@@ -418,8 +462,10 @@ func (w *ResponseWriterWrapper) logRequest(requestBody []byte, statusCode int, h
 		statusCode,
 		headers,
 		body,
+		websocketTimeline,
 		apiRequestBody,
 		apiResponseBody,
+		apiWebsocketTimeline,
 		apiResponseErrors,
 		w.requestInfo.RequestID,
 		w.requestInfo.Timestamp,
--- a/internal/api/middleware/response_writer_test.go
+++ b/internal/api/middleware/response_writer_test.go
@@ -1,10 +1,14 @@
 package middleware

 import (
+	"bytes"
 	"net/http/httptest"
 	"testing"
+	"time"

 	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
 )

 func TestExtractRequestBodyPrefersOverride(t *testing.T) {
@@ -33,7 +37,7 @@ func TestExtractRequestBodySupportsStringOverride(t *testing.T) {
 	recorder := httptest.NewRecorder()
 	c, _ := gin.CreateTestContext(recorder)

-	wrapper := &ResponseWriterWrapper{}
+	wrapper := &ResponseWriterWrapper{body: &bytes.Buffer{}}
 	c.Set(requestBodyOverrideContextKey, "override-as-string")

 	body := wrapper.extractRequestBody(c)
@@ -41,3 +45,158 @@ func TestExtractRequestBodySupportsStringOverride(t *testing.T) {
 		t.Fatalf("request body = %q, want %q", string(body), "override-as-string")
 	}
 }
+
+func TestExtractResponseBodyPrefersOverride(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	recorder := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(recorder)
+
+	wrapper := &ResponseWriterWrapper{body: &bytes.Buffer{}}
+	wrapper.body.WriteString("original-response")
+
+	body := wrapper.extractResponseBody(c)
+	if string(body) != "original-response" {
+		t.Fatalf("response body = %q, want %q", string(body), "original-response")
+	}
+
+	c.Set(responseBodyOverrideContextKey, []byte("override-response"))
+	body = wrapper.extractResponseBody(c)
+	if string(body) != "override-response" {
+		t.Fatalf("response body = %q, want %q", string(body), "override-response")
+	}
+
+	body[0] = 'X'
+	if got := wrapper.extractResponseBody(c); string(got) != "override-response" {
+		t.Fatalf("response override should be cloned, got %q", string(got))
+	}
+}
+
+func TestExtractResponseBodySupportsStringOverride(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	recorder := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(recorder)
+
+	wrapper := &ResponseWriterWrapper{}
+	c.Set(responseBodyOverrideContextKey, "override-response-as-string")
+
+	body := wrapper.extractResponseBody(c)
+	if string(body) != "override-response-as-string" {
+		t.Fatalf("response body = %q, want %q", string(body), "override-response-as-string")
+	}
+}
+
+func TestExtractBodyOverrideClonesBytes(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	recorder := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(recorder)
+
+	override := []byte("body-override")
+	c.Set(requestBodyOverrideContextKey, override)
+
+	body := extractBodyOverride(c, requestBodyOverrideContextKey)
+	if !bytes.Equal(body, override) {
+		t.Fatalf("body override = %q, want %q", string(body), string(override))
+	}
+
+	body[0] = 'X'
+	if !bytes.Equal(override, []byte("body-override")) {
+		t.Fatalf("override mutated: %q", string(override))
+	}
+}
+
+func TestExtractWebsocketTimelineUsesOverride(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	recorder := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(recorder)
+
+	wrapper := &ResponseWriterWrapper{}
+	if got := wrapper.extractWebsocketTimeline(c); got != nil {
+		t.Fatalf("expected nil websocket timeline, got %q", string(got))
+	}
+
+	c.Set(websocketTimelineOverrideContextKey, []byte("timeline"))
+	body := wrapper.extractWebsocketTimeline(c)
+	if string(body) != "timeline" {
+		t.Fatalf("websocket timeline = %q, want %q", string(body), "timeline")
+	}
+}
+
+func TestFinalizeStreamingWritesAPIWebsocketTimeline(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	recorder := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(recorder)
+
+	streamWriter := &testStreamingLogWriter{}
+	wrapper := &ResponseWriterWrapper{
+		ResponseWriter: c.Writer,
+		logger:         &testRequestLogger{enabled: true},
+		requestInfo: &RequestInfo{
+			URL:       "/v1/responses",
+			Method:    "POST",
+			Headers:   map[string][]string{"Content-Type": {"application/json"}},
+			RequestID: "req-1",
+			Timestamp: time.Date(2026, time.April, 1, 12, 0, 0, 0, time.UTC),
+		},
+		isStreaming:  true,
+		streamWriter: streamWriter,
+	}
+
+	c.Set("API_WEBSOCKET_TIMELINE", []byte("Timestamp: 2026-04-01T12:00:00Z\nEvent: api.websocket.request\n{}"))
+
+	if err := wrapper.Finalize(c); err != nil {
+		t.Fatalf("Finalize error: %v", err)
+	}
+	if string(streamWriter.apiWebsocketTimeline) != "Timestamp: 2026-04-01T12:00:00Z\nEvent: api.websocket.request\n{}" {
+		t.Fatalf("stream writer websocket timeline = %q", string(streamWriter.apiWebsocketTimeline))
+	}
+	if !streamWriter.closed {
+		t.Fatal("expected stream writer to be closed")
+	}
+}
+
+type testRequestLogger struct {
+	enabled bool
+}
+
+func (l *testRequestLogger) LogRequest(string, string, map[string][]string, []byte, int, map[string][]string, []byte, []byte, []byte, []byte, []byte, []*interfaces.ErrorMessage, string, time.Time, time.Time) error {
+	return nil
+}
+
+func (l *testRequestLogger) LogStreamingRequest(string, string, map[string][]string, []byte, string) (logging.StreamingLogWriter, error) {
+	return &testStreamingLogWriter{}, nil
+}
+
+func (l *testRequestLogger) IsEnabled() bool {
+	return l.enabled
+}
+
+type testStreamingLogWriter struct {
+	apiWebsocketTimeline []byte
+	closed               bool
+}
+
+func (w *testStreamingLogWriter) WriteChunkAsync([]byte) {}
+
+func (w *testStreamingLogWriter) WriteStatus(int, map[string][]string) error {
+	return nil
+}
+
+func (w *testStreamingLogWriter) WriteAPIRequest([]byte) error {
+	return nil
+}
+
+func (w *testStreamingLogWriter) WriteAPIResponse([]byte) error {
+	return nil
+}
+
+func (w *testStreamingLogWriter) WriteAPIWebsocketTimeline(apiWebsocketTimeline []byte) error {
+	w.apiWebsocketTimeline = bytes.Clone(apiWebsocketTimeline)
+	return nil
+}
+
+func (w *testStreamingLogWriter) SetFirstChunkTimestamp(time.Time) {}
+
+func (w *testStreamingLogWriter) Close() error {
+	w.closed = true
+	return nil
+}
--- a/internal/api/modules/amp/fallback_handlers.go
+++ b/internal/api/modules/amp/fallback_handlers.go
@@ -123,6 +123,10 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
 			return
 		}

+		// Sanitize request body: remove thinking blocks with invalid signatures
+		// to prevent upstream API 400 errors
+		bodyBytes = SanitizeAmpRequestBody(bodyBytes)
+
 		// Restore the body for the handler to read
 		c.Request.Body = io.NopCloser(bytes.NewReader(bodyBytes))

@@ -249,6 +253,7 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
 			log.Debugf("amp model mapping: request %s -> %s", normalizedModel, resolvedModel)
 			logAmpRouting(RouteTypeModelMapping, modelName, resolvedModel, providerName, requestPath)
 			rewriter := NewResponseRewriter(c.Writer, modelName)
+			rewriter.suppressThinking = true
 			c.Writer = rewriter
 			// Filter Anthropic-Beta header only for local handling paths
 			filterAntropicBetaHeader(c)
@@ -259,10 +264,17 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
 		} else if len(providers) > 0 {
 			// Log: Using local provider (free)
 			logAmpRouting(RouteTypeLocalProvider, modelName, resolvedModel, providerName, requestPath)
+			// Wrap with ResponseRewriter for local providers too, because upstream
+			// proxies (e.g. NewAPI) may return a different model name and lack
+			// Amp-required fields like thinking.signature.
+			rewriter := NewResponseRewriter(c.Writer, modelName)
+			rewriter.suppressThinking = providerName != "claude"
+			c.Writer = rewriter
 			// Filter Anthropic-Beta header only for local handling paths
 			filterAntropicBetaHeader(c)
 			c.Request.Body = io.NopCloser(bytes.NewReader(bodyBytes))
 			handler(c)
+			rewriter.Flush()
 		} else {
 			// No provider, no mapping, no proxy: fall back to the wrapped handler so it can return an error response
 			c.Request.Body = io.NopCloser(bytes.NewReader(bodyBytes))
--- a/internal/api/modules/amp/response_rewriter.go
+++ b/internal/api/modules/amp/response_rewriter.go
@@ -2,6 +2,7 @@ package amp

 import (
 	"bytes"
+	"fmt"
 	"net/http"
 	"strings"

@@ -12,15 +13,17 @@ import (
 )

 // ResponseRewriter wraps a gin.ResponseWriter to intercept and modify the response body
-// It's used to rewrite model names in responses when model mapping is used
+// It is used to rewrite model names in responses when model mapping is used
+// and to keep Amp-compatible response shapes.
 type ResponseRewriter struct {
 	gin.ResponseWriter
-	body          *bytes.Buffer
-	originalModel string
-	isStreaming   bool
+	body             *bytes.Buffer
+	originalModel    string
+	isStreaming      bool
+	suppressThinking bool
 }

-// NewResponseRewriter creates a new response rewriter for model name substitution
+// NewResponseRewriter creates a new response rewriter for model name substitution.
 func NewResponseRewriter(w gin.ResponseWriter, originalModel string) *ResponseRewriter {
 	return &ResponseRewriter{
 		ResponseWriter: w,
@@ -33,15 +36,15 @@ const maxBufferedResponseBytes = 2 * 1024 * 1024 // 2MB safety cap

 func looksLikeSSEChunk(data []byte) bool {
 	// Fallback detection: some upstreams may omit/lie about Content-Type, causing SSE to be buffered.
-	// Heuristics are intentionally simple and cheap.
-	return bytes.Contains(data, []byte("data:")) ||
-		bytes.Contains(data, []byte("event:")) ||
-		bytes.Contains(data, []byte("message_start")) ||
-		bytes.Contains(data, []byte("message_delta")) ||
-		bytes.Contains(data, []byte("content_block_start")) ||
-		bytes.Contains(data, []byte("content_block_delta")) ||
-		bytes.Contains(data, []byte("content_block_stop")) ||
-		bytes.Contains(data, []byte("\n\n"))
+	// We conservatively detect SSE by checking for "data:" / "event:" at the start of any line.
+	for _, line := range bytes.Split(data, []byte("\n")) {
+		trimmed := bytes.TrimSpace(line)
+		if bytes.HasPrefix(trimmed, []byte("data:")) ||
+			bytes.HasPrefix(trimmed, []byte("event:")) {
+			return true
+		}
+	}
+	return false
 }

 func (rw *ResponseRewriter) enableStreaming(reason string) error {
@@ -95,7 +98,8 @@ func (rw *ResponseRewriter) Write(data []byte) (int, error) {
 	}

 	if rw.isStreaming {
-		n, err := rw.ResponseWriter.Write(rw.rewriteStreamChunk(data))
+		rewritten := rw.rewriteStreamChunk(data)
+		n, err := rw.ResponseWriter.Write(rewritten)
 		if err == nil {
 			if flusher, ok := rw.ResponseWriter.(http.Flusher); ok {
 				flusher.Flush()
@@ -106,7 +110,6 @@ func (rw *ResponseRewriter) Write(data []byte) (int, error) {
 	return rw.body.Write(data)
 }

-// Flush writes the buffered response with model names rewritten
 func (rw *ResponseRewriter) Flush() {
 	if rw.isStreaming {
 		if flusher, ok := rw.ResponseWriter.(http.Flusher); ok {
@@ -115,40 +118,79 @@ func (rw *ResponseRewriter) Flush() {
 		return
 	}
 	if rw.body.Len() > 0 {
-		if _, err := rw.ResponseWriter.Write(rw.rewriteModelInResponse(rw.body.Bytes())); err != nil {
+		rewritten := rw.rewriteModelInResponse(rw.body.Bytes())
+		// Update Content-Length to match the rewritten body size, since
+		// signature injection and model name changes alter the payload length.
+		rw.ResponseWriter.Header().Set("Content-Length", fmt.Sprintf("%d", len(rewritten)))
+		if _, err := rw.ResponseWriter.Write(rewritten); err != nil {
 			log.Warnf("amp response rewriter: failed to write rewritten response: %v", err)
 		}
 	}
 }

-// modelFieldPaths lists all JSON paths where model name may appear
 var modelFieldPaths = []string{"message.model", "model", "modelVersion", "response.model", "response.modelVersion"}

-// rewriteModelInResponse replaces all occurrences of the mapped model with the original model in JSON
-// It also suppresses "thinking" blocks if "tool_use" is present to ensure Amp client compatibility
-func (rw *ResponseRewriter) rewriteModelInResponse(data []byte) []byte {
-	// 1. Amp Compatibility: Suppress thinking blocks if tool use is detected
-	// The Amp client struggles when both thinking and tool_use blocks are present
+// ensureAmpSignature injects empty signature fields into tool_use/thinking blocks
+// in API responses so that the Amp TUI does not crash on P.signature.length.
+func ensureAmpSignature(data []byte) []byte {
+	for index, block := range gjson.GetBytes(data, "content").Array() {
+		blockType := block.Get("type").String()
+		if blockType != "tool_use" && blockType != "thinking" {
+			continue
+		}
+		signaturePath := fmt.Sprintf("content.%d.signature", index)
+		if gjson.GetBytes(data, signaturePath).Exists() {
+			continue
+		}
+		var err error
+		data, err = sjson.SetBytes(data, signaturePath, "")
+		if err != nil {
+			log.Warnf("Amp ResponseRewriter: failed to add empty signature to %s block: %v", blockType, err)
+			break
+		}
+	}
+
+	contentBlockType := gjson.GetBytes(data, "content_block.type").String()
+	if (contentBlockType == "tool_use" || contentBlockType == "thinking") && !gjson.GetBytes(data, "content_block.signature").Exists() {
+		var err error
+		data, err = sjson.SetBytes(data, "content_block.signature", "")
+		if err != nil {
+			log.Warnf("Amp ResponseRewriter: failed to add empty signature to streaming %s block: %v", contentBlockType, err)
+		}
+	}
+
+	return data
+}
+
+func (rw *ResponseRewriter) suppressAmpThinking(data []byte) []byte {
+	if !rw.suppressThinking {
+		return data
+	}
 	if gjson.GetBytes(data, `content.#(type=="tool_use")`).Exists() {
 		filtered := gjson.GetBytes(data, `content.#(type!="thinking")#`)
 		if filtered.Exists() {
 			originalCount := gjson.GetBytes(data, "content.#").Int()
 			filteredCount := filtered.Get("#").Int()
-
 			if originalCount > filteredCount {
 				var err error
 				data, err = sjson.SetBytes(data, "content", filtered.Value())
 				if err != nil {
 					log.Warnf("Amp ResponseRewriter: failed to suppress thinking blocks: %v", err)
-				} else {
-					log.Debugf("Amp ResponseRewriter: Suppressed %d thinking blocks due to tool usage", originalCount-filteredCount)
-					// Log the result for verification
-					log.Debugf("Amp ResponseRewriter: Resulting content: %s", gjson.GetBytes(data, "content").String())
 				}
 			}
 		}
 	}

+	return data
+}
+
+func (rw *ResponseRewriter) rewriteModelInResponse(data []byte) []byte {
+	data = ensureAmpSignature(data)
+	data = rw.suppressAmpThinking(data)
+	if len(data) == 0 {
+		return data
+	}
+
 	if rw.originalModel == "" {
 		return data
 	}
@@ -160,24 +202,154 @@ func (rw *ResponseRewriter) rewriteModelInResponse(data []byte) []byte {
 	return data
 }

-// rewriteStreamChunk rewrites model names in SSE stream chunks
 func (rw *ResponseRewriter) rewriteStreamChunk(chunk []byte) []byte {
-	if rw.originalModel == "" {
-		return chunk
+	lines := bytes.Split(chunk, []byte("\n"))
+	var out [][]byte
+
+	i := 0
+	for i < len(lines) {
+		line := lines[i]
+		trimmed := bytes.TrimSpace(line)
+
+		// Case 1: "event:" line - look ahead for its "data:" line
+		if bytes.HasPrefix(trimmed, []byte("event: ")) {
+			// Scan forward past blank lines to find the data: line
+			dataIdx := -1
+			for j := i + 1; j < len(lines); j++ {
+				t := bytes.TrimSpace(lines[j])
+				if len(t) == 0 {
+					continue
+				}
+				if bytes.HasPrefix(t, []byte("data: ")) {
+					dataIdx = j
+				}
+				break
+			}
+
+			if dataIdx >= 0 {
+				// Found event+data pair - process through rewriter
+				jsonData := bytes.TrimPrefix(bytes.TrimSpace(lines[dataIdx]), []byte("data: "))
+				if len(jsonData) > 0 && jsonData[0] == '{' {
+					rewritten := rw.rewriteStreamEvent(jsonData)
+					if rewritten == nil {
+						i = dataIdx + 1
+						continue
+					}
+					// Emit event line
+					out = append(out, line)
+					// Emit blank lines between event and data
+					for k := i + 1; k < dataIdx; k++ {
+						out = append(out, lines[k])
+					}
+					// Emit rewritten data
+					out = append(out, append([]byte("data: "), rewritten...))
+					i = dataIdx + 1
+					continue
+				}
+			}
+
+			// No data line found (orphan event from cross-chunk split)
+			// Pass it through as-is - the data will arrive in the next chunk
+			out = append(out, line)
+			i++
+			continue
+		}
+
+		// Case 2: standalone "data:" line (no preceding event: in this chunk)
+		if bytes.HasPrefix(trimmed, []byte("data: ")) {
+			jsonData := bytes.TrimPrefix(trimmed, []byte("data: "))
+			if len(jsonData) > 0 && jsonData[0] == '{' {
+				rewritten := rw.rewriteStreamEvent(jsonData)
+				if rewritten != nil {
+					out = append(out, append([]byte("data: "), rewritten...))
+				}
+				i++
+				continue
+			}
+		}
+
+		// Case 3: everything else
+		out = append(out, line)
+		i++
 	}

-	// SSE format: "data: {json}\n\n"
-	lines := bytes.Split(chunk, []byte("\n"))
-	for i, line := range lines {
-		if bytes.HasPrefix(line, []byte("data: ")) {
-			jsonData := bytes.TrimPrefix(line, []byte("data: "))
-			if len(jsonData) > 0 && jsonData[0] == '{' {
-				// Rewrite JSON in the data line
-				rewritten := rw.rewriteModelInResponse(jsonData)
-				lines[i] = append([]byte("data: "), rewritten...)
+	return bytes.Join(out, []byte("\n"))
+}
+
+// rewriteStreamEvent processes a single JSON event in the SSE stream.
+// It rewrites model names and ensures signature fields exist.
+// NOTE: streaming mode does NOT suppress thinking blocks - they are
+// passed through with signature injection to avoid breaking SSE index
+// alignment and TUI rendering.
+func (rw *ResponseRewriter) rewriteStreamEvent(data []byte) []byte {
+	// Inject empty signature where needed
+	data = ensureAmpSignature(data)
+
+	// Rewrite model name
+	if rw.originalModel != "" {
+		for _, path := range modelFieldPaths {
+			if gjson.GetBytes(data, path).Exists() {
+				data, _ = sjson.SetBytes(data, path, rw.originalModel)
 			}
 		}
 	}

-	return bytes.Join(lines, []byte("\n"))
+	return data
+}
+
+// SanitizeAmpRequestBody removes thinking blocks with empty/missing/invalid signatures
+// from the messages array in a request body before forwarding to the upstream API.
+// This prevents 400 errors from the API which requires valid signatures on thinking blocks.
+func SanitizeAmpRequestBody(body []byte) []byte {
+	messages := gjson.GetBytes(body, "messages")
+	if !messages.Exists() || !messages.IsArray() {
+		return body
+	}
+
+	modified := false
+	for msgIdx, msg := range messages.Array() {
+		if msg.Get("role").String() != "assistant" {
+			continue
+		}
+		content := msg.Get("content")
+		if !content.Exists() || !content.IsArray() {
+			continue
+		}
+
+		var keepBlocks []interface{}
+		removedCount := 0
+
+		for _, block := range content.Array() {
+			blockType := block.Get("type").String()
+			if blockType == "thinking" {
+				sig := block.Get("signature")
+				if !sig.Exists() || sig.Type != gjson.String || strings.TrimSpace(sig.String()) == "" {
+					removedCount++
+					continue
+				}
+			}
+			keepBlocks = append(keepBlocks, block.Value())
+		}
+
+		if removedCount > 0 {
+			contentPath := fmt.Sprintf("messages.%d.content", msgIdx)
+			var err error
+			if len(keepBlocks) == 0 {
+				body, err = sjson.SetBytes(body, contentPath, []interface{}{})
+			} else {
+				body, err = sjson.SetBytes(body, contentPath, keepBlocks)
+			}
+			if err != nil {
+				log.Warnf("Amp RequestSanitizer: failed to remove thinking blocks from message %d: %v", msgIdx, err)
+				continue
+			}
+			modified = true
+			log.Debugf("Amp RequestSanitizer: removed %d thinking blocks with invalid signatures from message %d", removedCount, msgIdx)
+		}
+	}
+
+	if modified {
+		log.Debugf("Amp RequestSanitizer: sanitized request body")
+	}
+	return body
 }
--- a/internal/api/modules/amp/response_rewriter_test.go
+++ b/internal/api/modules/amp/response_rewriter_test.go
@@ -1,6 +1,7 @@
 package amp

 import (
+	"strings"
 	"testing"
 )

@@ -100,6 +101,50 @@ func TestRewriteStreamChunk_MessageModel(t *testing.T) {
 	}
 }

+func TestRewriteStreamChunk_PreservesThinkingWithSignatureInjection(t *testing.T) {
+	rw := &ResponseRewriter{}
+
+	chunk := []byte("event: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"thinking\",\"thinking\":\"\"}}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"abc\"}}\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0}\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":1,\"content_block\":{\"type\":\"tool_use\",\"name\":\"bash\",\"input\":{}}}\n\n")
+	result := rw.rewriteStreamChunk(chunk)
+
+	// Streaming mode preserves thinking blocks (does NOT suppress them)
+	// to avoid breaking SSE index alignment and TUI rendering
+	if !contains(result, []byte(`"content_block":{"type":"thinking"`)) {
+		t.Fatalf("expected thinking content_block_start to be preserved, got %s", string(result))
+	}
+	if !contains(result, []byte(`"delta":{"type":"thinking_delta"`)) {
+		t.Fatalf("expected thinking_delta to be preserved, got %s", string(result))
+	}
+	if !contains(result, []byte(`"type":"content_block_stop","index":0`)) {
+		t.Fatalf("expected content_block_stop for thinking block to be preserved, got %s", string(result))
+	}
+	if !contains(result, []byte(`"content_block":{"type":"tool_use"`)) {
+		t.Fatalf("expected tool_use content_block frame to remain, got %s", string(result))
+	}
+	// Signature should be injected into both thinking and tool_use blocks
+	if count := strings.Count(string(result), `"signature":""`); count != 2 {
+		t.Fatalf("expected 2 signature injections, but got %d in %s", count, string(result))
+	}
+}
+
+func TestSanitizeAmpRequestBody_RemovesWhitespaceAndNonStringSignatures(t *testing.T) {
+	input := []byte(`{"messages":[{"role":"assistant","content":[{"type":"thinking","thinking":"drop-whitespace","signature":"   "},{"type":"thinking","thinking":"drop-number","signature":123},{"type":"thinking","thinking":"keep-valid","signature":"valid-signature"},{"type":"text","text":"keep-text"}]}]}`)
+	result := SanitizeAmpRequestBody(input)
+
+	if contains(result, []byte("drop-whitespace")) {
+		t.Fatalf("expected whitespace-only signature block to be removed, got %s", string(result))
+	}
+	if contains(result, []byte("drop-number")) {
+		t.Fatalf("expected non-string signature block to be removed, got %s", string(result))
+	}
+	if !contains(result, []byte("keep-valid")) {
+		t.Fatalf("expected valid thinking block to remain, got %s", string(result))
+	}
+	if !contains(result, []byte("keep-text")) {
+		t.Fatalf("expected non-thinking content to remain, got %s", string(result))
+	}
+}
+
 func contains(data, substr []byte) bool {
 	for i := 0; i <= len(data)-len(substr); i++ {
 		if string(data[i:i+len(substr)]) == string(substr) {
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -323,6 +323,10 @@ func NewServer(cfg *config.Config, authManager *auth.Manager, accessManager *sdk
 // setupRoutes configures the API routes for the server.
 // It defines the endpoints and associates them with their respective handlers.
 func (s *Server) setupRoutes() {
+	s.engine.GET("/healthz", func(c *gin.Context) {
+		c.JSON(http.StatusOK, gin.H{"status": "ok"})
+	})
+
 	s.engine.GET("/management.html", s.serveManagementControlPanel)
 	openaiHandlers := openai.NewOpenAIAPIHandler(s.handlers)
 	geminiHandlers := gemini.NewGeminiAPIHandler(s.handlers)
@@ -403,6 +407,20 @@ func (s *Server) setupRoutes() {
 		c.String(http.StatusOK, oauthCallbackSuccessHTML)
 	})

+	s.engine.GET("/gitlab/callback", func(c *gin.Context) {
+		code := c.Query("code")
+		state := c.Query("state")
+		errStr := c.Query("error")
+		if errStr == "" {
+			errStr = c.Query("error_description")
+		}
+		if state != "" {
+			_, _ = managementHandlers.WriteOAuthCallbackFileForPendingSession(s.cfg.AuthDir, "gitlab", state, code, errStr)
+		}
+		c.Header("Content-Type", "text/html; charset=utf-8")
+		c.String(http.StatusOK, oauthCallbackSuccessHTML)
+	})
+
 	s.engine.GET("/google/callback", func(c *gin.Context) {
 		code := c.Query("code")
 		state := c.Query("state")
@@ -658,6 +676,8 @@ func (s *Server) registerManagementRoutes() {

 		mgmt.GET("/anthropic-auth-url", s.mgmt.RequestAnthropicToken)
 		mgmt.GET("/codex-auth-url", s.mgmt.RequestCodexToken)
+		mgmt.GET("/gitlab-auth-url", s.mgmt.RequestGitLabToken)
+		mgmt.POST("/gitlab-auth-url", s.mgmt.RequestGitLabPATToken)
 		mgmt.GET("/gemini-cli-auth-url", s.mgmt.RequestGeminiCLIToken)
 		mgmt.GET("/antigravity-auth-url", s.mgmt.RequestAntigravityToken)
 		mgmt.GET("/qwen-auth-url", s.mgmt.RequestQwenToken)
@@ -666,6 +686,7 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.GET("/iflow-auth-url", s.mgmt.RequestIFlowToken)
 		mgmt.POST("/iflow-auth-url", s.mgmt.RequestIFlowCookieToken)
 		mgmt.GET("/kiro-auth-url", s.mgmt.RequestKiroToken)
+		mgmt.GET("/cursor-auth-url", s.mgmt.RequestCursorToken)
 		mgmt.GET("/github-auth-url", s.mgmt.RequestGitHubToken)
 		mgmt.POST("/oauth-callback", s.mgmt.PostOAuthCallback)
 		mgmt.GET("/get-auth-status", s.mgmt.GetAuthStatus)
--- a/internal/api/server_test.go
+++ b/internal/api/server_test.go
@@ -1,6 +1,7 @@
 package api

 import (
+	"encoding/json"
 	"net/http"
 	"net/http/httptest"
 	"os"
@@ -46,6 +47,28 @@ func newTestServer(t *testing.T) *Server {
 	return NewServer(cfg, authManager, accessManager, configPath)
 }

+func TestHealthz(t *testing.T) {
+	server := newTestServer(t)
+
+	req := httptest.NewRequest(http.MethodGet, "/healthz", nil)
+	rr := httptest.NewRecorder()
+	server.engine.ServeHTTP(rr, req)
+
+	if rr.Code != http.StatusOK {
+		t.Fatalf("unexpected status code: got %d want %d; body=%s", rr.Code, http.StatusOK, rr.Body.String())
+	}
+
+	var resp struct {
+		Status string `json:"status"`
+	}
+	if err := json.Unmarshal(rr.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("failed to parse response JSON: %v; body=%s", err, rr.Body.String())
+	}
+	if resp.Status != "ok" {
+		t.Fatalf("unexpected response status: got %q want %q", resp.Status, "ok")
+	}
+}
+
 func TestAmpProviderModelRoutes(t *testing.T) {
 	testCases := []struct {
 		name         string
@@ -172,6 +195,8 @@ func TestDefaultRequestLoggerFactory_UsesResolvedLogDirectory(t *testing.T) {
 		nil,
 		nil,
 		nil,
+		nil,
+		nil,
 		true,
 		"issue-1711",
 		time.Now(),
--- a/internal/auth/claude/anthropic_auth.go
+++ b/internal/auth/claude/anthropic_auth.go
@@ -88,7 +88,7 @@ func (o *ClaudeAuth) GenerateAuthURL(state string, pkceCodes *PKCECodes) (string
 		"client_id":             {ClientID},
 		"response_type":         {"code"},
 		"redirect_uri":          {RedirectURI},
-		"scope":                 {"org:create_api_key user:profile user:inference"},
+		"scope":                 {"user:profile user:inference user:sessions:claude_code user:mcp_servers user:file_upload"},
 		"code_challenge":        {pkceCodes.CodeChallenge},
 		"code_challenge_method": {"S256"},
 		"state":                 {state},
--- a/internal/auth/claude/utls_transport.go
+++ b/internal/auth/claude/utls_transport.go
@@ -4,12 +4,12 @@ package claude

 import (
 	"net/http"
-	"net/url"
 	"strings"
 	"sync"

 	tls "github.com/refraction-networking/utls"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/sdk/proxyutil"
 	log "github.com/sirupsen/logrus"
 	"golang.org/x/net/http2"
 	"golang.org/x/net/proxy"
@@ -31,17 +31,12 @@ type utlsRoundTripper struct {
 // newUtlsRoundTripper creates a new utls-based round tripper with optional proxy support
 func newUtlsRoundTripper(cfg *config.SDKConfig) *utlsRoundTripper {
 	var dialer proxy.Dialer = proxy.Direct
-	if cfg != nil && cfg.ProxyURL != "" {
-		proxyURL, err := url.Parse(cfg.ProxyURL)
-		if err != nil {
-			log.Errorf("failed to parse proxy URL %q: %v", cfg.ProxyURL, err)
-		} else {
-			pDialer, err := proxy.FromURL(proxyURL, proxy.Direct)
-			if err != nil {
-				log.Errorf("failed to create proxy dialer for %q: %v", cfg.ProxyURL, err)
-			} else {
-				dialer = pDialer
-			}
+	if cfg != nil {
+		proxyDialer, mode, errBuild := proxyutil.BuildDialer(cfg.ProxyURL)
+		if errBuild != nil {
+			log.Errorf("failed to configure proxy dialer for %q: %v", cfg.ProxyURL, errBuild)
+		} else if mode != proxyutil.ModeInherit && proxyDialer != nil {
+			dialer = proxyDialer
 		}
 	}

--- a/internal/auth/codebuddy/codebuddy_auth.go
+++ b/internal/auth/codebuddy/codebuddy_auth.go
@@ -0,0 +1,335 @@
+package codebuddy
+
+import (
+	"bytes"
+	"context"
+	"encoding/base64"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"strings"
+	"time"
+
+	"github.com/google/uuid"
+	log "github.com/sirupsen/logrus"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+)
+
+const (
+	BaseURL       = "https://copilot.tencent.com"
+	DefaultDomain = "www.codebuddy.cn"
+	UserAgent     = "CLI/2.63.2 CodeBuddy/2.63.2"
+
+	codeBuddyStatePath   = "/v2/plugin/auth/state"
+	codeBuddyTokenPath   = "/v2/plugin/auth/token"
+	codeBuddyRefreshPath = "/v2/plugin/auth/token/refresh"
+	pollInterval         = 5 * time.Second
+	maxPollDuration      = 5 * time.Minute
+	codeLoginPending     = 11217
+	codeSuccess          = 0
+)
+
+type CodeBuddyAuth struct {
+	httpClient *http.Client
+	cfg        *config.Config
+	baseURL    string
+}
+
+func NewCodeBuddyAuth(cfg *config.Config) *CodeBuddyAuth {
+	httpClient := &http.Client{Timeout: 30 * time.Second}
+	if cfg != nil {
+		httpClient = util.SetProxy(&cfg.SDKConfig, httpClient)
+	}
+	return &CodeBuddyAuth{httpClient: httpClient, cfg: cfg, baseURL: BaseURL}
+}
+
+// AuthState holds the state and auth URL returned by the auth state API.
+type AuthState struct {
+	State   string
+	AuthURL string
+}
+
+// FetchAuthState calls POST /v2/plugin/auth/state?platform=CLI to get the state and login URL.
+func (a *CodeBuddyAuth) FetchAuthState(ctx context.Context) (*AuthState, error) {
+	stateURL := fmt.Sprintf("%s%s?platform=CLI", a.baseURL, codeBuddyStatePath)
+	body := []byte("{}")
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, stateURL, bytes.NewReader(body))
+	if err != nil {
+		return nil, fmt.Errorf("codebuddy: failed to create auth state request: %w", err)
+	}
+
+requestID := uuid.NewString()
+	req.Header.Set("Accept", "application/json, text/plain, */*")
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("X-Requested-With", "XMLHttpRequest")
+	req.Header.Set("X-Domain", "copilot.tencent.com")
+	req.Header.Set("X-No-Authorization", "true")
+	req.Header.Set("X-No-User-Id", "true")
+	req.Header.Set("X-No-Enterprise-Id", "true")
+	req.Header.Set("X-No-Department-Info", "true")
+	req.Header.Set("X-Product", "SaaS")
+	req.Header.Set("User-Agent", UserAgent)
+	req.Header.Set("X-Request-ID", requestID)
+
+	resp, err := a.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("codebuddy: auth state request failed: %w", err)
+	}
+	defer func() {
+		if errClose := resp.Body.Close(); errClose != nil {
+			log.Errorf("codebuddy auth state: close body error: %v", errClose)
+		}
+	}()
+
+	bodyBytes, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("codebuddy: failed to read auth state response: %w", err)
+	}
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("codebuddy: auth state request returned status %d: %s", resp.StatusCode, string(bodyBytes))
+	}
+
+	var result struct {
+		Code int    `json:"code"`
+		Msg  string `json:"msg"`
+		Data *struct {
+			State   string `json:"state"`
+			AuthURL string `json:"authUrl"`
+		} `json:"data"`
+	}
+	if err = json.Unmarshal(bodyBytes, &result); err != nil {
+		return nil, fmt.Errorf("codebuddy: failed to parse auth state response: %w", err)
+	}
+	if result.Code != codeSuccess {
+		return nil, fmt.Errorf("codebuddy: auth state request failed with code %d: %s", result.Code, result.Msg)
+	}
+	if result.Data == nil || result.Data.State == "" || result.Data.AuthURL == "" {
+		return nil, fmt.Errorf("codebuddy: auth state response missing state or authUrl")
+	}
+
+	return &AuthState{
+		State:   result.Data.State,
+		AuthURL: result.Data.AuthURL,
+	}, nil
+}
+
+type pollResponse struct {
+	Code      int    `json:"code"`
+	Msg       string `json:"msg"`
+	RequestID string `json:"requestId"`
+	Data      *struct {
+		AccessToken  string `json:"accessToken"`
+		RefreshToken string `json:"refreshToken"`
+		ExpiresIn    int64  `json:"expiresIn"`
+		TokenType    string `json:"tokenType"`
+		Domain       string `json:"domain"`
+	} `json:"data"`
+}
+
+// doPollRequest performs a single polling request, safely reading and closing the response body
+func (a *CodeBuddyAuth) doPollRequest(ctx context.Context, pollURL string) ([]byte, int, error) {
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, pollURL, nil)
+	if err != nil {
+		return nil, 0, fmt.Errorf("%w: %v", ErrTokenFetchFailed, err)
+	}
+	a.applyPollHeaders(req)
+
+	resp, err := a.httpClient.Do(req)
+	if err != nil {
+		return nil, 0, err
+	}
+	defer func() {
+		if errClose := resp.Body.Close(); errClose != nil {
+			log.Errorf("codebuddy poll: close body error: %v", errClose)
+		}
+	}()
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, resp.StatusCode, fmt.Errorf("codebuddy poll: failed to read response body: %w", err)
+	}
+	return body, resp.StatusCode, nil
+}
+
+// PollForToken polls until the user completes browser authorization and returns auth data.
+func (a *CodeBuddyAuth) PollForToken(ctx context.Context, state string) (*CodeBuddyTokenStorage, error) {
+	deadline := time.Now().Add(maxPollDuration)
+	pollURL := fmt.Sprintf("%s%s?state=%s", a.baseURL, codeBuddyTokenPath, url.QueryEscape(state))
+
+	for time.Now().Before(deadline) {
+		select {
+		case <-ctx.Done():
+			return nil, ctx.Err()
+		case <-time.After(pollInterval):
+		}
+
+		body, statusCode, err := a.doPollRequest(ctx, pollURL)
+		if err != nil {
+			log.Debugf("codebuddy poll: request error: %v", err)
+			continue
+		}
+
+		if statusCode != http.StatusOK {
+			log.Debugf("codebuddy poll: unexpected status %d", statusCode)
+			continue
+		}
+
+		var result pollResponse
+		if err := json.Unmarshal(body, &result); err != nil {
+			continue
+		}
+
+		switch result.Code {
+		case codeSuccess:
+			if result.Data == nil {
+				return nil, fmt.Errorf("%w: empty data in response", ErrTokenFetchFailed)
+			}
+			userID, _ := a.DecodeUserID(result.Data.AccessToken)
+			return &CodeBuddyTokenStorage{
+				AccessToken:  result.Data.AccessToken,
+				RefreshToken: result.Data.RefreshToken,
+				ExpiresIn:    result.Data.ExpiresIn,
+				TokenType:    result.Data.TokenType,
+				Domain:       result.Data.Domain,
+				UserID:       userID,
+				Type:         "codebuddy",
+			}, nil
+		case codeLoginPending:
+			// continue polling
+		default:
+			// TODO: when the CodeBuddy API error code for user denial is known,
+			// return ErrAccessDenied here instead of ErrTokenFetchFailed.
+			return nil, fmt.Errorf("%w: server returned code %d: %s", ErrTokenFetchFailed, result.Code, result.Msg)
+		}
+	}
+	return nil, ErrPollingTimeout
+}
+
+// DecodeUserID decodes the sub field from a JWT access token as the user ID.
+func (a *CodeBuddyAuth) DecodeUserID(accessToken string) (string, error) {
+	parts := strings.Split(accessToken, ".")
+	if len(parts) < 2 {
+		return "", ErrJWTDecodeFailed
+	}
+	payload, err := base64.RawURLEncoding.DecodeString(parts[1])
+	if err != nil {
+		return "", fmt.Errorf("%w: %v", ErrJWTDecodeFailed, err)
+	}
+	var claims struct {
+		Sub string `json:"sub"`
+	}
+	if err := json.Unmarshal(payload, &claims); err != nil {
+		return "", fmt.Errorf("%w: %v", ErrJWTDecodeFailed, err)
+	}
+	if claims.Sub == "" {
+		return "", fmt.Errorf("%w: sub claim is empty", ErrJWTDecodeFailed)
+	}
+	return claims.Sub, nil
+}
+
+// RefreshToken exchanges a refresh token for a new access token.
+// It calls POST /v2/plugin/auth/token/refresh with the required headers.
+func (a *CodeBuddyAuth) RefreshToken(ctx context.Context, accessToken, refreshToken, userID, domain string) (*CodeBuddyTokenStorage, error) {
+	if domain == "" {
+		domain = DefaultDomain
+	}
+	refreshURL := fmt.Sprintf("%s%s", a.baseURL, codeBuddyRefreshPath)
+	body := []byte("{}")
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, refreshURL, bytes.NewReader(body))
+	if err != nil {
+		return nil, fmt.Errorf("codebuddy: failed to create refresh request: %w", err)
+	}
+
+	requestID := strings.ReplaceAll(uuid.New().String(), "-", "")
+	req.Header.Set("Accept", "application/json, text/plain, */*")
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("X-Requested-With", "XMLHttpRequest")
+	req.Header.Set("X-Domain", domain)
+	req.Header.Set("X-Refresh-Token", refreshToken)
+	req.Header.Set("X-Auth-Refresh-Source", "plugin")
+	req.Header.Set("X-Request-ID", requestID)
+	req.Header.Set("Authorization", "Bearer "+accessToken)
+	req.Header.Set("X-User-Id", userID)
+	req.Header.Set("X-Product", "SaaS")
+	req.Header.Set("User-Agent", UserAgent)
+
+	resp, err := a.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("codebuddy: refresh request failed: %w", err)
+	}
+	defer func() {
+		if errClose := resp.Body.Close(); errClose != nil {
+			log.Errorf("codebuddy refresh: close body error: %v", errClose)
+		}
+	}()
+
+	bodyBytes, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("codebuddy: failed to read refresh response: %w", err)
+	}
+
+	if resp.StatusCode == http.StatusUnauthorized || resp.StatusCode == http.StatusForbidden {
+		return nil, fmt.Errorf("codebuddy: refresh token rejected (status %d)", resp.StatusCode)
+	}
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("codebuddy: refresh failed with status %d: %s", resp.StatusCode, string(bodyBytes))
+	}
+
+	var result struct {
+		Code int    `json:"code"`
+		Msg  string `json:"msg"`
+		Data *struct {
+			AccessToken      string `json:"accessToken"`
+			RefreshToken     string `json:"refreshToken"`
+			ExpiresIn        int64  `json:"expiresIn"`
+			RefreshExpiresIn int64  `json:"refreshExpiresIn"`
+			TokenType        string `json:"tokenType"`
+			Domain           string `json:"domain"`
+		} `json:"data"`
+	}
+	if err = json.Unmarshal(bodyBytes, &result); err != nil {
+		return nil, fmt.Errorf("codebuddy: failed to parse refresh response: %w", err)
+	}
+	if result.Code != codeSuccess {
+		return nil, fmt.Errorf("codebuddy: refresh failed with code %d: %s", result.Code, result.Msg)
+	}
+	if result.Data == nil {
+		return nil, fmt.Errorf("codebuddy: empty data in refresh response")
+	}
+
+	newUserID, _ := a.DecodeUserID(result.Data.AccessToken)
+	if newUserID == "" {
+		newUserID = userID
+	}
+	tokenDomain := result.Data.Domain
+	if tokenDomain == "" {
+		tokenDomain = domain
+	}
+
+	return &CodeBuddyTokenStorage{
+		AccessToken:      result.Data.AccessToken,
+		RefreshToken:     result.Data.RefreshToken,
+		ExpiresIn:        result.Data.ExpiresIn,
+		RefreshExpiresIn: result.Data.RefreshExpiresIn,
+		TokenType:        result.Data.TokenType,
+		Domain:           tokenDomain,
+		UserID:           newUserID,
+		Type:             "codebuddy",
+	}, nil
+}
+
+func (a *CodeBuddyAuth) applyPollHeaders(req *http.Request) {
+	req.Header.Set("Accept", "application/json, text/plain, */*")
+	req.Header.Set("User-Agent", UserAgent)
+	req.Header.Set("X-Requested-With", "XMLHttpRequest")
+	req.Header.Set("X-No-Authorization", "true")
+	req.Header.Set("X-No-User-Id", "true")
+	req.Header.Set("X-No-Enterprise-Id", "true")
+	req.Header.Set("X-No-Department-Info", "true")
+	req.Header.Set("X-Product", "SaaS")
+}
--- a/internal/auth/codebuddy/codebuddy_auth_http_test.go
+++ b/internal/auth/codebuddy/codebuddy_auth_http_test.go
@@ -0,0 +1,285 @@
+package codebuddy
+
+import (
+	"context"
+	"encoding/base64"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+)
+
+// newTestAuth creates a CodeBuddyAuth pointing at the given test server.
+func newTestAuth(serverURL string) *CodeBuddyAuth {
+	return &CodeBuddyAuth{
+		httpClient: http.DefaultClient,
+		baseURL:    serverURL,
+	}
+}
+
+// fakeJWT builds a minimal JWT with the given sub claim for testing.
+func fakeJWT(sub string) string {
+	header := base64.RawURLEncoding.EncodeToString([]byte(`{"alg":"RS256"}`))
+	payload, _ := json.Marshal(map[string]any{"sub": sub, "iat": 1234567890})
+	encodedPayload := base64.RawURLEncoding.EncodeToString(payload)
+	return header + "." + encodedPayload + ".sig"
+}
+
+// --- FetchAuthState tests ---
+
+func TestFetchAuthState_Success(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodPost {
+			t.Errorf("expected POST, got %s", r.Method)
+		}
+		if got := r.URL.Path; got != codeBuddyStatePath {
+			t.Errorf("expected path %s, got %s", codeBuddyStatePath, got)
+		}
+		if got := r.URL.Query().Get("platform"); got != "CLI" {
+			t.Errorf("expected platform=CLI, got %s", got)
+		}
+		if got := r.Header.Get("User-Agent"); got != UserAgent {
+			t.Errorf("expected User-Agent %s, got %s", UserAgent, got)
+		}
+		w.Header().Set("Content-Type", "application/json")
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"code": 0,
+			"msg":  "ok",
+			"data": map[string]any{
+				"state":   "test-state-abc",
+				"authUrl": "https://example.com/login?state=test-state-abc",
+			},
+		})
+	}))
+	defer srv.Close()
+
+	auth := newTestAuth(srv.URL)
+	result, err := auth.FetchAuthState(context.Background())
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if result.State != "test-state-abc" {
+		t.Errorf("expected state 'test-state-abc', got '%s'", result.State)
+	}
+	if result.AuthURL != "https://example.com/login?state=test-state-abc" {
+		t.Errorf("unexpected authURL: %s", result.AuthURL)
+	}
+}
+
+func TestFetchAuthState_NonOKStatus(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		w.WriteHeader(http.StatusInternalServerError)
+		_, _ = w.Write([]byte("internal error"))
+	}))
+	defer srv.Close()
+
+	auth := newTestAuth(srv.URL)
+	_, err := auth.FetchAuthState(context.Background())
+	if err == nil {
+		t.Fatal("expected error for non-200 status")
+	}
+}
+
+func TestFetchAuthState_APIErrorCode(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"code": 10001,
+			"msg":  "rate limited",
+		})
+	}))
+	defer srv.Close()
+
+	auth := newTestAuth(srv.URL)
+	_, err := auth.FetchAuthState(context.Background())
+	if err == nil {
+		t.Fatal("expected error for non-zero code")
+	}
+}
+
+func TestFetchAuthState_MissingData(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"code": 0,
+			"msg":  "ok",
+			"data": map[string]any{
+				"state":   "",
+				"authUrl": "",
+			},
+		})
+	}))
+	defer srv.Close()
+
+	auth := newTestAuth(srv.URL)
+	_, err := auth.FetchAuthState(context.Background())
+	if err == nil {
+		t.Fatal("expected error for empty state/authUrl")
+	}
+}
+
+// --- RefreshToken tests ---
+
+func TestRefreshToken_Success(t *testing.T) {
+	newAccessToken := fakeJWT("refreshed-user-456")
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodPost {
+			t.Errorf("expected POST, got %s", r.Method)
+		}
+		if got := r.URL.Path; got != codeBuddyRefreshPath {
+			t.Errorf("expected path %s, got %s", codeBuddyRefreshPath, got)
+		}
+		if got := r.Header.Get("X-Refresh-Token"); got != "old-refresh-token" {
+			t.Errorf("expected X-Refresh-Token 'old-refresh-token', got '%s'", got)
+		}
+		if got := r.Header.Get("Authorization"); got != "Bearer old-access-token" {
+			t.Errorf("expected Authorization 'Bearer old-access-token', got '%s'", got)
+		}
+		if got := r.Header.Get("X-User-Id"); got != "user-123" {
+			t.Errorf("expected X-User-Id 'user-123', got '%s'", got)
+		}
+		if got := r.Header.Get("X-Domain"); got != "custom.domain.com" {
+			t.Errorf("expected X-Domain 'custom.domain.com', got '%s'", got)
+		}
+		w.Header().Set("Content-Type", "application/json")
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"code": 0,
+			"msg":  "ok",
+			"data": map[string]any{
+				"accessToken":      newAccessToken,
+				"refreshToken":     "new-refresh-token",
+				"expiresIn":        3600,
+				"refreshExpiresIn": 86400,
+				"tokenType":        "bearer",
+				"domain":           "custom.domain.com",
+			},
+		})
+	}))
+	defer srv.Close()
+
+	auth := newTestAuth(srv.URL)
+	storage, err := auth.RefreshToken(context.Background(), "old-access-token", "old-refresh-token", "user-123", "custom.domain.com")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if storage.AccessToken != newAccessToken {
+		t.Errorf("expected new access token, got '%s'", storage.AccessToken)
+	}
+	if storage.RefreshToken != "new-refresh-token" {
+		t.Errorf("expected 'new-refresh-token', got '%s'", storage.RefreshToken)
+	}
+	if storage.UserID != "refreshed-user-456" {
+		t.Errorf("expected userID 'refreshed-user-456', got '%s'", storage.UserID)
+	}
+	if storage.ExpiresIn != 3600 {
+		t.Errorf("expected expiresIn 3600, got %d", storage.ExpiresIn)
+	}
+	if storage.RefreshExpiresIn != 86400 {
+		t.Errorf("expected refreshExpiresIn 86400, got %d", storage.RefreshExpiresIn)
+	}
+	if storage.Domain != "custom.domain.com" {
+		t.Errorf("expected domain 'custom.domain.com', got '%s'", storage.Domain)
+	}
+	if storage.Type != "codebuddy" {
+		t.Errorf("expected type 'codebuddy', got '%s'", storage.Type)
+	}
+}
+
+func TestRefreshToken_DefaultDomain(t *testing.T) {
+	var receivedDomain string
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		receivedDomain = r.Header.Get("X-Domain")
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"code": 0,
+			"msg":  "ok",
+			"data": map[string]any{
+				"accessToken":  fakeJWT("user-1"),
+				"refreshToken": "rt",
+				"expiresIn":    3600,
+				"tokenType":    "bearer",
+				"domain":       DefaultDomain,
+			},
+		})
+	}))
+	defer srv.Close()
+
+	auth := newTestAuth(srv.URL)
+	_, err := auth.RefreshToken(context.Background(), "at", "rt", "uid", "")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if receivedDomain != DefaultDomain {
+		t.Errorf("expected default domain '%s', got '%s'", DefaultDomain, receivedDomain)
+	}
+}
+
+func TestRefreshToken_Unauthorized(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		w.WriteHeader(http.StatusUnauthorized)
+	}))
+	defer srv.Close()
+
+	auth := newTestAuth(srv.URL)
+	_, err := auth.RefreshToken(context.Background(), "at", "rt", "uid", "d")
+	if err == nil {
+		t.Fatal("expected error for 401 response")
+	}
+}
+
+func TestRefreshToken_Forbidden(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		w.WriteHeader(http.StatusForbidden)
+	}))
+	defer srv.Close()
+
+	auth := newTestAuth(srv.URL)
+	_, err := auth.RefreshToken(context.Background(), "at", "rt", "uid", "d")
+	if err == nil {
+		t.Fatal("expected error for 403 response")
+	}
+}
+
+func TestRefreshToken_APIErrorCode(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"code": 40001,
+			"msg":  "invalid refresh token",
+		})
+	}))
+	defer srv.Close()
+
+	auth := newTestAuth(srv.URL)
+	_, err := auth.RefreshToken(context.Background(), "at", "rt", "uid", "d")
+	if err == nil {
+		t.Fatal("expected error for non-zero API code")
+	}
+}
+
+func TestRefreshToken_FallbackUserIDAndDomain(t *testing.T) {
+	// When the new access token cannot be decoded for userID, it should fall back to the provided one.
+	// When the response domain is empty, it should fall back to the request domain.
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"code": 0,
+			"msg":  "ok",
+			"data": map[string]any{
+				"accessToken":  "not-a-valid-jwt",
+				"refreshToken": "new-rt",
+				"expiresIn":    7200,
+				"tokenType":    "bearer",
+				"domain":       "",
+			},
+		})
+	}))
+	defer srv.Close()
+
+	auth := newTestAuth(srv.URL)
+	storage, err := auth.RefreshToken(context.Background(), "at", "rt", "original-uid", "original.domain.com")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if storage.UserID != "original-uid" {
+		t.Errorf("expected fallback userID 'original-uid', got '%s'", storage.UserID)
+	}
+	if storage.Domain != "original.domain.com" {
+		t.Errorf("expected fallback domain 'original.domain.com', got '%s'", storage.Domain)
+	}
+}
--- a/internal/auth/codebuddy/codebuddy_auth_test.go
+++ b/internal/auth/codebuddy/codebuddy_auth_test.go
@@ -0,0 +1,22 @@
+package codebuddy_test
+
+import (
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/codebuddy"
+)
+
+func TestDecodeUserID_ValidJWT(t *testing.T) {
+	// JWT payload: {"sub":"test-user-id-123","iat":1234567890}
+	// base64url encode: eyJzdWIiOiJ0ZXN0LXVzZXItaWQtMTIzIiwiaWF0IjoxMjM0NTY3ODkwfQ
+	token := "eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJ0ZXN0LXVzZXItaWQtMTIzIiwiaWF0IjoxMjM0NTY3ODkwfQ.sig"
+	auth := codebuddy.NewCodeBuddyAuth(nil)
+	userID, err := auth.DecodeUserID(token)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if userID != "test-user-id-123" {
+		t.Errorf("expected 'test-user-id-123', got '%s'", userID)
+	}
+}
+
--- a/internal/auth/codebuddy/errors.go
+++ b/internal/auth/codebuddy/errors.go
@@ -0,0 +1,25 @@
+package codebuddy
+
+import "errors"
+
+var (
+	ErrPollingTimeout   = errors.New("codebuddy: polling timeout, user did not authorize in time")
+	ErrAccessDenied     = errors.New("codebuddy: access denied by user")
+	ErrTokenFetchFailed = errors.New("codebuddy: failed to fetch token from server")
+	ErrJWTDecodeFailed  = errors.New("codebuddy: failed to decode JWT token")
+)
+
+func GetUserFriendlyMessage(err error) string {
+	switch {
+	case errors.Is(err, ErrPollingTimeout):
+		return "Authentication timed out. Please try again."
+	case errors.Is(err, ErrAccessDenied):
+		return "Access denied. Please try again and approve the login request."
+	case errors.Is(err, ErrJWTDecodeFailed):
+		return "Failed to decode token. Please try logging in again."
+	case errors.Is(err, ErrTokenFetchFailed):
+		return "Failed to fetch token from server. Please try again."
+	default:
+		return "Authentication failed: " + err.Error()
+	}
+}
--- a/internal/auth/codebuddy/token.go
+++ b/internal/auth/codebuddy/token.go
@@ -0,0 +1,65 @@
+// Package codebuddy provides authentication and token management functionality
+// for CodeBuddy AI services. It handles OAuth2 token storage, serialization,
+// and retrieval for maintaining authenticated sessions with the CodeBuddy API.
+package codebuddy
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
+)
+
+// CodeBuddyTokenStorage stores OAuth token information for CodeBuddy API authentication.
+// It maintains compatibility with the existing auth system while adding CodeBuddy-specific fields
+// for managing access tokens and user account information.
+type CodeBuddyTokenStorage struct {
+	// AccessToken is the OAuth2 access token used for authenticating API requests.
+	AccessToken string `json:"access_token"`
+	// RefreshToken is the OAuth2 refresh token used to obtain new access tokens.
+	RefreshToken string `json:"refresh_token"`
+	// ExpiresIn is the number of seconds until the access token expires.
+	ExpiresIn int64 `json:"expires_in"`
+	// RefreshExpiresIn is the number of seconds until the refresh token expires.
+	RefreshExpiresIn int64 `json:"refresh_expires_in,omitempty"`
+	// TokenType is the type of token, typically "bearer".
+	TokenType string `json:"token_type"`
+	// Domain is the CodeBuddy service domain/region.
+	Domain string `json:"domain"`
+	// UserID is the user ID associated with this token.
+	UserID string `json:"user_id"`
+	// Type indicates the authentication provider type, always "codebuddy" for this storage.
+	Type string `json:"type"`
+}
+
+// SaveTokenToFile serializes the CodeBuddy token storage to a JSON file.
+// This method creates the necessary directory structure and writes the token
+// data in JSON format to the specified file path for persistent storage.
+//
+// Parameters:
+//   - authFilePath: The full path where the token file should be saved
+//
+// Returns:
+//   - error: An error if the operation fails, nil otherwise
+func (s *CodeBuddyTokenStorage) SaveTokenToFile(authFilePath string) error {
+	misc.LogSavingCredentials(authFilePath)
+	s.Type = "codebuddy"
+	if err := os.MkdirAll(filepath.Dir(authFilePath), 0700); err != nil {
+		return fmt.Errorf("failed to create directory: %w", err)
+	}
+
+	f, err := os.OpenFile(authFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0600)
+	if err != nil {
+		return fmt.Errorf("failed to create token file: %w", err)
+	}
+	defer func() {
+		_ = f.Close()
+	}()
+
+	if err = json.NewEncoder(f).Encode(s); err != nil {
+		return fmt.Errorf("failed to write token to file: %w", err)
+	}
+	return nil
+}
--- a/internal/auth/copilot/copilot_auth.go
+++ b/internal/auth/copilot/copilot_auth.go
@@ -8,6 +8,8 @@ import (
 	"fmt"
 	"io"
 	"net/http"
+	"net/url"
+	"strings"
 	"time"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
@@ -222,6 +224,97 @@ func (c *CopilotAuth) MakeAuthenticatedRequest(ctx context.Context, method, url
 	return req, nil
 }

+// CopilotModelEntry represents a single model entry returned by the Copilot /models API.
+type CopilotModelEntry struct {
+	ID           string         `json:"id"`
+	Object       string         `json:"object"`
+	Created      int64          `json:"created"`
+	OwnedBy      string         `json:"owned_by"`
+	Name         string         `json:"name,omitempty"`
+	Version      string         `json:"version,omitempty"`
+	Capabilities map[string]any `json:"capabilities,omitempty"`
+}
+
+// CopilotModelsResponse represents the response from the Copilot /models endpoint.
+type CopilotModelsResponse struct {
+	Data   []CopilotModelEntry `json:"data"`
+	Object string              `json:"object"`
+}
+
+// maxModelsResponseSize is the maximum allowed response size from the /models endpoint (2 MB).
+const maxModelsResponseSize = 2 * 1024 * 1024
+
+// allowedCopilotAPIHosts is the set of hosts that are considered safe for Copilot API requests.
+var allowedCopilotAPIHosts = map[string]bool{
+	"api.githubcopilot.com":          true,
+	"api.individual.githubcopilot.com": true,
+	"api.business.githubcopilot.com":   true,
+	"copilot-proxy.githubusercontent.com": true,
+}
+
+// ListModels fetches the list of available models from the Copilot API.
+// It requires a valid Copilot API token (not the GitHub access token).
+func (c *CopilotAuth) ListModels(ctx context.Context, apiToken *CopilotAPIToken) ([]CopilotModelEntry, error) {
+	if apiToken == nil || apiToken.Token == "" {
+		return nil, fmt.Errorf("copilot: api token is required for listing models")
+	}
+
+	// Build models URL, validating the endpoint host to prevent SSRF.
+	modelsURL := copilotAPIEndpoint + "/models"
+	if ep := strings.TrimRight(apiToken.Endpoints.API, "/"); ep != "" {
+		parsed, err := url.Parse(ep)
+		if err == nil && parsed.Scheme == "https" && allowedCopilotAPIHosts[parsed.Host] {
+			modelsURL = ep + "/models"
+		} else {
+			log.Warnf("copilot: ignoring untrusted API endpoint %q, using default", ep)
+		}
+	}
+
+	req, err := c.MakeAuthenticatedRequest(ctx, http.MethodGet, modelsURL, nil, apiToken)
+	if err != nil {
+		return nil, fmt.Errorf("copilot: failed to create models request: %w", err)
+	}
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("copilot: models request failed: %w", err)
+	}
+	defer func() {
+		if errClose := resp.Body.Close(); errClose != nil {
+			log.Errorf("copilot list models: close body error: %v", errClose)
+		}
+	}()
+
+	// Limit response body to prevent memory exhaustion.
+	limitedReader := io.LimitReader(resp.Body, maxModelsResponseSize)
+	bodyBytes, err := io.ReadAll(limitedReader)
+	if err != nil {
+		return nil, fmt.Errorf("copilot: failed to read models response: %w", err)
+	}
+
+	if !isHTTPSuccess(resp.StatusCode) {
+		return nil, fmt.Errorf("copilot: list models failed with status %d: %s", resp.StatusCode, string(bodyBytes))
+	}
+
+	var modelsResp CopilotModelsResponse
+	if err = json.Unmarshal(bodyBytes, &modelsResp); err != nil {
+		return nil, fmt.Errorf("copilot: failed to parse models response: %w", err)
+	}
+
+	return modelsResp.Data, nil
+}
+
+// ListModelsWithGitHubToken is a convenience method that exchanges a GitHub access token
+// for a Copilot API token and then fetches the available models.
+func (c *CopilotAuth) ListModelsWithGitHubToken(ctx context.Context, githubAccessToken string) ([]CopilotModelEntry, error) {
+	apiToken, err := c.GetCopilotAPIToken(ctx, githubAccessToken)
+	if err != nil {
+		return nil, fmt.Errorf("copilot: failed to get API token for model listing: %w", err)
+	}
+
+	return c.ListModels(ctx, apiToken)
+}
+
 // buildChatCompletionURL builds the URL for chat completions API.
 func buildChatCompletionURL() string {
 	return copilotAPIEndpoint + "/chat/completions"
--- a/internal/auth/cursor/filename.go
+++ b/internal/auth/cursor/filename.go
@@ -0,0 +1,33 @@
+package cursor
+
+import (
+	"fmt"
+	"strings"
+)
+
+// CredentialFileName returns the filename used to persist Cursor credentials.
+// Priority: explicit label > auto-generated from JWT sub hash.
+// If both label and subHash are empty, falls back to "cursor.json".
+func CredentialFileName(label, subHash string) string {
+	label = strings.TrimSpace(label)
+	subHash = strings.TrimSpace(subHash)
+	if label != "" {
+		return fmt.Sprintf("cursor.%s.json", label)
+	}
+	if subHash != "" {
+		return fmt.Sprintf("cursor.%s.json", subHash)
+	}
+	return "cursor.json"
+}
+
+// DisplayLabel returns a human-readable label for the Cursor account.
+func DisplayLabel(label, subHash string) string {
+	label = strings.TrimSpace(label)
+	if label != "" {
+		return "Cursor " + label
+	}
+	if subHash != "" {
+		return "Cursor " + subHash
+	}
+	return "Cursor User"
+}
--- a/internal/auth/cursor/oauth.go
+++ b/internal/auth/cursor/oauth.go
@@ -0,0 +1,249 @@
+// Package cursor implements Cursor OAuth PKCE authentication and token refresh.
+package cursor
+
+import (
+	"context"
+	"crypto/rand"
+	"crypto/sha256"
+	"encoding/base64"
+	"encoding/json"
+	"fmt"
+	"io"
+	"math"
+	"net/http"
+	"strings"
+	"time"
+)
+
+const (
+	CursorLoginURL   = "https://cursor.com/loginDeepControl"
+	CursorPollURL    = "https://api2.cursor.sh/auth/poll"
+	CursorRefreshURL = "https://api2.cursor.sh/auth/exchange_user_api_key"
+
+	pollMaxAttempts      = 150
+	pollBaseDelay        = 1 * time.Second
+	pollMaxDelay         = 10 * time.Second
+	pollBackoffMultiply  = 1.2
+	maxConsecutiveErrors = 10
+)
+
+// AuthParams holds the PKCE parameters for Cursor login.
+type AuthParams struct {
+	Verifier  string
+	Challenge string
+	UUID      string
+	LoginURL  string
+}
+
+// TokenPair holds the access and refresh tokens from Cursor.
+type TokenPair struct {
+	AccessToken  string `json:"accessToken"`
+	RefreshToken string `json:"refreshToken"`
+}
+
+// GeneratePKCE creates a PKCE verifier and challenge pair.
+func GeneratePKCE() (verifier, challenge string, err error) {
+	verifierBytes := make([]byte, 96)
+	if _, err = rand.Read(verifierBytes); err != nil {
+		return "", "", fmt.Errorf("cursor: failed to generate PKCE verifier: %w", err)
+	}
+	verifier = base64.RawURLEncoding.EncodeToString(verifierBytes)
+
+	h := sha256.Sum256([]byte(verifier))
+	challenge = base64.RawURLEncoding.EncodeToString(h[:])
+	return verifier, challenge, nil
+}
+
+// GenerateAuthParams creates the full set of auth params for Cursor login.
+func GenerateAuthParams() (*AuthParams, error) {
+	verifier, challenge, err := GeneratePKCE()
+	if err != nil {
+		return nil, err
+	}
+
+	uuidBytes := make([]byte, 16)
+	if _, err = rand.Read(uuidBytes); err != nil {
+		return nil, fmt.Errorf("cursor: failed to generate UUID: %w", err)
+	}
+	uuid := fmt.Sprintf("%x-%x-%x-%x-%x",
+		uuidBytes[0:4], uuidBytes[4:6], uuidBytes[6:8], uuidBytes[8:10], uuidBytes[10:16])
+
+	loginURL := fmt.Sprintf("%s?challenge=%s&uuid=%s&mode=login&redirectTarget=cli",
+		CursorLoginURL, challenge, uuid)
+
+	return &AuthParams{
+		Verifier:  verifier,
+		Challenge: challenge,
+		UUID:      uuid,
+		LoginURL:  loginURL,
+	}, nil
+}
+
+// PollForAuth polls the Cursor auth endpoint until the user completes login.
+func PollForAuth(ctx context.Context, uuid, verifier string) (*TokenPair, error) {
+	delay := pollBaseDelay
+	consecutiveErrors := 0
+
+	client := &http.Client{Timeout: 10 * time.Second}
+
+	for attempt := 0; attempt < pollMaxAttempts; attempt++ {
+		select {
+		case <-ctx.Done():
+			return nil, ctx.Err()
+		case <-time.After(delay):
+		}
+
+		url := fmt.Sprintf("%s?uuid=%s&verifier=%s", CursorPollURL, uuid, verifier)
+		req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
+		if err != nil {
+			return nil, fmt.Errorf("cursor: failed to create poll request: %w", err)
+		}
+
+		resp, err := client.Do(req)
+		if err != nil {
+			consecutiveErrors++
+			if consecutiveErrors >= maxConsecutiveErrors {
+				return nil, fmt.Errorf("cursor: too many consecutive poll errors (last: %v)", err)
+			}
+			delay = minDuration(time.Duration(float64(delay)*pollBackoffMultiply), pollMaxDelay)
+			continue
+		}
+
+		body, _ := io.ReadAll(resp.Body)
+		resp.Body.Close()
+
+		if resp.StatusCode == http.StatusNotFound {
+			// Still waiting for user to authorize
+			consecutiveErrors = 0
+			delay = minDuration(time.Duration(float64(delay)*pollBackoffMultiply), pollMaxDelay)
+			continue
+		}
+
+		if resp.StatusCode >= 200 && resp.StatusCode < 300 {
+			var tokens TokenPair
+			if err := json.Unmarshal(body, &tokens); err != nil {
+				return nil, fmt.Errorf("cursor: failed to parse auth response: %w", err)
+			}
+			return &tokens, nil
+		}
+
+		return nil, fmt.Errorf("cursor: poll failed with status %d: %s", resp.StatusCode, string(body))
+	}
+
+	return nil, fmt.Errorf("cursor: authentication polling timeout (waited ~%.0f seconds)",
+		float64(pollMaxAttempts)*pollMaxDelay.Seconds()/2)
+}
+
+// RefreshToken refreshes a Cursor access token using the refresh token.
+func RefreshToken(ctx context.Context, refreshToken string) (*TokenPair, error) {
+	client := &http.Client{Timeout: 10 * time.Second}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, CursorRefreshURL,
+		strings.NewReader("{}"))
+	if err != nil {
+		return nil, fmt.Errorf("cursor: failed to create refresh request: %w", err)
+	}
+	req.Header.Set("Authorization", "Bearer "+refreshToken)
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := client.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("cursor: token refresh request failed: %w", err)
+	}
+	defer resp.Body.Close()
+
+	body, _ := io.ReadAll(resp.Body)
+
+	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+		return nil, fmt.Errorf("cursor: token refresh failed (status %d): %s", resp.StatusCode, string(body))
+	}
+
+	var tokens TokenPair
+	if err := json.Unmarshal(body, &tokens); err != nil {
+		return nil, fmt.Errorf("cursor: failed to parse refresh response: %w", err)
+	}
+
+	// Keep original refresh token if not returned
+	if tokens.RefreshToken == "" {
+		tokens.RefreshToken = refreshToken
+	}
+
+	return &tokens, nil
+}
+
+// ParseJWTSub extracts the "sub" claim from a Cursor JWT access token.
+// Cursor JWTs contain "sub" like "auth0|user_XXXX" which uniquely identifies
+// the account. Returns empty string if parsing fails.
+func ParseJWTSub(token string) string {
+	decoded := decodeJWTPayload(token)
+	if decoded == nil {
+		return ""
+	}
+	var claims struct {
+		Sub string `json:"sub"`
+	}
+	if err := json.Unmarshal(decoded, &claims); err != nil {
+		return ""
+	}
+	return claims.Sub
+}
+
+// SubToShortHash converts a JWT sub claim to a short hex hash for use in filenames.
+// e.g. "auth0|user_2x..." → "a3f8b2c1"
+func SubToShortHash(sub string) string {
+	if sub == "" {
+		return ""
+	}
+	h := sha256.Sum256([]byte(sub))
+	return fmt.Sprintf("%x", h[:4]) // 8 hex chars
+}
+
+// decodeJWTPayload decodes the payload (middle) part of a JWT.
+func decodeJWTPayload(token string) []byte {
+	parts := strings.Split(token, ".")
+	if len(parts) != 3 {
+		return nil
+	}
+	payload := parts[1]
+	switch len(payload) % 4 {
+	case 2:
+		payload += "=="
+	case 3:
+		payload += "="
+	}
+	payload = strings.ReplaceAll(payload, "-", "+")
+	payload = strings.ReplaceAll(payload, "_", "/")
+	decoded, err := base64.StdEncoding.DecodeString(payload)
+	if err != nil {
+		return nil
+	}
+	return decoded
+}
+
+// GetTokenExpiry extracts the JWT expiry from an access token with a 5-minute safety margin.
+// Falls back to 1 hour from now if the token can't be parsed.
+func GetTokenExpiry(token string) time.Time {
+	decoded := decodeJWTPayload(token)
+	if decoded == nil {
+		return time.Now().Add(1 * time.Hour)
+	}
+
+	var claims struct {
+		Exp float64 `json:"exp"`
+	}
+	if err := json.Unmarshal(decoded, &claims); err != nil || claims.Exp == 0 {
+		return time.Now().Add(1 * time.Hour)
+	}
+
+	sec, frac := math.Modf(claims.Exp)
+	expiry := time.Unix(int64(sec), int64(frac*1e9))
+	// Subtract 5-minute safety margin
+	return expiry.Add(-5 * time.Minute)
+}
+
+func minDuration(a, b time.Duration) time.Duration {
+	if a < b {
+		return a
+	}
+	return b
+}
--- a/internal/auth/cursor/proto/connect.go
+++ b/internal/auth/cursor/proto/connect.go
@@ -0,0 +1,84 @@
+package proto
+
+import (
+	"encoding/binary"
+	"encoding/json"
+	"fmt"
+)
+
+const (
+	// ConnectEndStreamFlag marks the end-of-stream frame (trailers).
+	ConnectEndStreamFlag byte = 0x02
+	// ConnectCompressionFlag indicates the payload is compressed (not supported).
+	ConnectCompressionFlag byte = 0x01
+	// ConnectFrameHeaderSize is the fixed 5-byte frame header.
+	ConnectFrameHeaderSize = 5
+)
+
+// FrameConnectMessage wraps a protobuf payload in a Connect frame.
+// Frame format: [1 byte flags][4 bytes payload length (big-endian)][payload]
+func FrameConnectMessage(data []byte, flags byte) []byte {
+	frame := make([]byte, ConnectFrameHeaderSize+len(data))
+	frame[0] = flags
+	binary.BigEndian.PutUint32(frame[1:5], uint32(len(data)))
+	copy(frame[5:], data)
+	return frame
+}
+
+// ParseConnectFrame extracts one frame from a buffer.
+// Returns (flags, payload, bytesConsumed, ok).
+// ok is false when the buffer is too short for a complete frame.
+func ParseConnectFrame(buf []byte) (flags byte, payload []byte, consumed int, ok bool) {
+	if len(buf) < ConnectFrameHeaderSize {
+		return 0, nil, 0, false
+	}
+	flags = buf[0]
+	length := binary.BigEndian.Uint32(buf[1:5])
+	total := ConnectFrameHeaderSize + int(length)
+	if len(buf) < total {
+		return 0, nil, 0, false
+	}
+	return flags, buf[5:total], total, true
+}
+
+// ConnectError is a structured error from the Connect protocol end-of-stream trailer.
+// The Code field contains the server-defined error code (e.g. gRPC standard codes
+// like "resource_exhausted", "unauthenticated", "permission_denied", "unavailable").
+type ConnectError struct {
+	Code    string // server-defined error code
+	Message string // human-readable error description
+}
+
+func (e *ConnectError) Error() string {
+	return fmt.Sprintf("Connect error %s: %s", e.Code, e.Message)
+}
+
+// ParseConnectEndStream parses a Connect end-of-stream frame payload (JSON).
+// Returns nil if there is no error in the trailer.
+// On error, returns a *ConnectError with the server's error code and message.
+func ParseConnectEndStream(data []byte) error {
+	if len(data) == 0 {
+		return nil
+	}
+	var trailer struct {
+		Error *struct {
+			Code    string `json:"code"`
+			Message string `json:"message"`
+		} `json:"error"`
+	}
+	if err := json.Unmarshal(data, &trailer); err != nil {
+		return fmt.Errorf("failed to parse Connect end stream: %w", err)
+	}
+	if trailer.Error != nil {
+		code := trailer.Error.Code
+		if code == "" {
+			code = "unknown"
+		}
+		msg := trailer.Error.Message
+		if msg == "" {
+			msg = "Unknown error"
+		}
+		return &ConnectError{Code: code, Message: msg}
+	}
+	return nil
+}
--- a/internal/auth/cursor/proto/decode.go
+++ b/internal/auth/cursor/proto/decode.go
@@ -0,0 +1,564 @@
+package proto
+
+import (
+	"encoding/hex"
+	"fmt"
+
+	log "github.com/sirupsen/logrus"
+	"google.golang.org/protobuf/encoding/protowire"
+)
+
+// ServerMessageType identifies the kind of decoded server message.
+type ServerMessageType int
+
+const (
+	ServerMsgUnknown           ServerMessageType = iota
+	ServerMsgTextDelta                           // Text content delta
+	ServerMsgThinkingDelta                       // Thinking/reasoning delta
+	ServerMsgThinkingCompleted                   // Thinking completed
+	ServerMsgKvGetBlob                           // Server wants a blob
+	ServerMsgKvSetBlob                           // Server wants to store a blob
+	ServerMsgExecRequestCtx                      // Server requests context (tools, etc.)
+	ServerMsgExecMcpArgs                         // Server wants MCP tool execution
+	ServerMsgExecShellArgs                       // Rejected: shell command
+	ServerMsgExecReadArgs                        // Rejected: file read
+	ServerMsgExecWriteArgs                       // Rejected: file write
+	ServerMsgExecDeleteArgs                      // Rejected: file delete
+	ServerMsgExecLsArgs                          // Rejected: directory listing
+	ServerMsgExecGrepArgs                        // Rejected: grep search
+	ServerMsgExecFetchArgs                       // Rejected: HTTP fetch
+	ServerMsgExecDiagnostics                     // Respond with empty diagnostics
+	ServerMsgExecShellStream                     // Rejected: shell stream
+	ServerMsgExecBgShellSpawn                    // Rejected: background shell
+	ServerMsgExecWriteShellStdin                 // Rejected: write shell stdin
+	ServerMsgExecOther                           // Other exec types (respond with empty)
+	ServerMsgTurnEnded                           // Turn has ended (no more output)
+	ServerMsgHeartbeat                           // Server heartbeat
+	ServerMsgTokenDelta                          // Token usage delta
+	ServerMsgCheckpoint                          // Conversation checkpoint update
+)
+
+// DecodedServerMessage holds parsed data from an AgentServerMessage.
+type DecodedServerMessage struct {
+	Type ServerMessageType
+
+	// For text/thinking deltas
+	Text string
+
+	// For KV messages
+	KvId     uint32
+	BlobId   []byte // hex-encoded blob ID
+	BlobData []byte // for setBlobArgs
+
+	// For exec messages
+	ExecMsgId uint32
+	ExecId    string
+
+	// For MCP args
+	McpToolName   string
+	McpToolCallId string
+	McpArgs       map[string][]byte // arg name -> protobuf-encoded value
+
+	// For rejection context
+	Path             string
+	Command          string
+	WorkingDirectory string
+	Url              string
+
+	// For other exec - the raw field number for building a response
+	ExecFieldNumber int
+
+	// For TokenDeltaUpdate
+	TokenDelta int64
+
+	// For conversation checkpoint update (raw bytes, not decoded)
+	CheckpointData []byte
+}
+
+// DecodeAgentServerMessage parses an AgentServerMessage and returns
+// a structured representation of the first meaningful message found.
+func DecodeAgentServerMessage(data []byte) (*DecodedServerMessage, error) {
+	msg := &DecodedServerMessage{Type: ServerMsgUnknown}
+
+	for len(data) > 0 {
+		num, typ, n := protowire.ConsumeTag(data)
+		if n < 0 {
+			return msg, fmt.Errorf("invalid tag")
+		}
+		data = data[n:]
+
+		switch typ {
+		case protowire.BytesType:
+			val, n := protowire.ConsumeBytes(data)
+			if n < 0 {
+				return msg, fmt.Errorf("invalid bytes field %d", num)
+			}
+			data = data[n:]
+
+			// Debug: log top-level ASM fields
+			log.Debugf("DecodeAgentServerMessage: found ASM field %d, len=%d", num, len(val))
+
+			switch num {
+			case ASM_InteractionUpdate:
+				log.Debugf("DecodeAgentServerMessage: calling decodeInteractionUpdate")
+				decodeInteractionUpdate(val, msg)
+			case ASM_ExecServerMessage:
+				log.Debugf("DecodeAgentServerMessage: calling decodeExecServerMessage")
+				decodeExecServerMessage(val, msg)
+			case ASM_KvServerMessage:
+				decodeKvServerMessage(val, msg)
+			case ASM_ConversationCheckpoint:
+				msg.Type = ServerMsgCheckpoint
+				msg.CheckpointData = append([]byte(nil), val...) // copy raw bytes
+				log.Debugf("DecodeAgentServerMessage: captured checkpoint %d bytes", len(val))
+			}
+
+		case protowire.VarintType:
+			_, n := protowire.ConsumeVarint(data)
+			if n < 0 {
+				return msg, fmt.Errorf("invalid varint field %d", num)
+			}
+			data = data[n:]
+
+		default:
+			// Skip unknown wire types
+			n := protowire.ConsumeFieldValue(num, typ, data)
+			if n < 0 {
+				return msg, fmt.Errorf("invalid field %d", num)
+			}
+			data = data[n:]
+		}
+	}
+
+	return msg, nil
+}
+
+func decodeInteractionUpdate(data []byte, msg *DecodedServerMessage) {
+	log.Debugf("decodeInteractionUpdate: input len=%d, hex=%x", len(data), data)
+	for len(data) > 0 {
+		num, typ, n := protowire.ConsumeTag(data)
+		if n < 0 {
+			log.Debugf("decodeInteractionUpdate: invalid tag, remaining=%x", data)
+			return
+		}
+		data = data[n:]
+		log.Debugf("decodeInteractionUpdate: field=%d wire=%d remaining=%d bytes", num, typ, len(data))
+
+		if typ == protowire.BytesType {
+			val, n := protowire.ConsumeBytes(data)
+			if n < 0 {
+				log.Debugf("decodeInteractionUpdate: invalid bytes field %d", num)
+				return
+			}
+			data = data[n:]
+			log.Debugf("decodeInteractionUpdate: field %d content len=%d, first 20 bytes: %x", num, len(val), val[:min(20, len(val))])
+
+			switch num {
+			case IU_TextDelta:
+				msg.Type = ServerMsgTextDelta
+				msg.Text = decodeStringField(val, TDU_Text)
+				log.Debugf("decodeInteractionUpdate: TextDelta text=%q", msg.Text)
+			case IU_ThinkingDelta:
+				msg.Type = ServerMsgThinkingDelta
+				msg.Text = decodeStringField(val, TKD_Text)
+				log.Debugf("decodeInteractionUpdate: ThinkingDelta text=%q", msg.Text)
+			case IU_ThinkingCompleted:
+				msg.Type = ServerMsgThinkingCompleted
+				log.Debugf("decodeInteractionUpdate: ThinkingCompleted")
+			case 2:
+				// tool_call_started - ignore but log
+				log.Debugf("decodeInteractionUpdate: ToolCallStarted (ignored)")
+			case 3:
+				// tool_call_completed - ignore but log
+				log.Debugf("decodeInteractionUpdate: ToolCallCompleted (ignored)")
+			case 8:
+				// token_delta - extract token count
+				msg.Type = ServerMsgTokenDelta
+				msg.TokenDelta = decodeVarintField(val, 1)
+				log.Debugf("decodeInteractionUpdate: TokenDeltaUpdate tokens=%d", msg.TokenDelta)
+			case 13:
+				// heartbeat from server
+				msg.Type = ServerMsgHeartbeat
+			case 14:
+				// turn_ended - critical: model finished generating
+				msg.Type = ServerMsgTurnEnded
+				log.Debugf("decodeInteractionUpdate: TurnEndedUpdate - stream should end")
+			case 16:
+				// step_started - ignore
+				log.Debugf("decodeInteractionUpdate: StepStartedUpdate (ignored)")
+			case 17:
+				// step_completed - ignore
+				log.Debugf("decodeInteractionUpdate: StepCompletedUpdate (ignored)")
+			default:
+				log.Debugf("decodeInteractionUpdate: unknown field %d", num)
+			}
+		} else {
+			n := protowire.ConsumeFieldValue(num, typ, data)
+			if n < 0 {
+				return
+			}
+			data = data[n:]
+		}
+	}
+}
+
+func decodeKvServerMessage(data []byte, msg *DecodedServerMessage) {
+	for len(data) > 0 {
+		num, typ, n := protowire.ConsumeTag(data)
+		if n < 0 {
+			return
+		}
+		data = data[n:]
+
+		switch typ {
+		case protowire.VarintType:
+			val, n := protowire.ConsumeVarint(data)
+			if n < 0 {
+				return
+			}
+			data = data[n:]
+			if num == KSM_Id {
+				msg.KvId = uint32(val)
+			}
+
+		case protowire.BytesType:
+			val, n := protowire.ConsumeBytes(data)
+			if n < 0 {
+				return
+			}
+			data = data[n:]
+
+			switch num {
+			case KSM_GetBlobArgs:
+				msg.Type = ServerMsgKvGetBlob
+				msg.BlobId = decodeBytesField(val, GBA_BlobId)
+			case KSM_SetBlobArgs:
+				msg.Type = ServerMsgKvSetBlob
+				decodeSetBlobArgs(val, msg)
+			}
+
+		default:
+			n := protowire.ConsumeFieldValue(num, typ, data)
+			if n < 0 {
+				return
+			}
+			data = data[n:]
+		}
+	}
+}
+
+func decodeSetBlobArgs(data []byte, msg *DecodedServerMessage) {
+	for len(data) > 0 {
+		num, typ, n := protowire.ConsumeTag(data)
+		if n < 0 {
+			return
+		}
+		data = data[n:]
+
+		if typ == protowire.BytesType {
+			val, n := protowire.ConsumeBytes(data)
+			if n < 0 {
+				return
+			}
+			data = data[n:]
+			switch num {
+			case SBA_BlobId:
+				msg.BlobId = val
+			case SBA_BlobData:
+				msg.BlobData = val
+			}
+		} else {
+			n := protowire.ConsumeFieldValue(num, typ, data)
+			if n < 0 {
+				return
+			}
+			data = data[n:]
+		}
+	}
+}
+
+func decodeExecServerMessage(data []byte, msg *DecodedServerMessage) {
+	for len(data) > 0 {
+		num, typ, n := protowire.ConsumeTag(data)
+		if n < 0 {
+			return
+		}
+		data = data[n:]
+
+		switch typ {
+		case protowire.VarintType:
+			val, n := protowire.ConsumeVarint(data)
+			if n < 0 {
+				return
+			}
+			data = data[n:]
+			if num == ESM_Id {
+				msg.ExecMsgId = uint32(val)
+				log.Debugf("decodeExecServerMessage: ESM_Id = %d", val)
+			}
+
+		case protowire.BytesType:
+			val, n := protowire.ConsumeBytes(data)
+			if n < 0 {
+				return
+			}
+			data = data[n:]
+
+			// Debug: log all fields found in ExecServerMessage
+			log.Debugf("decodeExecServerMessage: found field %d, len=%d, first 20 bytes: %x", num, len(val), val[:min(20, len(val))])
+
+			switch num {
+			case ESM_ExecId:
+				msg.ExecId = string(val)
+				log.Debugf("decodeExecServerMessage: ESM_ExecId = %q", msg.ExecId)
+			case ESM_RequestContextArgs:
+				msg.Type = ServerMsgExecRequestCtx
+			case ESM_McpArgs:
+				msg.Type = ServerMsgExecMcpArgs
+				decodeMcpArgs(val, msg)
+			case ESM_ShellArgs:
+				msg.Type = ServerMsgExecShellArgs
+				decodeShellArgs(val, msg)
+			case ESM_ShellStreamArgs:
+				msg.Type = ServerMsgExecShellStream
+				decodeShellArgs(val, msg)
+			case ESM_ReadArgs:
+				msg.Type = ServerMsgExecReadArgs
+				msg.Path = decodeStringField(val, RA_Path)
+			case ESM_WriteArgs:
+				msg.Type = ServerMsgExecWriteArgs
+				msg.Path = decodeStringField(val, WA_Path)
+			case ESM_DeleteArgs:
+				msg.Type = ServerMsgExecDeleteArgs
+				msg.Path = decodeStringField(val, DA_Path)
+			case ESM_LsArgs:
+				msg.Type = ServerMsgExecLsArgs
+				msg.Path = decodeStringField(val, LA_Path)
+			case ESM_GrepArgs:
+				msg.Type = ServerMsgExecGrepArgs
+			case ESM_FetchArgs:
+				msg.Type = ServerMsgExecFetchArgs
+				msg.Url = decodeStringField(val, FA_Url)
+			case ESM_DiagnosticsArgs:
+				msg.Type = ServerMsgExecDiagnostics
+			case ESM_BackgroundShellSpawn:
+				msg.Type = ServerMsgExecBgShellSpawn
+				decodeShellArgs(val, msg) // same structure
+			case ESM_WriteShellStdinArgs:
+				msg.Type = ServerMsgExecWriteShellStdin
+			default:
+				// Unknown exec types - only set if we haven't identified the type yet
+				// (other fields like span_context (19) come after the exec type field)
+				if msg.Type == ServerMsgUnknown {
+					msg.Type = ServerMsgExecOther
+					msg.ExecFieldNumber = int(num)
+				}
+			}
+
+		default:
+			n := protowire.ConsumeFieldValue(num, typ, data)
+			if n < 0 {
+				return
+			}
+			data = data[n:]
+		}
+	}
+}
+
+func decodeMcpArgs(data []byte, msg *DecodedServerMessage) {
+	msg.McpArgs = make(map[string][]byte)
+	for len(data) > 0 {
+		num, typ, n := protowire.ConsumeTag(data)
+		if n < 0 {
+			return
+		}
+		data = data[n:]
+
+		if typ == protowire.BytesType {
+			val, n := protowire.ConsumeBytes(data)
+			if n < 0 {
+				return
+			}
+			data = data[n:]
+
+			switch num {
+			case MCA_Name:
+				msg.McpToolName = string(val)
+			case MCA_Args:
+				// Map entries are encoded as submessages with key=1, value=2
+				decodeMapEntry(val, msg.McpArgs)
+			case MCA_ToolCallId:
+				msg.McpToolCallId = string(val)
+			case MCA_ToolName:
+				// ToolName takes precedence if present
+				if msg.McpToolName == "" || string(val) != "" {
+					msg.McpToolName = string(val)
+				}
+			}
+		} else {
+			n := protowire.ConsumeFieldValue(num, typ, data)
+			if n < 0 {
+				return
+			}
+			data = data[n:]
+		}
+	}
+}
+
+func decodeMapEntry(data []byte, m map[string][]byte) {
+	var key string
+	var value []byte
+	for len(data) > 0 {
+		num, typ, n := protowire.ConsumeTag(data)
+		if n < 0 {
+			return
+		}
+		data = data[n:]
+
+		if typ == protowire.BytesType {
+			val, n := protowire.ConsumeBytes(data)
+			if n < 0 {
+				return
+			}
+			data = data[n:]
+			if num == 1 {
+				key = string(val)
+			} else if num == 2 {
+				value = append([]byte(nil), val...)
+			}
+		} else {
+			n := protowire.ConsumeFieldValue(num, typ, data)
+			if n < 0 {
+				return
+			}
+			data = data[n:]
+		}
+	}
+	if key != "" {
+		m[key] = value
+	}
+}
+
+func decodeShellArgs(data []byte, msg *DecodedServerMessage) {
+	for len(data) > 0 {
+		num, typ, n := protowire.ConsumeTag(data)
+		if n < 0 {
+			return
+		}
+		data = data[n:]
+
+		if typ == protowire.BytesType {
+			val, n := protowire.ConsumeBytes(data)
+			if n < 0 {
+				return
+			}
+			data = data[n:]
+			switch num {
+			case SHA_Command:
+				msg.Command = string(val)
+			case SHA_WorkingDirectory:
+				msg.WorkingDirectory = string(val)
+			}
+		} else {
+			n := protowire.ConsumeFieldValue(num, typ, data)
+			if n < 0 {
+				return
+			}
+			data = data[n:]
+		}
+	}
+}
+
+// --- Helper decoders ---
+
+// decodeStringField extracts a string from the first matching field in a submessage.
+func decodeStringField(data []byte, targetField protowire.Number) string {
+	for len(data) > 0 {
+		num, typ, n := protowire.ConsumeTag(data)
+		if n < 0 {
+			return ""
+		}
+		data = data[n:]
+
+		if typ == protowire.BytesType {
+			val, n := protowire.ConsumeBytes(data)
+			if n < 0 {
+				return ""
+			}
+			data = data[n:]
+			if num == targetField {
+				return string(val)
+			}
+		} else {
+			n := protowire.ConsumeFieldValue(num, typ, data)
+			if n < 0 {
+				return ""
+			}
+			data = data[n:]
+		}
+	}
+	return ""
+}
+
+// decodeBytesField extracts bytes from the first matching field in a submessage.
+func decodeBytesField(data []byte, targetField protowire.Number) []byte {
+	for len(data) > 0 {
+		num, typ, n := protowire.ConsumeTag(data)
+		if n < 0 {
+			return nil
+		}
+		data = data[n:]
+
+		if typ == protowire.BytesType {
+			val, n := protowire.ConsumeBytes(data)
+			if n < 0 {
+				return nil
+			}
+			data = data[n:]
+			if num == targetField {
+				return append([]byte(nil), val...)
+			}
+		} else {
+			n := protowire.ConsumeFieldValue(num, typ, data)
+			if n < 0 {
+				return nil
+			}
+			data = data[n:]
+		}
+	}
+	return nil
+}
+
+// decodeVarintField extracts an int64 from the first matching varint field in a submessage.
+func decodeVarintField(data []byte, targetField protowire.Number) int64 {
+	for len(data) > 0 {
+		num, typ, n := protowire.ConsumeTag(data)
+		if n < 0 {
+			return 0
+		}
+		data = data[n:]
+		if typ == protowire.VarintType {
+			val, n := protowire.ConsumeVarint(data)
+			if n < 0 {
+				return 0
+			}
+			data = data[n:]
+			if num == targetField {
+				return int64(val)
+			}
+		} else {
+			n := protowire.ConsumeFieldValue(num, typ, data)
+			if n < 0 {
+				return 0
+			}
+			data = data[n:]
+		}
+	}
+	return 0
+}
+
+// BlobIdHex returns the hex string of a blob ID for use as a map key.
+func BlobIdHex(blobId []byte) string {
+	return hex.EncodeToString(blobId)
+}
+
--- a/internal/auth/cursor/proto/descriptor.go
+++ b/internal/auth/cursor/proto/descriptor.go
--- a/internal/auth/cursor/proto/encode.go
+++ b/internal/auth/cursor/proto/encode.go
@@ -0,0 +1,664 @@
+// Package proto provides protobuf encoding for Cursor's gRPC API,
+// using dynamicpb with the embedded FileDescriptorProto from agent.proto.
+// This mirrors the cursor-auth TS plugin's use of @bufbuild/protobuf create()+toBinary().
+package proto
+
+import (
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"fmt"
+
+	log "github.com/sirupsen/logrus"
+	"google.golang.org/protobuf/encoding/protowire"
+	"google.golang.org/protobuf/proto"
+	"google.golang.org/protobuf/reflect/protoreflect"
+	"google.golang.org/protobuf/types/dynamicpb"
+	"google.golang.org/protobuf/types/known/structpb"
+)
+
+// --- Public types ---
+
+// RunRequestParams holds all data needed to build an AgentRunRequest.
+type RunRequestParams struct {
+	ModelId        string
+	SystemPrompt   string
+	UserText       string
+	MessageId      string
+	ConversationId string
+	Images         []ImageData
+	Turns          []TurnData
+	McpTools       []McpToolDef
+	BlobStore      map[string][]byte // hex(sha256) -> data, populated during encoding
+	RawCheckpoint  []byte            // if non-nil, use as conversation_state directly (from server checkpoint)
+}
+
+type ImageData struct {
+	MimeType string
+	Data     []byte
+}
+
+type TurnData struct {
+	UserText      string
+	AssistantText string
+}
+
+type McpToolDef struct {
+	Name        string
+	Description string
+	InputSchema json.RawMessage
+}
+
+// --- Helper: create a dynamic message and set fields ---
+
+func newMsg(name string) *dynamicpb.Message {
+	return dynamicpb.NewMessage(Msg(name))
+}
+
+func field(msg *dynamicpb.Message, name string) protoreflect.FieldDescriptor {
+	return msg.Descriptor().Fields().ByName(protoreflect.Name(name))
+}
+
+func setStr(msg *dynamicpb.Message, name, val string) {
+	if val != "" {
+		msg.Set(field(msg, name), protoreflect.ValueOfString(val))
+	}
+}
+
+func setBytes(msg *dynamicpb.Message, name string, val []byte) {
+	if len(val) > 0 {
+		msg.Set(field(msg, name), protoreflect.ValueOfBytes(val))
+	}
+}
+
+func setUint32(msg *dynamicpb.Message, name string, val uint32) {
+	msg.Set(field(msg, name), protoreflect.ValueOfUint32(val))
+}
+
+func setBool(msg *dynamicpb.Message, name string, val bool) {
+	msg.Set(field(msg, name), protoreflect.ValueOfBool(val))
+}
+
+func setMsg(msg *dynamicpb.Message, name string, sub *dynamicpb.Message) {
+	msg.Set(field(msg, name), protoreflect.ValueOfMessage(sub.ProtoReflect()))
+}
+
+func marshal(msg *dynamicpb.Message) []byte {
+	b, err := proto.Marshal(msg)
+	if err != nil {
+		panic("cursor proto marshal: " + err.Error())
+	}
+	return b
+}
+
+// --- Encode functions mirroring cursor-fetch.ts ---
+
+// EncodeHeartbeat returns an encoded AgentClientMessage with clientHeartbeat.
+// Mirrors: create(AgentClientMessageSchema, { message: { case: 'clientHeartbeat', value: create(ClientHeartbeatSchema, {}) } })
+func EncodeHeartbeat() []byte {
+	hb := newMsg("ClientHeartbeat")
+	acm := newMsg("AgentClientMessage")
+	setMsg(acm, "client_heartbeat", hb)
+	return marshal(acm)
+}
+
+// EncodeRunRequest builds a full AgentClientMessage wrapping an AgentRunRequest.
+// Mirrors buildCursorRequest() in cursor-fetch.ts.
+// If p.RawCheckpoint is set, it is used directly as the conversation_state bytes
+// (from a previous conversation_checkpoint_update), skipping manual turn construction.
+func EncodeRunRequest(p *RunRequestParams) []byte {
+	if p.RawCheckpoint != nil {
+		return encodeRunRequestWithCheckpoint(p)
+	}
+
+	if p.BlobStore == nil {
+		p.BlobStore = make(map[string][]byte)
+	}
+
+	// --- Conversation turns ---
+	// Each turn is serialized as bytes (ConversationTurnStructure → bytes)
+	var turnBytes [][]byte
+	for _, turn := range p.Turns {
+		// UserMessage for this turn
+		um := newMsg("UserMessage")
+		setStr(um, "text", turn.UserText)
+		setStr(um, "message_id", generateId())
+		umBytes := marshal(um)
+
+		// Steps (assistant response)
+		var stepBytes [][]byte
+		if turn.AssistantText != "" {
+			am := newMsg("AssistantMessage")
+			setStr(am, "text", turn.AssistantText)
+			step := newMsg("ConversationStep")
+			setMsg(step, "assistant_message", am)
+			stepBytes = append(stepBytes, marshal(step))
+		}
+
+		// AgentConversationTurnStructure (fields are bytes, not submessages)
+		agentTurn := newMsg("AgentConversationTurnStructure")
+		setBytes(agentTurn, "user_message", umBytes)
+		for _, sb := range stepBytes {
+			stepsField := field(agentTurn, "steps")
+			list := agentTurn.Mutable(stepsField).List()
+			list.Append(protoreflect.ValueOfBytes(sb))
+		}
+
+		// ConversationTurnStructure (oneof turn → agentConversationTurn)
+		cts := newMsg("ConversationTurnStructure")
+		setMsg(cts, "agent_conversation_turn", agentTurn)
+		turnBytes = append(turnBytes, marshal(cts))
+	}
+
+	// --- System prompt blob ---
+	systemJSON, _ := json.Marshal(map[string]string{"role": "system", "content": p.SystemPrompt})
+	blobId := sha256Sum(systemJSON)
+	p.BlobStore[hex.EncodeToString(blobId)] = systemJSON
+
+	// --- ConversationStateStructure ---
+	css := newMsg("ConversationStateStructure")
+	// rootPromptMessagesJson: repeated bytes
+	rootField := field(css, "root_prompt_messages_json")
+	rootList := css.Mutable(rootField).List()
+	rootList.Append(protoreflect.ValueOfBytes(blobId))
+	// turns: repeated bytes (field 8) + turns_old (field 2) for compatibility
+	turnsField := field(css, "turns")
+	turnsList := css.Mutable(turnsField).List()
+	for _, tb := range turnBytes {
+		turnsList.Append(protoreflect.ValueOfBytes(tb))
+	}
+	turnsOldField := field(css, "turns_old")
+	if turnsOldField != nil {
+		turnsOldList := css.Mutable(turnsOldField).List()
+		for _, tb := range turnBytes {
+			turnsOldList.Append(protoreflect.ValueOfBytes(tb))
+		}
+	}
+
+	// --- UserMessage (current) ---
+	userMessage := newMsg("UserMessage")
+	setStr(userMessage, "text", p.UserText)
+	setStr(userMessage, "message_id", p.MessageId)
+
+	// Images via SelectedContext
+	if len(p.Images) > 0 {
+		sc := newMsg("SelectedContext")
+		imgsField := field(sc, "selected_images")
+		imgsList := sc.Mutable(imgsField).List()
+		for _, img := range p.Images {
+			si := newMsg("SelectedImage")
+			setStr(si, "uuid", generateId())
+			setStr(si, "mime_type", img.MimeType)
+			setBytes(si, "data", img.Data)
+			imgsList.Append(protoreflect.ValueOfMessage(si.ProtoReflect()))
+		}
+		setMsg(userMessage, "selected_context", sc)
+	}
+
+	// --- UserMessageAction ---
+	uma := newMsg("UserMessageAction")
+	setMsg(uma, "user_message", userMessage)
+
+	// --- ConversationAction ---
+	ca := newMsg("ConversationAction")
+	setMsg(ca, "user_message_action", uma)
+
+	// --- ModelDetails ---
+	md := newMsg("ModelDetails")
+	setStr(md, "model_id", p.ModelId)
+	setStr(md, "display_model_id", p.ModelId)
+	setStr(md, "display_name", p.ModelId)
+
+	// --- AgentRunRequest ---
+	arr := newMsg("AgentRunRequest")
+	setMsg(arr, "conversation_state", css)
+	setMsg(arr, "action", ca)
+	setMsg(arr, "model_details", md)
+	setStr(arr, "conversation_id", p.ConversationId)
+
+	// McpTools
+	if len(p.McpTools) > 0 {
+		mcpTools := newMsg("McpTools")
+		toolsField := field(mcpTools, "mcp_tools")
+		toolsList := mcpTools.Mutable(toolsField).List()
+		for _, tool := range p.McpTools {
+			td := newMsg("McpToolDefinition")
+			setStr(td, "name", tool.Name)
+			setStr(td, "description", tool.Description)
+			if len(tool.InputSchema) > 0 {
+				setBytes(td, "input_schema", jsonToProtobufValueBytes(tool.InputSchema))
+			}
+			setStr(td, "provider_identifier", "proxy")
+			setStr(td, "tool_name", tool.Name)
+			toolsList.Append(protoreflect.ValueOfMessage(td.ProtoReflect()))
+		}
+		setMsg(arr, "mcp_tools", mcpTools)
+	}
+
+	// --- AgentClientMessage ---
+	acm := newMsg("AgentClientMessage")
+	setMsg(acm, "run_request", arr)
+
+	return marshal(acm)
+}
+
+// encodeRunRequestWithCheckpoint builds an AgentClientMessage using a raw checkpoint
+// as conversation_state. The checkpoint bytes are embedded directly without deserialization.
+func encodeRunRequestWithCheckpoint(p *RunRequestParams) []byte {
+	// Build UserMessage
+	userMessage := newMsg("UserMessage")
+	setStr(userMessage, "text", p.UserText)
+	setStr(userMessage, "message_id", p.MessageId)
+	if len(p.Images) > 0 {
+		sc := newMsg("SelectedContext")
+		imgsField := field(sc, "selected_images")
+		imgsList := sc.Mutable(imgsField).List()
+		for _, img := range p.Images {
+			si := newMsg("SelectedImage")
+			setStr(si, "uuid", generateId())
+			setStr(si, "mime_type", img.MimeType)
+			setBytes(si, "data", img.Data)
+			imgsList.Append(protoreflect.ValueOfMessage(si.ProtoReflect()))
+		}
+		setMsg(userMessage, "selected_context", sc)
+	}
+
+	// Build ConversationAction with UserMessageAction
+	uma := newMsg("UserMessageAction")
+	setMsg(uma, "user_message", userMessage)
+	ca := newMsg("ConversationAction")
+	setMsg(ca, "user_message_action", uma)
+	caBytes := marshal(ca)
+
+	// Build ModelDetails
+	md := newMsg("ModelDetails")
+	setStr(md, "model_id", p.ModelId)
+	setStr(md, "display_model_id", p.ModelId)
+	setStr(md, "display_name", p.ModelId)
+	mdBytes := marshal(md)
+
+	// Build McpTools
+	var mcpToolsBytes []byte
+	if len(p.McpTools) > 0 {
+		mcpTools := newMsg("McpTools")
+		toolsField := field(mcpTools, "mcp_tools")
+		toolsList := mcpTools.Mutable(toolsField).List()
+		for _, tool := range p.McpTools {
+			td := newMsg("McpToolDefinition")
+			setStr(td, "name", tool.Name)
+			setStr(td, "description", tool.Description)
+			if len(tool.InputSchema) > 0 {
+				setBytes(td, "input_schema", jsonToProtobufValueBytes(tool.InputSchema))
+			}
+			setStr(td, "provider_identifier", "proxy")
+			setStr(td, "tool_name", tool.Name)
+			toolsList.Append(protoreflect.ValueOfMessage(td.ProtoReflect()))
+		}
+		mcpToolsBytes = marshal(mcpTools)
+	}
+
+	// Manually assemble AgentRunRequest using protowire to embed raw checkpoint
+	var arrBuf []byte
+	// field 1: conversation_state = raw checkpoint bytes (length-delimited)
+	arrBuf = protowire.AppendTag(arrBuf, ARR_ConversationState, protowire.BytesType)
+	arrBuf = protowire.AppendBytes(arrBuf, p.RawCheckpoint)
+	// field 2: action = ConversationAction
+	arrBuf = protowire.AppendTag(arrBuf, ARR_Action, protowire.BytesType)
+	arrBuf = protowire.AppendBytes(arrBuf, caBytes)
+	// field 3: model_details = ModelDetails
+	arrBuf = protowire.AppendTag(arrBuf, ARR_ModelDetails, protowire.BytesType)
+	arrBuf = protowire.AppendBytes(arrBuf, mdBytes)
+	// field 4: mcp_tools = McpTools
+	if len(mcpToolsBytes) > 0 {
+		arrBuf = protowire.AppendTag(arrBuf, ARR_McpTools, protowire.BytesType)
+		arrBuf = protowire.AppendBytes(arrBuf, mcpToolsBytes)
+	}
+	// field 5: conversation_id = string
+	if p.ConversationId != "" {
+		arrBuf = protowire.AppendTag(arrBuf, ARR_ConversationId, protowire.BytesType)
+		arrBuf = protowire.AppendString(arrBuf, p.ConversationId)
+	}
+
+	// Wrap in AgentClientMessage field 1 (run_request)
+	var acmBuf []byte
+	acmBuf = protowire.AppendTag(acmBuf, ACM_RunRequest, protowire.BytesType)
+	acmBuf = protowire.AppendBytes(acmBuf, arrBuf)
+
+	log.Debugf("cursor encode: built RunRequest with checkpoint (%d bytes), total=%d bytes", len(p.RawCheckpoint), len(acmBuf))
+	return acmBuf
+}
+
+// ResumeRequestParams holds data for a ResumeAction request.
+type ResumeRequestParams struct {
+	ModelId        string
+	ConversationId string
+	McpTools       []McpToolDef
+}
+
+// EncodeResumeRequest builds an AgentClientMessage with ResumeAction.
+// Used to resume a conversation by conversation_id without re-sending full history.
+func EncodeResumeRequest(p *ResumeRequestParams) []byte {
+	// RequestContext with tools
+	rc := newMsg("RequestContext")
+	if len(p.McpTools) > 0 {
+		toolsField := field(rc, "tools")
+		toolsList := rc.Mutable(toolsField).List()
+		for _, tool := range p.McpTools {
+			td := newMsg("McpToolDefinition")
+			setStr(td, "name", tool.Name)
+			setStr(td, "description", tool.Description)
+			if len(tool.InputSchema) > 0 {
+				setBytes(td, "input_schema", jsonToProtobufValueBytes(tool.InputSchema))
+			}
+			setStr(td, "provider_identifier", "proxy")
+			setStr(td, "tool_name", tool.Name)
+			toolsList.Append(protoreflect.ValueOfMessage(td.ProtoReflect()))
+		}
+	}
+
+	// ResumeAction
+	ra := newMsg("ResumeAction")
+	setMsg(ra, "request_context", rc)
+
+	// ConversationAction with resume_action
+	ca := newMsg("ConversationAction")
+	setMsg(ca, "resume_action", ra)
+
+	// ModelDetails
+	md := newMsg("ModelDetails")
+	setStr(md, "model_id", p.ModelId)
+	setStr(md, "display_model_id", p.ModelId)
+	setStr(md, "display_name", p.ModelId)
+
+	// AgentRunRequest — no conversation_state needed for resume
+	arr := newMsg("AgentRunRequest")
+	setMsg(arr, "action", ca)
+	setMsg(arr, "model_details", md)
+	setStr(arr, "conversation_id", p.ConversationId)
+
+	// McpTools at top level
+	if len(p.McpTools) > 0 {
+		mcpTools := newMsg("McpTools")
+		toolsField := field(mcpTools, "mcp_tools")
+		toolsList := mcpTools.Mutable(toolsField).List()
+		for _, tool := range p.McpTools {
+			td := newMsg("McpToolDefinition")
+			setStr(td, "name", tool.Name)
+			setStr(td, "description", tool.Description)
+			if len(tool.InputSchema) > 0 {
+				setBytes(td, "input_schema", jsonToProtobufValueBytes(tool.InputSchema))
+			}
+			setStr(td, "provider_identifier", "proxy")
+			setStr(td, "tool_name", tool.Name)
+			toolsList.Append(protoreflect.ValueOfMessage(td.ProtoReflect()))
+		}
+		setMsg(arr, "mcp_tools", mcpTools)
+	}
+
+	acm := newMsg("AgentClientMessage")
+	setMsg(acm, "run_request", arr)
+	return marshal(acm)
+}
+
+// --- KV response encoders ---
+// Mirrors handleKvMessage() in cursor-fetch.ts
+
+// EncodeKvGetBlobResult responds to a getBlobArgs request.
+func EncodeKvGetBlobResult(kvId uint32, blobData []byte) []byte {
+	result := newMsg("GetBlobResult")
+	if blobData != nil {
+		setBytes(result, "blob_data", blobData)
+	}
+
+	kvc := newMsg("KvClientMessage")
+	setUint32(kvc, "id", kvId)
+	setMsg(kvc, "get_blob_result", result)
+
+	acm := newMsg("AgentClientMessage")
+	setMsg(acm, "kv_client_message", kvc)
+	return marshal(acm)
+}
+
+// EncodeKvSetBlobResult responds to a setBlobArgs request.
+func EncodeKvSetBlobResult(kvId uint32) []byte {
+	result := newMsg("SetBlobResult")
+
+	kvc := newMsg("KvClientMessage")
+	setUint32(kvc, "id", kvId)
+	setMsg(kvc, "set_blob_result", result)
+
+	acm := newMsg("AgentClientMessage")
+	setMsg(acm, "kv_client_message", kvc)
+	return marshal(acm)
+}
+
+// --- Exec response encoders ---
+// Mirrors handleExecMessage() and sendExec() in cursor-fetch.ts
+
+// EncodeExecRequestContextResult responds to requestContextArgs with tool definitions.
+func EncodeExecRequestContextResult(execMsgId uint32, execId string, tools []McpToolDef) []byte {
+	// RequestContext with tools
+	rc := newMsg("RequestContext")
+	if len(tools) > 0 {
+		toolsField := field(rc, "tools")
+		toolsList := rc.Mutable(toolsField).List()
+		for _, tool := range tools {
+			td := newMsg("McpToolDefinition")
+			setStr(td, "name", tool.Name)
+			setStr(td, "description", tool.Description)
+			if len(tool.InputSchema) > 0 {
+				setBytes(td, "input_schema", jsonToProtobufValueBytes(tool.InputSchema))
+			}
+			setStr(td, "provider_identifier", "proxy")
+			setStr(td, "tool_name", tool.Name)
+			toolsList.Append(protoreflect.ValueOfMessage(td.ProtoReflect()))
+		}
+	}
+
+	// RequestContextSuccess
+	rcs := newMsg("RequestContextSuccess")
+	setMsg(rcs, "request_context", rc)
+
+	// RequestContextResult (oneof success)
+	rcr := newMsg("RequestContextResult")
+	setMsg(rcr, "success", rcs)
+
+	return encodeExecClientMsg(execMsgId, execId, "request_context_result", rcr)
+}
+
+// EncodeExecMcpResult responds with MCP tool result.
+func EncodeExecMcpResult(execMsgId uint32, execId string, content string, isError bool) []byte {
+	textContent := newMsg("McpTextContent")
+	setStr(textContent, "text", content)
+
+	contentItem := newMsg("McpToolResultContentItem")
+	setMsg(contentItem, "text", textContent)
+
+	success := newMsg("McpSuccess")
+	contentField := field(success, "content")
+	contentList := success.Mutable(contentField).List()
+	contentList.Append(protoreflect.ValueOfMessage(contentItem.ProtoReflect()))
+	setBool(success, "is_error", isError)
+
+	result := newMsg("McpResult")
+	setMsg(result, "success", success)
+
+	return encodeExecClientMsg(execMsgId, execId, "mcp_result", result)
+}
+
+// EncodeExecMcpError responds with MCP error.
+func EncodeExecMcpError(execMsgId uint32, execId string, errMsg string) []byte {
+	mcpErr := newMsg("McpError")
+	setStr(mcpErr, "error", errMsg)
+
+	result := newMsg("McpResult")
+	setMsg(result, "error", mcpErr)
+
+	return encodeExecClientMsg(execMsgId, execId, "mcp_result", result)
+}
+
+// --- Rejection encoders (mirror handleExecMessage rejections) ---
+
+func EncodeExecReadRejected(execMsgId uint32, execId string, path, reason string) []byte {
+	rej := newMsg("ReadRejected")
+	setStr(rej, "path", path)
+	setStr(rej, "reason", reason)
+	result := newMsg("ReadResult")
+	setMsg(result, "rejected", rej)
+	return encodeExecClientMsg(execMsgId, execId, "read_result", result)
+}
+
+func EncodeExecShellRejected(execMsgId uint32, execId string, command, workDir, reason string) []byte {
+	rej := newMsg("ShellRejected")
+	setStr(rej, "command", command)
+	setStr(rej, "working_directory", workDir)
+	setStr(rej, "reason", reason)
+	result := newMsg("ShellResult")
+	setMsg(result, "rejected", rej)
+	return encodeExecClientMsg(execMsgId, execId, "shell_result", result)
+}
+
+func EncodeExecWriteRejected(execMsgId uint32, execId string, path, reason string) []byte {
+	rej := newMsg("WriteRejected")
+	setStr(rej, "path", path)
+	setStr(rej, "reason", reason)
+	result := newMsg("WriteResult")
+	setMsg(result, "rejected", rej)
+	return encodeExecClientMsg(execMsgId, execId, "write_result", result)
+}
+
+func EncodeExecDeleteRejected(execMsgId uint32, execId string, path, reason string) []byte {
+	rej := newMsg("DeleteRejected")
+	setStr(rej, "path", path)
+	setStr(rej, "reason", reason)
+	result := newMsg("DeleteResult")
+	setMsg(result, "rejected", rej)
+	return encodeExecClientMsg(execMsgId, execId, "delete_result", result)
+}
+
+func EncodeExecLsRejected(execMsgId uint32, execId string, path, reason string) []byte {
+	rej := newMsg("LsRejected")
+	setStr(rej, "path", path)
+	setStr(rej, "reason", reason)
+	result := newMsg("LsResult")
+	setMsg(result, "rejected", rej)
+	return encodeExecClientMsg(execMsgId, execId, "ls_result", result)
+}
+
+func EncodeExecGrepError(execMsgId uint32, execId string, errMsg string) []byte {
+	grepErr := newMsg("GrepError")
+	setStr(grepErr, "error", errMsg)
+	result := newMsg("GrepResult")
+	setMsg(result, "error", grepErr)
+	return encodeExecClientMsg(execMsgId, execId, "grep_result", result)
+}
+
+func EncodeExecFetchError(execMsgId uint32, execId string, url, errMsg string) []byte {
+	fetchErr := newMsg("FetchError")
+	setStr(fetchErr, "url", url)
+	setStr(fetchErr, "error", errMsg)
+	result := newMsg("FetchResult")
+	setMsg(result, "error", fetchErr)
+	return encodeExecClientMsg(execMsgId, execId, "fetch_result", result)
+}
+
+func EncodeExecDiagnosticsResult(execMsgId uint32, execId string) []byte {
+	result := newMsg("DiagnosticsResult")
+	return encodeExecClientMsg(execMsgId, execId, "diagnostics_result", result)
+}
+
+func EncodeExecBackgroundShellSpawnRejected(execMsgId uint32, execId string, command, workDir, reason string) []byte {
+	rej := newMsg("ShellRejected")
+	setStr(rej, "command", command)
+	setStr(rej, "working_directory", workDir)
+	setStr(rej, "reason", reason)
+	result := newMsg("BackgroundShellSpawnResult")
+	setMsg(result, "rejected", rej)
+	return encodeExecClientMsg(execMsgId, execId, "background_shell_spawn_result", result)
+}
+
+func EncodeExecWriteShellStdinError(execMsgId uint32, execId string, errMsg string) []byte {
+	wsErr := newMsg("WriteShellStdinError")
+	setStr(wsErr, "error", errMsg)
+	result := newMsg("WriteShellStdinResult")
+	setMsg(result, "error", wsErr)
+	return encodeExecClientMsg(execMsgId, execId, "write_shell_stdin_result", result)
+}
+
+// encodeExecClientMsg wraps an exec result in AgentClientMessage.
+// Mirrors sendExec() in cursor-fetch.ts.
+func encodeExecClientMsg(id uint32, execId string, resultFieldName string, resultMsg *dynamicpb.Message) []byte {
+	ecm := newMsg("ExecClientMessage")
+	setUint32(ecm, "id", id)
+	// Force set exec_id even if empty - Cursor requires this field to be set
+	ecm.Set(field(ecm, "exec_id"), protoreflect.ValueOfString(execId))
+
+	// Debug: check if field exists
+	fd := field(ecm, resultFieldName)
+	if fd == nil {
+		panic(fmt.Sprintf("field %q NOT FOUND in ExecClientMessage! Available fields: %v", resultFieldName, listFields(ecm)))
+	}
+
+	// Debug: log the actual field being set
+	log.Debugf("encodeExecClientMsg: setting field %q (number=%d, kind=%s)", fd.Name(), fd.Number(), fd.Kind())
+
+	ecm.Set(fd, protoreflect.ValueOfMessage(resultMsg.ProtoReflect()))
+
+	acm := newMsg("AgentClientMessage")
+	setMsg(acm, "exec_client_message", ecm)
+	return marshal(acm)
+}
+
+func listFields(msg *dynamicpb.Message) []string {
+	var names []string
+	for i := 0; i < msg.Descriptor().Fields().Len(); i++ {
+		names = append(names, string(msg.Descriptor().Fields().Get(i).Name()))
+	}
+	return names
+}
+
+// --- Utilities ---
+
+// jsonToProtobufValueBytes converts a JSON schema (json.RawMessage) to protobuf Value binary.
+// This mirrors the TS pattern: toBinary(ValueSchema, fromJson(ValueSchema, jsonSchema))
+func jsonToProtobufValueBytes(jsonData json.RawMessage) []byte {
+	if len(jsonData) == 0 {
+		return nil
+	}
+	var v interface{}
+	if err := json.Unmarshal(jsonData, &v); err != nil {
+		return jsonData // fallback to raw JSON if parsing fails
+	}
+	pbVal, err := structpb.NewValue(v)
+	if err != nil {
+		return jsonData // fallback
+	}
+	b, err := proto.Marshal(pbVal)
+	if err != nil {
+		return jsonData // fallback
+	}
+	return b
+}
+
+// ProtobufValueBytesToJSON converts protobuf Value binary back to JSON.
+// This mirrors the TS pattern: toJson(ValueSchema, fromBinary(ValueSchema, value))
+func ProtobufValueBytesToJSON(data []byte) (interface{}, error) {
+	val := &structpb.Value{}
+	if err := proto.Unmarshal(data, val); err != nil {
+		return nil, err
+	}
+	return val.AsInterface(), nil
+}
+
+func sha256Sum(data []byte) []byte {
+	h := sha256.Sum256(data)
+	return h[:]
+}
+
+var idCounter uint64
+
+func generateId() string {
+	idCounter++
+	h := sha256.Sum256([]byte{byte(idCounter), byte(idCounter >> 8), byte(idCounter >> 16)})
+	return hex.EncodeToString(h[:16])
+}
--- a/internal/auth/cursor/proto/fieldnumbers.go
+++ b/internal/auth/cursor/proto/fieldnumbers.go
@@ -0,0 +1,332 @@
+// Package proto provides hand-rolled protobuf encode/decode for Cursor's gRPC API.
+// Field numbers are extracted from the TypeScript generated proto/agent_pb.ts in alma-plugins/cursor-auth.
+package proto
+
+// AgentClientMessage (msg 118) oneof "message"
+const (
+	ACM_RunRequest              = 1 // AgentRunRequest
+	ACM_ExecClientMessage       = 2 // ExecClientMessage
+	ACM_KvClientMessage         = 3 // KvClientMessage
+	ACM_ConversationAction      = 4 // ConversationAction
+	ACM_ExecClientControlMsg    = 5 // ExecClientControlMessage
+	ACM_InteractionResponse     = 6 // InteractionResponse
+	ACM_ClientHeartbeat         = 7 // ClientHeartbeat
+)
+
+// AgentServerMessage (msg 119) oneof "message"
+const (
+	ASM_InteractionUpdate         = 1 // InteractionUpdate
+	ASM_ExecServerMessage         = 2 // ExecServerMessage
+	ASM_ConversationCheckpoint    = 3 // ConversationStateStructure
+	ASM_KvServerMessage           = 4 // KvServerMessage
+	ASM_ExecServerControlMessage  = 5 // ExecServerControlMessage
+	ASM_InteractionQuery          = 7 // InteractionQuery
+)
+
+// AgentRunRequest (msg 91)
+const (
+	ARR_ConversationState = 1 // ConversationStateStructure
+	ARR_Action            = 2 // ConversationAction
+	ARR_ModelDetails      = 3 // ModelDetails
+	ARR_McpTools          = 4 // McpTools
+	ARR_ConversationId    = 5 // string (optional)
+)
+
+// ConversationStateStructure (msg 83)
+const (
+	CSS_RootPromptMessagesJson = 1  // repeated bytes
+	CSS_TurnsOld               = 2  // repeated bytes (deprecated)
+	CSS_Todos                  = 3  // repeated bytes
+	CSS_PendingToolCalls       = 4  // repeated string
+	CSS_Turns                  = 8  // repeated bytes (CURRENT field for turns)
+	CSS_PreviousWorkspaceUris  = 9  // repeated string
+	CSS_SelfSummaryCount       = 17 // uint32
+	CSS_ReadPaths              = 18 // repeated string
+)
+
+// ConversationAction (msg 54) oneof "action"
+const (
+	CA_UserMessageAction = 1 // UserMessageAction
+)
+
+// UserMessageAction (msg 55)
+const (
+	UMA_UserMessage = 1 // UserMessage
+)
+
+// UserMessage (msg 63)
+const (
+	UM_Text            = 1 // string
+	UM_MessageId       = 2 // string
+	UM_SelectedContext = 3 // SelectedContext (optional)
+)
+
+// SelectedContext
+const (
+	SC_SelectedImages = 1 // repeated SelectedImage
+)
+
+// SelectedImage
+const (
+	SI_BlobId   = 1 // bytes (oneof dataOrBlobId)
+	SI_Uuid     = 2 // string
+	SI_Path     = 3 // string
+	SI_MimeType = 7 // string
+	SI_Data     = 8 // bytes (oneof dataOrBlobId)
+)
+
+// ModelDetails (msg 88)
+const (
+	MD_ModelId        = 1 // string
+	MD_ThinkingDetails = 2 // ThinkingDetails (optional)
+	MD_DisplayModelId = 3 // string
+	MD_DisplayName    = 4 // string
+)
+
+// McpTools (msg 307)
+const (
+	MT_McpTools = 1 // repeated McpToolDefinition
+)
+
+// McpToolDefinition (msg 306)
+const (
+	MTD_Name               = 1 // string
+	MTD_Description        = 2 // string
+	MTD_InputSchema        = 3 // bytes
+	MTD_ProviderIdentifier = 4 // string
+	MTD_ToolName           = 5 // string
+)
+
+// ConversationTurnStructure (msg 70) oneof "turn"
+const (
+	CTS_AgentConversationTurn = 1 // AgentConversationTurnStructure
+)
+
+// AgentConversationTurnStructure (msg 72)
+const (
+	ACTS_UserMessage = 1 // bytes (serialized UserMessage)
+	ACTS_Steps       = 2 // repeated bytes (serialized ConversationStep)
+)
+
+// ConversationStep (msg 53) oneof "message"
+const (
+	CS_AssistantMessage = 1 // AssistantMessage
+)
+
+// AssistantMessage
+const (
+	AM_Text = 1 // string
+)
+
+// --- Server-side message fields ---
+
+// InteractionUpdate oneof "message"
+const (
+	IU_TextDelta         = 1  // TextDeltaUpdate
+	IU_ThinkingDelta     = 4  // ThinkingDeltaUpdate
+	IU_ThinkingCompleted = 5  // ThinkingCompletedUpdate
+)
+
+// TextDeltaUpdate (msg 92)
+const (
+	TDU_Text = 1 // string
+)
+
+// ThinkingDeltaUpdate (msg 97)
+const (
+	TKD_Text = 1 // string
+)
+
+// KvServerMessage (msg 271)
+const (
+	KSM_Id          = 1 // uint32
+	KSM_GetBlobArgs = 2 // GetBlobArgs
+	KSM_SetBlobArgs = 3 // SetBlobArgs
+)
+
+// GetBlobArgs (msg 267)
+const (
+	GBA_BlobId = 1 // bytes
+)
+
+// SetBlobArgs (msg 269)
+const (
+	SBA_BlobId   = 1 // bytes
+	SBA_BlobData = 2 // bytes
+)
+
+// KvClientMessage (msg 272)
+const (
+	KCM_Id            = 1 // uint32
+	KCM_GetBlobResult = 2 // GetBlobResult
+	KCM_SetBlobResult = 3 // SetBlobResult
+)
+
+// GetBlobResult (msg 268)
+const (
+	GBR_BlobData = 1 // bytes (optional)
+)
+
+// ExecServerMessage
+const (
+	ESM_Id      = 1  // uint32
+	ESM_ExecId  = 15 // string
+	// oneof message:
+	ESM_ShellArgs              = 2  // ShellArgs
+	ESM_WriteArgs              = 3  // WriteArgs
+	ESM_DeleteArgs             = 4  // DeleteArgs
+	ESM_GrepArgs               = 5  // GrepArgs
+	ESM_ReadArgs               = 7  // ReadArgs (NOTE: 6 is skipped)
+	ESM_LsArgs                 = 8  // LsArgs
+	ESM_DiagnosticsArgs        = 9  // DiagnosticsArgs
+	ESM_RequestContextArgs     = 10 // RequestContextArgs
+	ESM_McpArgs                = 11 // McpArgs
+	ESM_ShellStreamArgs        = 14 // ShellArgs (stream variant)
+	ESM_BackgroundShellSpawn   = 16 // BackgroundShellSpawnArgs
+	ESM_FetchArgs              = 20 // FetchArgs
+	ESM_WriteShellStdinArgs    = 23 // WriteShellStdinArgs
+)
+
+// ExecClientMessage
+const (
+	ECM_Id     = 1  // uint32
+	ECM_ExecId = 15 // string
+	// oneof message (mirrors server fields):
+	ECM_ShellResult              = 2
+	ECM_WriteResult              = 3
+	ECM_DeleteResult             = 4
+	ECM_GrepResult               = 5
+	ECM_ReadResult               = 7
+	ECM_LsResult                 = 8
+	ECM_DiagnosticsResult        = 9
+	ECM_RequestContextResult     = 10
+	ECM_McpResult                = 11
+	ECM_ShellStream              = 14
+	ECM_BackgroundShellSpawnRes  = 16
+	ECM_FetchResult              = 20
+	ECM_WriteShellStdinResult    = 23
+)
+
+// McpArgs
+const (
+	MCA_Name               = 1 // string
+	MCA_Args               = 2 // map<string, bytes>
+	MCA_ToolCallId         = 3 // string
+	MCA_ProviderIdentifier = 4 // string
+	MCA_ToolName           = 5 // string
+)
+
+// RequestContextResult oneof "result"
+const (
+	RCR_Success = 1 // RequestContextSuccess
+	RCR_Error   = 2 // RequestContextError
+)
+
+// RequestContextSuccess (msg 337)
+const (
+	RCS_RequestContext = 1 // RequestContext
+)
+
+// RequestContext
+const (
+	RC_Rules = 2 // repeated CursorRule
+	RC_Tools = 7 // repeated McpToolDefinition
+)
+
+// McpResult oneof "result"
+const (
+	MCR_Success  = 1 // McpSuccess
+	MCR_Error    = 2 // McpError
+	MCR_Rejected = 3 // McpRejected
+)
+
+// McpSuccess (msg 290)
+const (
+	MCS_Content = 1 // repeated McpToolResultContentItem
+	MCS_IsError = 2 // bool
+)
+
+// McpToolResultContentItem oneof "content"
+const (
+	MTRCI_Text = 1 // McpTextContent
+)
+
+// McpTextContent (msg 287)
+const (
+	MTC_Text = 1 // string
+)
+
+// McpError (msg 291)
+const (
+	MCE_Error = 1 // string
+)
+
+// --- Rejection messages ---
+
+// ReadRejected: path=1, reason=2
+// ShellRejected: command=1, workingDirectory=2, reason=3, isReadonly=4
+// WriteRejected: path=1, reason=2
+// DeleteRejected: path=1, reason=2
+// LsRejected: path=1, reason=2
+// GrepError: error=1
+// FetchError: url=1, error=2
+// WriteShellStdinError: error=1
+
+// ReadResult oneof: success=1, error=2, rejected=3
+// ShellResult oneof: success=1 (+ various), rejected=?
+// The TS code uses specific result field numbers from the oneof:
+const (
+	RR_Rejected = 3 // ReadResult.rejected
+	SR_Rejected = 5 // ShellResult.rejected (from TS: ShellResult has success/various/rejected)
+	WR_Rejected = 5 // WriteResult.rejected
+	DR_Rejected = 3 // DeleteResult.rejected
+	LR_Rejected = 3 // LsResult.rejected
+	GR_Error    = 2 // GrepResult.error
+	FR_Error    = 2 // FetchResult.error
+	BSSR_Rejected = 2 // BackgroundShellSpawnResult.rejected (error field)
+	WSSR_Error    = 2 // WriteShellStdinResult.error
+)
+
+// --- Rejection struct fields ---
+const (
+	REJ_Path             = 1
+	REJ_Reason           = 2
+	SREJ_Command         = 1
+	SREJ_WorkingDir      = 2
+	SREJ_Reason          = 3
+	SREJ_IsReadonly      = 4
+	GERR_Error           = 1
+	FERR_Url             = 1
+	FERR_Error           = 2
+)
+
+// ReadArgs
+const (
+	RA_Path = 1 // string
+)
+
+// WriteArgs
+const (
+	WA_Path = 1 // string
+)
+
+// DeleteArgs
+const (
+	DA_Path = 1 // string
+)
+
+// LsArgs
+const (
+	LA_Path = 1 // string
+)
+
+// ShellArgs
+const (
+	SHA_Command          = 1 // string
+	SHA_WorkingDirectory = 2 // string
+)
+
+// FetchArgs
+const (
+	FA_Url = 1 // string
+)
--- a/internal/auth/cursor/proto/h2stream.go
+++ b/internal/auth/cursor/proto/h2stream.go
@@ -0,0 +1,313 @@
+package proto
+
+import (
+	"crypto/tls"
+	"fmt"
+	"io"
+	"net"
+	"sync"
+	"time"
+
+	log "github.com/sirupsen/logrus"
+	"golang.org/x/net/http2"
+	"golang.org/x/net/http2/hpack"
+)
+
+const (
+	defaultInitialWindowSize = 65535 // HTTP/2 default
+	maxFramePayload          = 16384 // HTTP/2 default max frame size
+)
+
+// H2Stream provides bidirectional HTTP/2 streaming for the Connect protocol.
+// Go's net/http does not support full-duplex HTTP/2, so we use the low-level framer.
+type H2Stream struct {
+	framer   *http2.Framer
+	conn     net.Conn
+	streamID uint32
+	mu       sync.Mutex
+	id       string // unique identifier for debugging
+	frameNum int64  // sequential frame counter for debugging
+
+	dataCh chan []byte
+	doneCh chan struct{}
+	err    error
+
+	// Send-side flow control
+	sendWindow   int32      // available bytes we can send on this stream
+	connWindow   int32      // available bytes on the connection level
+	windowCond   *sync.Cond // signaled when window is updated
+	windowMu     sync.Mutex // protects sendWindow, connWindow
+}
+
+// ID returns the unique identifier for this stream (for logging).
+func (s *H2Stream) ID() string { return s.id }
+
+// FrameNum returns the current frame number for debugging.
+func (s *H2Stream) FrameNum() int64 {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	return s.frameNum
+}
+
+// DialH2Stream establishes a TLS+HTTP/2 connection and opens a new stream.
+func DialH2Stream(host string, headers map[string]string) (*H2Stream, error) {
+	tlsConn, err := tls.Dial("tcp", host+":443", &tls.Config{
+		NextProtos: []string{"h2"},
+	})
+	if err != nil {
+		return nil, fmt.Errorf("h2: TLS dial failed: %w", err)
+	}
+	if tlsConn.ConnectionState().NegotiatedProtocol != "h2" {
+		tlsConn.Close()
+		return nil, fmt.Errorf("h2: server did not negotiate h2")
+	}
+
+	framer := http2.NewFramer(tlsConn, tlsConn)
+
+	// Client connection preface
+	if _, err := tlsConn.Write([]byte(http2.ClientPreface)); err != nil {
+		tlsConn.Close()
+		return nil, fmt.Errorf("h2: preface write failed: %w", err)
+	}
+
+	// Send initial SETTINGS (tell server how much WE can receive)
+	if err := framer.WriteSettings(
+		http2.Setting{ID: http2.SettingInitialWindowSize, Val: 4 * 1024 * 1024},
+		http2.Setting{ID: http2.SettingMaxConcurrentStreams, Val: 100},
+	); err != nil {
+		tlsConn.Close()
+		return nil, fmt.Errorf("h2: settings write failed: %w", err)
+	}
+
+	// Connection-level window update (for receiving)
+	if err := framer.WriteWindowUpdate(0, 3*1024*1024); err != nil {
+		tlsConn.Close()
+		return nil, fmt.Errorf("h2: window update failed: %w", err)
+	}
+
+	// Read and handle initial server frames (SETTINGS, WINDOW_UPDATE)
+	// Track server's initial window size (how much WE can send)
+	serverInitialWindowSize := int32(defaultInitialWindowSize)
+	connWindowSize := int32(defaultInitialWindowSize) // connection-level send window
+	for i := 0; i < 10; i++ {
+		f, err := framer.ReadFrame()
+		if err != nil {
+			tlsConn.Close()
+			return nil, fmt.Errorf("h2: initial frame read failed: %w", err)
+		}
+		switch sf := f.(type) {
+		case *http2.SettingsFrame:
+			if !sf.IsAck() {
+				sf.ForeachSetting(func(s http2.Setting) error {
+					if s.ID == http2.SettingInitialWindowSize {
+						serverInitialWindowSize = int32(s.Val)
+						log.Debugf("h2: server initial window size: %d", s.Val)
+					}
+					return nil
+				})
+				framer.WriteSettingsAck()
+			} else {
+				goto handshakeDone
+			}
+		case *http2.WindowUpdateFrame:
+			if sf.StreamID == 0 {
+				connWindowSize += int32(sf.Increment)
+				log.Debugf("h2: initial conn window update: +%d, total=%d", sf.Increment, connWindowSize)
+			}
+		default:
+			// unexpected but continue
+		}
+	}
+handshakeDone:
+
+	// Build HEADERS
+	streamID := uint32(1)
+	var hdrBuf []byte
+	enc := hpack.NewEncoder(&sliceWriter{buf: &hdrBuf})
+	enc.WriteField(hpack.HeaderField{Name: ":method", Value: "POST"})
+	enc.WriteField(hpack.HeaderField{Name: ":scheme", Value: "https"})
+	enc.WriteField(hpack.HeaderField{Name: ":authority", Value: host})
+	if p, ok := headers[":path"]; ok {
+		enc.WriteField(hpack.HeaderField{Name: ":path", Value: p})
+	}
+	for k, v := range headers {
+		if len(k) > 0 && k[0] == ':' {
+			continue
+		}
+		enc.WriteField(hpack.HeaderField{Name: k, Value: v})
+	}
+
+	if err := framer.WriteHeaders(http2.HeadersFrameParam{
+		StreamID:      streamID,
+		BlockFragment: hdrBuf,
+		EndStream:     false,
+		EndHeaders:    true,
+	}); err != nil {
+		tlsConn.Close()
+		return nil, fmt.Errorf("h2: headers write failed: %w", err)
+	}
+
+	s := &H2Stream{
+		framer:     framer,
+		conn:       tlsConn,
+		streamID:   streamID,
+		dataCh:     make(chan []byte, 256),
+		doneCh:     make(chan struct{}),
+		id:         fmt.Sprintf("%d-%s", streamID, time.Now().Format("150405.000")),
+		frameNum:   0,
+		sendWindow: serverInitialWindowSize,
+		connWindow: connWindowSize,
+	}
+	s.windowCond = sync.NewCond(&s.windowMu)
+	go s.readLoop()
+	return s, nil
+}
+
+// Write sends a DATA frame on the stream, respecting flow control.
+func (s *H2Stream) Write(data []byte) error {
+	for len(data) > 0 {
+		chunk := data
+		if len(chunk) > maxFramePayload {
+			chunk = data[:maxFramePayload]
+		}
+
+		// Wait for flow control window
+		s.windowMu.Lock()
+		for s.sendWindow <= 0 || s.connWindow <= 0 {
+			s.windowCond.Wait()
+		}
+		// Limit chunk to available window
+		allowed := int(s.sendWindow)
+		if int(s.connWindow) < allowed {
+			allowed = int(s.connWindow)
+		}
+		if len(chunk) > allowed {
+			chunk = chunk[:allowed]
+		}
+		s.sendWindow -= int32(len(chunk))
+		s.connWindow -= int32(len(chunk))
+		s.windowMu.Unlock()
+
+		s.mu.Lock()
+		err := s.framer.WriteData(s.streamID, false, chunk)
+		s.mu.Unlock()
+		if err != nil {
+			return err
+		}
+		data = data[len(chunk):]
+	}
+	return nil
+}
+
+// Data returns the channel of received data chunks.
+func (s *H2Stream) Data() <-chan []byte { return s.dataCh }
+
+// Done returns a channel closed when the stream ends.
+func (s *H2Stream) Done() <-chan struct{} { return s.doneCh }
+
+// Err returns the error (if any) that caused the stream to close.
+// Returns nil for a clean shutdown (EOF / StreamEnded).
+func (s *H2Stream) Err() error { return s.err }
+
+// Close tears down the connection.
+func (s *H2Stream) Close() {
+	s.conn.Close()
+	// Unblock any writers waiting on flow control
+	s.windowCond.Broadcast()
+}
+
+func (s *H2Stream) readLoop() {
+	defer close(s.doneCh)
+	defer close(s.dataCh)
+
+	for {
+		f, err := s.framer.ReadFrame()
+		if err != nil {
+			if err != io.EOF {
+				s.err = err
+				log.Debugf("h2stream[%s]: readLoop error: %v", s.id, err)
+			}
+			return
+		}
+
+		// Increment frame counter
+		s.mu.Lock()
+		s.frameNum++
+		s.mu.Unlock()
+
+		switch frame := f.(type) {
+		case *http2.DataFrame:
+			if frame.StreamID == s.streamID && len(frame.Data()) > 0 {
+				cp := make([]byte, len(frame.Data()))
+				copy(cp, frame.Data())
+				s.dataCh <- cp
+
+				// Flow control: send WINDOW_UPDATE for received data
+				s.mu.Lock()
+				s.framer.WriteWindowUpdate(0, uint32(len(cp)))
+				s.framer.WriteWindowUpdate(s.streamID, uint32(len(cp)))
+				s.mu.Unlock()
+			}
+			if frame.StreamEnded() {
+				return
+			}
+
+		case *http2.HeadersFrame:
+			if frame.StreamEnded() {
+				return
+			}
+
+		case *http2.RSTStreamFrame:
+			s.err = fmt.Errorf("h2: RST_STREAM code=%d", frame.ErrCode)
+			log.Debugf("h2stream[%s]: received RST_STREAM code=%d", s.id, frame.ErrCode)
+			return
+
+		case *http2.GoAwayFrame:
+			s.err = fmt.Errorf("h2: GOAWAY code=%d", frame.ErrCode)
+			return
+
+		case *http2.PingFrame:
+			if !frame.IsAck() {
+				s.mu.Lock()
+				s.framer.WritePing(true, frame.Data)
+				s.mu.Unlock()
+			}
+
+		case *http2.SettingsFrame:
+			if !frame.IsAck() {
+				// Check for window size changes
+				frame.ForeachSetting(func(setting http2.Setting) error {
+					if setting.ID == http2.SettingInitialWindowSize {
+						s.windowMu.Lock()
+						delta := int32(setting.Val) - s.sendWindow
+						s.sendWindow += delta
+						s.windowMu.Unlock()
+						s.windowCond.Broadcast()
+					}
+					return nil
+				})
+				s.mu.Lock()
+				s.framer.WriteSettingsAck()
+				s.mu.Unlock()
+			}
+
+		case *http2.WindowUpdateFrame:
+			// Update send-side flow control window
+			s.windowMu.Lock()
+			if frame.StreamID == 0 {
+				s.connWindow += int32(frame.Increment)
+			} else if frame.StreamID == s.streamID {
+				s.sendWindow += int32(frame.Increment)
+			}
+			s.windowMu.Unlock()
+			s.windowCond.Broadcast()
+		}
+	}
+}
+
+type sliceWriter struct{ buf *[]byte }
+
+func (w *sliceWriter) Write(p []byte) (int, error) {
+	*w.buf = append(*w.buf, p...)
+	return len(p), nil
+}
--- a/internal/auth/gemini/gemini_auth.go
+++ b/internal/auth/gemini/gemini_auth.go
@@ -10,9 +10,7 @@ import (
 	"errors"
 	"fmt"
 	"io"
-	"net"
 	"net/http"
-	"net/url"
 	"time"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/codex"
@@ -20,9 +18,9 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/sdk/proxyutil"
 	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
-	"golang.org/x/net/proxy"

 	"golang.org/x/oauth2"
 	"golang.org/x/oauth2/google"
@@ -80,36 +78,16 @@ func (g *GeminiAuth) GetAuthenticatedClient(ctx context.Context, ts *GeminiToken
 	}
 	callbackURL := fmt.Sprintf("http://localhost:%d/oauth2callback", callbackPort)

-	// Configure proxy settings for the HTTP client if a proxy URL is provided.
-	proxyURL, err := url.Parse(cfg.ProxyURL)
-	if err == nil {
-		var transport *http.Transport
-		if proxyURL.Scheme == "socks5" {
-			// Handle SOCKS5 proxy.
-			username := proxyURL.User.Username()
-			password, _ := proxyURL.User.Password()
-			auth := &proxy.Auth{User: username, Password: password}
-			dialer, errSOCKS5 := proxy.SOCKS5("tcp", proxyURL.Host, auth, proxy.Direct)
-			if errSOCKS5 != nil {
-				log.Errorf("create SOCKS5 dialer failed: %v", errSOCKS5)
-				return nil, fmt.Errorf("create SOCKS5 dialer failed: %w", errSOCKS5)
-			}
-			transport = &http.Transport{
-				DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
-					return dialer.Dial(network, addr)
-				},
-			}
-		} else if proxyURL.Scheme == "http" || proxyURL.Scheme == "https" {
-			// Handle HTTP/HTTPS proxy.
-			transport = &http.Transport{Proxy: http.ProxyURL(proxyURL)}
-		}
-
-		if transport != nil {
-			proxyClient := &http.Client{Transport: transport}
-			ctx = context.WithValue(ctx, oauth2.HTTPClient, proxyClient)
-		}
+	transport, _, errBuild := proxyutil.BuildHTTPTransport(cfg.ProxyURL)
+	if errBuild != nil {
+		log.Errorf("%v", errBuild)
+	} else if transport != nil {
+		proxyClient := &http.Client{Transport: transport}
+		ctx = context.WithValue(ctx, oauth2.HTTPClient, proxyClient)
 	}

+	var err error
+
 	// Configure the OAuth2 client.
 	conf := &oauth2.Config{
 		ClientID:     ClientID,
@@ -327,6 +305,9 @@ func (g *GeminiAuth) getTokenFromWeb(ctx context.Context, config *oauth2.Config,
 		defer manualPromptTimer.Stop()
 	}

+	var manualInputCh <-chan string
+	var manualInputErrCh <-chan error
+
 waitForCallback:
 	for {
 		select {
@@ -348,13 +329,14 @@ waitForCallback:
 				return nil, err
 			default:
 			}
-			input, err := opts.Prompt("Paste the Gemini callback URL (or press Enter to keep waiting): ")
-			if err != nil {
-				return nil, err
-			}
-			parsed, err := misc.ParseOAuthCallback(input)
-			if err != nil {
-				return nil, err
+			manualInputCh, manualInputErrCh = misc.AsyncPrompt(opts.Prompt, "Paste the Gemini callback URL (or press Enter to keep waiting): ")
+			continue
+		case input := <-manualInputCh:
+			manualInputCh = nil
+			manualInputErrCh = nil
+			parsed, errParse := misc.ParseOAuthCallback(input)
+			if errParse != nil {
+				return nil, errParse
 			}
 			if parsed == nil {
 				continue
@@ -367,6 +349,8 @@ waitForCallback:
 			}
 			authCode = parsed.Code
 			break waitForCallback
+		case errManual := <-manualInputErrCh:
+			return nil, errManual
 		case <-timeoutTimer.C:
 			return nil, fmt.Errorf("oauth flow timed out")
 		}
--- a/internal/auth/gitlab/gitlab.go
+++ b/internal/auth/gitlab/gitlab.go
@@ -0,0 +1,492 @@
+package gitlab
+
+import (
+	"context"
+	"crypto/rand"
+	"crypto/sha256"
+	"encoding/base64"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net"
+	"net/http"
+	"net/url"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	log "github.com/sirupsen/logrus"
+)
+
+const (
+	DefaultBaseURL      = "https://gitlab.com"
+	DefaultCallbackPort = 17171
+	defaultOAuthScope   = "api read_user"
+)
+
+type PKCECodes struct {
+	CodeVerifier  string
+	CodeChallenge string
+}
+
+type OAuthResult struct {
+	Code  string
+	State string
+	Error string
+}
+
+type OAuthServer struct {
+	server     *http.Server
+	port       int
+	resultChan chan *OAuthResult
+	errorChan  chan error
+	mu         sync.Mutex
+	running    bool
+}
+
+type TokenResponse struct {
+	AccessToken  string `json:"access_token"`
+	TokenType    string `json:"token_type"`
+	RefreshToken string `json:"refresh_token"`
+	Scope        string `json:"scope"`
+	CreatedAt    int64  `json:"created_at"`
+	ExpiresIn    int    `json:"expires_in"`
+}
+
+type User struct {
+	ID          int64  `json:"id"`
+	Username    string `json:"username"`
+	Name        string `json:"name"`
+	Email       string `json:"email"`
+	PublicEmail string `json:"public_email"`
+}
+
+type PersonalAccessTokenSelf struct {
+	ID     int64    `json:"id"`
+	Name   string   `json:"name"`
+	Scopes []string `json:"scopes"`
+	UserID int64    `json:"user_id"`
+}
+
+type ModelDetails struct {
+	ModelProvider string `json:"model_provider"`
+	ModelName     string `json:"model_name"`
+}
+
+type DirectAccessResponse struct {
+	BaseURL      string            `json:"base_url"`
+	Token        string            `json:"token"`
+	ExpiresAt    int64             `json:"expires_at"`
+	Headers      map[string]string `json:"headers"`
+	ModelDetails *ModelDetails     `json:"model_details,omitempty"`
+}
+
+type DiscoveredModel struct {
+	ModelProvider string
+	ModelName     string
+}
+
+type AuthClient struct {
+	httpClient *http.Client
+}
+
+func NewAuthClient(cfg *config.Config) *AuthClient {
+	client := &http.Client{}
+	if cfg != nil {
+		client = util.SetProxy(&cfg.SDKConfig, client)
+	}
+	return &AuthClient{httpClient: client}
+}
+
+func NormalizeBaseURL(raw string) string {
+	value := strings.TrimSpace(raw)
+	if value == "" {
+		return DefaultBaseURL
+	}
+	if !strings.Contains(value, "://") {
+		value = "https://" + value
+	}
+	value = strings.TrimRight(value, "/")
+	return value
+}
+
+func TokenExpiry(now time.Time, token *TokenResponse) time.Time {
+	if token == nil {
+		return time.Time{}
+	}
+	if token.CreatedAt > 0 && token.ExpiresIn > 0 {
+		return time.Unix(token.CreatedAt+int64(token.ExpiresIn), 0).UTC()
+	}
+	if token.ExpiresIn > 0 {
+		return now.UTC().Add(time.Duration(token.ExpiresIn) * time.Second)
+	}
+	return time.Time{}
+}
+
+func GeneratePKCECodes() (*PKCECodes, error) {
+	verifierBytes := make([]byte, 32)
+	if _, err := rand.Read(verifierBytes); err != nil {
+		return nil, fmt.Errorf("gitlab pkce generation failed: %w", err)
+	}
+	verifier := base64.RawURLEncoding.EncodeToString(verifierBytes)
+	sum := sha256.Sum256([]byte(verifier))
+	challenge := base64.RawURLEncoding.EncodeToString(sum[:])
+	return &PKCECodes{
+		CodeVerifier:  verifier,
+		CodeChallenge: challenge,
+	}, nil
+}
+
+func NewOAuthServer(port int) *OAuthServer {
+	return &OAuthServer{
+		port:       port,
+		resultChan: make(chan *OAuthResult, 1),
+		errorChan:  make(chan error, 1),
+	}
+}
+
+func (s *OAuthServer) Start() error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	if s.running {
+		return fmt.Errorf("gitlab oauth server already running")
+	}
+	if !s.isPortAvailable() {
+		return fmt.Errorf("port %d is already in use", s.port)
+	}
+
+	mux := http.NewServeMux()
+	mux.HandleFunc("/auth/callback", s.handleCallback)
+
+	s.server = &http.Server{
+		Addr:         fmt.Sprintf(":%d", s.port),
+		Handler:      mux,
+		ReadTimeout:  10 * time.Second,
+		WriteTimeout: 10 * time.Second,
+	}
+	s.running = true
+
+	go func() {
+		if err := s.server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
+			s.errorChan <- err
+		}
+	}()
+
+	time.Sleep(100 * time.Millisecond)
+	return nil
+}
+
+func (s *OAuthServer) Stop(ctx context.Context) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if !s.running || s.server == nil {
+		return nil
+	}
+	defer func() {
+		s.running = false
+		s.server = nil
+	}()
+	return s.server.Shutdown(ctx)
+}
+
+func (s *OAuthServer) WaitForCallback(timeout time.Duration) (*OAuthResult, error) {
+	select {
+	case result := <-s.resultChan:
+		return result, nil
+	case err := <-s.errorChan:
+		return nil, err
+	case <-time.After(timeout):
+		return nil, fmt.Errorf("timeout waiting for OAuth callback")
+	}
+}
+
+func (s *OAuthServer) handleCallback(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet {
+		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+	query := r.URL.Query()
+	if errParam := strings.TrimSpace(query.Get("error")); errParam != "" {
+		s.sendResult(&OAuthResult{Error: errParam})
+		http.Error(w, errParam, http.StatusBadRequest)
+		return
+	}
+	code := strings.TrimSpace(query.Get("code"))
+	state := strings.TrimSpace(query.Get("state"))
+	if code == "" || state == "" {
+		s.sendResult(&OAuthResult{Error: "missing_code_or_state"})
+		http.Error(w, "missing code or state", http.StatusBadRequest)
+		return
+	}
+	s.sendResult(&OAuthResult{Code: code, State: state})
+	_, _ = w.Write([]byte("GitLab authentication received. You can close this tab."))
+}
+
+func (s *OAuthServer) sendResult(result *OAuthResult) {
+	select {
+	case s.resultChan <- result:
+	default:
+		log.Debug("gitlab oauth result channel full, dropping callback result")
+	}
+}
+
+func (s *OAuthServer) isPortAvailable() bool {
+	listener, err := net.Listen("tcp", fmt.Sprintf(":%d", s.port))
+	if err != nil {
+		return false
+	}
+	_ = listener.Close()
+	return true
+}
+
+func RedirectURL(port int) string {
+	return fmt.Sprintf("http://localhost:%d/auth/callback", port)
+}
+
+func (c *AuthClient) GenerateAuthURL(baseURL, clientID, redirectURI, state string, pkce *PKCECodes) (string, error) {
+	if pkce == nil {
+		return "", fmt.Errorf("gitlab auth URL generation failed: PKCE codes are required")
+	}
+	if strings.TrimSpace(clientID) == "" {
+		return "", fmt.Errorf("gitlab auth URL generation failed: client ID is required")
+	}
+	baseURL = NormalizeBaseURL(baseURL)
+	params := url.Values{
+		"client_id":             {strings.TrimSpace(clientID)},
+		"response_type":         {"code"},
+		"redirect_uri":          {strings.TrimSpace(redirectURI)},
+		"scope":                 {defaultOAuthScope},
+		"state":                 {strings.TrimSpace(state)},
+		"code_challenge":        {pkce.CodeChallenge},
+		"code_challenge_method": {"S256"},
+	}
+	return fmt.Sprintf("%s/oauth/authorize?%s", baseURL, params.Encode()), nil
+}
+
+func (c *AuthClient) ExchangeCodeForTokens(ctx context.Context, baseURL, clientID, clientSecret, redirectURI, code, codeVerifier string) (*TokenResponse, error) {
+	form := url.Values{
+		"grant_type":    {"authorization_code"},
+		"client_id":     {strings.TrimSpace(clientID)},
+		"code":          {strings.TrimSpace(code)},
+		"redirect_uri":  {strings.TrimSpace(redirectURI)},
+		"code_verifier": {strings.TrimSpace(codeVerifier)},
+	}
+	if secret := strings.TrimSpace(clientSecret); secret != "" {
+		form.Set("client_secret", secret)
+	}
+	return c.postToken(ctx, NormalizeBaseURL(baseURL)+"/oauth/token", form)
+}
+
+func (c *AuthClient) RefreshTokens(ctx context.Context, baseURL, clientID, clientSecret, refreshToken string) (*TokenResponse, error) {
+	form := url.Values{
+		"grant_type":    {"refresh_token"},
+		"refresh_token": {strings.TrimSpace(refreshToken)},
+	}
+	if clientID = strings.TrimSpace(clientID); clientID != "" {
+		form.Set("client_id", clientID)
+	}
+	if secret := strings.TrimSpace(clientSecret); secret != "" {
+		form.Set("client_secret", secret)
+	}
+	return c.postToken(ctx, NormalizeBaseURL(baseURL)+"/oauth/token", form)
+}
+
+func (c *AuthClient) postToken(ctx context.Context, tokenURL string, form url.Values) (*TokenResponse, error) {
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, tokenURL, strings.NewReader(form.Encode()))
+	if err != nil {
+		return nil, fmt.Errorf("gitlab token request failed: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+	req.Header.Set("Accept", "application/json")
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("gitlab token request failed: %w", err)
+	}
+	defer func() { _ = resp.Body.Close() }()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("gitlab token response read failed: %w", err)
+	}
+	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+		return nil, fmt.Errorf("gitlab token request failed with status %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
+	}
+	var token TokenResponse
+	if err := json.Unmarshal(body, &token); err != nil {
+		return nil, fmt.Errorf("gitlab token response decode failed: %w", err)
+	}
+	return &token, nil
+}
+
+func (c *AuthClient) GetCurrentUser(ctx context.Context, baseURL, token string) (*User, error) {
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, NormalizeBaseURL(baseURL)+"/api/v4/user", nil)
+	if err != nil {
+		return nil, fmt.Errorf("gitlab user request failed: %w", err)
+	}
+	req.Header.Set("Authorization", "Bearer "+strings.TrimSpace(token))
+	req.Header.Set("Accept", "application/json")
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("gitlab user request failed: %w", err)
+	}
+	defer func() { _ = resp.Body.Close() }()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("gitlab user response read failed: %w", err)
+	}
+	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+		return nil, fmt.Errorf("gitlab user request failed with status %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
+	}
+
+	var user User
+	if err := json.Unmarshal(body, &user); err != nil {
+		return nil, fmt.Errorf("gitlab user response decode failed: %w", err)
+	}
+	return &user, nil
+}
+
+func (c *AuthClient) GetPersonalAccessTokenSelf(ctx context.Context, baseURL, token string) (*PersonalAccessTokenSelf, error) {
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, NormalizeBaseURL(baseURL)+"/api/v4/personal_access_tokens/self", nil)
+	if err != nil {
+		return nil, fmt.Errorf("gitlab PAT self request failed: %w", err)
+	}
+	req.Header.Set("Authorization", "Bearer "+strings.TrimSpace(token))
+	req.Header.Set("Accept", "application/json")
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("gitlab PAT self request failed: %w", err)
+	}
+	defer func() { _ = resp.Body.Close() }()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("gitlab PAT self response read failed: %w", err)
+	}
+	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+		return nil, fmt.Errorf("gitlab PAT self request failed with status %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
+	}
+
+	var pat PersonalAccessTokenSelf
+	if err := json.Unmarshal(body, &pat); err != nil {
+		return nil, fmt.Errorf("gitlab PAT self response decode failed: %w", err)
+	}
+	return &pat, nil
+}
+
+func (c *AuthClient) FetchDirectAccess(ctx context.Context, baseURL, token string) (*DirectAccessResponse, error) {
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, NormalizeBaseURL(baseURL)+"/api/v4/code_suggestions/direct_access", nil)
+	if err != nil {
+		return nil, fmt.Errorf("gitlab direct access request failed: %w", err)
+	}
+	req.Header.Set("Authorization", "Bearer "+strings.TrimSpace(token))
+	req.Header.Set("Accept", "application/json")
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("gitlab direct access request failed: %w", err)
+	}
+	defer func() { _ = resp.Body.Close() }()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("gitlab direct access response read failed: %w", err)
+	}
+	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+		return nil, fmt.Errorf("gitlab direct access request failed with status %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
+	}
+
+	var direct DirectAccessResponse
+	if err := json.Unmarshal(body, &direct); err != nil {
+		return nil, fmt.Errorf("gitlab direct access response decode failed: %w", err)
+	}
+	if direct.Headers == nil {
+		direct.Headers = make(map[string]string)
+	}
+	return &direct, nil
+}
+
+func ExtractDiscoveredModels(metadata map[string]any) []DiscoveredModel {
+	if len(metadata) == 0 {
+		return nil
+	}
+
+	models := make([]DiscoveredModel, 0, 4)
+	seen := make(map[string]struct{})
+	appendModel := func(provider, name string) {
+		provider = strings.TrimSpace(provider)
+		name = strings.TrimSpace(name)
+		if name == "" {
+			return
+		}
+		key := strings.ToLower(name)
+		if _, ok := seen[key]; ok {
+			return
+		}
+		seen[key] = struct{}{}
+		models = append(models, DiscoveredModel{
+			ModelProvider: provider,
+			ModelName:     name,
+		})
+	}
+
+	if raw, ok := metadata["model_details"]; ok {
+		appendDiscoveredModels(raw, appendModel)
+	}
+	appendModel(stringValue(metadata["model_provider"]), stringValue(metadata["model_name"]))
+
+	for _, key := range []string{"models", "supported_models", "discovered_models"} {
+		if raw, ok := metadata[key]; ok {
+			appendDiscoveredModels(raw, appendModel)
+		}
+	}
+
+	return models
+}
+
+func appendDiscoveredModels(raw any, appendModel func(provider, name string)) {
+	switch typed := raw.(type) {
+	case map[string]any:
+		appendModel(stringValue(typed["model_provider"]), stringValue(typed["model_name"]))
+		appendModel(stringValue(typed["provider"]), stringValue(typed["name"]))
+		if nested, ok := typed["models"]; ok {
+			appendDiscoveredModels(nested, appendModel)
+		}
+	case []any:
+		for _, item := range typed {
+			appendDiscoveredModels(item, appendModel)
+		}
+	case []string:
+		for _, item := range typed {
+			appendModel("", item)
+		}
+	case string:
+		appendModel("", typed)
+	}
+}
+
+func stringValue(raw any) string {
+	switch typed := raw.(type) {
+	case string:
+		return strings.TrimSpace(typed)
+	case fmt.Stringer:
+		return strings.TrimSpace(typed.String())
+	case json.Number:
+		return typed.String()
+	case int:
+		return strconv.Itoa(typed)
+	case int64:
+		return strconv.FormatInt(typed, 10)
+	case float64:
+		return strconv.FormatInt(int64(typed), 10)
+	default:
+		return ""
+	}
+}
--- a/internal/auth/gitlab/gitlab_test.go
+++ b/internal/auth/gitlab/gitlab_test.go
@@ -0,0 +1,138 @@
+package gitlab
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"net/url"
+	"strings"
+	"testing"
+)
+
+func TestAuthClientGenerateAuthURLIncludesPKCE(t *testing.T) {
+	client := NewAuthClient(nil)
+	pkce, err := GeneratePKCECodes()
+	if err != nil {
+		t.Fatalf("GeneratePKCECodes() error = %v", err)
+	}
+
+	rawURL, err := client.GenerateAuthURL("https://gitlab.example.com", "client-id", RedirectURL(17171), "state-123", pkce)
+	if err != nil {
+		t.Fatalf("GenerateAuthURL() error = %v", err)
+	}
+
+	parsed, err := url.Parse(rawURL)
+	if err != nil {
+		t.Fatalf("Parse(authURL) error = %v", err)
+	}
+	if got := parsed.Path; got != "/oauth/authorize" {
+		t.Fatalf("expected /oauth/authorize path, got %q", got)
+	}
+	query := parsed.Query()
+	if got := query.Get("client_id"); got != "client-id" {
+		t.Fatalf("expected client_id, got %q", got)
+	}
+	if got := query.Get("scope"); got != defaultOAuthScope {
+		t.Fatalf("expected scope %q, got %q", defaultOAuthScope, got)
+	}
+	if got := query.Get("code_challenge_method"); got != "S256" {
+		t.Fatalf("expected PKCE method S256, got %q", got)
+	}
+	if got := query.Get("code_challenge"); got == "" {
+		t.Fatal("expected non-empty code_challenge")
+	}
+}
+
+func TestAuthClientExchangeCodeForTokens(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path != "/oauth/token" {
+			t.Fatalf("unexpected path %q", r.URL.Path)
+		}
+		if err := r.ParseForm(); err != nil {
+			t.Fatalf("ParseForm() error = %v", err)
+		}
+		if got := r.Form.Get("grant_type"); got != "authorization_code" {
+			t.Fatalf("expected authorization_code grant, got %q", got)
+		}
+		if got := r.Form.Get("code_verifier"); got != "verifier-123" {
+			t.Fatalf("expected code_verifier, got %q", got)
+		}
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"access_token":  "oauth-access",
+			"refresh_token": "oauth-refresh",
+			"token_type":    "Bearer",
+			"scope":         "api read_user",
+			"created_at":    1710000000,
+			"expires_in":    3600,
+		})
+	}))
+	defer srv.Close()
+
+	client := NewAuthClient(nil)
+	token, err := client.ExchangeCodeForTokens(context.Background(), srv.URL, "client-id", "client-secret", RedirectURL(17171), "auth-code", "verifier-123")
+	if err != nil {
+		t.Fatalf("ExchangeCodeForTokens() error = %v", err)
+	}
+	if token.AccessToken != "oauth-access" {
+		t.Fatalf("expected access token, got %q", token.AccessToken)
+	}
+	if token.RefreshToken != "oauth-refresh" {
+		t.Fatalf("expected refresh token, got %q", token.RefreshToken)
+	}
+}
+
+func TestExtractDiscoveredModels(t *testing.T) {
+	models := ExtractDiscoveredModels(map[string]any{
+		"model_details": map[string]any{
+			"model_provider": "anthropic",
+			"model_name":     "claude-sonnet-4-5",
+		},
+		"supported_models": []any{
+			map[string]any{"model_provider": "openai", "model_name": "gpt-4.1"},
+			"claude-sonnet-4-5",
+		},
+	})
+	if len(models) != 2 {
+		t.Fatalf("expected 2 unique models, got %d", len(models))
+	}
+	if models[0].ModelName != "claude-sonnet-4-5" {
+		t.Fatalf("unexpected first model %q", models[0].ModelName)
+	}
+	if models[1].ModelName != "gpt-4.1" {
+		t.Fatalf("unexpected second model %q", models[1].ModelName)
+	}
+}
+
+func TestFetchDirectAccessDecodesModelDetails(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path != "/api/v4/code_suggestions/direct_access" {
+			t.Fatalf("unexpected path %q", r.URL.Path)
+		}
+		if got := r.Header.Get("Authorization"); !strings.Contains(got, "token-123") {
+			t.Fatalf("expected bearer token, got %q", got)
+		}
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"base_url":   "https://cloud.gitlab.example.com",
+			"token":      "gateway-token",
+			"expires_at": 1710003600,
+			"headers": map[string]string{
+				"X-Gitlab-Realm": "saas",
+			},
+			"model_details": map[string]any{
+				"model_provider": "anthropic",
+				"model_name":     "claude-sonnet-4-5",
+			},
+		})
+	}))
+	defer srv.Close()
+
+	client := NewAuthClient(nil)
+	direct, err := client.FetchDirectAccess(context.Background(), srv.URL, "token-123")
+	if err != nil {
+		t.Fatalf("FetchDirectAccess() error = %v", err)
+	}
+	if direct.ModelDetails == nil || direct.ModelDetails.ModelName != "claude-sonnet-4-5" {
+		t.Fatalf("expected model details, got %+v", direct.ModelDetails)
+	}
+}
--- a/internal/cmd/auth_manager.go
+++ b/internal/cmd/auth_manager.go
@@ -5,8 +5,7 @@ import (
 )

 // newAuthManager creates a new authentication manager instance with all supported
-// authenticators and a file-based token store. It initializes authenticators for
-// Gemini, Codex, Claude, Qwen, IFlow, Antigravity, and GitHub Copilot providers.
+// authenticators and a file-based token store.
 //
 // Returns:
 //   - *sdkAuth.Manager: A configured authentication manager instance
@@ -23,6 +22,9 @@ func newAuthManager() *sdkAuth.Manager {
 		sdkAuth.NewKiroAuthenticator(),
 		sdkAuth.NewGitHubCopilotAuthenticator(),
 		sdkAuth.NewKiloAuthenticator(),
+		sdkAuth.NewGitLabAuthenticator(),
+		sdkAuth.NewCodeBuddyAuthenticator(),
+		sdkAuth.NewCursorAuthenticator(),
 	)
 	return manager
 }
--- a/internal/cmd/codebuddy_login.go
+++ b/internal/cmd/codebuddy_login.go
@@ -0,0 +1,43 @@
+package cmd
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	sdkAuth "github.com/router-for-me/CLIProxyAPI/v6/sdk/auth"
+	log "github.com/sirupsen/logrus"
+)
+
+// DoCodeBuddyLogin triggers the browser OAuth polling flow for CodeBuddy and saves tokens.
+// It initiates the OAuth authentication, displays the user code for the user to enter
+// at the CodeBuddy verification URL, and waits for authorization before saving the tokens.
+//
+// Parameters:
+//   - cfg: The application configuration containing proxy and auth directory settings
+//   - options: Login options including browser behavior settings
+func DoCodeBuddyLogin(cfg *config.Config, options *LoginOptions) {
+	if options == nil {
+		options = &LoginOptions{}
+	}
+
+	manager := newAuthManager()
+	authOpts := &sdkAuth.LoginOptions{
+		NoBrowser: options.NoBrowser,
+		Metadata:  map[string]string{},
+	}
+
+	record, savedPath, err := manager.Login(context.Background(), "codebuddy", cfg, authOpts)
+	if err != nil {
+		log.Errorf("CodeBuddy authentication failed: %v", err)
+		return
+	}
+
+	if savedPath != "" {
+		fmt.Printf("Authentication saved to %s\n", savedPath)
+	}
+	if record != nil && record.Label != "" {
+		fmt.Printf("Authenticated as %s\n", record.Label)
+	}
+	fmt.Println("CodeBuddy authentication successful!")
+}
--- a/internal/cmd/cursor_login.go
+++ b/internal/cmd/cursor_login.go
@@ -0,0 +1,37 @@
+package cmd
+
+import (
+	"context"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	sdkAuth "github.com/router-for-me/CLIProxyAPI/v6/sdk/auth"
+	log "github.com/sirupsen/logrus"
+)
+
+// DoCursorLogin triggers the OAuth PKCE flow for Cursor and saves tokens.
+func DoCursorLogin(cfg *config.Config, options *LoginOptions) {
+	if options == nil {
+		options = &LoginOptions{}
+	}
+
+	manager := newAuthManager()
+	authOpts := &sdkAuth.LoginOptions{
+		NoBrowser: options.NoBrowser,
+		Metadata:  map[string]string{},
+		Prompt:    options.Prompt,
+	}
+
+	record, savedPath, err := manager.Login(context.Background(), "cursor", cfg, authOpts)
+	if err != nil {
+		log.Errorf("Cursor authentication failed: %v", err)
+		return
+	}
+
+	if savedPath != "" {
+		log.Infof("Authentication saved to %s", savedPath)
+	}
+	if record != nil && record.Label != "" {
+		log.Infof("Authenticated as %s", record.Label)
+	}
+	log.Info("Cursor authentication successful!")
+}
--- a/internal/cmd/gitlab_login.go
+++ b/internal/cmd/gitlab_login.go
@@ -0,0 +1,69 @@
+package cmd
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	sdkAuth "github.com/router-for-me/CLIProxyAPI/v6/sdk/auth"
+)
+
+func DoGitLabLogin(cfg *config.Config, options *LoginOptions) {
+	if options == nil {
+		options = &LoginOptions{}
+	}
+
+	promptFn := options.Prompt
+	if promptFn == nil {
+		promptFn = defaultProjectPrompt()
+	}
+
+	manager := newAuthManager()
+	authOpts := &sdkAuth.LoginOptions{
+		NoBrowser:    options.NoBrowser,
+		CallbackPort: options.CallbackPort,
+		Metadata: map[string]string{
+			"login_mode": "oauth",
+		},
+		Prompt: promptFn,
+	}
+
+	_, savedPath, err := manager.Login(context.Background(), "gitlab", cfg, authOpts)
+	if err != nil {
+		fmt.Printf("GitLab Duo authentication failed: %v\n", err)
+		return
+	}
+	if savedPath != "" {
+		fmt.Printf("Authentication saved to %s\n", savedPath)
+	}
+	fmt.Println("GitLab Duo authentication successful!")
+}
+
+func DoGitLabTokenLogin(cfg *config.Config, options *LoginOptions) {
+	if options == nil {
+		options = &LoginOptions{}
+	}
+
+	promptFn := options.Prompt
+	if promptFn == nil {
+		promptFn = defaultProjectPrompt()
+	}
+
+	manager := newAuthManager()
+	authOpts := &sdkAuth.LoginOptions{
+		Metadata: map[string]string{
+			"login_mode": "pat",
+		},
+		Prompt: promptFn,
+	}
+
+	_, savedPath, err := manager.Login(context.Background(), "gitlab", cfg, authOpts)
+	if err != nil {
+		fmt.Printf("GitLab Duo PAT authentication failed: %v\n", err)
+		return
+	}
+	if savedPath != "" {
+		fmt.Printf("Authentication saved to %s\n", savedPath)
+	}
+	fmt.Println("GitLab Duo PAT authentication successful!")
+}
--- a/internal/config/claude_header_defaults_test.go
+++ b/internal/config/claude_header_defaults_test.go
@@ -0,0 +1,55 @@
+package config
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestLoadConfigOptional_ClaudeHeaderDefaults(t *testing.T) {
+	dir := t.TempDir()
+	configPath := filepath.Join(dir, "config.yaml")
+	configYAML := []byte(`
+claude-header-defaults:
+  user-agent: "  claude-cli/2.1.70 (external, cli)  "
+  package-version: "  0.80.0  "
+  runtime-version: "  v24.5.0  "
+  os: "  MacOS  "
+  arch: "  arm64  "
+  timeout: "  900  "
+  stabilize-device-profile: false
+`)
+	if err := os.WriteFile(configPath, configYAML, 0o600); err != nil {
+		t.Fatalf("failed to write config: %v", err)
+	}
+
+	cfg, err := LoadConfigOptional(configPath, false)
+	if err != nil {
+		t.Fatalf("LoadConfigOptional() error = %v", err)
+	}
+
+	if got := cfg.ClaudeHeaderDefaults.UserAgent; got != "claude-cli/2.1.70 (external, cli)" {
+		t.Fatalf("UserAgent = %q, want %q", got, "claude-cli/2.1.70 (external, cli)")
+	}
+	if got := cfg.ClaudeHeaderDefaults.PackageVersion; got != "0.80.0" {
+		t.Fatalf("PackageVersion = %q, want %q", got, "0.80.0")
+	}
+	if got := cfg.ClaudeHeaderDefaults.RuntimeVersion; got != "v24.5.0" {
+		t.Fatalf("RuntimeVersion = %q, want %q", got, "v24.5.0")
+	}
+	if got := cfg.ClaudeHeaderDefaults.OS; got != "MacOS" {
+		t.Fatalf("OS = %q, want %q", got, "MacOS")
+	}
+	if got := cfg.ClaudeHeaderDefaults.Arch; got != "arm64" {
+		t.Fatalf("Arch = %q, want %q", got, "arm64")
+	}
+	if got := cfg.ClaudeHeaderDefaults.Timeout; got != "900" {
+		t.Fatalf("Timeout = %q, want %q", got, "900")
+	}
+	if cfg.ClaudeHeaderDefaults.StabilizeDeviceProfile == nil {
+		t.Fatal("StabilizeDeviceProfile = nil, want non-nil")
+	}
+	if got := *cfg.ClaudeHeaderDefaults.StabilizeDeviceProfile; got {
+		t.Fatalf("StabilizeDeviceProfile = %v, want false", got)
+	}
+}
--- a/internal/config/codex_websocket_header_defaults_test.go
+++ b/internal/config/codex_websocket_header_defaults_test.go
@@ -0,0 +1,32 @@
+package config
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestLoadConfigOptional_CodexHeaderDefaults(t *testing.T) {
+	dir := t.TempDir()
+	configPath := filepath.Join(dir, "config.yaml")
+	configYAML := []byte(`
+codex-header-defaults:
+  user-agent: "  my-codex-client/1.0  "
+  beta-features: "  feature-a,feature-b  "
+`)
+	if err := os.WriteFile(configPath, configYAML, 0o600); err != nil {
+		t.Fatalf("failed to write config: %v", err)
+	}
+
+	cfg, err := LoadConfigOptional(configPath, false)
+	if err != nil {
+		t.Fatalf("LoadConfigOptional() error = %v", err)
+	}
+
+	if got := cfg.CodexHeaderDefaults.UserAgent; got != "my-codex-client/1.0" {
+		t.Fatalf("UserAgent = %q, want %q", got, "my-codex-client/1.0")
+	}
+	if got := cfg.CodexHeaderDefaults.BetaFeatures; got != "feature-a,feature-b" {
+		t.Fatalf("BetaFeatures = %q, want %q", got, "feature-a,feature-b")
+	}
+}
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -13,6 +13,7 @@ import (
 	"strings"
 	"syscall"

+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	log "github.com/sirupsen/logrus"
 	"golang.org/x/crypto/bcrypt"
 	"gopkg.in/yaml.v3"
@@ -101,6 +102,10 @@ type Config struct {
 	// Codex defines a list of Codex API key configurations as specified in the YAML configuration file.
 	CodexKey []CodexKey `yaml:"codex-api-key" json:"codex-api-key"`

+	// CodexHeaderDefaults configures fallback headers for Codex OAuth model requests.
+	// These are used only when the client does not send its own headers.
+	CodexHeaderDefaults CodexHeaderDefaults `yaml:"codex-header-defaults" json:"codex-header-defaults"`
+
 	// ClaudeKey defines a list of Claude API key configurations as specified in the YAML configuration file.
 	ClaudeKey []ClaudeKey `yaml:"claude-api-key" json:"claude-api-key"`

@@ -141,13 +146,27 @@ type Config struct {
 	legacyMigrationPending bool `yaml:"-" json:"-"`
 }

-// ClaudeHeaderDefaults configures default header values injected into Claude API requests
-// when the client does not send them. Update these when Claude Code releases a new version.
+// ClaudeHeaderDefaults configures default header values injected into Claude API requests.
+// In legacy mode, UserAgent/PackageVersion/RuntimeVersion/Timeout act as fallbacks when
+// the client omits them, while OS/Arch remain runtime-derived. When stabilized device
+// profiles are enabled, OS/Arch become the pinned platform baseline, while
+// UserAgent/PackageVersion/RuntimeVersion seed the upgradeable software fingerprint.
 type ClaudeHeaderDefaults struct {
-	UserAgent      string `yaml:"user-agent" json:"user-agent"`
-	PackageVersion string `yaml:"package-version" json:"package-version"`
-	RuntimeVersion string `yaml:"runtime-version" json:"runtime-version"`
-	Timeout        string `yaml:"timeout" json:"timeout"`
+	UserAgent              string `yaml:"user-agent" json:"user-agent"`
+	PackageVersion         string `yaml:"package-version" json:"package-version"`
+	RuntimeVersion         string `yaml:"runtime-version" json:"runtime-version"`
+	OS                     string `yaml:"os" json:"os"`
+	Arch                   string `yaml:"arch" json:"arch"`
+	Timeout                string `yaml:"timeout" json:"timeout"`
+	StabilizeDeviceProfile *bool  `yaml:"stabilize-device-profile,omitempty" json:"stabilize-device-profile,omitempty"`
+}
+
+// CodexHeaderDefaults configures fallback header values injected into Codex
+// model requests for OAuth/file-backed auth when the client omits them.
+// UserAgent applies to HTTP and websocket requests; BetaFeatures only applies to websockets.
+type CodexHeaderDefaults struct {
+	UserAgent    string `yaml:"user-agent" json:"user-agent"`
+	BetaFeatures string `yaml:"beta-features" json:"beta-features"`
 }

 // TLSConfig holds HTTPS server settings.
@@ -176,6 +195,9 @@ type RemoteManagement struct {
 	SecretKey string `yaml:"secret-key"`
 	// DisableControlPanel skips serving and syncing the bundled management UI when true.
 	DisableControlPanel bool `yaml:"disable-control-panel"`
+	// DisableAutoUpdatePanel disables automatic periodic background updates of the management panel asset from GitHub.
+	// When false (the default), the background updater remains enabled; when true, the panel is only downloaded on first access if missing.
+	DisableAutoUpdatePanel bool `yaml:"disable-auto-update-panel"`
 	// PanelGitHubRepository overrides the GitHub repository used to fetch the management panel asset.
 	// Accepts either a repository URL (https://github.com/org/repo) or an API releases endpoint.
 	PanelGitHubRepository string `yaml:"panel-github-repository"`
@@ -189,6 +211,10 @@ type QuotaExceeded struct {

 	// SwitchPreviewModel indicates whether to automatically switch to a preview model when a quota is exceeded.
 	SwitchPreviewModel bool `yaml:"switch-preview-model" json:"switch-preview-model"`
+
+	// AntigravityCredits indicates whether to retry Antigravity quota_exhausted 429s once
+	// on the same credential with enabledCreditTypes=["GOOGLE_ONE_AI"].
+	AntigravityCredits bool `yaml:"antigravity-credits" json:"antigravity-credits"`
 }

 // RoutingConfig configures how credentials are selected for requests.
@@ -235,8 +261,8 @@ type AmpCode struct {
 	UpstreamAPIKey string `yaml:"upstream-api-key" json:"upstream-api-key"`

 	// UpstreamAPIKeys maps client API keys (from top-level api-keys) to upstream API keys.
-	// When a client authenticates with a key that matches an entry, that upstream key is used.
-	// If no match is found, falls back to UpstreamAPIKey (default behavior).
+	// When a request is authenticated with one of the APIKeys, the corresponding UpstreamAPIKey
+	// is used for the upstream Amp request.
 	UpstreamAPIKeys []AmpUpstreamAPIKeyEntry `yaml:"upstream-api-keys,omitempty" json:"upstream-api-keys,omitempty"`

 	// RestrictManagementToLocalhost restricts Amp management routes (/api/user, /api/threads, etc.)
@@ -358,6 +384,11 @@ type ClaudeKey struct {

 	// Cloak configures request cloaking for non-Claude-Code clients.
 	Cloak *CloakConfig `yaml:"cloak,omitempty" json:"cloak,omitempty"`
+
+	// ExperimentalCCHSigning enables opt-in final-body cch signing for cloaked
+	// Claude /v1/messages requests. It is disabled by default so upstream seed
+	// changes do not alter the proxy's legacy behavior.
+	ExperimentalCCHSigning bool `yaml:"experimental-cch-signing,omitempty" json:"experimental-cch-signing,omitempty"`
 }

 func (k ClaudeKey) GetAPIKey() string  { return k.APIKey }
@@ -556,6 +587,10 @@ type OpenAICompatibilityModel struct {

 	// Alias is the model name alias that clients will use to reference this model.
 	Alias string `yaml:"alias" json:"alias"`
+
+	// Thinking configures the thinking/reasoning capability for this model.
+	// If nil, the model defaults to level-based reasoning with levels ["low", "medium", "high"].
+	Thinking *registry.ThinkingSupport `yaml:"thinking,omitempty" json:"thinking,omitempty"`
 }

 func (m OpenAICompatibilityModel) GetName() string  { return m.Name }
@@ -673,12 +708,18 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
 	// Sanitize Gemini API key configuration and migrate legacy entries.
 	cfg.SanitizeGeminiKeys()

-	// Sanitize Vertex-compatible API keys: drop entries without base-url
+	// Sanitize Vertex-compatible API keys.
 	cfg.SanitizeVertexCompatKeys()

 	// Sanitize Codex keys: drop entries without base-url
 	cfg.SanitizeCodexKeys()

+	// Sanitize Codex header defaults.
+	cfg.SanitizeCodexHeaderDefaults()
+
+	// Sanitize Claude header defaults.
+	cfg.SanitizeClaudeHeaderDefaults()
+
 	// Sanitize Claude key headers
 	cfg.SanitizeClaudeKeys()

@@ -771,6 +812,30 @@ func payloadRawString(value any) ([]byte, bool) {
 	}
 }

+// SanitizeCodexHeaderDefaults trims surrounding whitespace from the
+// configured Codex header fallback values.
+func (cfg *Config) SanitizeCodexHeaderDefaults() {
+	if cfg == nil {
+		return
+	}
+	cfg.CodexHeaderDefaults.UserAgent = strings.TrimSpace(cfg.CodexHeaderDefaults.UserAgent)
+	cfg.CodexHeaderDefaults.BetaFeatures = strings.TrimSpace(cfg.CodexHeaderDefaults.BetaFeatures)
+}
+
+// SanitizeClaudeHeaderDefaults trims surrounding whitespace from the
+// configured Claude fingerprint baseline values.
+func (cfg *Config) SanitizeClaudeHeaderDefaults() {
+	if cfg == nil {
+		return
+	}
+	cfg.ClaudeHeaderDefaults.UserAgent = strings.TrimSpace(cfg.ClaudeHeaderDefaults.UserAgent)
+	cfg.ClaudeHeaderDefaults.PackageVersion = strings.TrimSpace(cfg.ClaudeHeaderDefaults.PackageVersion)
+	cfg.ClaudeHeaderDefaults.RuntimeVersion = strings.TrimSpace(cfg.ClaudeHeaderDefaults.RuntimeVersion)
+	cfg.ClaudeHeaderDefaults.OS = strings.TrimSpace(cfg.ClaudeHeaderDefaults.OS)
+	cfg.ClaudeHeaderDefaults.Arch = strings.TrimSpace(cfg.ClaudeHeaderDefaults.Arch)
+	cfg.ClaudeHeaderDefaults.Timeout = strings.TrimSpace(cfg.ClaudeHeaderDefaults.Timeout)
+}
+
 // SanitizeOAuthModelAlias normalizes and deduplicates global OAuth model name aliases.
 // It trims whitespace, normalizes channel keys to lower-case, drops empty entries,
 // allows multiple aliases per upstream name, and ensures aliases are unique within each channel.
--- a/internal/config/oauth_model_alias_defaults.go
+++ b/internal/config/oauth_model_alias_defaults.go
@@ -0,0 +1,61 @@
+package config
+
+import "strings"
+
+// defaultKiroAliases returns default oauth-model-alias entries for Kiro.
+// These aliases expose standard Claude IDs for Kiro-prefixed upstream models.
+func defaultKiroAliases() []OAuthModelAlias {
+	return []OAuthModelAlias{
+		// Sonnet 4.6
+		{Name: "kiro-claude-sonnet-4-6", Alias: "claude-sonnet-4-6", Fork: true},
+		// Sonnet 4.5
+		{Name: "kiro-claude-sonnet-4-5", Alias: "claude-sonnet-4-5-20250929", Fork: true},
+		{Name: "kiro-claude-sonnet-4-5", Alias: "claude-sonnet-4-5", Fork: true},
+		// Sonnet 4
+		{Name: "kiro-claude-sonnet-4", Alias: "claude-sonnet-4-20250514", Fork: true},
+		{Name: "kiro-claude-sonnet-4", Alias: "claude-sonnet-4", Fork: true},
+		// Opus 4.6
+		{Name: "kiro-claude-opus-4-6", Alias: "claude-opus-4-6", Fork: true},
+		// Opus 4.5
+		{Name: "kiro-claude-opus-4-5", Alias: "claude-opus-4-5-20251101", Fork: true},
+		{Name: "kiro-claude-opus-4-5", Alias: "claude-opus-4-5", Fork: true},
+		// Haiku 4.5
+		{Name: "kiro-claude-haiku-4-5", Alias: "claude-haiku-4-5-20251001", Fork: true},
+		{Name: "kiro-claude-haiku-4-5", Alias: "claude-haiku-4-5", Fork: true},
+	}
+}
+
+// defaultGitHubCopilotAliases returns default oauth-model-alias entries for
+// GitHub Copilot Claude models. It exposes hyphen-style IDs used by clients.
+func defaultGitHubCopilotAliases() []OAuthModelAlias {
+	return []OAuthModelAlias{
+		{Name: "claude-haiku-4.5", Alias: "claude-haiku-4-5", Fork: true},
+		{Name: "claude-opus-4.1", Alias: "claude-opus-4-1", Fork: true},
+		{Name: "claude-opus-4.5", Alias: "claude-opus-4-5", Fork: true},
+		{Name: "claude-opus-4.6", Alias: "claude-opus-4-6", Fork: true},
+		{Name: "claude-sonnet-4.5", Alias: "claude-sonnet-4-5", Fork: true},
+		{Name: "claude-sonnet-4.6", Alias: "claude-sonnet-4-6", Fork: true},
+	}
+}
+
+// GitHubCopilotAliasesFromModels generates oauth-model-alias entries from a dynamic
+// list of model IDs fetched from the Copilot API. It auto-creates aliases for
+// models whose ID contains a dot (e.g. "claude-opus-4.6" → "claude-opus-4-6"),
+// which is the pattern used by Claude models on Copilot.
+func GitHubCopilotAliasesFromModels(modelIDs []string) []OAuthModelAlias {
+	var aliases []OAuthModelAlias
+	seen := make(map[string]struct{})
+	for _, id := range modelIDs {
+		if !strings.Contains(id, ".") {
+			continue
+		}
+		hyphenID := strings.ReplaceAll(id, ".", "-")
+		key := id + "→" + hyphenID
+		if _, ok := seen[key]; ok {
+			continue
+		}
+		seen[key] = struct{}{}
+		aliases = append(aliases, OAuthModelAlias{Name: id, Alias: hyphenID, Fork: true})
+	}
+	return aliases
+}
--- a/internal/config/vertex_compat.go
+++ b/internal/config/vertex_compat.go
@@ -20,9 +20,9 @@ type VertexCompatKey struct {
 	// Prefix optionally namespaces model aliases for this credential (e.g., "teamA/vertex-pro").
 	Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"`

-	// BaseURL is the base URL for the Vertex-compatible API endpoint.
+	// BaseURL optionally overrides the Vertex-compatible API endpoint.
 	// The executor will append "/v1/publishers/google/models/{model}:action" to this.
-	// Example: "https://zenmux.ai/api" becomes "https://zenmux.ai/api/v1/publishers/google/models/..."
+	// When empty, requests fall back to the default Vertex API base URL.
 	BaseURL string `yaml:"base-url,omitempty" json:"base-url,omitempty"`

 	// ProxyURL optionally overrides the global proxy for this API key.
@@ -34,6 +34,9 @@ type VertexCompatKey struct {

 	// Models defines the model configurations including aliases for routing.
 	Models []VertexCompatModel `yaml:"models,omitempty" json:"models,omitempty"`
+
+	// ExcludedModels lists model IDs that should be excluded for this provider.
+	ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"`
 }

 func (k VertexCompatKey) GetAPIKey() string  { return k.APIKey }
@@ -68,12 +71,9 @@ func (cfg *Config) SanitizeVertexCompatKeys() {
 		}
 		entry.Prefix = normalizeModelPrefix(entry.Prefix)
 		entry.BaseURL = strings.TrimSpace(entry.BaseURL)
-		if entry.BaseURL == "" {
-			// BaseURL is required for Vertex API key entries
-			continue
-		}
 		entry.ProxyURL = strings.TrimSpace(entry.ProxyURL)
 		entry.Headers = NormalizeHeaders(entry.Headers)
+		entry.ExcludedModels = NormalizeExcludedModels(entry.ExcludedModels)

 		// Sanitize models: remove entries without valid alias
 		sanitizedModels := make([]VertexCompatModel, 0, len(entry.Models))
--- a/internal/logging/request_logger.go
+++ b/internal/logging/request_logger.go
@@ -4,6 +4,7 @@
 package logging

 import (
+	"bufio"
 	"bytes"
 	"compress/flate"
 	"compress/gzip"
@@ -41,15 +42,17 @@ type RequestLogger interface {
 	//   - statusCode: The response status code
 	//   - responseHeaders: The response headers
 	//   - response: The raw response data
+	//   - websocketTimeline: Optional downstream websocket event timeline
 	//   - apiRequest: The API request data
 	//   - apiResponse: The API response data
+	//   - apiWebsocketTimeline: Optional upstream websocket event timeline
 	//   - requestID: Optional request ID for log file naming
 	//   - requestTimestamp: When the request was received
 	//   - apiResponseTimestamp: When the API response was received
 	//
 	// Returns:
 	//   - error: An error if logging fails, nil otherwise
-	LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error
+	LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, websocketTimeline, apiRequest, apiResponse, apiWebsocketTimeline []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error

 	// LogStreamingRequest initiates logging for a streaming request and returns a writer for chunks.
 	//
@@ -111,6 +114,16 @@ type StreamingLogWriter interface {
 	//   - error: An error if writing fails, nil otherwise
 	WriteAPIResponse(apiResponse []byte) error

+	// WriteAPIWebsocketTimeline writes the upstream websocket timeline to the log.
+	// This should be called when upstream communication happened over websocket.
+	//
+	// Parameters:
+	//   - apiWebsocketTimeline: The upstream websocket event timeline
+	//
+	// Returns:
+	//   - error: An error if writing fails, nil otherwise
+	WriteAPIWebsocketTimeline(apiWebsocketTimeline []byte) error
+
 	// SetFirstChunkTimestamp sets the TTFB timestamp captured when first chunk was received.
 	//
 	// Parameters:
@@ -203,17 +216,17 @@ func (l *FileRequestLogger) SetErrorLogsMaxFiles(maxFiles int) {
 //
 // Returns:
 //   - error: An error if logging fails, nil otherwise
-func (l *FileRequestLogger) LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
-	return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, false, requestID, requestTimestamp, apiResponseTimestamp)
+func (l *FileRequestLogger) LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, websocketTimeline, apiRequest, apiResponse, apiWebsocketTimeline []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
+	return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, websocketTimeline, apiRequest, apiResponse, apiWebsocketTimeline, apiResponseErrors, false, requestID, requestTimestamp, apiResponseTimestamp)
 }

 // LogRequestWithOptions logs a request with optional forced logging behavior.
 // The force flag allows writing error logs even when regular request logging is disabled.
-func (l *FileRequestLogger) LogRequestWithOptions(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
-	return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, force, requestID, requestTimestamp, apiResponseTimestamp)
+func (l *FileRequestLogger) LogRequestWithOptions(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, websocketTimeline, apiRequest, apiResponse, apiWebsocketTimeline []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
+	return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, websocketTimeline, apiRequest, apiResponse, apiWebsocketTimeline, apiResponseErrors, force, requestID, requestTimestamp, apiResponseTimestamp)
 }

-func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
+func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, websocketTimeline, apiRequest, apiResponse, apiWebsocketTimeline []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
 	if !l.enabled && !force {
 		return nil
 	}
@@ -260,8 +273,10 @@ func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[st
 		requestHeaders,
 		body,
 		requestBodyPath,
+		websocketTimeline,
 		apiRequest,
 		apiResponse,
+		apiWebsocketTimeline,
 		apiResponseErrors,
 		statusCode,
 		responseHeaders,
@@ -518,8 +533,10 @@ func (l *FileRequestLogger) writeNonStreamingLog(
 	requestHeaders map[string][]string,
 	requestBody []byte,
 	requestBodyPath string,
+	websocketTimeline []byte,
 	apiRequest []byte,
 	apiResponse []byte,
+	apiWebsocketTimeline []byte,
 	apiResponseErrors []*interfaces.ErrorMessage,
 	statusCode int,
 	responseHeaders map[string][]string,
@@ -531,7 +548,16 @@ func (l *FileRequestLogger) writeNonStreamingLog(
 	if requestTimestamp.IsZero() {
 		requestTimestamp = time.Now()
 	}
-	if errWrite := writeRequestInfoWithBody(w, url, method, requestHeaders, requestBody, requestBodyPath, requestTimestamp); errWrite != nil {
+	isWebsocketTranscript := hasSectionPayload(websocketTimeline)
+	downstreamTransport := inferDownstreamTransport(requestHeaders, websocketTimeline)
+	upstreamTransport := inferUpstreamTransport(apiRequest, apiResponse, apiWebsocketTimeline, apiResponseErrors)
+	if errWrite := writeRequestInfoWithBody(w, url, method, requestHeaders, requestBody, requestBodyPath, requestTimestamp, downstreamTransport, upstreamTransport, !isWebsocketTranscript); errWrite != nil {
+		return errWrite
+	}
+	if errWrite := writeAPISection(w, "=== WEBSOCKET TIMELINE ===\n", "=== WEBSOCKET TIMELINE", websocketTimeline, time.Time{}); errWrite != nil {
+		return errWrite
+	}
+	if errWrite := writeAPISection(w, "=== API WEBSOCKET TIMELINE ===\n", "=== API WEBSOCKET TIMELINE", apiWebsocketTimeline, time.Time{}); errWrite != nil {
 		return errWrite
 	}
 	if errWrite := writeAPISection(w, "=== API REQUEST ===\n", "=== API REQUEST", apiRequest, time.Time{}); errWrite != nil {
@@ -543,6 +569,12 @@ func (l *FileRequestLogger) writeNonStreamingLog(
 	if errWrite := writeAPISection(w, "=== API RESPONSE ===\n", "=== API RESPONSE", apiResponse, apiResponseTimestamp); errWrite != nil {
 		return errWrite
 	}
+	if isWebsocketTranscript {
+		// Intentionally omit the generic downstream HTTP response section for websocket
+		// transcripts. The durable session exchange is captured in WEBSOCKET TIMELINE,
+		// and appending a one-off upgrade response snapshot would dilute that transcript.
+		return nil
+	}
 	return writeResponseSection(w, statusCode, true, responseHeaders, bytes.NewReader(response), decompressErr, true)
 }

@@ -553,6 +585,9 @@ func writeRequestInfoWithBody(
 	body []byte,
 	bodyPath string,
 	timestamp time.Time,
+	downstreamTransport string,
+	upstreamTransport string,
+	includeBody bool,
 ) error {
 	if _, errWrite := io.WriteString(w, "=== REQUEST INFO ===\n"); errWrite != nil {
 		return errWrite
@@ -566,10 +601,20 @@ func writeRequestInfoWithBody(
 	if _, errWrite := io.WriteString(w, fmt.Sprintf("Method: %s\n", method)); errWrite != nil {
 		return errWrite
 	}
+	if strings.TrimSpace(downstreamTransport) != "" {
+		if _, errWrite := io.WriteString(w, fmt.Sprintf("Downstream Transport: %s\n", downstreamTransport)); errWrite != nil {
+			return errWrite
+		}
+	}
+	if strings.TrimSpace(upstreamTransport) != "" {
+		if _, errWrite := io.WriteString(w, fmt.Sprintf("Upstream Transport: %s\n", upstreamTransport)); errWrite != nil {
+			return errWrite
+		}
+	}
 	if _, errWrite := io.WriteString(w, fmt.Sprintf("Timestamp: %s\n", timestamp.Format(time.RFC3339Nano))); errWrite != nil {
 		return errWrite
 	}
-	if _, errWrite := io.WriteString(w, "\n"); errWrite != nil {
+	if errWrite := writeSectionSpacing(w, 1); errWrite != nil {
 		return errWrite
 	}

@@ -584,36 +629,121 @@ func writeRequestInfoWithBody(
 			}
 		}
 	}
-	if _, errWrite := io.WriteString(w, "\n"); errWrite != nil {
+	if errWrite := writeSectionSpacing(w, 1); errWrite != nil {
 		return errWrite
 	}

+	if !includeBody {
+		return nil
+	}
+
 	if _, errWrite := io.WriteString(w, "=== REQUEST BODY ===\n"); errWrite != nil {
 		return errWrite
 	}

+	bodyTrailingNewlines := 1
 	if bodyPath != "" {
 		bodyFile, errOpen := os.Open(bodyPath)
 		if errOpen != nil {
 			return errOpen
 		}
-		if _, errCopy := io.Copy(w, bodyFile); errCopy != nil {
+		tracker := &trailingNewlineTrackingWriter{writer: w}
+		written, errCopy := io.Copy(tracker, bodyFile)
+		if errCopy != nil {
 			_ = bodyFile.Close()
 			return errCopy
 		}
+		if written > 0 {
+			bodyTrailingNewlines = tracker.trailingNewlines
+		}
 		if errClose := bodyFile.Close(); errClose != nil {
 			log.WithError(errClose).Warn("failed to close request body temp file")
 		}
 	} else if _, errWrite := w.Write(body); errWrite != nil {
 		return errWrite
+	} else if len(body) > 0 {
+		bodyTrailingNewlines = countTrailingNewlinesBytes(body)
 	}
-
-	if _, errWrite := io.WriteString(w, "\n\n"); errWrite != nil {
+	if errWrite := writeSectionSpacing(w, bodyTrailingNewlines); errWrite != nil {
 		return errWrite
 	}
 	return nil
 }

+func countTrailingNewlinesBytes(payload []byte) int {
+	count := 0
+	for i := len(payload) - 1; i >= 0; i-- {
+		if payload[i] != '\n' {
+			break
+		}
+		count++
+	}
+	return count
+}
+
+func writeSectionSpacing(w io.Writer, trailingNewlines int) error {
+	missingNewlines := 3 - trailingNewlines
+	if missingNewlines <= 0 {
+		return nil
+	}
+	_, errWrite := io.WriteString(w, strings.Repeat("\n", missingNewlines))
+	return errWrite
+}
+
+type trailingNewlineTrackingWriter struct {
+	writer           io.Writer
+	trailingNewlines int
+}
+
+func (t *trailingNewlineTrackingWriter) Write(payload []byte) (int, error) {
+	written, errWrite := t.writer.Write(payload)
+	if written > 0 {
+		writtenPayload := payload[:written]
+		trailingNewlines := countTrailingNewlinesBytes(writtenPayload)
+		if trailingNewlines == len(writtenPayload) {
+			t.trailingNewlines += trailingNewlines
+		} else {
+			t.trailingNewlines = trailingNewlines
+		}
+	}
+	return written, errWrite
+}
+
+func hasSectionPayload(payload []byte) bool {
+	return len(bytes.TrimSpace(payload)) > 0
+}
+
+func inferDownstreamTransport(headers map[string][]string, websocketTimeline []byte) string {
+	if hasSectionPayload(websocketTimeline) {
+		return "websocket"
+	}
+	for key, values := range headers {
+		if strings.EqualFold(strings.TrimSpace(key), "Upgrade") {
+			for _, value := range values {
+				if strings.EqualFold(strings.TrimSpace(value), "websocket") {
+					return "websocket"
+				}
+			}
+		}
+	}
+	return "http"
+}
+
+func inferUpstreamTransport(apiRequest, apiResponse, apiWebsocketTimeline []byte, _ []*interfaces.ErrorMessage) string {
+	hasHTTP := hasSectionPayload(apiRequest) || hasSectionPayload(apiResponse)
+	hasWS := hasSectionPayload(apiWebsocketTimeline)
+	switch {
+	case hasHTTP && hasWS:
+		return "websocket+http"
+	case hasWS:
+		return "websocket"
+	case hasHTTP:
+		return "http"
+	default:
+		return ""
+	}
+}
+
 func writeAPISection(w io.Writer, sectionHeader string, sectionPrefix string, payload []byte, timestamp time.Time) error {
 	if len(payload) == 0 {
 		return nil
@@ -623,11 +753,6 @@ func writeAPISection(w io.Writer, sectionHeader string, sectionPrefix string, pa
 		if _, errWrite := w.Write(payload); errWrite != nil {
 			return errWrite
 		}
-		if !bytes.HasSuffix(payload, []byte("\n")) {
-			if _, errWrite := io.WriteString(w, "\n"); errWrite != nil {
-				return errWrite
-			}
-		}
 	} else {
 		if _, errWrite := io.WriteString(w, sectionHeader); errWrite != nil {
 			return errWrite
@@ -640,12 +765,9 @@ func writeAPISection(w io.Writer, sectionHeader string, sectionPrefix string, pa
 		if _, errWrite := w.Write(payload); errWrite != nil {
 			return errWrite
 		}
-		if _, errWrite := io.WriteString(w, "\n"); errWrite != nil {
-			return errWrite
-		}
 	}

-	if _, errWrite := io.WriteString(w, "\n"); errWrite != nil {
+	if errWrite := writeSectionSpacing(w, countTrailingNewlinesBytes(payload)); errWrite != nil {
 		return errWrite
 	}
 	return nil
@@ -662,12 +784,17 @@ func writeAPIErrorResponses(w io.Writer, apiResponseErrors []*interfaces.ErrorMe
 		if _, errWrite := io.WriteString(w, fmt.Sprintf("HTTP Status: %d\n", apiResponseErrors[i].StatusCode)); errWrite != nil {
 			return errWrite
 		}
+		trailingNewlines := 1
 		if apiResponseErrors[i].Error != nil {
-			if _, errWrite := io.WriteString(w, apiResponseErrors[i].Error.Error()); errWrite != nil {
+			errText := apiResponseErrors[i].Error.Error()
+			if _, errWrite := io.WriteString(w, errText); errWrite != nil {
 				return errWrite
 			}
+			if errText != "" {
+				trailingNewlines = countTrailingNewlinesBytes([]byte(errText))
+			}
 		}
-		if _, errWrite := io.WriteString(w, "\n\n"); errWrite != nil {
+		if errWrite := writeSectionSpacing(w, trailingNewlines); errWrite != nil {
 			return errWrite
 		}
 	}
@@ -694,12 +821,18 @@ func writeResponseSection(w io.Writer, statusCode int, statusWritten bool, respo
 		}
 	}

-	if _, errWrite := io.WriteString(w, "\n"); errWrite != nil {
-		return errWrite
+	var bufferedReader *bufio.Reader
+	if responseReader != nil {
+		bufferedReader = bufio.NewReader(responseReader)
+	}
+	if !responseBodyStartsWithLeadingNewline(bufferedReader) {
+		if _, errWrite := io.WriteString(w, "\n"); errWrite != nil {
+			return errWrite
+		}
 	}

-	if responseReader != nil {
-		if _, errCopy := io.Copy(w, responseReader); errCopy != nil {
+	if bufferedReader != nil {
+		if _, errCopy := io.Copy(w, bufferedReader); errCopy != nil {
 			return errCopy
 		}
 	}
@@ -717,6 +850,19 @@ func writeResponseSection(w io.Writer, statusCode int, statusWritten bool, respo
 	return nil
 }

+func responseBodyStartsWithLeadingNewline(reader *bufio.Reader) bool {
+	if reader == nil {
+		return false
+	}
+	if peeked, _ := reader.Peek(2); len(peeked) >= 2 && peeked[0] == '\r' && peeked[1] == '\n' {
+		return true
+	}
+	if peeked, _ := reader.Peek(1); len(peeked) >= 1 && peeked[0] == '\n' {
+		return true
+	}
+	return false
+}
+
 // formatLogContent creates the complete log content for non-streaming requests.
 //
 // Parameters:
@@ -724,6 +870,7 @@ func writeResponseSection(w io.Writer, statusCode int, statusWritten bool, respo
 //   - method: The HTTP method
 //   - headers: The request headers
 //   - body: The request body
+//   - websocketTimeline: The downstream websocket event timeline
 //   - apiRequest: The API request data
 //   - apiResponse: The API response data
 //   - response: The raw response data
@@ -732,11 +879,42 @@ func writeResponseSection(w io.Writer, statusCode int, statusWritten bool, respo
 //
 // Returns:
 //   - string: The formatted log content
-func (l *FileRequestLogger) formatLogContent(url, method string, headers map[string][]string, body, apiRequest, apiResponse, response []byte, status int, responseHeaders map[string][]string, apiResponseErrors []*interfaces.ErrorMessage) string {
+func (l *FileRequestLogger) formatLogContent(url, method string, headers map[string][]string, body, websocketTimeline, apiRequest, apiResponse, apiWebsocketTimeline, response []byte, status int, responseHeaders map[string][]string, apiResponseErrors []*interfaces.ErrorMessage) string {
 	var content strings.Builder
+	isWebsocketTranscript := hasSectionPayload(websocketTimeline)
+	downstreamTransport := inferDownstreamTransport(headers, websocketTimeline)
+	upstreamTransport := inferUpstreamTransport(apiRequest, apiResponse, apiWebsocketTimeline, apiResponseErrors)

 	// Request info
-	content.WriteString(l.formatRequestInfo(url, method, headers, body))
+	content.WriteString(l.formatRequestInfo(url, method, headers, body, downstreamTransport, upstreamTransport, !isWebsocketTranscript))
+
+	if len(websocketTimeline) > 0 {
+		if bytes.HasPrefix(websocketTimeline, []byte("=== WEBSOCKET TIMELINE")) {
+			content.Write(websocketTimeline)
+			if !bytes.HasSuffix(websocketTimeline, []byte("\n")) {
+				content.WriteString("\n")
+			}
+		} else {
+			content.WriteString("=== WEBSOCKET TIMELINE ===\n")
+			content.Write(websocketTimeline)
+			content.WriteString("\n")
+		}
+		content.WriteString("\n")
+	}
+
+	if len(apiWebsocketTimeline) > 0 {
+		if bytes.HasPrefix(apiWebsocketTimeline, []byte("=== API WEBSOCKET TIMELINE")) {
+			content.Write(apiWebsocketTimeline)
+			if !bytes.HasSuffix(apiWebsocketTimeline, []byte("\n")) {
+				content.WriteString("\n")
+			}
+		} else {
+			content.WriteString("=== API WEBSOCKET TIMELINE ===\n")
+			content.Write(apiWebsocketTimeline)
+			content.WriteString("\n")
+		}
+		content.WriteString("\n")
+	}

 	if len(apiRequest) > 0 {
 		if bytes.HasPrefix(apiRequest, []byte("=== API REQUEST")) {
@@ -773,6 +951,12 @@ func (l *FileRequestLogger) formatLogContent(url, method string, headers map[str
 		content.WriteString("\n")
 	}

+	if isWebsocketTranscript {
+		// Mirror writeNonStreamingLog: websocket transcripts end with the dedicated
+		// timeline sections instead of a generic downstream HTTP response block.
+		return content.String()
+	}
+
 	// Response section
 	content.WriteString("=== RESPONSE ===\n")
 	content.WriteString(fmt.Sprintf("Status: %d\n", status))
@@ -933,13 +1117,19 @@ func (l *FileRequestLogger) decompressZstd(data []byte) ([]byte, error) {
 //
 // Returns:
 //   - string: The formatted request information
-func (l *FileRequestLogger) formatRequestInfo(url, method string, headers map[string][]string, body []byte) string {
+func (l *FileRequestLogger) formatRequestInfo(url, method string, headers map[string][]string, body []byte, downstreamTransport string, upstreamTransport string, includeBody bool) string {
 	var content strings.Builder

 	content.WriteString("=== REQUEST INFO ===\n")
 	content.WriteString(fmt.Sprintf("Version: %s\n", buildinfo.Version))
 	content.WriteString(fmt.Sprintf("URL: %s\n", url))
 	content.WriteString(fmt.Sprintf("Method: %s\n", method))
+	if strings.TrimSpace(downstreamTransport) != "" {
+		content.WriteString(fmt.Sprintf("Downstream Transport: %s\n", downstreamTransport))
+	}
+	if strings.TrimSpace(upstreamTransport) != "" {
+		content.WriteString(fmt.Sprintf("Upstream Transport: %s\n", upstreamTransport))
+	}
 	content.WriteString(fmt.Sprintf("Timestamp: %s\n", time.Now().Format(time.RFC3339Nano)))
 	content.WriteString("\n")

@@ -952,6 +1142,10 @@ func (l *FileRequestLogger) formatRequestInfo(url, method string, headers map[st
 	}
 	content.WriteString("\n")

+	if !includeBody {
+		return content.String()
+	}
+
 	content.WriteString("=== REQUEST BODY ===\n")
 	content.Write(body)
 	content.WriteString("\n\n")
@@ -1011,6 +1205,9 @@ type FileStreamingLogWriter struct {
 	// apiResponse stores the upstream API response data.
 	apiResponse []byte

+	// apiWebsocketTimeline stores the upstream websocket event timeline.
+	apiWebsocketTimeline []byte
+
 	// apiResponseTimestamp captures when the API response was received.
 	apiResponseTimestamp time.Time
 }
@@ -1092,6 +1289,21 @@ func (w *FileStreamingLogWriter) WriteAPIResponse(apiResponse []byte) error {
 	return nil
 }

+// WriteAPIWebsocketTimeline buffers the upstream websocket timeline for later writing.
+//
+// Parameters:
+//   - apiWebsocketTimeline: The upstream websocket event timeline
+//
+// Returns:
+//   - error: Always returns nil (buffering cannot fail)
+func (w *FileStreamingLogWriter) WriteAPIWebsocketTimeline(apiWebsocketTimeline []byte) error {
+	if len(apiWebsocketTimeline) == 0 {
+		return nil
+	}
+	w.apiWebsocketTimeline = bytes.Clone(apiWebsocketTimeline)
+	return nil
+}
+
 func (w *FileStreamingLogWriter) SetFirstChunkTimestamp(timestamp time.Time) {
 	if !timestamp.IsZero() {
 		w.apiResponseTimestamp = timestamp
@@ -1100,7 +1312,7 @@ func (w *FileStreamingLogWriter) SetFirstChunkTimestamp(timestamp time.Time) {

 // Close finalizes the log file and cleans up resources.
 // It writes all buffered data to the file in the correct order:
-// API REQUEST -> API RESPONSE -> RESPONSE (status, headers, body chunks)
+// API WEBSOCKET TIMELINE -> API REQUEST -> API RESPONSE -> RESPONSE (status, headers, body chunks)
 //
 // Returns:
 //   - error: An error if closing fails, nil otherwise
@@ -1182,7 +1394,10 @@ func (w *FileStreamingLogWriter) asyncWriter() {
 }

 func (w *FileStreamingLogWriter) writeFinalLog(logFile *os.File) error {
-	if errWrite := writeRequestInfoWithBody(logFile, w.url, w.method, w.requestHeaders, nil, w.requestBodyPath, w.timestamp); errWrite != nil {
+	if errWrite := writeRequestInfoWithBody(logFile, w.url, w.method, w.requestHeaders, nil, w.requestBodyPath, w.timestamp, "http", inferUpstreamTransport(w.apiRequest, w.apiResponse, w.apiWebsocketTimeline, nil), true); errWrite != nil {
+		return errWrite
+	}
+	if errWrite := writeAPISection(logFile, "=== API WEBSOCKET TIMELINE ===\n", "=== API WEBSOCKET TIMELINE", w.apiWebsocketTimeline, time.Time{}); errWrite != nil {
 		return errWrite
 	}
 	if errWrite := writeAPISection(logFile, "=== API REQUEST ===\n", "=== API REQUEST", w.apiRequest, time.Time{}); errWrite != nil {
@@ -1265,6 +1480,17 @@ func (w *NoOpStreamingLogWriter) WriteAPIResponse(_ []byte) error {
 	return nil
 }

+// WriteAPIWebsocketTimeline is a no-op implementation that does nothing and always returns nil.
+//
+// Parameters:
+//   - apiWebsocketTimeline: The upstream websocket event timeline (ignored)
+//
+// Returns:
+//   - error: Always returns nil
+func (w *NoOpStreamingLogWriter) WriteAPIWebsocketTimeline(_ []byte) error {
+	return nil
+}
+
 func (w *NoOpStreamingLogWriter) SetFirstChunkTimestamp(_ time.Time) {}

 // Close is a no-op implementation that does nothing and always returns nil.
--- a/internal/managementasset/updater.go
+++ b/internal/managementasset/updater.go
@@ -31,6 +31,7 @@ const (
 	httpUserAgent                = "CLIProxyAPI-management-updater"
 	managementSyncMinInterval    = 30 * time.Second
 	updateCheckInterval          = 3 * time.Hour
+	maxAssetDownloadSize         = 50 << 20 // 10 MB safety limit for management asset downloads
 )

 // ManagementFileName exposes the control panel asset filename.
@@ -88,6 +89,10 @@ func runAutoUpdater(ctx context.Context) {
 			log.Debug("management asset auto-updater skipped: control panel disabled")
 			return
 		}
+		if cfg.RemoteManagement.DisableAutoUpdatePanel {
+			log.Debug("management asset auto-updater skipped: disable-auto-update-panel is enabled")
+			return
+		}

 		configPath, _ := schedulerConfigPath.Load().(string)
 		staticDir := StaticDir(configPath)
@@ -259,7 +264,8 @@ func EnsureLatestManagementHTML(ctx context.Context, staticDir string, proxyURL
 		}

 		if remoteHash != "" && !strings.EqualFold(remoteHash, downloadedHash) {
-			log.Warnf("remote digest mismatch for management asset: expected %s got %s", remoteHash, downloadedHash)
+			log.Errorf("management asset digest mismatch: expected %s got %s — aborting update for safety", remoteHash, downloadedHash)
+			return nil, nil
 		}

 		if err = atomicWriteFile(localPath, data); err != nil {
@@ -282,6 +288,9 @@ func ensureFallbackManagementHTML(ctx context.Context, client *http.Client, loca
 		return false
 	}

+	log.Warnf("management asset downloaded from fallback URL without digest verification (hash=%s) — "+
+		"enable verified GitHub updates by keeping disable-auto-update-panel set to false", downloadedHash)
+
 	if err = atomicWriteFile(localPath, data); err != nil {
 		log.WithError(err).Warn("failed to persist fallback management control panel page")
 		return false
@@ -392,10 +401,13 @@ func downloadAsset(ctx context.Context, client *http.Client, downloadURL string)
 		return nil, "", fmt.Errorf("unexpected download status %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
 	}

-	data, err := io.ReadAll(resp.Body)
+	data, err := io.ReadAll(io.LimitReader(resp.Body, maxAssetDownloadSize+1))
 	if err != nil {
 		return nil, "", fmt.Errorf("read download body: %w", err)
 	}
+	if int64(len(data)) > maxAssetDownloadSize {
+		return nil, "", fmt.Errorf("download exceeds maximum allowed size of %d bytes", maxAssetDownloadSize)
+	}

 	sum := sha256.Sum256(data)
 	return data, hex.EncodeToString(sum[:]), nil
--- a/internal/misc/oauth.go
+++ b/internal/misc/oauth.go
@@ -30,6 +30,23 @@ type OAuthCallback struct {
 	ErrorDescription string
 }

+// AsyncPrompt runs a prompt function in a goroutine and returns channels for
+// the result. The returned channels are buffered (size 1) so the goroutine can
+// complete even if the caller abandons the channels.
+func AsyncPrompt(promptFn func(string) (string, error), message string) (<-chan string, <-chan error) {
+	inputCh := make(chan string, 1)
+	errCh := make(chan error, 1)
+	go func() {
+		input, err := promptFn(message)
+		if err != nil {
+			errCh <- err
+			return
+		}
+		inputCh <- input
+	}()
+	return inputCh, errCh
+}
+
 // ParseOAuthCallback extracts OAuth parameters from a callback URL.
 // It returns nil when the input is empty.
 func ParseOAuthCallback(input string) (*OAuthCallback, error) {
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -1,12 +1,186 @@
 // Package registry provides model definitions and lookup helpers for various AI providers.
-// Static model metadata is stored in model_definitions_static_data.go.
+// Static model metadata is loaded from the embedded models.json file and can be refreshed from network.
 package registry

 import (
-	"sort"
 	"strings"
 )

+// staticModelsJSON mirrors the top-level structure of models.json.
+type staticModelsJSON struct {
+	Claude      []*ModelInfo `json:"claude"`
+	Gemini      []*ModelInfo `json:"gemini"`
+	Vertex      []*ModelInfo `json:"vertex"`
+	GeminiCLI   []*ModelInfo `json:"gemini-cli"`
+	AIStudio    []*ModelInfo `json:"aistudio"`
+	CodexFree   []*ModelInfo `json:"codex-free"`
+	CodexTeam   []*ModelInfo `json:"codex-team"`
+	CodexPlus   []*ModelInfo `json:"codex-plus"`
+	CodexPro    []*ModelInfo `json:"codex-pro"`
+	Qwen        []*ModelInfo `json:"qwen"`
+	IFlow       []*ModelInfo `json:"iflow"`
+	Kimi        []*ModelInfo `json:"kimi"`
+	Antigravity []*ModelInfo `json:"antigravity"`
+}
+
+// GetClaudeModels returns the standard Claude model definitions.
+func GetClaudeModels() []*ModelInfo {
+	return cloneModelInfos(getModels().Claude)
+}
+
+// GetGeminiModels returns the standard Gemini model definitions.
+func GetGeminiModels() []*ModelInfo {
+	return cloneModelInfos(getModels().Gemini)
+}
+
+// GetGeminiVertexModels returns Gemini model definitions for Vertex AI.
+func GetGeminiVertexModels() []*ModelInfo {
+	return cloneModelInfos(getModels().Vertex)
+}
+
+// GetGeminiCLIModels returns Gemini model definitions for the Gemini CLI.
+func GetGeminiCLIModels() []*ModelInfo {
+	return cloneModelInfos(getModels().GeminiCLI)
+}
+
+// GetAIStudioModels returns model definitions for AI Studio.
+func GetAIStudioModels() []*ModelInfo {
+	return cloneModelInfos(getModels().AIStudio)
+}
+
+// GetCodexFreeModels returns model definitions for the Codex free plan tier.
+func GetCodexFreeModels() []*ModelInfo {
+	return cloneModelInfos(getModels().CodexFree)
+}
+
+// GetCodexTeamModels returns model definitions for the Codex team plan tier.
+func GetCodexTeamModels() []*ModelInfo {
+	return cloneModelInfos(getModels().CodexTeam)
+}
+
+// GetCodexPlusModels returns model definitions for the Codex plus plan tier.
+func GetCodexPlusModels() []*ModelInfo {
+	return cloneModelInfos(getModels().CodexPlus)
+}
+
+// GetCodexProModels returns model definitions for the Codex pro plan tier.
+func GetCodexProModels() []*ModelInfo {
+	return cloneModelInfos(getModels().CodexPro)
+}
+
+// GetQwenModels returns the standard Qwen model definitions.
+func GetQwenModels() []*ModelInfo {
+	return cloneModelInfos(getModels().Qwen)
+}
+
+// GetIFlowModels returns the standard iFlow model definitions.
+func GetIFlowModels() []*ModelInfo {
+	return cloneModelInfos(getModels().IFlow)
+}
+
+// GetKimiModels returns the standard Kimi (Moonshot AI) model definitions.
+func GetKimiModels() []*ModelInfo {
+	return cloneModelInfos(getModels().Kimi)
+}
+
+// GetAntigravityModels returns the standard Antigravity model definitions.
+func GetAntigravityModels() []*ModelInfo {
+	return cloneModelInfos(getModels().Antigravity)
+}
+
+// GetCodeBuddyModels returns the available models for CodeBuddy (Tencent).
+// These models are served through the copilot.tencent.com API.
+func GetCodeBuddyModels() []*ModelInfo {
+	now := int64(1748044800) // 2025-05-24
+	return []*ModelInfo{
+		{
+			ID:                  "glm-5.0",
+			Object:              "model",
+			Created:             now,
+			OwnedBy:             "tencent",
+			Type:                "codebuddy",
+			DisplayName:         "GLM-5.0",
+			Description:         "GLM-5.0 via CodeBuddy",
+			ContextLength:       128000,
+			MaxCompletionTokens: 32768,
+			SupportedEndpoints:  []string{"/chat/completions"},
+		},
+		{
+			ID:                  "glm-4.7",
+			Object:              "model",
+			Created:             now,
+			OwnedBy:             "tencent",
+			Type:                "codebuddy",
+			DisplayName:         "GLM-4.7",
+			Description:         "GLM-4.7 via CodeBuddy",
+			ContextLength:       128000,
+			MaxCompletionTokens: 32768,
+			SupportedEndpoints:  []string{"/chat/completions"},
+		},
+		{
+			ID:                  "minimax-m2.5",
+			Object:              "model",
+			Created:             now,
+			OwnedBy:             "tencent",
+			Type:                "codebuddy",
+			DisplayName:         "MiniMax M2.5",
+			Description:         "MiniMax M2.5 via CodeBuddy",
+			ContextLength:       200000,
+			MaxCompletionTokens: 32768,
+			SupportedEndpoints:  []string{"/chat/completions"},
+		},
+		{
+			ID:                  "kimi-k2.5",
+			Object:              "model",
+			Created:             now,
+			OwnedBy:             "tencent",
+			Type:                "codebuddy",
+			DisplayName:         "Kimi K2.5",
+			Description:         "Kimi K2.5 via CodeBuddy",
+			ContextLength:       128000,
+			MaxCompletionTokens: 32768,
+			SupportedEndpoints:  []string{"/chat/completions"},
+		},
+		{
+			ID:                  "deepseek-v3-2-volc",
+			Object:              "model",
+			Created:             now,
+			OwnedBy:             "tencent",
+			Type:                "codebuddy",
+			DisplayName:         "DeepSeek V3.2 (Volc)",
+			Description:         "DeepSeek V3.2 via CodeBuddy (Volcano Engine)",
+			ContextLength:       128000,
+			MaxCompletionTokens: 32768,
+			SupportedEndpoints:  []string{"/chat/completions"},
+		},
+		{
+			ID:                  "hunyuan-2.0-thinking",
+			Object:              "model",
+			Created:             now,
+			OwnedBy:             "tencent",
+			Type:                "codebuddy",
+			DisplayName:         "Hunyuan 2.0 Thinking",
+			Description:         "Tencent Hunyuan 2.0 Thinking via CodeBuddy",
+			ContextLength:       128000,
+			MaxCompletionTokens: 32768,
+			Thinking:            &ThinkingSupport{ZeroAllowed: true},
+			SupportedEndpoints:  []string{"/chat/completions"},
+		},
+	}
+}
+
+// cloneModelInfos returns a shallow copy of the slice with each element deep-cloned.
+func cloneModelInfos(models []*ModelInfo) []*ModelInfo {
+	if len(models) == 0 {
+		return nil
+	}
+	out := make([]*ModelInfo, len(models))
+	for i, m := range models {
+		out[i] = cloneModelInfo(m)
+	}
+	return out
+}
+
 // GetStaticModelDefinitionsByChannel returns static model definitions for a given channel/provider.
 // It returns nil when the channel is unknown.
 //
@@ -20,10 +194,8 @@ import (
 //   - qwen
 //   - iflow
 //   - kimi
-//   - kiro
 //   - kilo
 //   - github-copilot
-//   - kiro
 //   - amazonq
 //   - antigravity (returns static overrides only)
 func GetStaticModelDefinitionsByChannel(channel string) []*ModelInfo {
@@ -40,7 +212,7 @@ func GetStaticModelDefinitionsByChannel(channel string) []*ModelInfo {
 	case "aistudio":
 		return GetAIStudioModels()
 	case "codex":
-		return GetOpenAIModels()
+		return GetCodexProModels()
 	case "qwen":
 		return GetQwenModels()
 	case "iflow":
@@ -56,33 +228,28 @@ func GetStaticModelDefinitionsByChannel(channel string) []*ModelInfo {
 	case "amazonq":
 		return GetAmazonQModels()
 	case "antigravity":
-		cfg := GetAntigravityModelConfig()
-		if len(cfg) == 0 {
-			return nil
-		}
-		models := make([]*ModelInfo, 0, len(cfg))
-		for modelID, entry := range cfg {
-			if modelID == "" || entry == nil {
-				continue
-			}
-			models = append(models, &ModelInfo{
-				ID:                  modelID,
-				Object:              "model",
-				OwnedBy:             "antigravity",
-				Type:                "antigravity",
-				Thinking:            entry.Thinking,
-				MaxCompletionTokens: entry.MaxCompletionTokens,
-			})
-		}
-		sort.Slice(models, func(i, j int) bool {
-			return strings.ToLower(models[i].ID) < strings.ToLower(models[j].ID)
-		})
-		return models
+		return GetAntigravityModels()
+	case "codebuddy":
+		return GetCodeBuddyModels()
+	case "cursor":
+		return GetCursorModels()
 	default:
 		return nil
 	}
 }

+// GetCursorModels returns the fallback Cursor model definitions.
+func GetCursorModels() []*ModelInfo {
+	return []*ModelInfo{
+		{ID: "composer-2", Object: "model", OwnedBy: "cursor", Type: "cursor", DisplayName: "Composer 2", ContextLength: 200000, MaxCompletionTokens: 64000, Thinking: &ThinkingSupport{Max: 50000, DynamicAllowed: true}},
+		{ID: "claude-4-sonnet", Object: "model", OwnedBy: "cursor", Type: "cursor", DisplayName: "Claude 4 Sonnet", ContextLength: 200000, MaxCompletionTokens: 64000, Thinking: &ThinkingSupport{Max: 50000, DynamicAllowed: true}},
+		{ID: "claude-3.5-sonnet", Object: "model", OwnedBy: "cursor", Type: "cursor", DisplayName: "Claude 3.5 Sonnet", ContextLength: 200000, MaxCompletionTokens: 8192},
+		{ID: "gpt-4o", Object: "model", OwnedBy: "cursor", Type: "cursor", DisplayName: "GPT-4o", ContextLength: 128000, MaxCompletionTokens: 16384},
+		{ID: "cursor-small", Object: "model", OwnedBy: "cursor", Type: "cursor", DisplayName: "Cursor Small", ContextLength: 200000, MaxCompletionTokens: 64000},
+		{ID: "gemini-2.5-pro", Object: "model", OwnedBy: "cursor", Type: "cursor", DisplayName: "Gemini 2.5 Pro", ContextLength: 1000000, MaxCompletionTokens: 65536, Thinking: &ThinkingSupport{Max: 50000, DynamicAllowed: true}},
+	}
+}
+
 // LookupStaticModelInfo searches all static model definitions for a model by ID.
 // Returns nil if no matching model is found.
 func LookupStaticModelInfo(modelID string) *ModelInfo {
@@ -90,38 +257,33 @@ func LookupStaticModelInfo(modelID string) *ModelInfo {
 		return nil
 	}

+	data := getModels()
 	allModels := [][]*ModelInfo{
-		GetClaudeModels(),
-		GetGeminiModels(),
-		GetGeminiVertexModels(),
-		GetGeminiCLIModels(),
-		GetAIStudioModels(),
-		GetOpenAIModels(),
-		GetQwenModels(),
-		GetIFlowModels(),
-		GetKimiModels(),
+		data.Claude,
+		data.Gemini,
+		data.Vertex,
+		data.GeminiCLI,
+		data.AIStudio,
+		data.CodexPro,
+		data.Qwen,
+		data.IFlow,
+		data.Kimi,
+		data.Antigravity,
 		GetGitHubCopilotModels(),
 		GetKiroModels(),
 		GetKiloModels(),
 		GetAmazonQModels(),
+		GetCodeBuddyModels(),
+		GetCursorModels(),
 	}
 	for _, models := range allModels {
 		for _, m := range models {
 			if m != nil && m.ID == modelID {
-				return m
+				return cloneModelInfo(m)
 			}
 		}
 	}

-	// Check Antigravity static config
-	if cfg := GetAntigravityModelConfig()[modelID]; cfg != nil {
-		return &ModelInfo{
-			ID:                  modelID,
-			Thinking:            cfg.Thinking,
-			MaxCompletionTokens: cfg.MaxCompletionTokens,
-		}
-	}
-
 	return nil
 }

@@ -152,6 +314,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "OpenAI GPT-4.1 via GitHub Copilot",
 			ContextLength:       128000,
 			MaxCompletionTokens: 16384,
+			SupportedEndpoints:  []string{"/chat/completions", "/responses"},
 		},
 	}

@@ -166,6 +329,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         entry.Description,
 			ContextLength:       128000,
 			MaxCompletionTokens: 16384,
+			SupportedEndpoints:  []string{"/chat/completions", "/responses"},
 		})
 	}

@@ -300,6 +464,32 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			SupportedEndpoints:  []string{"/responses"},
 			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}},
 		},
+		{
+			ID:                  "gpt-5.4",
+			Object:              "model",
+			Created:             now,
+			OwnedBy:             "github-copilot",
+			Type:                "github-copilot",
+			DisplayName:         "GPT-5.4",
+			Description:         "OpenAI GPT-5.4 via GitHub Copilot",
+			ContextLength:       200000,
+			MaxCompletionTokens: 32768,
+			SupportedEndpoints:  []string{"/responses"},
+			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}},
+		},
+		{
+			ID:                  "gpt-5.4-mini",
+			Object:              "model",
+			Created:             now,
+			OwnedBy:             "github-copilot",
+			Type:                "github-copilot",
+			DisplayName:         "GPT-5.4 mini",
+			Description:         "OpenAI GPT-5.4 mini via GitHub Copilot",
+			ContextLength:       200000,
+			MaxCompletionTokens: 32768,
+			SupportedEndpoints:  []string{"/responses"},
+			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}},
+		},
 		{
 			ID:                  "claude-haiku-4.5",
 			Object:              "model",
@@ -394,6 +584,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "Google Gemini 2.5 Pro via GitHub Copilot",
 			ContextLength:       1048576,
 			MaxCompletionTokens: 65536,
+			SupportedEndpoints:  []string{"/chat/completions"},
 		},
 		{
 			ID:                  "gemini-3-pro-preview",
@@ -405,6 +596,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "Google Gemini 3 Pro Preview via GitHub Copilot",
 			ContextLength:       1048576,
 			MaxCompletionTokens: 65536,
+			SupportedEndpoints:  []string{"/chat/completions"},
 		},
 		{
 			ID:                  "gemini-3.1-pro-preview",
@@ -414,8 +606,9 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Type:                "github-copilot",
 			DisplayName:         "Gemini 3.1 Pro (Preview)",
 			Description:         "Google Gemini 3.1 Pro Preview via GitHub Copilot",
-			ContextLength:       1048576,
+			ContextLength:       173000,
 			MaxCompletionTokens: 65536,
+			SupportedEndpoints:  []string{"/chat/completions"},
 		},
 		{
 			ID:                  "gemini-3-flash-preview",
@@ -425,8 +618,9 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Type:                "github-copilot",
 			DisplayName:         "Gemini 3 Flash (Preview)",
 			Description:         "Google Gemini 3 Flash Preview via GitHub Copilot",
-			ContextLength:       1048576,
+			ContextLength:       173000,
 			MaxCompletionTokens: 65536,
+			SupportedEndpoints:  []string{"/chat/completions"},
 		},
 		{
 			ID:                  "grok-code-fast-1",
--- a/internal/registry/model_definitions_static_data.go
+++ b/internal/registry/model_definitions_static_data.go
--- a/internal/registry/model_definitions_test.go
+++ b/internal/registry/model_definitions_test.go
@@ -0,0 +1,29 @@
+package registry
+
+import "testing"
+
+func TestGitHubCopilotGeminiModelsAreChatOnly(t *testing.T) {
+	models := GetGitHubCopilotModels()
+	required := map[string]bool{
+		"gemini-2.5-pro":         false,
+		"gemini-3-pro-preview":   false,
+		"gemini-3.1-pro-preview": false,
+		"gemini-3-flash-preview": false,
+	}
+
+	for _, model := range models {
+		if _, ok := required[model.ID]; !ok {
+			continue
+		}
+		required[model.ID] = true
+		if len(model.SupportedEndpoints) != 1 || model.SupportedEndpoints[0] != "/chat/completions" {
+			t.Fatalf("model %q supported endpoints = %v, want [/chat/completions]", model.ID, model.SupportedEndpoints)
+		}
+	}
+
+	for modelID, found := range required {
+		if !found {
+			t.Fatalf("expected GitHub Copilot model %q in definitions", modelID)
+		}
+	}
+}
--- a/internal/registry/model_registry.go
+++ b/internal/registry/model_registry.go
@@ -47,6 +47,8 @@ type ModelInfo struct {
 	MaxCompletionTokens int `json:"max_completion_tokens,omitempty"`
 	// SupportedParameters lists supported parameters
 	SupportedParameters []string `json:"supported_parameters,omitempty"`
+	// SupportedEndpoints lists supported API endpoints (e.g., "/chat/completions", "/responses").
+	SupportedEndpoints []string `json:"supported_endpoints,omitempty"`
 	// SupportedInputModalities lists supported input modalities (e.g., TEXT, IMAGE, VIDEO, AUDIO)
 	SupportedInputModalities []string `json:"supportedInputModalities,omitempty"`
 	// SupportedOutputModalities lists supported output modalities (e.g., TEXT, IMAGE)
@@ -62,20 +64,25 @@ type ModelInfo struct {
 	UserDefined bool `json:"-"`
 }

+type availableModelsCacheEntry struct {
+	models    []map[string]any
+	expiresAt time.Time
+}
+
 // ThinkingSupport describes a model family's supported internal reasoning budget range.
 // Values are interpreted in provider-native token units.
 type ThinkingSupport struct {
 	// Min is the minimum allowed thinking budget (inclusive).
-	Min int `json:"min,omitempty"`
+	Min int `json:"min,omitempty" yaml:"min,omitempty"`
 	// Max is the maximum allowed thinking budget (inclusive).
-	Max int `json:"max,omitempty"`
+	Max int `json:"max,omitempty" yaml:"max,omitempty"`
 	// ZeroAllowed indicates whether 0 is a valid value (to disable thinking).
-	ZeroAllowed bool `json:"zero_allowed,omitempty"`
+	ZeroAllowed bool `json:"zero_allowed,omitempty" yaml:"zero-allowed,omitempty"`
 	// DynamicAllowed indicates whether -1 is a valid value (dynamic thinking budget).
-	DynamicAllowed bool `json:"dynamic_allowed,omitempty"`
+	DynamicAllowed bool `json:"dynamic_allowed,omitempty" yaml:"dynamic-allowed,omitempty"`
 	// Levels defines discrete reasoning effort levels (e.g., "low", "medium", "high").
 	// When set, the model uses level-based reasoning instead of token budgets.
-	Levels []string `json:"levels,omitempty"`
+	Levels []string `json:"levels,omitempty" yaml:"levels,omitempty"`
 }

 // ModelRegistration tracks a model's availability
@@ -116,6 +123,8 @@ type ModelRegistry struct {
 	clientProviders map[string]string
 	// mutex ensures thread-safe access to the registry
 	mutex *sync.RWMutex
+	// availableModelsCache stores per-handler snapshots for GetAvailableModels.
+	availableModelsCache map[string]availableModelsCacheEntry
 	// hook is an optional callback sink for model registration changes
 	hook ModelRegistryHook
 }
@@ -128,15 +137,28 @@ var registryOnce sync.Once
 func GetGlobalRegistry() *ModelRegistry {
 	registryOnce.Do(func() {
 		globalRegistry = &ModelRegistry{
-			models:           make(map[string]*ModelRegistration),
-			clientModels:     make(map[string][]string),
-			clientModelInfos: make(map[string]map[string]*ModelInfo),
-			clientProviders:  make(map[string]string),
-			mutex:            &sync.RWMutex{},
+			models:               make(map[string]*ModelRegistration),
+			clientModels:         make(map[string][]string),
+			clientModelInfos:     make(map[string]map[string]*ModelInfo),
+			clientProviders:      make(map[string]string),
+			availableModelsCache: make(map[string]availableModelsCacheEntry),
+			mutex:                &sync.RWMutex{},
 		}
 	})
 	return globalRegistry
 }
+func (r *ModelRegistry) ensureAvailableModelsCacheLocked() {
+	if r.availableModelsCache == nil {
+		r.availableModelsCache = make(map[string]availableModelsCacheEntry)
+	}
+}
+
+func (r *ModelRegistry) invalidateAvailableModelsCacheLocked() {
+	if len(r.availableModelsCache) == 0 {
+		return
+	}
+	clear(r.availableModelsCache)
+}

 // LookupModelInfo searches dynamic registry (provider-specific > global) then static definitions.
 func LookupModelInfo(modelID string, provider ...string) *ModelInfo {
@@ -151,9 +173,9 @@ func LookupModelInfo(modelID string, provider ...string) *ModelInfo {
 	}

 	if info := GetGlobalRegistry().GetModelInfo(modelID, p); info != nil {
-		return info
+		return cloneModelInfo(info)
 	}
-	return LookupStaticModelInfo(modelID)
+	return cloneModelInfo(LookupStaticModelInfo(modelID))
 }

 // SetHook sets an optional hook for observing model registration changes.
@@ -167,6 +189,7 @@ func (r *ModelRegistry) SetHook(hook ModelRegistryHook) {
 }

 const defaultModelRegistryHookTimeout = 5 * time.Second
+const modelQuotaExceededWindow = 5 * time.Minute

 func (r *ModelRegistry) triggerModelsRegistered(provider, clientID string, models []*ModelInfo) {
 	hook := r.hook
@@ -211,6 +234,7 @@ func (r *ModelRegistry) triggerModelsUnregistered(provider, clientID string) {
 func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models []*ModelInfo) {
 	r.mutex.Lock()
 	defer r.mutex.Unlock()
+	r.ensureAvailableModelsCacheLocked()

 	provider := strings.ToLower(clientProvider)
 	uniqueModelIDs := make([]string, 0, len(models))
@@ -236,6 +260,7 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
 		delete(r.clientModels, clientID)
 		delete(r.clientModelInfos, clientID)
 		delete(r.clientProviders, clientID)
+		r.invalidateAvailableModelsCacheLocked()
 		misc.LogCredentialSeparator()
 		return
 	}
@@ -263,6 +288,7 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
 		} else {
 			delete(r.clientProviders, clientID)
 		}
+		r.invalidateAvailableModelsCacheLocked()
 		r.triggerModelsRegistered(provider, clientID, models)
 		log.Debugf("Registered client %s from provider %s with %d models", clientID, clientProvider, len(rawModelIDs))
 		misc.LogCredentialSeparator()
@@ -365,6 +391,9 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
 				reg.InfoByProvider[provider] = cloneModelInfo(model)
 			}
 			reg.LastUpdated = now
+			// Re-registering an existing client/model binding starts a fresh registry
+			// snapshot for that binding. Cooldown and suspension are transient
+			// scheduling state and must not survive this reconciliation step.
 			if reg.QuotaExceededClients != nil {
 				delete(reg.QuotaExceededClients, clientID)
 			}
@@ -406,6 +435,7 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
 		delete(r.clientProviders, clientID)
 	}

+	r.invalidateAvailableModelsCacheLocked()
 	r.triggerModelsRegistered(provider, clientID, models)
 	if len(added) == 0 && len(removed) == 0 && !providerChanged {
 		// Only metadata (e.g., display name) changed; skip separator when no log output.
@@ -509,6 +539,13 @@ func cloneModelInfo(model *ModelInfo) *ModelInfo {
 	if len(model.SupportedOutputModalities) > 0 {
 		copyModel.SupportedOutputModalities = append([]string(nil), model.SupportedOutputModalities...)
 	}
+	if model.Thinking != nil {
+		copyThinking := *model.Thinking
+		if len(model.Thinking.Levels) > 0 {
+			copyThinking.Levels = append([]string(nil), model.Thinking.Levels...)
+		}
+		copyModel.Thinking = &copyThinking
+	}
 	return &copyModel
 }

@@ -538,6 +575,7 @@ func (r *ModelRegistry) UnregisterClient(clientID string) {
 	r.mutex.Lock()
 	defer r.mutex.Unlock()
 	r.unregisterClientInternal(clientID)
+	r.invalidateAvailableModelsCacheLocked()
 }

 // unregisterClientInternal performs the actual client unregistration (internal, no locking)
@@ -604,9 +642,12 @@ func (r *ModelRegistry) unregisterClientInternal(clientID string) {
 func (r *ModelRegistry) SetModelQuotaExceeded(clientID, modelID string) {
 	r.mutex.Lock()
 	defer r.mutex.Unlock()
+	r.ensureAvailableModelsCacheLocked()

 	if registration, exists := r.models[modelID]; exists {
-		registration.QuotaExceededClients[clientID] = new(time.Now())
+		now := time.Now()
+		registration.QuotaExceededClients[clientID] = &now
+		r.invalidateAvailableModelsCacheLocked()
 		log.Debugf("Marked model %s as quota exceeded for client %s", modelID, clientID)
 	}
 }
@@ -618,9 +659,11 @@ func (r *ModelRegistry) SetModelQuotaExceeded(clientID, modelID string) {
 func (r *ModelRegistry) ClearModelQuotaExceeded(clientID, modelID string) {
 	r.mutex.Lock()
 	defer r.mutex.Unlock()
+	r.ensureAvailableModelsCacheLocked()

 	if registration, exists := r.models[modelID]; exists {
 		delete(registration.QuotaExceededClients, clientID)
+		r.invalidateAvailableModelsCacheLocked()
 		// log.Debugf("Cleared quota exceeded status for model %s and client %s", modelID, clientID)
 	}
 }
@@ -636,6 +679,7 @@ func (r *ModelRegistry) SuspendClientModel(clientID, modelID, reason string) {
 	}
 	r.mutex.Lock()
 	defer r.mutex.Unlock()
+	r.ensureAvailableModelsCacheLocked()

 	registration, exists := r.models[modelID]
 	if !exists || registration == nil {
@@ -649,6 +693,7 @@ func (r *ModelRegistry) SuspendClientModel(clientID, modelID, reason string) {
 	}
 	registration.SuspendedClients[clientID] = reason
 	registration.LastUpdated = time.Now()
+	r.invalidateAvailableModelsCacheLocked()
 	if reason != "" {
 		log.Debugf("Suspended client %s for model %s: %s", clientID, modelID, reason)
 	} else {
@@ -666,6 +711,7 @@ func (r *ModelRegistry) ResumeClientModel(clientID, modelID string) {
 	}
 	r.mutex.Lock()
 	defer r.mutex.Unlock()
+	r.ensureAvailableModelsCacheLocked()

 	registration, exists := r.models[modelID]
 	if !exists || registration == nil || registration.SuspendedClients == nil {
@@ -676,6 +722,7 @@ func (r *ModelRegistry) ResumeClientModel(clientID, modelID string) {
 	}
 	delete(registration.SuspendedClients, clientID)
 	registration.LastUpdated = time.Now()
+	r.invalidateAvailableModelsCacheLocked()
 	log.Debugf("Resumed client %s for model %s", clientID, modelID)
 }

@@ -711,22 +758,51 @@ func (r *ModelRegistry) ClientSupportsModel(clientID, modelID string) bool {
 // Returns:
 //   - []map[string]any: List of available models in the requested format
 func (r *ModelRegistry) GetAvailableModels(handlerType string) []map[string]any {
-	r.mutex.RLock()
-	defer r.mutex.RUnlock()
+	now := time.Now()

-	models := make([]map[string]any, 0)
-	quotaExpiredDuration := 5 * time.Minute
+	r.mutex.RLock()
+	if cache, ok := r.availableModelsCache[handlerType]; ok && (cache.expiresAt.IsZero() || now.Before(cache.expiresAt)) {
+		models := cloneModelMaps(cache.models)
+		r.mutex.RUnlock()
+		return models
+	}
+	r.mutex.RUnlock()
+
+	r.mutex.Lock()
+	defer r.mutex.Unlock()
+	r.ensureAvailableModelsCacheLocked()
+
+	if cache, ok := r.availableModelsCache[handlerType]; ok && (cache.expiresAt.IsZero() || now.Before(cache.expiresAt)) {
+		return cloneModelMaps(cache.models)
+	}
+
+	models, expiresAt := r.buildAvailableModelsLocked(handlerType, now)
+	r.availableModelsCache[handlerType] = availableModelsCacheEntry{
+		models:    cloneModelMaps(models),
+		expiresAt: expiresAt,
+	}
+
+	return models
+}
+
+func (r *ModelRegistry) buildAvailableModelsLocked(handlerType string, now time.Time) ([]map[string]any, time.Time) {
+	models := make([]map[string]any, 0, len(r.models))
+	var expiresAt time.Time

 	for _, registration := range r.models {
-		// Check if model has any non-quota-exceeded clients
 		availableClients := registration.Count
-		now := time.Now()

-		// Count clients that have exceeded quota but haven't recovered yet
 		expiredClients := 0
 		for _, quotaTime := range registration.QuotaExceededClients {
-			if quotaTime != nil && now.Sub(*quotaTime) < quotaExpiredDuration {
+			if quotaTime == nil {
+				continue
+			}
+			recoveryAt := quotaTime.Add(modelQuotaExceededWindow)
+			if now.Before(recoveryAt) {
 				expiredClients++
+				if expiresAt.IsZero() || recoveryAt.Before(expiresAt) {
+					expiresAt = recoveryAt
+				}
 			}
 		}

@@ -747,7 +823,6 @@ func (r *ModelRegistry) GetAvailableModels(handlerType string) []map[string]any
 			effectiveClients = 0
 		}

-		// Include models that have available clients, or those solely cooling down.
 		if effectiveClients > 0 || (availableClients > 0 && (expiredClients > 0 || cooldownSuspended > 0) && otherSuspended == 0) {
 			model := r.convertModelToMap(registration.Info, handlerType)
 			if model != nil {
@@ -756,7 +831,44 @@ func (r *ModelRegistry) GetAvailableModels(handlerType string) []map[string]any
 		}
 	}

-	return models
+	return models, expiresAt
+}
+
+func cloneModelMaps(models []map[string]any) []map[string]any {
+	cloned := make([]map[string]any, 0, len(models))
+	for _, model := range models {
+		if model == nil {
+			cloned = append(cloned, nil)
+			continue
+		}
+		copyModel := make(map[string]any, len(model))
+		for key, value := range model {
+			copyModel[key] = cloneModelMapValue(value)
+		}
+		cloned = append(cloned, copyModel)
+	}
+	return cloned
+}
+
+func cloneModelMapValue(value any) any {
+	switch typed := value.(type) {
+	case map[string]any:
+		copyMap := make(map[string]any, len(typed))
+		for key, entry := range typed {
+			copyMap[key] = cloneModelMapValue(entry)
+		}
+		return copyMap
+	case []any:
+		copySlice := make([]any, len(typed))
+		for i, entry := range typed {
+			copySlice[i] = cloneModelMapValue(entry)
+		}
+		return copySlice
+	case []string:
+		return append([]string(nil), typed...)
+	default:
+		return value
+	}
 }

 // GetAvailableModelsByProvider returns models available for the given provider identifier.
@@ -820,7 +932,6 @@ func (r *ModelRegistry) GetAvailableModelsByProvider(provider string) []*ModelIn
 		return nil
 	}

-	quotaExpiredDuration := 5 * time.Minute
 	now := time.Now()
 	result := make([]*ModelInfo, 0, len(providerModels))

@@ -842,7 +953,7 @@ func (r *ModelRegistry) GetAvailableModelsByProvider(provider string) []*ModelIn
 					if p, okProvider := r.clientProviders[clientID]; !okProvider || p != provider {
 						continue
 					}
-					if quotaTime != nil && now.Sub(*quotaTime) < quotaExpiredDuration {
+					if quotaTime != nil && now.Sub(*quotaTime) < modelQuotaExceededWindow {
 						expiredClients++
 					}
 				}
@@ -872,11 +983,11 @@ func (r *ModelRegistry) GetAvailableModelsByProvider(provider string) []*ModelIn

 		if effectiveClients > 0 || (availableClients > 0 && (expiredClients > 0 || cooldownSuspended > 0) && otherSuspended == 0) {
 			if entry.info != nil {
-				result = append(result, entry.info)
+				result = append(result, cloneModelInfo(entry.info))
 				continue
 			}
 			if ok && registration != nil && registration.Info != nil {
-				result = append(result, registration.Info)
+				result = append(result, cloneModelInfo(registration.Info))
 			}
 		}
 	}
@@ -896,12 +1007,11 @@ func (r *ModelRegistry) GetModelCount(modelID string) int {

 	if registration, exists := r.models[modelID]; exists {
 		now := time.Now()
-		quotaExpiredDuration := 5 * time.Minute

 		// Count clients that have exceeded quota but haven't recovered yet
 		expiredClients := 0
 		for _, quotaTime := range registration.QuotaExceededClients {
-			if quotaTime != nil && now.Sub(*quotaTime) < quotaExpiredDuration {
+			if quotaTime != nil && now.Sub(*quotaTime) < modelQuotaExceededWindow {
 				expiredClients++
 			}
 		}
@@ -985,13 +1095,13 @@ func (r *ModelRegistry) GetModelInfo(modelID, provider string) *ModelInfo {
 			if reg.Providers != nil {
 				if count, ok := reg.Providers[provider]; ok && count > 0 {
 					if info, ok := reg.InfoByProvider[provider]; ok && info != nil {
-						return info
+						return cloneModelInfo(info)
 					}
 				}
 			}
 		}
 		// Fallback to global info (last registered)
-		return reg.Info
+		return cloneModelInfo(reg.Info)
 	}
 	return nil
 }
@@ -1031,7 +1141,7 @@ func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string)
 			result["max_completion_tokens"] = model.MaxCompletionTokens
 		}
 		if len(model.SupportedParameters) > 0 {
-			result["supported_parameters"] = model.SupportedParameters
+			result["supported_parameters"] = append([]string(nil), model.SupportedParameters...)
 		}
 		if len(model.SupportedEndpoints) > 0 {
 			result["supported_endpoints"] = model.SupportedEndpoints
@@ -1092,13 +1202,13 @@ func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string)
 			result["outputTokenLimit"] = model.OutputTokenLimit
 		}
 		if len(model.SupportedGenerationMethods) > 0 {
-			result["supportedGenerationMethods"] = model.SupportedGenerationMethods
+			result["supportedGenerationMethods"] = append([]string(nil), model.SupportedGenerationMethods...)
 		}
 		if len(model.SupportedInputModalities) > 0 {
-			result["supportedInputModalities"] = model.SupportedInputModalities
+			result["supportedInputModalities"] = append([]string(nil), model.SupportedInputModalities...)
 		}
 		if len(model.SupportedOutputModalities) > 0 {
-			result["supportedOutputModalities"] = model.SupportedOutputModalities
+			result["supportedOutputModalities"] = append([]string(nil), model.SupportedOutputModalities...)
 		}
 		return result

@@ -1127,16 +1237,20 @@ func (r *ModelRegistry) CleanupExpiredQuotas() {
 	defer r.mutex.Unlock()

 	now := time.Now()
-	quotaExpiredDuration := 5 * time.Minute
+	invalidated := false

 	for modelID, registration := range r.models {
 		for clientID, quotaTime := range registration.QuotaExceededClients {
-			if quotaTime != nil && now.Sub(*quotaTime) >= quotaExpiredDuration {
+			if quotaTime != nil && now.Sub(*quotaTime) >= modelQuotaExceededWindow {
 				delete(registration.QuotaExceededClients, clientID)
+				invalidated = true
 				log.Debugf("Cleaned up expired quota tracking for model %s, client %s", modelID, clientID)
 			}
 		}
 	}
+	if invalidated {
+		r.invalidateAvailableModelsCacheLocked()
+	}
 }

 // GetFirstAvailableModel returns the first available model for the given handler type.
@@ -1150,8 +1264,6 @@ func (r *ModelRegistry) CleanupExpiredQuotas() {
 //   - string: The model ID of the first available model, or empty string if none available
 //   - error: An error if no models are available
 func (r *ModelRegistry) GetFirstAvailableModel(handlerType string) (string, error) {
-	r.mutex.RLock()
-	defer r.mutex.RUnlock()

 	// Get all available models for this handler type
 	models := r.GetAvailableModels(handlerType)
@@ -1211,13 +1323,13 @@ func (r *ModelRegistry) GetModelsForClient(clientID string) []*ModelInfo {
 		// Prefer client's own model info to preserve original type/owned_by
 		if clientInfos != nil {
 			if info, ok := clientInfos[modelID]; ok && info != nil {
-				result = append(result, info)
+				result = append(result, cloneModelInfo(info))
 				continue
 			}
 		}
 		// Fallback to global registry (for backwards compatibility)
 		if reg, ok := r.models[modelID]; ok && reg.Info != nil {
-			result = append(result, reg.Info)
+			result = append(result, cloneModelInfo(reg.Info))
 		}
 	}
 	return result
--- a/internal/registry/model_registry_cache_test.go
+++ b/internal/registry/model_registry_cache_test.go
@@ -0,0 +1,54 @@
+package registry
+
+import "testing"
+
+func TestGetAvailableModelsReturnsClonedSnapshots(t *testing.T) {
+	r := newTestModelRegistry()
+	r.RegisterClient("client-1", "OpenAI", []*ModelInfo{{ID: "m1", OwnedBy: "team-a", DisplayName: "Model One"}})
+
+	first := r.GetAvailableModels("openai")
+	if len(first) != 1 {
+		t.Fatalf("expected 1 model, got %d", len(first))
+	}
+	first[0]["id"] = "mutated"
+	first[0]["display_name"] = "Mutated"
+
+	second := r.GetAvailableModels("openai")
+	if got := second[0]["id"]; got != "m1" {
+		t.Fatalf("expected cached snapshot to stay isolated, got id %v", got)
+	}
+	if got := second[0]["display_name"]; got != "Model One" {
+		t.Fatalf("expected cached snapshot to stay isolated, got display_name %v", got)
+	}
+}
+
+func TestGetAvailableModelsInvalidatesCacheOnRegistryChanges(t *testing.T) {
+	r := newTestModelRegistry()
+	r.RegisterClient("client-1", "OpenAI", []*ModelInfo{{ID: "m1", OwnedBy: "team-a", DisplayName: "Model One"}})
+
+	models := r.GetAvailableModels("openai")
+	if len(models) != 1 {
+		t.Fatalf("expected 1 model, got %d", len(models))
+	}
+	if got := models[0]["display_name"]; got != "Model One" {
+		t.Fatalf("expected initial display_name Model One, got %v", got)
+	}
+
+	r.RegisterClient("client-1", "OpenAI", []*ModelInfo{{ID: "m1", OwnedBy: "team-a", DisplayName: "Model One Updated"}})
+	models = r.GetAvailableModels("openai")
+	if got := models[0]["display_name"]; got != "Model One Updated" {
+		t.Fatalf("expected updated display_name after cache invalidation, got %v", got)
+	}
+
+	r.SuspendClientModel("client-1", "m1", "manual")
+	models = r.GetAvailableModels("openai")
+	if len(models) != 0 {
+		t.Fatalf("expected no available models after suspension, got %d", len(models))
+	}
+
+	r.ResumeClientModel("client-1", "m1")
+	models = r.GetAvailableModels("openai")
+	if len(models) != 1 {
+		t.Fatalf("expected model to reappear after resume, got %d", len(models))
+	}
+}
--- a/internal/registry/model_registry_safety_test.go
+++ b/internal/registry/model_registry_safety_test.go
@@ -0,0 +1,149 @@
+package registry
+
+import (
+	"testing"
+	"time"
+)
+
+func TestGetModelInfoReturnsClone(t *testing.T) {
+	r := newTestModelRegistry()
+	r.RegisterClient("client-1", "gemini", []*ModelInfo{{
+		ID:          "m1",
+		DisplayName: "Model One",
+		Thinking:    &ThinkingSupport{Min: 1, Max: 2, Levels: []string{"low", "high"}},
+	}})
+
+	first := r.GetModelInfo("m1", "gemini")
+	if first == nil {
+		t.Fatal("expected model info")
+	}
+	first.DisplayName = "mutated"
+	first.Thinking.Levels[0] = "mutated"
+
+	second := r.GetModelInfo("m1", "gemini")
+	if second.DisplayName != "Model One" {
+		t.Fatalf("expected cloned display name, got %q", second.DisplayName)
+	}
+	if second.Thinking == nil || len(second.Thinking.Levels) == 0 || second.Thinking.Levels[0] != "low" {
+		t.Fatalf("expected cloned thinking levels, got %+v", second.Thinking)
+	}
+}
+
+func TestGetModelsForClientReturnsClones(t *testing.T) {
+	r := newTestModelRegistry()
+	r.RegisterClient("client-1", "gemini", []*ModelInfo{{
+		ID:          "m1",
+		DisplayName: "Model One",
+		Thinking:    &ThinkingSupport{Levels: []string{"low", "high"}},
+	}})
+
+	first := r.GetModelsForClient("client-1")
+	if len(first) != 1 || first[0] == nil {
+		t.Fatalf("expected one model, got %+v", first)
+	}
+	first[0].DisplayName = "mutated"
+	first[0].Thinking.Levels[0] = "mutated"
+
+	second := r.GetModelsForClient("client-1")
+	if len(second) != 1 || second[0] == nil {
+		t.Fatalf("expected one model on second fetch, got %+v", second)
+	}
+	if second[0].DisplayName != "Model One" {
+		t.Fatalf("expected cloned display name, got %q", second[0].DisplayName)
+	}
+	if second[0].Thinking == nil || len(second[0].Thinking.Levels) == 0 || second[0].Thinking.Levels[0] != "low" {
+		t.Fatalf("expected cloned thinking levels, got %+v", second[0].Thinking)
+	}
+}
+
+func TestGetAvailableModelsByProviderReturnsClones(t *testing.T) {
+	r := newTestModelRegistry()
+	r.RegisterClient("client-1", "gemini", []*ModelInfo{{
+		ID:          "m1",
+		DisplayName: "Model One",
+		Thinking:    &ThinkingSupport{Levels: []string{"low", "high"}},
+	}})
+
+	first := r.GetAvailableModelsByProvider("gemini")
+	if len(first) != 1 || first[0] == nil {
+		t.Fatalf("expected one model, got %+v", first)
+	}
+	first[0].DisplayName = "mutated"
+	first[0].Thinking.Levels[0] = "mutated"
+
+	second := r.GetAvailableModelsByProvider("gemini")
+	if len(second) != 1 || second[0] == nil {
+		t.Fatalf("expected one model on second fetch, got %+v", second)
+	}
+	if second[0].DisplayName != "Model One" {
+		t.Fatalf("expected cloned display name, got %q", second[0].DisplayName)
+	}
+	if second[0].Thinking == nil || len(second[0].Thinking.Levels) == 0 || second[0].Thinking.Levels[0] != "low" {
+		t.Fatalf("expected cloned thinking levels, got %+v", second[0].Thinking)
+	}
+}
+
+func TestCleanupExpiredQuotasInvalidatesAvailableModelsCache(t *testing.T) {
+	r := newTestModelRegistry()
+	r.RegisterClient("client-1", "openai", []*ModelInfo{{ID: "m1", Created: 1}})
+	r.SetModelQuotaExceeded("client-1", "m1")
+	if models := r.GetAvailableModels("openai"); len(models) != 1 {
+		t.Fatalf("expected cooldown model to remain listed before cleanup, got %d", len(models))
+	}
+
+	r.mutex.Lock()
+	quotaTime := time.Now().Add(-6 * time.Minute)
+	r.models["m1"].QuotaExceededClients["client-1"] = &quotaTime
+	r.mutex.Unlock()
+
+	r.CleanupExpiredQuotas()
+
+	if count := r.GetModelCount("m1"); count != 1 {
+		t.Fatalf("expected model count 1 after cleanup, got %d", count)
+	}
+	models := r.GetAvailableModels("openai")
+	if len(models) != 1 {
+		t.Fatalf("expected model to stay available after cleanup, got %d", len(models))
+	}
+	if got := models[0]["id"]; got != "m1" {
+		t.Fatalf("expected model id m1, got %v", got)
+	}
+}
+
+func TestGetAvailableModelsReturnsClonedSupportedParameters(t *testing.T) {
+	r := newTestModelRegistry()
+	r.RegisterClient("client-1", "openai", []*ModelInfo{{
+		ID:                  "m1",
+		DisplayName:         "Model One",
+		SupportedParameters: []string{"temperature", "top_p"},
+	}})
+
+	first := r.GetAvailableModels("openai")
+	if len(first) != 1 {
+		t.Fatalf("expected one model, got %d", len(first))
+	}
+	params, ok := first[0]["supported_parameters"].([]string)
+	if !ok || len(params) != 2 {
+		t.Fatalf("expected supported_parameters slice, got %#v", first[0]["supported_parameters"])
+	}
+	params[0] = "mutated"
+
+	second := r.GetAvailableModels("openai")
+	params, ok = second[0]["supported_parameters"].([]string)
+	if !ok || len(params) != 2 || params[0] != "temperature" {
+		t.Fatalf("expected cloned supported_parameters, got %#v", second[0]["supported_parameters"])
+	}
+}
+
+func TestLookupModelInfoReturnsCloneForStaticDefinitions(t *testing.T) {
+	first := LookupModelInfo("glm-4.6")
+	if first == nil || first.Thinking == nil || len(first.Thinking.Levels) == 0 {
+		t.Fatalf("expected static model with thinking levels, got %+v", first)
+	}
+	first.Thinking.Levels[0] = "mutated"
+
+	second := LookupModelInfo("glm-4.6")
+	if second == nil || second.Thinking == nil || len(second.Thinking.Levels) == 0 || second.Thinking.Levels[0] == "mutated" {
+		t.Fatalf("expected static lookup clone, got %+v", second)
+	}
+}
--- a/internal/registry/model_updater.go
+++ b/internal/registry/model_updater.go
@@ -0,0 +1,372 @@
+package registry
+
+import (
+	"context"
+	_ "embed"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+	"sync"
+	"time"
+
+	log "github.com/sirupsen/logrus"
+)
+
+const (
+	modelsFetchTimeout    = 30 * time.Second
+	modelsRefreshInterval = 3 * time.Hour
+)
+
+var modelsURLs = []string{
+	"https://raw.githubusercontent.com/router-for-me/models/refs/heads/main/models.json",
+	"https://models.router-for.me/models.json",
+}
+
+//go:embed models/models.json
+var embeddedModelsJSON []byte
+
+type modelStore struct {
+	mu   sync.RWMutex
+	data *staticModelsJSON
+}
+
+var modelsCatalogStore = &modelStore{}
+
+var updaterOnce sync.Once
+
+// ModelRefreshCallback is invoked when startup or periodic model refresh detects changes.
+// changedProviders contains the provider names whose model definitions changed.
+type ModelRefreshCallback func(changedProviders []string)
+
+var (
+	refreshCallbackMu     sync.Mutex
+	refreshCallback       ModelRefreshCallback
+	pendingRefreshChanges []string
+)
+
+// SetModelRefreshCallback registers a callback that is invoked when startup or
+// periodic model refresh detects changes. Only one callback is supported;
+// subsequent calls replace the previous callback.
+func SetModelRefreshCallback(cb ModelRefreshCallback) {
+	refreshCallbackMu.Lock()
+	refreshCallback = cb
+	var pending []string
+	if cb != nil && len(pendingRefreshChanges) > 0 {
+		pending = append([]string(nil), pendingRefreshChanges...)
+		pendingRefreshChanges = nil
+	}
+	refreshCallbackMu.Unlock()
+
+	if cb != nil && len(pending) > 0 {
+		cb(pending)
+	}
+}
+
+func init() {
+	// Load embedded data as fallback on startup.
+	if err := loadModelsFromBytes(embeddedModelsJSON, "embed"); err != nil {
+		panic(fmt.Sprintf("registry: failed to parse embedded models.json: %v", err))
+	}
+}
+
+// StartModelsUpdater starts a background updater that fetches models
+// immediately on startup and then refreshes the model catalog every 3 hours.
+// Safe to call multiple times; only one updater will run.
+func StartModelsUpdater(ctx context.Context) {
+	updaterOnce.Do(func() {
+		go runModelsUpdater(ctx)
+	})
+}
+
+func runModelsUpdater(ctx context.Context) {
+	tryStartupRefresh(ctx)
+	periodicRefresh(ctx)
+}
+
+func periodicRefresh(ctx context.Context) {
+	ticker := time.NewTicker(modelsRefreshInterval)
+	defer ticker.Stop()
+	log.Infof("periodic model refresh started (interval=%s)", modelsRefreshInterval)
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case <-ticker.C:
+			tryPeriodicRefresh(ctx)
+		}
+	}
+}
+
+// tryPeriodicRefresh fetches models from remote, compares with the current
+// catalog, and notifies the registered callback if any provider changed.
+func tryPeriodicRefresh(ctx context.Context) {
+	tryRefreshModels(ctx, "periodic model refresh")
+}
+
+// tryStartupRefresh fetches models from remote in the background during
+// process startup. It uses the same change detection as periodic refresh so
+// existing auth registrations can be updated after the callback is registered.
+func tryStartupRefresh(ctx context.Context) {
+	tryRefreshModels(ctx, "startup model refresh")
+}
+
+func tryRefreshModels(ctx context.Context, label string) {
+	oldData := getModels()
+
+	parsed, url := fetchModelsFromRemote(ctx)
+	if parsed == nil {
+		log.Warnf("%s: fetch failed from all URLs, keeping current data", label)
+		return
+	}
+
+	// Detect changes before updating store.
+	changed := detectChangedProviders(oldData, parsed)
+
+	// Update store with new data regardless.
+	modelsCatalogStore.mu.Lock()
+	modelsCatalogStore.data = parsed
+	modelsCatalogStore.mu.Unlock()
+
+	if len(changed) == 0 {
+		log.Infof("%s completed from %s, no changes detected", label, url)
+		return
+	}
+
+	log.Infof("%s completed from %s, changes detected for providers: %v", label, url, changed)
+	notifyModelRefresh(changed)
+}
+
+// fetchModelsFromRemote tries all remote URLs and returns the parsed model catalog
+// along with the URL it was fetched from. Returns (nil, "") if all fetches fail.
+func fetchModelsFromRemote(ctx context.Context) (*staticModelsJSON, string) {
+	client := &http.Client{Timeout: modelsFetchTimeout}
+	for _, url := range modelsURLs {
+		reqCtx, cancel := context.WithTimeout(ctx, modelsFetchTimeout)
+		req, err := http.NewRequestWithContext(reqCtx, "GET", url, nil)
+		if err != nil {
+			cancel()
+			log.Debugf("models fetch request creation failed for %s: %v", url, err)
+			continue
+		}
+
+		resp, err := client.Do(req)
+		if err != nil {
+			cancel()
+			log.Debugf("models fetch failed from %s: %v", url, err)
+			continue
+		}
+
+		if resp.StatusCode != 200 {
+			resp.Body.Close()
+			cancel()
+			log.Debugf("models fetch returned %d from %s", resp.StatusCode, url)
+			continue
+		}
+
+		data, err := io.ReadAll(resp.Body)
+		resp.Body.Close()
+		cancel()
+
+		if err != nil {
+			log.Debugf("models fetch read error from %s: %v", url, err)
+			continue
+		}
+
+		var parsed staticModelsJSON
+		if err := json.Unmarshal(data, &parsed); err != nil {
+			log.Warnf("models parse failed from %s: %v", url, err)
+			continue
+		}
+		if err := validateModelsCatalog(&parsed); err != nil {
+			log.Warnf("models validate failed from %s: %v", url, err)
+			continue
+		}
+
+		return &parsed, url
+	}
+	return nil, ""
+}
+
+// detectChangedProviders compares two model catalogs and returns provider names
+// whose model definitions differ. Codex tiers (free/team/plus/pro) are grouped
+// under a single "codex" provider.
+func detectChangedProviders(oldData, newData *staticModelsJSON) []string {
+	if oldData == nil || newData == nil {
+		return nil
+	}
+
+	type section struct {
+		provider string
+		oldList  []*ModelInfo
+		newList  []*ModelInfo
+	}
+
+	sections := []section{
+		{"claude", oldData.Claude, newData.Claude},
+		{"gemini", oldData.Gemini, newData.Gemini},
+		{"vertex", oldData.Vertex, newData.Vertex},
+		{"gemini-cli", oldData.GeminiCLI, newData.GeminiCLI},
+		{"aistudio", oldData.AIStudio, newData.AIStudio},
+		{"codex", oldData.CodexFree, newData.CodexFree},
+		{"codex", oldData.CodexTeam, newData.CodexTeam},
+		{"codex", oldData.CodexPlus, newData.CodexPlus},
+		{"codex", oldData.CodexPro, newData.CodexPro},
+		{"qwen", oldData.Qwen, newData.Qwen},
+		{"iflow", oldData.IFlow, newData.IFlow},
+		{"kimi", oldData.Kimi, newData.Kimi},
+		{"antigravity", oldData.Antigravity, newData.Antigravity},
+	}
+
+	seen := make(map[string]bool, len(sections))
+	var changed []string
+	for _, s := range sections {
+		if seen[s.provider] {
+			continue
+		}
+		if modelSectionChanged(s.oldList, s.newList) {
+			changed = append(changed, s.provider)
+			seen[s.provider] = true
+		}
+	}
+	return changed
+}
+
+// modelSectionChanged reports whether two model slices differ.
+func modelSectionChanged(a, b []*ModelInfo) bool {
+	if len(a) != len(b) {
+		return true
+	}
+	if len(a) == 0 {
+		return false
+	}
+	aj, err1 := json.Marshal(a)
+	bj, err2 := json.Marshal(b)
+	if err1 != nil || err2 != nil {
+		return true
+	}
+	return string(aj) != string(bj)
+}
+
+func notifyModelRefresh(changedProviders []string) {
+	if len(changedProviders) == 0 {
+		return
+	}
+
+	refreshCallbackMu.Lock()
+	cb := refreshCallback
+	if cb == nil {
+		pendingRefreshChanges = mergeProviderNames(pendingRefreshChanges, changedProviders)
+		refreshCallbackMu.Unlock()
+		return
+	}
+	refreshCallbackMu.Unlock()
+	cb(changedProviders)
+}
+
+func mergeProviderNames(existing, incoming []string) []string {
+	if len(incoming) == 0 {
+		return existing
+	}
+	seen := make(map[string]struct{}, len(existing)+len(incoming))
+	merged := make([]string, 0, len(existing)+len(incoming))
+	for _, provider := range existing {
+		name := strings.ToLower(strings.TrimSpace(provider))
+		if name == "" {
+			continue
+		}
+		if _, ok := seen[name]; ok {
+			continue
+		}
+		seen[name] = struct{}{}
+		merged = append(merged, name)
+	}
+	for _, provider := range incoming {
+		name := strings.ToLower(strings.TrimSpace(provider))
+		if name == "" {
+			continue
+		}
+		if _, ok := seen[name]; ok {
+			continue
+		}
+		seen[name] = struct{}{}
+		merged = append(merged, name)
+	}
+	return merged
+}
+
+func loadModelsFromBytes(data []byte, source string) error {
+	var parsed staticModelsJSON
+	if err := json.Unmarshal(data, &parsed); err != nil {
+		return fmt.Errorf("%s: decode models catalog: %w", source, err)
+	}
+	if err := validateModelsCatalog(&parsed); err != nil {
+		return fmt.Errorf("%s: validate models catalog: %w", source, err)
+	}
+
+	modelsCatalogStore.mu.Lock()
+	modelsCatalogStore.data = &parsed
+	modelsCatalogStore.mu.Unlock()
+	return nil
+}
+
+func getModels() *staticModelsJSON {
+	modelsCatalogStore.mu.RLock()
+	defer modelsCatalogStore.mu.RUnlock()
+	return modelsCatalogStore.data
+}
+
+func validateModelsCatalog(data *staticModelsJSON) error {
+	if data == nil {
+		return fmt.Errorf("catalog is nil")
+	}
+
+	requiredSections := []struct {
+		name   string
+		models []*ModelInfo
+	}{
+		{name: "claude", models: data.Claude},
+		{name: "gemini", models: data.Gemini},
+		{name: "vertex", models: data.Vertex},
+		{name: "gemini-cli", models: data.GeminiCLI},
+		{name: "aistudio", models: data.AIStudio},
+		{name: "codex-free", models: data.CodexFree},
+		{name: "codex-team", models: data.CodexTeam},
+		{name: "codex-plus", models: data.CodexPlus},
+		{name: "codex-pro", models: data.CodexPro},
+		{name: "qwen", models: data.Qwen},
+		{name: "iflow", models: data.IFlow},
+		{name: "kimi", models: data.Kimi},
+		{name: "antigravity", models: data.Antigravity},
+	}
+
+	for _, section := range requiredSections {
+		if err := validateModelSection(section.name, section.models); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func validateModelSection(section string, models []*ModelInfo) error {
+	if len(models) == 0 {
+		return fmt.Errorf("%s section is empty", section)
+	}
+
+	seen := make(map[string]struct{}, len(models))
+	for i, model := range models {
+		if model == nil {
+			return fmt.Errorf("%s[%d] is null", section, i)
+		}
+		modelID := strings.TrimSpace(model.ID)
+		if modelID == "" {
+			return fmt.Errorf("%s[%d] has empty id", section, i)
+		}
+		if _, exists := seen[modelID]; exists {
+			return fmt.Errorf("%s contains duplicate model id %q", section, modelID)
+		}
+		seen[modelID] = struct{}{}
+	}
+	return nil
+}
--- a/internal/registry/models/models.json
+++ b/internal/registry/models/models.json
--- a/internal/runtime/executor/aistudio_executor.go
+++ b/internal/runtime/executor/aistudio_executor.go
@@ -14,7 +14,9 @@ import (
 	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/wsrelay"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -46,8 +48,16 @@ func NewAIStudioExecutor(cfg *config.Config, provider string, relay *wsrelay.Man
 // Identifier returns the executor identifier.
 func (e *AIStudioExecutor) Identifier() string { return "aistudio" }

-// PrepareRequest prepares the HTTP request for execution (no-op for AI Studio).
-func (e *AIStudioExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Auth) error {
+// PrepareRequest prepares the HTTP request for execution.
+func (e *AIStudioExecutor) PrepareRequest(req *http.Request, auth *cliproxyauth.Auth) error {
+	if req == nil {
+		return nil
+	}
+	var attrs map[string]string
+	if auth != nil {
+		attrs = auth.Attributes
+	}
+	util.ApplyCustomHeadersFromAttrs(req, attrs)
 	return nil
 }

@@ -66,6 +76,9 @@ func (e *AIStudioExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.A
 		return nil, fmt.Errorf("aistudio executor: missing auth")
 	}
 	httpReq := req.WithContext(ctx)
+	if err := e.PrepareRequest(httpReq, auth); err != nil {
+		return nil, err
+	}
 	if httpReq.URL == nil || strings.TrimSpace(httpReq.URL.String()) == "" {
 		return nil, fmt.Errorf("aistudio executor: request URL is empty")
 	}
@@ -115,8 +128,8 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
 		return resp, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
 	}
 	baseModel := thinking.ParseSuffix(req.Model).ModelName
-	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
-	defer reporter.trackFailure(ctx, &err)
+	reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.TrackFailure(ctx, &err)

 	translatedReq, body, err := e.translateRequest(req, opts, false)
 	if err != nil {
@@ -130,6 +143,11 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
 		Headers: http.Header{"Content-Type": []string{"application/json"}},
 		Body:    body.payload,
 	}
+	var attrs map[string]string
+	if auth != nil {
+		attrs = auth.Attributes
+	}
+	util.ApplyCustomHeadersFromAttrs(&http.Request{Header: wsReq.Headers}, attrs)

 	var authID, authLabel, authType, authValue string
 	if auth != nil {
@@ -137,7 +155,7 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
 		authLabel = auth.Label
 		authType, authValue = auth.AccountInfo()
 	}
-	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+	helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
 		URL:       endpoint,
 		Method:    http.MethodPost,
 		Headers:   wsReq.Headers.Clone(),
@@ -151,20 +169,20 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,

 	wsResp, err := e.relay.NonStream(ctx, authID, wsReq)
 	if err != nil {
-		recordAPIResponseError(ctx, e.cfg, err)
+		helps.RecordAPIResponseError(ctx, e.cfg, err)
 		return resp, err
 	}
-	recordAPIResponseMetadata(ctx, e.cfg, wsResp.Status, wsResp.Headers.Clone())
+	helps.RecordAPIResponseMetadata(ctx, e.cfg, wsResp.Status, wsResp.Headers.Clone())
 	if len(wsResp.Body) > 0 {
-		appendAPIResponseChunk(ctx, e.cfg, wsResp.Body)
+		helps.AppendAPIResponseChunk(ctx, e.cfg, wsResp.Body)
 	}
 	if wsResp.Status < 200 || wsResp.Status >= 300 {
 		return resp, statusErr{code: wsResp.Status, msg: string(wsResp.Body)}
 	}
-	reporter.publish(ctx, parseGeminiUsage(wsResp.Body))
+	reporter.Publish(ctx, helps.ParseGeminiUsage(wsResp.Body))
 	var param any
 	out := sdktranslator.TranslateNonStream(ctx, body.toFormat, opts.SourceFormat, req.Model, opts.OriginalRequest, translatedReq, wsResp.Body, &param)
-	resp = cliproxyexecutor.Response{Payload: ensureColonSpacedJSON([]byte(out)), Headers: wsResp.Headers.Clone()}
+	resp = cliproxyexecutor.Response{Payload: ensureColonSpacedJSON(out), Headers: wsResp.Headers.Clone()}
 	return resp, nil
 }

@@ -174,8 +192,8 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 		return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
 	}
 	baseModel := thinking.ParseSuffix(req.Model).ModelName
-	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
-	defer reporter.trackFailure(ctx, &err)
+	reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.TrackFailure(ctx, &err)

 	translatedReq, body, err := e.translateRequest(req, opts, true)
 	if err != nil {
@@ -189,13 +207,18 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 		Headers: http.Header{"Content-Type": []string{"application/json"}},
 		Body:    body.payload,
 	}
+	var attrs map[string]string
+	if auth != nil {
+		attrs = auth.Attributes
+	}
+	util.ApplyCustomHeadersFromAttrs(&http.Request{Header: wsReq.Headers}, attrs)
 	var authID, authLabel, authType, authValue string
 	if auth != nil {
 		authID = auth.ID
 		authLabel = auth.Label
 		authType, authValue = auth.AccountInfo()
 	}
-	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+	helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
 		URL:       endpoint,
 		Method:    http.MethodPost,
 		Headers:   wsReq.Headers.Clone(),
@@ -208,24 +231,24 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 	})
 	wsStream, err := e.relay.Stream(ctx, authID, wsReq)
 	if err != nil {
-		recordAPIResponseError(ctx, e.cfg, err)
+		helps.RecordAPIResponseError(ctx, e.cfg, err)
 		return nil, err
 	}
 	firstEvent, ok := <-wsStream
 	if !ok {
 		err = fmt.Errorf("wsrelay: stream closed before start")
-		recordAPIResponseError(ctx, e.cfg, err)
+		helps.RecordAPIResponseError(ctx, e.cfg, err)
 		return nil, err
 	}
 	if firstEvent.Status > 0 && firstEvent.Status != http.StatusOK {
 		metadataLogged := false
 		if firstEvent.Status > 0 {
-			recordAPIResponseMetadata(ctx, e.cfg, firstEvent.Status, firstEvent.Headers.Clone())
+			helps.RecordAPIResponseMetadata(ctx, e.cfg, firstEvent.Status, firstEvent.Headers.Clone())
 			metadataLogged = true
 		}
 		var body bytes.Buffer
 		if len(firstEvent.Payload) > 0 {
-			appendAPIResponseChunk(ctx, e.cfg, firstEvent.Payload)
+			helps.AppendAPIResponseChunk(ctx, e.cfg, firstEvent.Payload)
 			body.Write(firstEvent.Payload)
 		}
 		if firstEvent.Type == wsrelay.MessageTypeStreamEnd {
@@ -233,18 +256,18 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 		}
 		for event := range wsStream {
 			if event.Err != nil {
-				recordAPIResponseError(ctx, e.cfg, event.Err)
+				helps.RecordAPIResponseError(ctx, e.cfg, event.Err)
 				if body.Len() == 0 {
 					body.WriteString(event.Err.Error())
 				}
 				break
 			}
 			if !metadataLogged && event.Status > 0 {
-				recordAPIResponseMetadata(ctx, e.cfg, event.Status, event.Headers.Clone())
+				helps.RecordAPIResponseMetadata(ctx, e.cfg, event.Status, event.Headers.Clone())
 				metadataLogged = true
 			}
 			if len(event.Payload) > 0 {
-				appendAPIResponseChunk(ctx, e.cfg, event.Payload)
+				helps.AppendAPIResponseChunk(ctx, e.cfg, event.Payload)
 				body.Write(event.Payload)
 			}
 			if event.Type == wsrelay.MessageTypeStreamEnd {
@@ -260,27 +283,27 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 		metadataLogged := false
 		processEvent := func(event wsrelay.StreamEvent) bool {
 			if event.Err != nil {
-				recordAPIResponseError(ctx, e.cfg, event.Err)
-				reporter.publishFailure(ctx)
+				helps.RecordAPIResponseError(ctx, e.cfg, event.Err)
+				reporter.PublishFailure(ctx)
 				out <- cliproxyexecutor.StreamChunk{Err: fmt.Errorf("wsrelay: %v", event.Err)}
 				return false
 			}
 			switch event.Type {
 			case wsrelay.MessageTypeStreamStart:
 				if !metadataLogged && event.Status > 0 {
-					recordAPIResponseMetadata(ctx, e.cfg, event.Status, event.Headers.Clone())
+					helps.RecordAPIResponseMetadata(ctx, e.cfg, event.Status, event.Headers.Clone())
 					metadataLogged = true
 				}
 			case wsrelay.MessageTypeStreamChunk:
 				if len(event.Payload) > 0 {
-					appendAPIResponseChunk(ctx, e.cfg, event.Payload)
-					filtered := FilterSSEUsageMetadata(event.Payload)
-					if detail, ok := parseGeminiStreamUsage(filtered); ok {
-						reporter.publish(ctx, detail)
+					helps.AppendAPIResponseChunk(ctx, e.cfg, event.Payload)
+					filtered := helps.FilterSSEUsageMetadata(event.Payload)
+					if detail, ok := helps.ParseGeminiStreamUsage(filtered); ok {
+						reporter.Publish(ctx, detail)
 					}
 					lines := sdktranslator.TranslateStream(ctx, body.toFormat, opts.SourceFormat, req.Model, opts.OriginalRequest, translatedReq, filtered, &param)
 					for i := range lines {
-						out <- cliproxyexecutor.StreamChunk{Payload: ensureColonSpacedJSON([]byte(lines[i]))}
+						out <- cliproxyexecutor.StreamChunk{Payload: ensureColonSpacedJSON(lines[i])}
 					}
 					break
 				}
@@ -288,21 +311,21 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 				return false
 			case wsrelay.MessageTypeHTTPResp:
 				if !metadataLogged && event.Status > 0 {
-					recordAPIResponseMetadata(ctx, e.cfg, event.Status, event.Headers.Clone())
+					helps.RecordAPIResponseMetadata(ctx, e.cfg, event.Status, event.Headers.Clone())
 					metadataLogged = true
 				}
 				if len(event.Payload) > 0 {
-					appendAPIResponseChunk(ctx, e.cfg, event.Payload)
+					helps.AppendAPIResponseChunk(ctx, e.cfg, event.Payload)
 				}
 				lines := sdktranslator.TranslateStream(ctx, body.toFormat, opts.SourceFormat, req.Model, opts.OriginalRequest, translatedReq, event.Payload, &param)
 				for i := range lines {
-					out <- cliproxyexecutor.StreamChunk{Payload: ensureColonSpacedJSON([]byte(lines[i]))}
+					out <- cliproxyexecutor.StreamChunk{Payload: ensureColonSpacedJSON(lines[i])}
 				}
-				reporter.publish(ctx, parseGeminiUsage(event.Payload))
+				reporter.Publish(ctx, helps.ParseGeminiUsage(event.Payload))
 				return false
 			case wsrelay.MessageTypeError:
-				recordAPIResponseError(ctx, e.cfg, event.Err)
-				reporter.publishFailure(ctx)
+				helps.RecordAPIResponseError(ctx, e.cfg, event.Err)
+				reporter.PublishFailure(ctx)
 				out <- cliproxyexecutor.StreamChunk{Err: fmt.Errorf("wsrelay: %v", event.Err)}
 				return false
 			}
@@ -345,7 +368,7 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A
 		authLabel = auth.Label
 		authType, authValue = auth.AccountInfo()
 	}
-	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+	helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
 		URL:       endpoint,
 		Method:    http.MethodPost,
 		Headers:   wsReq.Headers.Clone(),
@@ -358,12 +381,12 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A
 	})
 	resp, err := e.relay.NonStream(ctx, authID, wsReq)
 	if err != nil {
-		recordAPIResponseError(ctx, e.cfg, err)
+		helps.RecordAPIResponseError(ctx, e.cfg, err)
 		return cliproxyexecutor.Response{}, err
 	}
-	recordAPIResponseMetadata(ctx, e.cfg, resp.Status, resp.Headers.Clone())
+	helps.RecordAPIResponseMetadata(ctx, e.cfg, resp.Status, resp.Headers.Clone())
 	if len(resp.Body) > 0 {
-		appendAPIResponseChunk(ctx, e.cfg, resp.Body)
+		helps.AppendAPIResponseChunk(ctx, e.cfg, resp.Body)
 	}
 	if resp.Status < 200 || resp.Status >= 300 {
 		return cliproxyexecutor.Response{}, statusErr{code: resp.Status, msg: string(resp.Body)}
@@ -373,7 +396,7 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A
 		return cliproxyexecutor.Response{}, fmt.Errorf("wsrelay: totalTokens missing in response")
 	}
 	translated := sdktranslator.TranslateTokenCount(ctx, body.toFormat, opts.SourceFormat, totalTokens, resp.Body)
-	return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
+	return cliproxyexecutor.Response{Payload: translated}, nil
 }

 // Refresh refreshes the authentication credentials (no-op for AI Studio).
@@ -404,8 +427,8 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
 		return nil, translatedPayload{}, err
 	}
 	payload = fixGeminiImageAspectRatio(baseModel, payload)
-	requestedModel := payloadRequestedModel(opts, req.Model)
-	payload = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", payload, originalTranslated, requestedModel)
+	requestedModel := helps.PayloadRequestedModel(opts, req.Model)
+	payload = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", payload, originalTranslated, requestedModel)
 	payload, _ = sjson.DeleteBytes(payload, "generationConfig.maxOutputTokens")
 	payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseMimeType")
 	payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseJsonSchema")
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
--- a/internal/runtime/executor/antigravity_executor_buildrequest_test.go
+++ b/internal/runtime/executor/antigravity_executor_buildrequest_test.go
@@ -59,6 +59,7 @@ func buildRequestBodyFromPayload(t *testing.T, modelName string) map[string]any
 										"properties": {
 											"mode": {
 												"type": "string",
+												"deprecated": true,
 												"enum": ["a", "b"],
 												"enumTitles": ["A", "B"]
 											}
@@ -156,4 +157,7 @@ func assertSchemaSanitizedAndPropertyPreserved(t *testing.T, params map[string]a
 	if _, ok := mode["enumTitles"]; ok {
 		t.Fatalf("enumTitles should be removed from nested schema")
 	}
+	if _, ok := mode["deprecated"]; ok {
+		t.Fatalf("deprecated should be removed from nested schema")
+	}
 }
--- a/internal/runtime/executor/antigravity_executor_credits_test.go
+++ b/internal/runtime/executor/antigravity_executor_credits_test.go
@@ -0,0 +1,423 @@
+package executor
+
+import (
+	"context"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
+	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
+)
+
+func resetAntigravityCreditsRetryState() {
+	antigravityCreditsExhaustedByAuth = sync.Map{}
+	antigravityPreferCreditsByModel = sync.Map{}
+}
+
+func TestClassifyAntigravity429(t *testing.T) {
+	t.Run("quota exhausted", func(t *testing.T) {
+		body := []byte(`{"error":{"status":"RESOURCE_EXHAUSTED","message":"QUOTA_EXHAUSTED"}}`)
+		if got := classifyAntigravity429(body); got != antigravity429QuotaExhausted {
+			t.Fatalf("classifyAntigravity429() = %q, want %q", got, antigravity429QuotaExhausted)
+		}
+	})
+
+	t.Run("structured rate limit", func(t *testing.T) {
+		body := []byte(`{
+			"error": {
+				"status": "RESOURCE_EXHAUSTED",
+				"details": [
+					{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "reason": "RATE_LIMIT_EXCEEDED"},
+					{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.5s"}
+				]
+			}
+		}`)
+		if got := classifyAntigravity429(body); got != antigravity429RateLimited {
+			t.Fatalf("classifyAntigravity429() = %q, want %q", got, antigravity429RateLimited)
+		}
+	})
+
+	t.Run("structured quota exhausted", func(t *testing.T) {
+		body := []byte(`{
+			"error": {
+				"status": "RESOURCE_EXHAUSTED",
+				"details": [
+					{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "reason": "QUOTA_EXHAUSTED"}
+				]
+			}
+		}`)
+		if got := classifyAntigravity429(body); got != antigravity429QuotaExhausted {
+			t.Fatalf("classifyAntigravity429() = %q, want %q", got, antigravity429QuotaExhausted)
+		}
+	})
+
+	t.Run("unknown", func(t *testing.T) {
+		body := []byte(`{"error":{"message":"too many requests"}}`)
+		if got := classifyAntigravity429(body); got != antigravity429Unknown {
+			t.Fatalf("classifyAntigravity429() = %q, want %q", got, antigravity429Unknown)
+		}
+	})
+}
+
+func TestInjectEnabledCreditTypes(t *testing.T) {
+	body := []byte(`{"model":"gemini-2.5-flash","request":{}}`)
+	got := injectEnabledCreditTypes(body)
+	if got == nil {
+		t.Fatal("injectEnabledCreditTypes() returned nil")
+	}
+	if !strings.Contains(string(got), `"enabledCreditTypes":["GOOGLE_ONE_AI"]`) {
+		t.Fatalf("injectEnabledCreditTypes() = %s, want enabledCreditTypes", string(got))
+	}
+
+	if got := injectEnabledCreditTypes([]byte(`not json`)); got != nil {
+		t.Fatalf("injectEnabledCreditTypes() for invalid json = %s, want nil", string(got))
+	}
+}
+
+func TestShouldMarkAntigravityCreditsExhausted(t *testing.T) {
+	for _, body := range [][]byte{
+		[]byte(`{"error":{"message":"Insufficient GOOGLE_ONE_AI credits"}}`),
+		[]byte(`{"error":{"message":"minimumCreditAmountForUsage requirement not met"}}`),
+		[]byte(`{"error":{"message":"Resource has been exhausted"}}`),
+	} {
+		if !shouldMarkAntigravityCreditsExhausted(http.StatusForbidden, body, nil) {
+			t.Fatalf("shouldMarkAntigravityCreditsExhausted(%s) = false, want true", string(body))
+		}
+	}
+	if shouldMarkAntigravityCreditsExhausted(http.StatusServiceUnavailable, []byte(`{"error":{"message":"credits exhausted"}}`), nil) {
+		t.Fatal("shouldMarkAntigravityCreditsExhausted() = true for 5xx, want false")
+	}
+}
+
+func TestAntigravityExecute_RetriesQuotaExhaustedWithCredits(t *testing.T) {
+	resetAntigravityCreditsRetryState()
+	t.Cleanup(resetAntigravityCreditsRetryState)
+
+	var (
+		mu            sync.Mutex
+		requestBodies []string
+	)
+
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		body, _ := io.ReadAll(r.Body)
+		_ = r.Body.Close()
+
+		mu.Lock()
+		requestBodies = append(requestBodies, string(body))
+		reqNum := len(requestBodies)
+		mu.Unlock()
+
+		if reqNum == 1 {
+			w.WriteHeader(http.StatusTooManyRequests)
+			_, _ = w.Write([]byte(`{"error":{"status":"RESOURCE_EXHAUSTED","message":"QUOTA_EXHAUSTED"}}`))
+			return
+		}
+
+		if !strings.Contains(string(body), `"enabledCreditTypes":["GOOGLE_ONE_AI"]`) {
+			t.Fatalf("second request body missing enabledCreditTypes: %s", string(body))
+		}
+		w.Header().Set("Content-Type", "application/json")
+		_, _ = w.Write([]byte(`{"response":{"candidates":[{"content":{"role":"model","parts":[{"text":"ok"}]}}],"usageMetadata":{"promptTokenCount":1,"candidatesTokenCount":1,"totalTokenCount":2}}}`))
+	}))
+	defer server.Close()
+
+	exec := NewAntigravityExecutor(&config.Config{
+		QuotaExceeded: config.QuotaExceeded{AntigravityCredits: true},
+	})
+	auth := &cliproxyauth.Auth{
+		ID: "auth-credits-ok",
+		Attributes: map[string]string{
+			"base_url": server.URL,
+		},
+		Metadata: map[string]any{
+			"access_token": "token",
+			"project_id":   "project-1",
+			"expired":      time.Now().Add(1 * time.Hour).Format(time.RFC3339),
+		},
+	}
+
+	resp, err := exec.Execute(context.Background(), auth, cliproxyexecutor.Request{
+		Model:   "gemini-2.5-flash",
+		Payload: []byte(`{"request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`),
+	}, cliproxyexecutor.Options{
+		SourceFormat: sdktranslator.FormatAntigravity,
+	})
+	if err != nil {
+		t.Fatalf("Execute() error = %v", err)
+	}
+	if len(resp.Payload) == 0 {
+		t.Fatal("Execute() returned empty payload")
+	}
+
+	mu.Lock()
+	defer mu.Unlock()
+	if len(requestBodies) != 2 {
+		t.Fatalf("request count = %d, want 2", len(requestBodies))
+	}
+}
+
+func TestAntigravityExecute_SkipsCreditsRetryWhenAlreadyExhausted(t *testing.T) {
+	resetAntigravityCreditsRetryState()
+	t.Cleanup(resetAntigravityCreditsRetryState)
+
+	var requestCount int
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		requestCount++
+		w.WriteHeader(http.StatusTooManyRequests)
+		_, _ = w.Write([]byte(`{"error":{"status":"RESOURCE_EXHAUSTED","message":"QUOTA_EXHAUSTED"}}`))
+	}))
+	defer server.Close()
+
+	exec := NewAntigravityExecutor(&config.Config{
+		QuotaExceeded: config.QuotaExceeded{AntigravityCredits: true},
+	})
+	auth := &cliproxyauth.Auth{
+		ID: "auth-credits-exhausted",
+		Attributes: map[string]string{
+			"base_url": server.URL,
+		},
+		Metadata: map[string]any{
+			"access_token": "token",
+			"project_id":   "project-1",
+			"expired":      time.Now().Add(1 * time.Hour).Format(time.RFC3339),
+		},
+	}
+	markAntigravityCreditsExhausted(auth, time.Now())
+
+	_, err := exec.Execute(context.Background(), auth, cliproxyexecutor.Request{
+		Model:   "gemini-2.5-flash",
+		Payload: []byte(`{"request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`),
+	}, cliproxyexecutor.Options{
+		SourceFormat: sdktranslator.FormatAntigravity,
+	})
+	if err == nil {
+		t.Fatal("Execute() error = nil, want 429")
+	}
+	sErr, ok := err.(statusErr)
+	if !ok {
+		t.Fatalf("Execute() error type = %T, want statusErr", err)
+	}
+	if got := sErr.StatusCode(); got != http.StatusTooManyRequests {
+		t.Fatalf("Execute() status code = %d, want %d", got, http.StatusTooManyRequests)
+	}
+	if requestCount != 1 {
+		t.Fatalf("request count = %d, want 1", requestCount)
+	}
+}
+
+func TestAntigravityExecute_PrefersCreditsAfterSuccessfulFallback(t *testing.T) {
+	resetAntigravityCreditsRetryState()
+	t.Cleanup(resetAntigravityCreditsRetryState)
+
+	var (
+		mu            sync.Mutex
+		requestBodies []string
+	)
+
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		body, _ := io.ReadAll(r.Body)
+		_ = r.Body.Close()
+
+		mu.Lock()
+		requestBodies = append(requestBodies, string(body))
+		reqNum := len(requestBodies)
+		mu.Unlock()
+
+		switch reqNum {
+		case 1:
+			w.WriteHeader(http.StatusTooManyRequests)
+			_, _ = w.Write([]byte(`{"error":{"status":"RESOURCE_EXHAUSTED","details":[{"@type":"type.googleapis.com/google.rpc.ErrorInfo","reason":"QUOTA_EXHAUSTED"},{"@type":"type.googleapis.com/google.rpc.RetryInfo","retryDelay":"10s"}]}}`))
+		case 2, 3:
+			if !strings.Contains(string(body), `"enabledCreditTypes":["GOOGLE_ONE_AI"]`) {
+				t.Fatalf("request %d body missing enabledCreditTypes: %s", reqNum, string(body))
+			}
+			w.Header().Set("Content-Type", "application/json")
+			_, _ = w.Write([]byte(`{"response":{"candidates":[{"content":{"role":"model","parts":[{"text":"OK"}]}}],"usageMetadata":{"promptTokenCount":1,"candidatesTokenCount":1,"totalTokenCount":2}}}`))
+		default:
+			t.Fatalf("unexpected request count %d", reqNum)
+		}
+	}))
+	defer server.Close()
+
+	exec := NewAntigravityExecutor(&config.Config{
+		QuotaExceeded: config.QuotaExceeded{AntigravityCredits: true},
+	})
+	auth := &cliproxyauth.Auth{
+		ID: "auth-prefer-credits",
+		Attributes: map[string]string{
+			"base_url": server.URL,
+		},
+		Metadata: map[string]any{
+			"access_token": "token",
+			"project_id":   "project-1",
+			"expired":      time.Now().Add(1 * time.Hour).Format(time.RFC3339),
+		},
+	}
+
+	request := cliproxyexecutor.Request{
+		Model:   "gemini-2.5-flash",
+		Payload: []byte(`{"request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`),
+	}
+	opts := cliproxyexecutor.Options{SourceFormat: sdktranslator.FormatAntigravity}
+
+	if _, err := exec.Execute(context.Background(), auth, request, opts); err != nil {
+		t.Fatalf("first Execute() error = %v", err)
+	}
+	if _, err := exec.Execute(context.Background(), auth, request, opts); err != nil {
+		t.Fatalf("second Execute() error = %v", err)
+	}
+
+	mu.Lock()
+	defer mu.Unlock()
+	if len(requestBodies) != 3 {
+		t.Fatalf("request count = %d, want 3", len(requestBodies))
+	}
+	if strings.Contains(requestBodies[0], `"enabledCreditTypes":["GOOGLE_ONE_AI"]`) {
+		t.Fatalf("first request unexpectedly used credits: %s", requestBodies[0])
+	}
+	if !strings.Contains(requestBodies[1], `"enabledCreditTypes":["GOOGLE_ONE_AI"]`) {
+		t.Fatalf("fallback request missing credits: %s", requestBodies[1])
+	}
+	if !strings.Contains(requestBodies[2], `"enabledCreditTypes":["GOOGLE_ONE_AI"]`) {
+		t.Fatalf("preferred request missing credits: %s", requestBodies[2])
+	}
+}
+
+func TestAntigravityExecute_PreservesBaseURLFallbackAfterCreditsRetryFailure(t *testing.T) {
+	resetAntigravityCreditsRetryState()
+	t.Cleanup(resetAntigravityCreditsRetryState)
+
+	var (
+		mu          sync.Mutex
+		firstCount  int
+		secondCount int
+	)
+
+	firstServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		body, _ := io.ReadAll(r.Body)
+		_ = r.Body.Close()
+
+		mu.Lock()
+		firstCount++
+		reqNum := firstCount
+		mu.Unlock()
+
+		switch reqNum {
+		case 1:
+			w.WriteHeader(http.StatusTooManyRequests)
+			_, _ = w.Write([]byte(`{"error":{"status":"RESOURCE_EXHAUSTED","details":[{"@type":"type.googleapis.com/google.rpc.ErrorInfo","reason":"QUOTA_EXHAUSTED"}]}}`))
+		case 2:
+			if !strings.Contains(string(body), `"enabledCreditTypes":["GOOGLE_ONE_AI"]`) {
+				t.Fatalf("credits retry missing enabledCreditTypes: %s", string(body))
+			}
+			w.WriteHeader(http.StatusForbidden)
+			_, _ = w.Write([]byte(`{"error":{"message":"permission denied"}}`))
+		default:
+			t.Fatalf("unexpected first server request count %d", reqNum)
+		}
+	}))
+	defer firstServer.Close()
+
+	secondServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		mu.Lock()
+		secondCount++
+		mu.Unlock()
+		w.Header().Set("Content-Type", "application/json")
+		_, _ = w.Write([]byte(`{"response":{"candidates":[{"content":{"role":"model","parts":[{"text":"ok"}]}}],"usageMetadata":{"promptTokenCount":1,"candidatesTokenCount":1,"totalTokenCount":2}}}`))
+	}))
+	defer secondServer.Close()
+
+	exec := NewAntigravityExecutor(&config.Config{
+		QuotaExceeded: config.QuotaExceeded{AntigravityCredits: true},
+	})
+	auth := &cliproxyauth.Auth{
+		ID: "auth-baseurl-fallback",
+		Attributes: map[string]string{
+			"base_url": firstServer.URL,
+		},
+		Metadata: map[string]any{
+			"access_token": "token",
+			"project_id":   "project-1",
+			"expired":      time.Now().Add(1 * time.Hour).Format(time.RFC3339),
+		},
+	}
+
+	originalOrder := antigravityBaseURLFallbackOrder
+	defer func() { antigravityBaseURLFallbackOrder = originalOrder }()
+	antigravityBaseURLFallbackOrder = func(auth *cliproxyauth.Auth) []string {
+		return []string{firstServer.URL, secondServer.URL}
+	}
+
+	resp, err := exec.Execute(context.Background(), auth, cliproxyexecutor.Request{
+		Model:   "gemini-2.5-flash",
+		Payload: []byte(`{"request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`),
+	}, cliproxyexecutor.Options{
+		SourceFormat: sdktranslator.FormatAntigravity,
+	})
+	if err != nil {
+		t.Fatalf("Execute() error = %v", err)
+	}
+	if len(resp.Payload) == 0 {
+		t.Fatal("Execute() returned empty payload")
+	}
+	if firstCount != 2 {
+		t.Fatalf("first server request count = %d, want 2", firstCount)
+	}
+	if secondCount != 1 {
+		t.Fatalf("second server request count = %d, want 1", secondCount)
+	}
+}
+
+func TestAntigravityExecute_DoesNotDirectInjectCreditsWhenFlagDisabled(t *testing.T) {
+	resetAntigravityCreditsRetryState()
+	t.Cleanup(resetAntigravityCreditsRetryState)
+
+	var requestBodies []string
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		body, _ := io.ReadAll(r.Body)
+		_ = r.Body.Close()
+		requestBodies = append(requestBodies, string(body))
+		w.WriteHeader(http.StatusTooManyRequests)
+		_, _ = w.Write([]byte(`{"error":{"status":"RESOURCE_EXHAUSTED","message":"QUOTA_EXHAUSTED"}}`))
+	}))
+	defer server.Close()
+
+	exec := NewAntigravityExecutor(&config.Config{
+		QuotaExceeded: config.QuotaExceeded{AntigravityCredits: false},
+	})
+	auth := &cliproxyauth.Auth{
+		ID: "auth-flag-disabled",
+		Attributes: map[string]string{
+			"base_url": server.URL,
+		},
+		Metadata: map[string]any{
+			"access_token": "token",
+			"project_id":   "project-1",
+			"expired":      time.Now().Add(1 * time.Hour).Format(time.RFC3339),
+		},
+	}
+	markAntigravityPreferCredits(auth, "gemini-2.5-flash", time.Now(), nil)
+
+	_, err := exec.Execute(context.Background(), auth, cliproxyexecutor.Request{
+		Model:   "gemini-2.5-flash",
+		Payload: []byte(`{"request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`),
+	}, cliproxyexecutor.Options{
+		SourceFormat: sdktranslator.FormatAntigravity,
+	})
+	if err == nil {
+		t.Fatal("Execute() error = nil, want 429")
+	}
+	if len(requestBodies) != 1 {
+		t.Fatalf("request count = %d, want 1", len(requestBodies))
+	}
+	if strings.Contains(requestBodies[0], `"enabledCreditTypes":["GOOGLE_ONE_AI"]`) {
+		t.Fatalf("request unexpectedly used enabledCreditTypes with flag disabled: %s", requestBodies[0])
+	}
+}
--- a/internal/runtime/executor/antigravity_executor_models_cache_test.go
+++ b/internal/runtime/executor/antigravity_executor_models_cache_test.go
@@ -1,90 +0,0 @@
-package executor
-
-import (
-	"testing"
-
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
-)
-
-func resetAntigravityPrimaryModelsCacheForTest() {
-	antigravityPrimaryModelsCache.mu.Lock()
-	antigravityPrimaryModelsCache.models = nil
-	antigravityPrimaryModelsCache.mu.Unlock()
-}
-
-func TestStoreAntigravityPrimaryModels_EmptyDoesNotOverwrite(t *testing.T) {
-	resetAntigravityPrimaryModelsCacheForTest()
-	t.Cleanup(resetAntigravityPrimaryModelsCacheForTest)
-
-	seed := []*registry.ModelInfo{
-		{ID: "claude-sonnet-4-5"},
-		{ID: "gemini-2.5-pro"},
-	}
-	if updated := storeAntigravityPrimaryModels(seed); !updated {
-		t.Fatal("expected non-empty model list to update primary cache")
-	}
-
-	if updated := storeAntigravityPrimaryModels(nil); updated {
-		t.Fatal("expected nil model list not to overwrite primary cache")
-	}
-	if updated := storeAntigravityPrimaryModels([]*registry.ModelInfo{}); updated {
-		t.Fatal("expected empty model list not to overwrite primary cache")
-	}
-
-	got := loadAntigravityPrimaryModels()
-	if len(got) != 2 {
-		t.Fatalf("expected cached model count 2, got %d", len(got))
-	}
-	if got[0].ID != "claude-sonnet-4-5" || got[1].ID != "gemini-2.5-pro" {
-		t.Fatalf("unexpected cached model ids: %q, %q", got[0].ID, got[1].ID)
-	}
-}
-
-func TestLoadAntigravityPrimaryModels_ReturnsClone(t *testing.T) {
-	resetAntigravityPrimaryModelsCacheForTest()
-	t.Cleanup(resetAntigravityPrimaryModelsCacheForTest)
-
-	if updated := storeAntigravityPrimaryModels([]*registry.ModelInfo{{
-		ID:                         "gpt-5",
-		DisplayName:                "GPT-5",
-		SupportedGenerationMethods: []string{"generateContent"},
-		SupportedParameters:        []string{"temperature"},
-		Thinking: &registry.ThinkingSupport{
-			Levels: []string{"high"},
-		},
-	}}); !updated {
-		t.Fatal("expected model cache update")
-	}
-
-	got := loadAntigravityPrimaryModels()
-	if len(got) != 1 {
-		t.Fatalf("expected one cached model, got %d", len(got))
-	}
-	got[0].ID = "mutated-id"
-	if len(got[0].SupportedGenerationMethods) > 0 {
-		got[0].SupportedGenerationMethods[0] = "mutated-method"
-	}
-	if len(got[0].SupportedParameters) > 0 {
-		got[0].SupportedParameters[0] = "mutated-parameter"
-	}
-	if got[0].Thinking != nil && len(got[0].Thinking.Levels) > 0 {
-		got[0].Thinking.Levels[0] = "mutated-level"
-	}
-
-	again := loadAntigravityPrimaryModels()
-	if len(again) != 1 {
-		t.Fatalf("expected one cached model after mutation, got %d", len(again))
-	}
-	if again[0].ID != "gpt-5" {
-		t.Fatalf("expected cached model id to remain %q, got %q", "gpt-5", again[0].ID)
-	}
-	if len(again[0].SupportedGenerationMethods) == 0 || again[0].SupportedGenerationMethods[0] != "generateContent" {
-		t.Fatalf("expected cached generation methods to be unmutated, got %v", again[0].SupportedGenerationMethods)
-	}
-	if len(again[0].SupportedParameters) == 0 || again[0].SupportedParameters[0] != "temperature" {
-		t.Fatalf("expected cached supported parameters to be unmutated, got %v", again[0].SupportedParameters)
-	}
-	if again[0].Thinking == nil || len(again[0].Thinking.Levels) == 0 || again[0].Thinking.Levels[0] != "high" {
-		t.Fatalf("expected cached model thinking levels to be unmutated, got %v", again[0].Thinking)
-	}
-}
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
--- a/internal/runtime/executor/claude_executor_test.go
+++ b/internal/runtime/executor/claude_executor_test.go
--- a/internal/runtime/executor/claude_signing.go
+++ b/internal/runtime/executor/claude_signing.go
@@ -0,0 +1,81 @@
+package executor
+
+import (
+	"fmt"
+	"regexp"
+	"strings"
+
+	xxHash64 "github.com/pierrec/xxHash/xxHash64"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+const claudeCCHSeed uint64 = 0x6E52736AC806831E
+
+var claudeBillingHeaderCCHPattern = regexp.MustCompile(`\bcch=([0-9a-f]{5});`)
+
+func signAnthropicMessagesBody(body []byte) []byte {
+	billingHeader := gjson.GetBytes(body, "system.0.text").String()
+	if !strings.HasPrefix(billingHeader, "x-anthropic-billing-header:") {
+		return body
+	}
+	if !claudeBillingHeaderCCHPattern.MatchString(billingHeader) {
+		return body
+	}
+
+	unsignedBillingHeader := claudeBillingHeaderCCHPattern.ReplaceAllString(billingHeader, "cch=00000;")
+	unsignedBody, err := sjson.SetBytes(body, "system.0.text", unsignedBillingHeader)
+	if err != nil {
+		return body
+	}
+
+	cch := fmt.Sprintf("%05x", xxHash64.Checksum(unsignedBody, claudeCCHSeed)&0xFFFFF)
+	signedBillingHeader := claudeBillingHeaderCCHPattern.ReplaceAllString(unsignedBillingHeader, "cch="+cch+";")
+	signedBody, err := sjson.SetBytes(unsignedBody, "system.0.text", signedBillingHeader)
+	if err != nil {
+		return unsignedBody
+	}
+	return signedBody
+}
+
+func resolveClaudeKeyConfig(cfg *config.Config, auth *cliproxyauth.Auth) *config.ClaudeKey {
+	if cfg == nil || auth == nil {
+		return nil
+	}
+
+	apiKey, baseURL := claudeCreds(auth)
+	if apiKey == "" {
+		return nil
+	}
+
+	for i := range cfg.ClaudeKey {
+		entry := &cfg.ClaudeKey[i]
+		cfgKey := strings.TrimSpace(entry.APIKey)
+		cfgBase := strings.TrimSpace(entry.BaseURL)
+		if !strings.EqualFold(cfgKey, apiKey) {
+			continue
+		}
+		if baseURL != "" && cfgBase != "" && !strings.EqualFold(cfgBase, baseURL) {
+			continue
+		}
+		return entry
+	}
+
+	return nil
+}
+
+// resolveClaudeKeyCloakConfig finds the matching ClaudeKey config and returns its CloakConfig.
+func resolveClaudeKeyCloakConfig(cfg *config.Config, auth *cliproxyauth.Auth) *config.CloakConfig {
+	entry := resolveClaudeKeyConfig(cfg, auth)
+	if entry == nil {
+		return nil
+	}
+	return entry.Cloak
+}
+
+func experimentalCCHSigningEnabled(cfg *config.Config, auth *cliproxyauth.Auth) bool {
+	entry := resolveClaudeKeyConfig(cfg, auth)
+	return entry != nil && entry.ExperimentalCCHSigning
+}
--- a/internal/runtime/executor/codebuddy_executor.go
+++ b/internal/runtime/executor/codebuddy_executor.go
@@ -0,0 +1,343 @@
+package executor
+
+import (
+	"bufio"
+	"bytes"
+	"context"
+	"fmt"
+	"io"
+	"net/http"
+	"time"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/codebuddy"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
+	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
+	log "github.com/sirupsen/logrus"
+)
+
+const (
+	codeBuddyChatPath = "/v2/chat/completions"
+	codeBuddyAuthType = "codebuddy"
+)
+
+// CodeBuddyExecutor handles requests to the CodeBuddy API.
+type CodeBuddyExecutor struct {
+	cfg *config.Config
+}
+
+// NewCodeBuddyExecutor creates a new CodeBuddy executor instance.
+func NewCodeBuddyExecutor(cfg *config.Config) *CodeBuddyExecutor {
+	return &CodeBuddyExecutor{cfg: cfg}
+}
+
+// Identifier returns the unique identifier for this executor.
+func (e *CodeBuddyExecutor) Identifier() string { return codeBuddyAuthType }
+
+// codeBuddyCredentials extracts the access token and domain from auth metadata.
+func codeBuddyCredentials(auth *cliproxyauth.Auth) (accessToken, userID, domain string) {
+	if auth == nil {
+		return "", "", ""
+	}
+	accessToken = metaStringValue(auth.Metadata, "access_token")
+	userID = metaStringValue(auth.Metadata, "user_id")
+	domain = metaStringValue(auth.Metadata, "domain")
+	if domain == "" {
+		domain = codebuddy.DefaultDomain
+	}
+	return
+}
+
+// PrepareRequest prepares the HTTP request before execution.
+func (e *CodeBuddyExecutor) PrepareRequest(req *http.Request, auth *cliproxyauth.Auth) error {
+	if req == nil {
+		return nil
+	}
+	accessToken, userID, domain := codeBuddyCredentials(auth)
+	if accessToken == "" {
+		return fmt.Errorf("codebuddy: missing access token")
+	}
+	e.applyHeaders(req, accessToken, userID, domain)
+	return nil
+}
+
+// HttpRequest executes a raw HTTP request.
+func (e *CodeBuddyExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth, req *http.Request) (*http.Response, error) {
+	if req == nil {
+		return nil, fmt.Errorf("codebuddy executor: request is nil")
+	}
+	if ctx == nil {
+		ctx = req.Context()
+	}
+	httpReq := req.WithContext(ctx)
+	if err := e.PrepareRequest(httpReq, auth); err != nil {
+		return nil, err
+	}
+	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	return httpClient.Do(httpReq)
+}
+
+// Execute performs a non-streaming request.
+func (e *CodeBuddyExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.trackFailure(ctx, &err)
+
+	accessToken, userID, domain := codeBuddyCredentials(auth)
+	if accessToken == "" {
+		return resp, fmt.Errorf("codebuddy: missing access token")
+	}
+
+	from := opts.SourceFormat
+	to := sdktranslator.FromString("openai")
+
+	originalPayloadSource := req.Payload
+	if len(opts.OriginalRequest) > 0 {
+		originalPayloadSource = opts.OriginalRequest
+	}
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayloadSource, false)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated, requestedModel)
+
+	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
+	if err != nil {
+		return resp, err
+	}
+
+	url := codebuddy.BaseURL + codeBuddyChatPath
+	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
+	if err != nil {
+		return resp, err
+	}
+	e.applyHeaders(httpReq, accessToken, userID, domain)
+
+	var authID, authLabel, authType, authValue string
+	if auth != nil {
+		authID = auth.ID
+		authLabel = auth.Label
+		authType, authValue = auth.AccountInfo()
+	}
+	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+		URL:       url,
+		Method:    http.MethodPost,
+		Headers:   httpReq.Header.Clone(),
+		Body:      translated,
+		Provider:  e.Identifier(),
+		AuthID:    authID,
+		AuthLabel: authLabel,
+		AuthType:  authType,
+		AuthValue: authValue,
+	})
+
+	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpResp, err := httpClient.Do(httpReq)
+	if err != nil {
+		recordAPIResponseError(ctx, e.cfg, err)
+		return resp, err
+	}
+	defer func() {
+		if errClose := httpResp.Body.Close(); errClose != nil {
+			log.Errorf("codebuddy executor: close response body error: %v", errClose)
+		}
+	}()
+
+	recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+	if !isHTTPSuccess(httpResp.StatusCode) {
+		b, _ := io.ReadAll(httpResp.Body)
+		appendAPIResponseChunk(ctx, e.cfg, b)
+		log.Debugf("codebuddy executor: upstream error status: %d, body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
+		return resp, err
+	}
+
+	body, err := io.ReadAll(httpResp.Body)
+	if err != nil {
+		recordAPIResponseError(ctx, e.cfg, err)
+		return resp, err
+	}
+	appendAPIResponseChunk(ctx, e.cfg, body)
+	reporter.publish(ctx, parseOpenAIUsage(body))
+	reporter.ensurePublished(ctx)
+
+	var param any
+	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, body, &param)
+	resp = cliproxyexecutor.Response{Payload: []byte(out), Headers: httpResp.Header.Clone()}
+	return resp, nil
+}
+
+// ExecuteStream performs a streaming request.
+func (e *CodeBuddyExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (_ *cliproxyexecutor.StreamResult, err error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.trackFailure(ctx, &err)
+
+	accessToken, userID, domain := codeBuddyCredentials(auth)
+	if accessToken == "" {
+		return nil, fmt.Errorf("codebuddy: missing access token")
+	}
+
+	from := opts.SourceFormat
+	to := sdktranslator.FromString("openai")
+
+	originalPayloadSource := req.Payload
+	if len(opts.OriginalRequest) > 0 {
+		originalPayloadSource = opts.OriginalRequest
+	}
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayloadSource, true)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated, requestedModel)
+
+	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
+	if err != nil {
+		return nil, err
+	}
+
+	url := codebuddy.BaseURL + codeBuddyChatPath
+	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
+	if err != nil {
+		return nil, err
+	}
+	e.applyHeaders(httpReq, accessToken, userID, domain)
+	httpReq.Header.Set("Accept", "text/event-stream")
+	httpReq.Header.Set("Cache-Control", "no-cache")
+
+	var authID, authLabel, authType, authValue string
+	if auth != nil {
+		authID = auth.ID
+		authLabel = auth.Label
+		authType, authValue = auth.AccountInfo()
+	}
+	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+		URL:       url,
+		Method:    http.MethodPost,
+		Headers:   httpReq.Header.Clone(),
+		Body:      translated,
+		Provider:  e.Identifier(),
+		AuthID:    authID,
+		AuthLabel: authLabel,
+		AuthType:  authType,
+		AuthValue: authValue,
+	})
+
+	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpResp, err := httpClient.Do(httpReq)
+	if err != nil {
+		recordAPIResponseError(ctx, e.cfg, err)
+		return nil, err
+	}
+
+	recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+	if !isHTTPSuccess(httpResp.StatusCode) {
+		b, _ := io.ReadAll(httpResp.Body)
+		appendAPIResponseChunk(ctx, e.cfg, b)
+		httpResp.Body.Close()
+		log.Debugf("codebuddy executor: upstream error status: %d, body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
+		return nil, err
+	}
+
+	out := make(chan cliproxyexecutor.StreamChunk)
+	go func() {
+		defer close(out)
+		defer func() {
+			if errClose := httpResp.Body.Close(); errClose != nil {
+				log.Errorf("codebuddy executor: close stream body error: %v", errClose)
+			}
+		}()
+
+		scanner := bufio.NewScanner(httpResp.Body)
+		scanner.Buffer(nil, maxScannerBufferSize)
+		var param any
+		for scanner.Scan() {
+			line := scanner.Bytes()
+			appendAPIResponseChunk(ctx, e.cfg, line)
+			if detail, ok := parseOpenAIStreamUsage(line); ok {
+				reporter.publish(ctx, detail)
+			}
+			if len(line) == 0 {
+				continue
+			}
+			if !bytes.HasPrefix(line, []byte("data:")) {
+				continue
+			}
+			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, bytes.Clone(line), &param)
+			for i := range chunks {
+				out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
+			}
+		}
+		if errScan := scanner.Err(); errScan != nil {
+			recordAPIResponseError(ctx, e.cfg, errScan)
+			reporter.publishFailure(ctx)
+			out <- cliproxyexecutor.StreamChunk{Err: errScan}
+		}
+		reporter.ensurePublished(ctx)
+	}()
+
+	return &cliproxyexecutor.StreamResult{
+		Headers: httpResp.Header.Clone(),
+		Chunks:  out,
+	}, nil
+}
+
+// Refresh exchanges the CodeBuddy refresh token for a new access token.
+func (e *CodeBuddyExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
+	if auth == nil {
+		return nil, fmt.Errorf("codebuddy: missing auth")
+	}
+
+	refreshToken := metaStringValue(auth.Metadata, "refresh_token")
+	if refreshToken == "" {
+		log.Debugf("codebuddy executor: no refresh token available, skipping refresh")
+		return auth, nil
+	}
+
+	accessToken, userID, domain := codeBuddyCredentials(auth)
+
+	authSvc := codebuddy.NewCodeBuddyAuth(e.cfg)
+	storage, err := authSvc.RefreshToken(ctx, accessToken, refreshToken, userID, domain)
+	if err != nil {
+		return nil, fmt.Errorf("codebuddy: token refresh failed: %w", err)
+	}
+
+	updated := auth.Clone()
+	updated.Metadata["access_token"] = storage.AccessToken
+	if storage.RefreshToken != "" {
+		updated.Metadata["refresh_token"] = storage.RefreshToken
+	}
+	updated.Metadata["expires_in"] = storage.ExpiresIn
+	updated.Metadata["domain"] = storage.Domain
+	if storage.UserID != "" {
+		updated.Metadata["user_id"] = storage.UserID
+	}
+	now := time.Now()
+	updated.UpdatedAt = now
+	updated.LastRefreshedAt = now
+
+	return updated, nil
+}
+
+// CountTokens is not supported for CodeBuddy.
+func (e *CodeBuddyExecutor) CountTokens(_ context.Context, _ *cliproxyauth.Auth, _ cliproxyexecutor.Request, _ cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+	return cliproxyexecutor.Response{}, fmt.Errorf("codebuddy: count tokens not supported")
+}
+
+// applyHeaders sets required headers for CodeBuddy API requests.
+func (e *CodeBuddyExecutor) applyHeaders(req *http.Request, accessToken, userID, domain string) {
+	req.Header.Set("Authorization", "Bearer "+accessToken)
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Accept", "application/json")
+	req.Header.Set("User-Agent", codebuddy.UserAgent)
+	req.Header.Set("X-User-Id", userID)
+	req.Header.Set("X-Domain", domain)
+	req.Header.Set("X-Product", "SaaS")
+	req.Header.Set("X-IDE-Type", "CLI")
+	req.Header.Set("X-IDE-Name", "CLI")
+	req.Header.Set("X-IDE-Version", "2.63.2")
+	req.Header.Set("X-Requested-With", "XMLHttpRequest")
+}
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -13,6 +13,7 @@ import (
 	codexauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/codex"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
@@ -28,8 +29,8 @@ import (
 )

 const (
-	codexClientVersion = "0.101.0"
-	codexUserAgent     = "codex_cli_rs/0.101.0 (Mac OS 26.0.1; arm64) Apple_Terminal/464"
+	codexUserAgent  = "codex-tui/0.118.0 (Mac OS 26.3.1; arm64) iTerm.app/3.6.9 (codex-tui; 0.118.0)"
+	codexOriginator = "codex-tui"
 )

 var dataTag = []byte("data:")
@@ -73,7 +74,7 @@ func (e *CodexExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth
 	if err := e.PrepareRequest(httpReq, auth); err != nil {
 		return nil, err
 	}
-	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
 	return httpClient.Do(httpReq)
 }

@@ -88,8 +89,8 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 		baseURL = "https://chatgpt.com/backend-api/codex"
 	}

-	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
-	defer reporter.trackFailure(ctx, &err)
+	reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.TrackFailure(ctx, &err)

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
@@ -106,30 +107,29 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 		return resp, err
 	}

-	requestedModel := payloadRequestedModel(opts, req.Model)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
+	requestedModel := helps.PayloadRequestedModel(opts, req.Model)
+	body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 	body, _ = sjson.SetBytes(body, "stream", true)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
 	body, _ = sjson.DeleteBytes(body, "safety_identifier")
-	if !gjson.GetBytes(body, "instructions").Exists() {
-		body, _ = sjson.SetBytes(body, "instructions", "")
-	}
+	body, _ = sjson.DeleteBytes(body, "stream_options")
+	body = normalizeCodexInstructions(body)

 	url := strings.TrimSuffix(baseURL, "/") + "/responses"
 	httpReq, err := e.cacheHelper(ctx, from, url, req, body)
 	if err != nil {
 		return resp, err
 	}
-	applyCodexHeaders(httpReq, auth, apiKey, true)
+	applyCodexHeaders(httpReq, auth, apiKey, true, e.cfg)
 	var authID, authLabel, authType, authValue string
 	if auth != nil {
 		authID = auth.ID
 		authLabel = auth.Label
 		authType, authValue = auth.AccountInfo()
 	}
-	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+	helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
 		URL:       url,
 		Method:    http.MethodPost,
 		Headers:   httpReq.Header.Clone(),
@@ -140,10 +140,10 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 		AuthType:  authType,
 		AuthValue: authValue,
 	})
-	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
 	httpResp, err := httpClient.Do(httpReq)
 	if err != nil {
-		recordAPIResponseError(ctx, e.cfg, err)
+		helps.RecordAPIResponseError(ctx, e.cfg, err)
 		return resp, err
 	}
 	defer func() {
@@ -151,20 +151,20 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 			log.Errorf("codex executor: close response body error: %v", errClose)
 		}
 	}()
-	recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+	helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
-		appendAPIResponseChunk(ctx, e.cfg, b)
-		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		helps.AppendAPIResponseChunk(ctx, e.cfg, b)
+		helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = newCodexStatusErr(httpResp.StatusCode, b)
 		return resp, err
 	}
 	data, err := io.ReadAll(httpResp.Body)
 	if err != nil {
-		recordAPIResponseError(ctx, e.cfg, err)
+		helps.RecordAPIResponseError(ctx, e.cfg, err)
 		return resp, err
 	}
-	appendAPIResponseChunk(ctx, e.cfg, data)
+	helps.AppendAPIResponseChunk(ctx, e.cfg, data)

 	lines := bytes.Split(data, []byte("\n"))
 	for _, line := range lines {
@@ -177,13 +177,13 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 			continue
 		}

-		if detail, ok := parseCodexUsage(line); ok {
-			reporter.publish(ctx, detail)
+		if detail, ok := helps.ParseCodexUsage(line); ok {
+			reporter.Publish(ctx, detail)
 		}

 		var param any
 		out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, originalPayload, body, line, &param)
-		resp = cliproxyexecutor.Response{Payload: []byte(out), Headers: httpResp.Header.Clone()}
+		resp = cliproxyexecutor.Response{Payload: out, Headers: httpResp.Header.Clone()}
 		return resp, nil
 	}
 	err = statusErr{code: 408, msg: "stream error: stream disconnected before completion: stream closed before response.completed"}
@@ -198,8 +198,8 @@ func (e *CodexExecutor) executeCompact(ctx context.Context, auth *cliproxyauth.A
 		baseURL = "https://chatgpt.com/backend-api/codex"
 	}

-	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
-	defer reporter.trackFailure(ctx, &err)
+	reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.TrackFailure(ctx, &err)

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai-response")
@@ -216,24 +216,25 @@ func (e *CodexExecutor) executeCompact(ctx context.Context, auth *cliproxyauth.A
 		return resp, err
 	}

-	requestedModel := payloadRequestedModel(opts, req.Model)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
+	requestedModel := helps.PayloadRequestedModel(opts, req.Model)
+	body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 	body, _ = sjson.DeleteBytes(body, "stream")
+	body = normalizeCodexInstructions(body)

 	url := strings.TrimSuffix(baseURL, "/") + "/responses/compact"
 	httpReq, err := e.cacheHelper(ctx, from, url, req, body)
 	if err != nil {
 		return resp, err
 	}
-	applyCodexHeaders(httpReq, auth, apiKey, false)
+	applyCodexHeaders(httpReq, auth, apiKey, false, e.cfg)
 	var authID, authLabel, authType, authValue string
 	if auth != nil {
 		authID = auth.ID
 		authLabel = auth.Label
 		authType, authValue = auth.AccountInfo()
 	}
-	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+	helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
 		URL:       url,
 		Method:    http.MethodPost,
 		Headers:   httpReq.Header.Clone(),
@@ -244,10 +245,10 @@ func (e *CodexExecutor) executeCompact(ctx context.Context, auth *cliproxyauth.A
 		AuthType:  authType,
 		AuthValue: authValue,
 	})
-	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
 	httpResp, err := httpClient.Do(httpReq)
 	if err != nil {
-		recordAPIResponseError(ctx, e.cfg, err)
+		helps.RecordAPIResponseError(ctx, e.cfg, err)
 		return resp, err
 	}
 	defer func() {
@@ -255,25 +256,25 @@ func (e *CodexExecutor) executeCompact(ctx context.Context, auth *cliproxyauth.A
 			log.Errorf("codex executor: close response body error: %v", errClose)
 		}
 	}()
-	recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+	helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
-		appendAPIResponseChunk(ctx, e.cfg, b)
-		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		helps.AppendAPIResponseChunk(ctx, e.cfg, b)
+		helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = newCodexStatusErr(httpResp.StatusCode, b)
 		return resp, err
 	}
 	data, err := io.ReadAll(httpResp.Body)
 	if err != nil {
-		recordAPIResponseError(ctx, e.cfg, err)
+		helps.RecordAPIResponseError(ctx, e.cfg, err)
 		return resp, err
 	}
-	appendAPIResponseChunk(ctx, e.cfg, data)
-	reporter.publish(ctx, parseOpenAIUsage(data))
-	reporter.ensurePublished(ctx)
+	helps.AppendAPIResponseChunk(ctx, e.cfg, data)
+	reporter.Publish(ctx, helps.ParseOpenAIUsage(data))
+	reporter.EnsurePublished(ctx)
 	var param any
 	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, originalPayload, body, data, &param)
-	resp = cliproxyexecutor.Response{Payload: []byte(out), Headers: httpResp.Header.Clone()}
+	resp = cliproxyexecutor.Response{Payload: out, Headers: httpResp.Header.Clone()}
 	return resp, nil
 }

@@ -288,8 +289,8 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 		baseURL = "https://chatgpt.com/backend-api/codex"
 	}

-	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
-	defer reporter.trackFailure(ctx, &err)
+	reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.TrackFailure(ctx, &err)

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
@@ -306,29 +307,28 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 		return nil, err
 	}

-	requestedModel := payloadRequestedModel(opts, req.Model)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
+	requestedModel := helps.PayloadRequestedModel(opts, req.Model)
+	body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
 	body, _ = sjson.DeleteBytes(body, "safety_identifier")
+	body, _ = sjson.DeleteBytes(body, "stream_options")
 	body, _ = sjson.SetBytes(body, "model", baseModel)
-	if !gjson.GetBytes(body, "instructions").Exists() {
-		body, _ = sjson.SetBytes(body, "instructions", "")
-	}
+	body = normalizeCodexInstructions(body)

 	url := strings.TrimSuffix(baseURL, "/") + "/responses"
 	httpReq, err := e.cacheHelper(ctx, from, url, req, body)
 	if err != nil {
 		return nil, err
 	}
-	applyCodexHeaders(httpReq, auth, apiKey, true)
+	applyCodexHeaders(httpReq, auth, apiKey, true, e.cfg)
 	var authID, authLabel, authType, authValue string
 	if auth != nil {
 		authID = auth.ID
 		authLabel = auth.Label
 		authType, authValue = auth.AccountInfo()
 	}
-	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+	helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
 		URL:       url,
 		Method:    http.MethodPost,
 		Headers:   httpReq.Header.Clone(),
@@ -340,24 +340,24 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 		AuthValue: authValue,
 	})

-	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
 	httpResp, err := httpClient.Do(httpReq)
 	if err != nil {
-		recordAPIResponseError(ctx, e.cfg, err)
+		helps.RecordAPIResponseError(ctx, e.cfg, err)
 		return nil, err
 	}
-	recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+	helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		data, readErr := io.ReadAll(httpResp.Body)
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("codex executor: close response body error: %v", errClose)
 		}
 		if readErr != nil {
-			recordAPIResponseError(ctx, e.cfg, readErr)
+			helps.RecordAPIResponseError(ctx, e.cfg, readErr)
 			return nil, readErr
 		}
-		appendAPIResponseChunk(ctx, e.cfg, data)
-		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
+		helps.AppendAPIResponseChunk(ctx, e.cfg, data)
+		helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
 		err = newCodexStatusErr(httpResp.StatusCode, data)
 		return nil, err
 	}
@@ -374,25 +374,25 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
-			appendAPIResponseChunk(ctx, e.cfg, line)
+			helps.AppendAPIResponseChunk(ctx, e.cfg, line)

 			if bytes.HasPrefix(line, dataTag) {
 				data := bytes.TrimSpace(line[5:])
 				if gjson.GetBytes(data, "type").String() == "response.completed" {
-					if detail, ok := parseCodexUsage(data); ok {
-						reporter.publish(ctx, detail)
+					if detail, ok := helps.ParseCodexUsage(data); ok {
+						reporter.Publish(ctx, detail)
 					}
 				}
 			}

 			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, originalPayload, body, bytes.Clone(line), &param)
 			for i := range chunks {
-				out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
+				out <- cliproxyexecutor.StreamChunk{Payload: chunks[i]}
 			}
 		}
 		if errScan := scanner.Err(); errScan != nil {
-			recordAPIResponseError(ctx, e.cfg, errScan)
-			reporter.publishFailure(ctx)
+			helps.RecordAPIResponseError(ctx, e.cfg, errScan)
+			reporter.PublishFailure(ctx)
 			out <- cliproxyexecutor.StreamChunk{Err: errScan}
 		}
 	}()
@@ -415,10 +415,9 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
 	body, _ = sjson.DeleteBytes(body, "safety_identifier")
+	body, _ = sjson.DeleteBytes(body, "stream_options")
 	body, _ = sjson.SetBytes(body, "stream", false)
-	if !gjson.GetBytes(body, "instructions").Exists() {
-		body, _ = sjson.SetBytes(body, "instructions", "")
-	}
+	body = normalizeCodexInstructions(body)

 	enc, err := tokenizerForCodexModel(baseModel)
 	if err != nil {
@@ -432,7 +431,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth

 	usageJSON := fmt.Sprintf(`{"response":{"usage":{"input_tokens":%d,"output_tokens":0,"total_tokens":%d}}}`, count, count)
 	translated := sdktranslator.TranslateTokenCount(ctx, to, from, count, []byte(usageJSON))
-	return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
+	return cliproxyexecutor.Response{Payload: translated}, nil
 }

 func tokenizerForCodexModel(model string) (tokenizer.Codec, error) {
@@ -597,18 +596,18 @@ func (e *CodexExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*
 }

 func (e *CodexExecutor) cacheHelper(ctx context.Context, from sdktranslator.Format, url string, req cliproxyexecutor.Request, rawJSON []byte) (*http.Request, error) {
-	var cache codexCache
+	var cache helps.CodexCache
 	if from == "claude" {
 		userIDResult := gjson.GetBytes(req.Payload, "metadata.user_id")
 		if userIDResult.Exists() {
 			key := fmt.Sprintf("%s-%s", req.Model, userIDResult.String())
 			var ok bool
-			if cache, ok = getCodexCache(key); !ok {
-				cache = codexCache{
+			if cache, ok = helps.GetCodexCache(key); !ok {
+				cache = helps.CodexCache{
 					ID:     uuid.New().String(),
 					Expire: time.Now().Add(1 * time.Hour),
 				}
-				setCodexCache(key, cache)
+				helps.SetCodexCache(key, cache)
 			}
 		}
 	} else if from == "openai-response" {
@@ -616,6 +615,10 @@ func (e *CodexExecutor) cacheHelper(ctx context.Context, from sdktranslator.Form
 		if promptCacheKey.Exists() {
 			cache.ID = promptCacheKey.String()
 		}
+	} else if from == "openai" {
+		if apiKey := strings.TrimSpace(helps.APIKeyFromContext(ctx)); apiKey != "" {
+			cache.ID = uuid.NewSHA1(uuid.NameSpaceOID, []byte("cli-proxy-api:codex:prompt-cache:"+apiKey)).String()
+		}
 	}

 	if cache.ID != "" {
@@ -626,13 +629,12 @@ func (e *CodexExecutor) cacheHelper(ctx context.Context, from sdktranslator.Form
 		return nil, err
 	}
 	if cache.ID != "" {
-		httpReq.Header.Set("Conversation_id", cache.ID)
 		httpReq.Header.Set("Session_id", cache.ID)
 	}
 	return httpReq, nil
 }

-func applyCodexHeaders(r *http.Request, auth *cliproxyauth.Auth, token string, stream bool) {
+func applyCodexHeaders(r *http.Request, auth *cliproxyauth.Auth, token string, stream bool, cfg *config.Config) {
 	r.Header.Set("Content-Type", "application/json")
 	r.Header.Set("Authorization", "Bearer "+token)

@@ -641,9 +643,18 @@ func applyCodexHeaders(r *http.Request, auth *cliproxyauth.Auth, token string, s
 		ginHeaders = ginCtx.Request.Header
 	}

-	misc.EnsureHeader(r.Header, ginHeaders, "Version", codexClientVersion)
-	misc.EnsureHeader(r.Header, ginHeaders, "Session_id", uuid.NewString())
-	misc.EnsureHeader(r.Header, ginHeaders, "User-Agent", codexUserAgent)
+	if ginHeaders.Get("X-Codex-Beta-Features") != "" {
+		r.Header.Set("X-Codex-Beta-Features", ginHeaders.Get("X-Codex-Beta-Features"))
+	}
+	misc.EnsureHeader(r.Header, ginHeaders, "Version", "")
+	misc.EnsureHeader(r.Header, ginHeaders, "X-Codex-Turn-Metadata", "")
+	misc.EnsureHeader(r.Header, ginHeaders, "X-Client-Request-Id", "")
+	cfgUserAgent, _ := codexHeaderDefaults(cfg, auth)
+	ensureHeaderWithConfigPrecedence(r.Header, ginHeaders, "User-Agent", cfgUserAgent, codexUserAgent)
+
+	if strings.Contains(r.Header.Get("User-Agent"), "Mac OS") {
+		misc.EnsureHeader(r.Header, ginHeaders, "Session_id", uuid.NewString())
+	}

 	if stream {
 		r.Header.Set("Accept", "text/event-stream")
@@ -658,8 +669,12 @@ func applyCodexHeaders(r *http.Request, auth *cliproxyauth.Auth, token string, s
 			isAPIKey = true
 		}
 	}
+	if originator := strings.TrimSpace(ginHeaders.Get("Originator")); originator != "" {
+		r.Header.Set("Originator", originator)
+	} else if !isAPIKey {
+		r.Header.Set("Originator", codexOriginator)
+	}
 	if !isAPIKey {
-		r.Header.Set("Originator", "codex_cli_rs")
 		if auth != nil && auth.Metadata != nil {
 			if accountID, ok := auth.Metadata["account_id"].(string); ok {
 				r.Header.Set("Chatgpt-Account-Id", accountID)
@@ -674,13 +689,47 @@ func applyCodexHeaders(r *http.Request, auth *cliproxyauth.Auth, token string, s
 }

 func newCodexStatusErr(statusCode int, body []byte) statusErr {
-	err := statusErr{code: statusCode, msg: string(body)}
-	if retryAfter := parseCodexRetryAfter(statusCode, body, time.Now()); retryAfter != nil {
+	errCode := statusCode
+	if isCodexModelCapacityError(body) {
+		errCode = http.StatusTooManyRequests
+	}
+	err := statusErr{code: errCode, msg: string(body)}
+	if retryAfter := parseCodexRetryAfter(errCode, body, time.Now()); retryAfter != nil {
 		err.retryAfter = retryAfter
 	}
 	return err
 }

+func normalizeCodexInstructions(body []byte) []byte {
+	instructions := gjson.GetBytes(body, "instructions")
+	if !instructions.Exists() || instructions.Type == gjson.Null {
+		body, _ = sjson.SetBytes(body, "instructions", "")
+	}
+	return body
+}
+
+func isCodexModelCapacityError(errorBody []byte) bool {
+	if len(errorBody) == 0 {
+		return false
+	}
+	candidates := []string{
+		gjson.GetBytes(errorBody, "error.message").String(),
+		gjson.GetBytes(errorBody, "message").String(),
+		string(errorBody),
+	}
+	for _, candidate := range candidates {
+		lower := strings.ToLower(strings.TrimSpace(candidate))
+		if lower == "" {
+			continue
+		}
+		if strings.Contains(lower, "selected model is at capacity") ||
+			strings.Contains(lower, "model is at capacity. please try a different model") {
+			return true
+		}
+	}
+	return false
+}
+
 func parseCodexRetryAfter(statusCode int, errorBody []byte, now time.Time) *time.Duration {
 	if statusCode != http.StatusTooManyRequests || len(errorBody) == 0 {
 		return nil
--- a/internal/runtime/executor/codex_executor_cache_test.go
+++ b/internal/runtime/executor/codex_executor_cache_test.go
@@ -0,0 +1,64 @@
+package executor
+
+import (
+	"context"
+	"io"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/gin-gonic/gin"
+	"github.com/google/uuid"
+	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
+	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
+	"github.com/tidwall/gjson"
+)
+
+func TestCodexExecutorCacheHelper_OpenAIChatCompletions_StablePromptCacheKeyFromAPIKey(t *testing.T) {
+	recorder := httptest.NewRecorder()
+	ginCtx, _ := gin.CreateTestContext(recorder)
+	ginCtx.Set("apiKey", "test-api-key")
+
+	ctx := context.WithValue(context.Background(), "gin", ginCtx)
+	executor := &CodexExecutor{}
+	rawJSON := []byte(`{"model":"gpt-5.3-codex","stream":true}`)
+	req := cliproxyexecutor.Request{
+		Model:   "gpt-5.3-codex",
+		Payload: []byte(`{"model":"gpt-5.3-codex"}`),
+	}
+	url := "https://example.com/responses"
+
+	httpReq, err := executor.cacheHelper(ctx, sdktranslator.FromString("openai"), url, req, rawJSON)
+	if err != nil {
+		t.Fatalf("cacheHelper error: %v", err)
+	}
+
+	body, errRead := io.ReadAll(httpReq.Body)
+	if errRead != nil {
+		t.Fatalf("read request body: %v", errRead)
+	}
+
+	expectedKey := uuid.NewSHA1(uuid.NameSpaceOID, []byte("cli-proxy-api:codex:prompt-cache:test-api-key")).String()
+	gotKey := gjson.GetBytes(body, "prompt_cache_key").String()
+	if gotKey != expectedKey {
+		t.Fatalf("prompt_cache_key = %q, want %q", gotKey, expectedKey)
+	}
+	if gotConversation := httpReq.Header.Get("Conversation_id"); gotConversation != "" {
+		t.Fatalf("Conversation_id = %q, want empty", gotConversation)
+	}
+	if gotSession := httpReq.Header.Get("Session_id"); gotSession != expectedKey {
+		t.Fatalf("Session_id = %q, want %q", gotSession, expectedKey)
+	}
+
+	httpReq2, err := executor.cacheHelper(ctx, sdktranslator.FromString("openai"), url, req, rawJSON)
+	if err != nil {
+		t.Fatalf("cacheHelper error (second call): %v", err)
+	}
+	body2, errRead2 := io.ReadAll(httpReq2.Body)
+	if errRead2 != nil {
+		t.Fatalf("read request body (second call): %v", errRead2)
+	}
+	gotKey2 := gjson.GetBytes(body2, "prompt_cache_key").String()
+	if gotKey2 != expectedKey {
+		t.Fatalf("prompt_cache_key (second call) = %q, want %q", gotKey2, expectedKey)
+	}
+}
--- a/internal/runtime/executor/codex_executor_compact_test.go
+++ b/internal/runtime/executor/codex_executor_compact_test.go
@@ -0,0 +1,79 @@
+package executor
+
+import (
+	"context"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
+	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
+	"github.com/tidwall/gjson"
+)
+
+func TestCodexExecutorCompactAddsDefaultInstructions(t *testing.T) {
+	cases := []struct {
+		name    string
+		payload string
+	}{
+		{
+			name:    "missing instructions",
+			payload: `{"model":"gpt-5.4","input":"hello"}`,
+		},
+		{
+			name:    "null instructions",
+			payload: `{"model":"gpt-5.4","instructions":null,"input":"hello"}`,
+		},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			var gotPath string
+			var gotBody []byte
+			server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+				gotPath = r.URL.Path
+				body, _ := io.ReadAll(r.Body)
+				gotBody = body
+				w.Header().Set("Content-Type", "application/json")
+				_, _ = w.Write([]byte(`{"id":"resp_1","object":"response.compaction","usage":{"input_tokens":1,"output_tokens":2,"total_tokens":3}}`))
+			}))
+			defer server.Close()
+
+			executor := NewCodexExecutor(&config.Config{})
+			auth := &cliproxyauth.Auth{Attributes: map[string]string{
+				"base_url": server.URL,
+				"api_key":  "test",
+			}}
+
+			resp, err := executor.Execute(context.Background(), auth, cliproxyexecutor.Request{
+				Model:   "gpt-5.4",
+				Payload: []byte(tc.payload),
+			}, cliproxyexecutor.Options{
+				SourceFormat: sdktranslator.FromString("openai-response"),
+				Alt:          "responses/compact",
+				Stream:       false,
+			})
+			if err != nil {
+				t.Fatalf("Execute error: %v", err)
+			}
+			if gotPath != "/responses/compact" {
+				t.Fatalf("path = %q, want %q", gotPath, "/responses/compact")
+			}
+			if !gjson.GetBytes(gotBody, "instructions").Exists() {
+				t.Fatalf("expected instructions in compact request body, got %s", string(gotBody))
+			}
+			if gjson.GetBytes(gotBody, "instructions").Type != gjson.String {
+				t.Fatalf("instructions type = %v, want string", gjson.GetBytes(gotBody, "instructions").Type)
+			}
+			if gjson.GetBytes(gotBody, "instructions").String() != "" {
+				t.Fatalf("instructions = %q, want empty string", gjson.GetBytes(gotBody, "instructions").String())
+			}
+			if string(resp.Payload) != `{"id":"resp_1","object":"response.compaction","usage":{"input_tokens":1,"output_tokens":2,"total_tokens":3}}` {
+				t.Fatalf("payload = %s", string(resp.Payload))
+			}
+		})
+	}
+}
--- a/internal/runtime/executor/codex_executor_instructions_test.go
+++ b/internal/runtime/executor/codex_executor_instructions_test.go
@@ -0,0 +1,123 @@
+package executor
+
+import (
+	"context"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
+	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
+	"github.com/tidwall/gjson"
+)
+
+func TestCodexExecutorExecuteNormalizesNullInstructions(t *testing.T) {
+	var gotPath string
+	var gotBody []byte
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		gotPath = r.URL.Path
+		body, _ := io.ReadAll(r.Body)
+		gotBody = body
+		w.Header().Set("Content-Type", "text/event-stream")
+		_, _ = w.Write([]byte("data: {\"type\":\"response.completed\",\"response\":{\"id\":\"resp_1\",\"object\":\"response\",\"created_at\":0,\"status\":\"completed\",\"background\":false,\"error\":null}}\n\n"))
+	}))
+	defer server.Close()
+
+	executor := NewCodexExecutor(&config.Config{})
+	auth := &cliproxyauth.Auth{Attributes: map[string]string{
+		"base_url": server.URL,
+		"api_key":  "test",
+	}}
+
+	_, err := executor.Execute(context.Background(), auth, cliproxyexecutor.Request{
+		Model:   "gpt-5.4",
+		Payload: []byte(`{"model":"gpt-5.4","instructions":null,"input":"hello"}`),
+	}, cliproxyexecutor.Options{
+		SourceFormat: sdktranslator.FromString("openai-response"),
+		Stream:       false,
+	})
+	if err != nil {
+		t.Fatalf("Execute error: %v", err)
+	}
+	if gotPath != "/responses" {
+		t.Fatalf("path = %q, want %q", gotPath, "/responses")
+	}
+	if gjson.GetBytes(gotBody, "instructions").Type != gjson.String {
+		t.Fatalf("instructions type = %v, want string", gjson.GetBytes(gotBody, "instructions").Type)
+	}
+	if gjson.GetBytes(gotBody, "instructions").String() != "" {
+		t.Fatalf("instructions = %q, want empty string", gjson.GetBytes(gotBody, "instructions").String())
+	}
+}
+
+func TestCodexExecutorExecuteStreamNormalizesNullInstructions(t *testing.T) {
+	var gotPath string
+	var gotBody []byte
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		gotPath = r.URL.Path
+		body, _ := io.ReadAll(r.Body)
+		gotBody = body
+		w.Header().Set("Content-Type", "text/event-stream")
+		_, _ = w.Write([]byte("data: {\"type\":\"response.completed\",\"response\":{\"id\":\"resp_1\",\"object\":\"response\",\"created_at\":0,\"status\":\"completed\",\"background\":false,\"error\":null}}\n\n"))
+	}))
+	defer server.Close()
+
+	executor := NewCodexExecutor(&config.Config{})
+	auth := &cliproxyauth.Auth{Attributes: map[string]string{
+		"base_url": server.URL,
+		"api_key":  "test",
+	}}
+
+	result, err := executor.ExecuteStream(context.Background(), auth, cliproxyexecutor.Request{
+		Model:   "gpt-5.4",
+		Payload: []byte(`{"model":"gpt-5.4","instructions":null,"input":"hello"}`),
+	}, cliproxyexecutor.Options{
+		SourceFormat: sdktranslator.FromString("openai-response"),
+		Stream:       true,
+	})
+	if err != nil {
+		t.Fatalf("ExecuteStream error: %v", err)
+	}
+	for range result.Chunks {
+	}
+	if gotPath != "/responses" {
+		t.Fatalf("path = %q, want %q", gotPath, "/responses")
+	}
+	if gjson.GetBytes(gotBody, "instructions").Type != gjson.String {
+		t.Fatalf("instructions type = %v, want string", gjson.GetBytes(gotBody, "instructions").Type)
+	}
+	if gjson.GetBytes(gotBody, "instructions").String() != "" {
+		t.Fatalf("instructions = %q, want empty string", gjson.GetBytes(gotBody, "instructions").String())
+	}
+}
+
+func TestCodexExecutorCountTokensTreatsNullInstructionsAsEmpty(t *testing.T) {
+	executor := NewCodexExecutor(&config.Config{})
+
+	nullResp, err := executor.CountTokens(context.Background(), nil, cliproxyexecutor.Request{
+		Model:   "gpt-5.4",
+		Payload: []byte(`{"model":"gpt-5.4","instructions":null,"input":"hello"}`),
+	}, cliproxyexecutor.Options{
+		SourceFormat: sdktranslator.FromString("openai-response"),
+	})
+	if err != nil {
+		t.Fatalf("CountTokens(null) error: %v", err)
+	}
+
+	emptyResp, err := executor.CountTokens(context.Background(), nil, cliproxyexecutor.Request{
+		Model:   "gpt-5.4",
+		Payload: []byte(`{"model":"gpt-5.4","instructions":"","input":"hello"}`),
+	}, cliproxyexecutor.Options{
+		SourceFormat: sdktranslator.FromString("openai-response"),
+	})
+	if err != nil {
+		t.Fatalf("CountTokens(empty) error: %v", err)
+	}
+
+	if string(nullResp.Payload) != string(emptyResp.Payload) {
+		t.Fatalf("token count payload mismatch:\nnull=%s\nempty=%s", string(nullResp.Payload), string(emptyResp.Payload))
+	}
+}
--- a/internal/runtime/executor/codex_executor_retry_test.go
+++ b/internal/runtime/executor/codex_executor_retry_test.go
@@ -60,6 +60,19 @@ func TestParseCodexRetryAfter(t *testing.T) {
 	})
 }

+func TestNewCodexStatusErrTreatsCapacityAsRetryableRateLimit(t *testing.T) {
+	body := []byte(`{"error":{"message":"Selected model is at capacity. Please try a different model."}}`)
+
+	err := newCodexStatusErr(http.StatusBadRequest, body)
+
+	if got := err.StatusCode(); got != http.StatusTooManyRequests {
+		t.Fatalf("status code = %d, want %d", got, http.StatusTooManyRequests)
+	}
+	if err.RetryAfter() != nil {
+		t.Fatalf("expected nil explicit retryAfter for capacity fallback, got %v", *err.RetryAfter())
+	}
+}
+
 func itoa(v int64) string {
 	return strconv.FormatInt(v, 10)
 }
--- a/internal/runtime/executor/codex_websockets_executor.go
+++ b/internal/runtime/executor/codex_websockets_executor.go
@@ -15,14 +15,17 @@ import (
 	"sync"
 	"time"

+	"github.com/gin-gonic/gin"
 	"github.com/google/uuid"
 	"github.com/gorilla/websocket"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
+	"github.com/router-for-me/CLIProxyAPI/v6/sdk/proxyutil"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
 	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
@@ -31,7 +34,7 @@ import (
 )

 const (
-	codexResponsesWebsocketBetaHeaderValue = "responses_websockets=2026-02-04"
+	codexResponsesWebsocketBetaHeaderValue = "responses_websockets=2026-02-06"
 	codexResponsesWebsocketIdleTimeout     = 5 * time.Minute
 	codexResponsesWebsocketHandshakeTO     = 30 * time.Second
 )
@@ -43,10 +46,18 @@ const (
 type CodexWebsocketsExecutor struct {
 	*CodexExecutor

-	sessMu   sync.Mutex
+	store *codexWebsocketSessionStore
+}
+
+type codexWebsocketSessionStore struct {
+	mu       sync.Mutex
 	sessions map[string]*codexWebsocketSession
 }

+var globalCodexWebsocketSessionStore = &codexWebsocketSessionStore{
+	sessions: make(map[string]*codexWebsocketSession),
+}
+
 type codexWebsocketSession struct {
 	sessionID string

@@ -57,11 +68,6 @@ type codexWebsocketSession struct {
 	wsURL  string
 	authID string

-	// connCreateSent tracks whether a `response.create` message has been successfully sent
-	// on the current websocket connection. The upstream expects the first message on each
-	// connection to be `response.create`.
-	connCreateSent bool
-
 	writeMu sync.Mutex

 	activeMu     sync.Mutex
@@ -75,7 +81,7 @@ type codexWebsocketSession struct {
 func NewCodexWebsocketsExecutor(cfg *config.Config) *CodexWebsocketsExecutor {
 	return &CodexWebsocketsExecutor{
 		CodexExecutor: NewCodexExecutor(cfg),
-		sessions:      make(map[string]*codexWebsocketSession),
+		store:         globalCodexWebsocketSessionStore,
 	}
 }

@@ -159,8 +165,8 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
 		baseURL = "https://chatgpt.com/backend-api/codex"
 	}

-	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
-	defer reporter.trackFailure(ctx, &err)
+	reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.TrackFailure(ctx, &err)

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
@@ -177,8 +183,8 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
 		return resp, err
 	}

-	requestedModel := payloadRequestedModel(opts, req.Model)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
+	requestedModel := helps.PayloadRequestedModel(opts, req.Model)
+	body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 	body, _ = sjson.SetBytes(body, "stream", true)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
@@ -195,7 +201,7 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
 	}

 	body, wsHeaders := applyCodexPromptCacheHeaders(from, req, body)
-	wsHeaders = applyCodexWebsocketHeaders(ctx, wsHeaders, auth, apiKey)
+	wsHeaders = applyCodexWebsocketHeaders(ctx, wsHeaders, auth, apiKey, e.cfg)

 	var authID, authLabel, authType, authValue string
 	if auth != nil {
@@ -212,14 +218,8 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
 		defer sess.reqMu.Unlock()
 	}

-	allowAppend := true
-	if sess != nil {
-		sess.connMu.Lock()
-		allowAppend = sess.connCreateSent
-		sess.connMu.Unlock()
-	}
-	wsReqBody := buildCodexWebsocketRequestBody(body, allowAppend)
-	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+	wsReqBody := buildCodexWebsocketRequestBody(body)
+	wsReqLog := helps.UpstreamRequestLog{
 		URL:       wsURL,
 		Method:    "WEBSOCKET",
 		Headers:   wsHeaders.Clone(),
@@ -229,16 +229,14 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
 		AuthLabel: authLabel,
 		AuthType:  authType,
 		AuthValue: authValue,
-	})
+	}
+	helps.RecordAPIWebsocketRequest(ctx, e.cfg, wsReqLog)

 	conn, respHS, errDial := e.ensureUpstreamConn(ctx, auth, sess, authID, wsURL, wsHeaders)
-	if respHS != nil {
-		recordAPIResponseMetadata(ctx, e.cfg, respHS.StatusCode, respHS.Header.Clone())
-	}
 	if errDial != nil {
 		bodyErr := websocketHandshakeBody(respHS)
-		if len(bodyErr) > 0 {
-			appendAPIResponseChunk(ctx, e.cfg, bodyErr)
+		if respHS != nil {
+			helps.RecordAPIWebsocketUpgradeRejection(ctx, e.cfg, websocketUpgradeRequestLog(wsReqLog), respHS.StatusCode, respHS.Header.Clone(), bodyErr)
 		}
 		if respHS != nil && respHS.StatusCode == http.StatusUpgradeRequired {
 			return e.CodexExecutor.Execute(ctx, auth, req, opts)
@@ -246,10 +244,10 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
 		if respHS != nil && respHS.StatusCode > 0 {
 			return resp, statusErr{code: respHS.StatusCode, msg: string(bodyErr)}
 		}
-		recordAPIResponseError(ctx, e.cfg, errDial)
+		helps.RecordAPIWebsocketError(ctx, e.cfg, "dial", errDial)
 		return resp, errDial
 	}
-	closeHTTPResponseBody(respHS, "codex websockets executor: close handshake response body error")
+	recordAPIWebsocketHandshake(ctx, e.cfg, respHS)
 	if sess == nil {
 		logCodexWebsocketConnected(executionSessionID, authID, wsURL)
 		defer func() {
@@ -278,13 +276,10 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
 			// Retry once with a fresh websocket connection. This is mainly to handle
 			// upstream closing the socket between sequential requests within the same
 			// execution session.
-			connRetry, _, errDialRetry := e.ensureUpstreamConn(ctx, auth, sess, authID, wsURL, wsHeaders)
+			connRetry, respHSRetry, errDialRetry := e.ensureUpstreamConn(ctx, auth, sess, authID, wsURL, wsHeaders)
 			if errDialRetry == nil && connRetry != nil {
-				sess.connMu.Lock()
-				allowAppend = sess.connCreateSent
-				sess.connMu.Unlock()
-				wsReqBodyRetry := buildCodexWebsocketRequestBody(body, allowAppend)
-				recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+				wsReqBodyRetry := buildCodexWebsocketRequestBody(body)
+				helps.RecordAPIWebsocketRequest(ctx, e.cfg, helps.UpstreamRequestLog{
 					URL:       wsURL,
 					Method:    "WEBSOCKET",
 					Headers:   wsHeaders.Clone(),
@@ -295,24 +290,25 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
 					AuthType:  authType,
 					AuthValue: authValue,
 				})
+				recordAPIWebsocketHandshake(ctx, e.cfg, respHSRetry)
 				if errSendRetry := writeCodexWebsocketMessage(sess, connRetry, wsReqBodyRetry); errSendRetry == nil {
 					conn = connRetry
 					wsReqBody = wsReqBodyRetry
 				} else {
 					e.invalidateUpstreamConn(sess, connRetry, "send_error", errSendRetry)
-					recordAPIResponseError(ctx, e.cfg, errSendRetry)
+					helps.RecordAPIWebsocketError(ctx, e.cfg, "send_retry", errSendRetry)
 					return resp, errSendRetry
 				}
 			} else {
-				recordAPIResponseError(ctx, e.cfg, errDialRetry)
+				closeHTTPResponseBody(respHSRetry, "codex websockets executor: close handshake response body error")
+				helps.RecordAPIWebsocketError(ctx, e.cfg, "dial_retry", errDialRetry)
 				return resp, errDialRetry
 			}
 		} else {
-			recordAPIResponseError(ctx, e.cfg, errSend)
+			helps.RecordAPIWebsocketError(ctx, e.cfg, "send", errSend)
 			return resp, errSend
 		}
 	}
-	markCodexWebsocketCreateSent(sess, conn, wsReqBody)

 	for {
 		if ctx != nil && ctx.Err() != nil {
@@ -320,7 +316,7 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
 		}
 		msgType, payload, errRead := readCodexWebsocketMessage(ctx, sess, conn, readCh)
 		if errRead != nil {
-			recordAPIResponseError(ctx, e.cfg, errRead)
+			helps.RecordAPIWebsocketError(ctx, e.cfg, "read", errRead)
 			return resp, errRead
 		}
 		if msgType != websocket.TextMessage {
@@ -329,7 +325,7 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
 				if sess != nil {
 					e.invalidateUpstreamConn(sess, conn, "unexpected_binary", err)
 				}
-				recordAPIResponseError(ctx, e.cfg, err)
+				helps.RecordAPIWebsocketError(ctx, e.cfg, "unexpected_binary", err)
 				return resp, err
 			}
 			continue
@@ -339,25 +335,25 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
 		if len(payload) == 0 {
 			continue
 		}
-		appendAPIResponseChunk(ctx, e.cfg, payload)
+		helps.AppendAPIWebsocketResponse(ctx, e.cfg, payload)

 		if wsErr, ok := parseCodexWebsocketError(payload); ok {
 			if sess != nil {
 				e.invalidateUpstreamConn(sess, conn, "upstream_error", wsErr)
 			}
-			recordAPIResponseError(ctx, e.cfg, wsErr)
+			helps.RecordAPIWebsocketError(ctx, e.cfg, "upstream_error", wsErr)
 			return resp, wsErr
 		}

 		payload = normalizeCodexWebsocketCompletion(payload)
 		eventType := gjson.GetBytes(payload, "type").String()
 		if eventType == "response.completed" {
-			if detail, ok := parseCodexUsage(payload); ok {
-				reporter.publish(ctx, detail)
+			if detail, ok := helps.ParseCodexUsage(payload); ok {
+				reporter.Publish(ctx, detail)
 			}
 			var param any
 			out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, originalPayload, body, payload, &param)
-			resp = cliproxyexecutor.Response{Payload: []byte(out)}
+			resp = cliproxyexecutor.Response{Payload: out}
 			return resp, nil
 		}
 	}
@@ -378,8 +374,8 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
 		baseURL = "https://chatgpt.com/backend-api/codex"
 	}

-	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
-	defer reporter.trackFailure(ctx, &err)
+	reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.TrackFailure(ctx, &err)

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
@@ -390,8 +386,8 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
 		return nil, err
 	}

-	requestedModel := payloadRequestedModel(opts, req.Model)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, body, requestedModel)
+	requestedModel := helps.PayloadRequestedModel(opts, req.Model)
+	body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, body, requestedModel)

 	httpURL := strings.TrimSuffix(baseURL, "/") + "/responses"
 	wsURL, err := buildCodexResponsesWebsocketURL(httpURL)
@@ -400,30 +396,24 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
 	}

 	body, wsHeaders := applyCodexPromptCacheHeaders(from, req, body)
-	wsHeaders = applyCodexWebsocketHeaders(ctx, wsHeaders, auth, apiKey)
+	wsHeaders = applyCodexWebsocketHeaders(ctx, wsHeaders, auth, apiKey, e.cfg)

 	var authID, authLabel, authType, authValue string
-	if auth != nil {
-		authID = auth.ID
-		authLabel = auth.Label
-		authType, authValue = auth.AccountInfo()
-	}
+	authID = auth.ID
+	authLabel = auth.Label
+	authType, authValue = auth.AccountInfo()

 	executionSessionID := executionSessionIDFromOptions(opts)
 	var sess *codexWebsocketSession
 	if executionSessionID != "" {
 		sess = e.getOrCreateSession(executionSessionID)
-		sess.reqMu.Lock()
+		if sess != nil {
+			sess.reqMu.Lock()
+		}
 	}

-	allowAppend := true
-	if sess != nil {
-		sess.connMu.Lock()
-		allowAppend = sess.connCreateSent
-		sess.connMu.Unlock()
-	}
-	wsReqBody := buildCodexWebsocketRequestBody(body, allowAppend)
-	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+	wsReqBody := buildCodexWebsocketRequestBody(body)
+	wsReqLog := helps.UpstreamRequestLog{
 		URL:       wsURL,
 		Method:    "WEBSOCKET",
 		Headers:   wsHeaders.Clone(),
@@ -433,18 +423,18 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
 		AuthLabel: authLabel,
 		AuthType:  authType,
 		AuthValue: authValue,
-	})
+	}
+	helps.RecordAPIWebsocketRequest(ctx, e.cfg, wsReqLog)

 	conn, respHS, errDial := e.ensureUpstreamConn(ctx, auth, sess, authID, wsURL, wsHeaders)
 	var upstreamHeaders http.Header
 	if respHS != nil {
 		upstreamHeaders = respHS.Header.Clone()
-		recordAPIResponseMetadata(ctx, e.cfg, respHS.StatusCode, respHS.Header.Clone())
 	}
 	if errDial != nil {
 		bodyErr := websocketHandshakeBody(respHS)
-		if len(bodyErr) > 0 {
-			appendAPIResponseChunk(ctx, e.cfg, bodyErr)
+		if respHS != nil {
+			helps.RecordAPIWebsocketUpgradeRejection(ctx, e.cfg, websocketUpgradeRequestLog(wsReqLog), respHS.StatusCode, respHS.Header.Clone(), bodyErr)
 		}
 		if respHS != nil && respHS.StatusCode == http.StatusUpgradeRequired {
 			return e.CodexExecutor.ExecuteStream(ctx, auth, req, opts)
@@ -452,13 +442,13 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
 		if respHS != nil && respHS.StatusCode > 0 {
 			return nil, statusErr{code: respHS.StatusCode, msg: string(bodyErr)}
 		}
-		recordAPIResponseError(ctx, e.cfg, errDial)
+		helps.RecordAPIWebsocketError(ctx, e.cfg, "dial", errDial)
 		if sess != nil {
 			sess.reqMu.Unlock()
 		}
 		return nil, errDial
 	}
-	closeHTTPResponseBody(respHS, "codex websockets executor: close handshake response body error")
+	recordAPIWebsocketHandshake(ctx, e.cfg, respHS)

 	if sess == nil {
 		logCodexWebsocketConnected(executionSessionID, authID, wsURL)
@@ -471,23 +461,21 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
 	}

 	if errSend := writeCodexWebsocketMessage(sess, conn, wsReqBody); errSend != nil {
-		recordAPIResponseError(ctx, e.cfg, errSend)
+		helps.RecordAPIWebsocketError(ctx, e.cfg, "send", errSend)
 		if sess != nil {
 			e.invalidateUpstreamConn(sess, conn, "send_error", errSend)

 			// Retry once with a new websocket connection for the same execution session.
-			connRetry, _, errDialRetry := e.ensureUpstreamConn(ctx, auth, sess, authID, wsURL, wsHeaders)
+			connRetry, respHSRetry, errDialRetry := e.ensureUpstreamConn(ctx, auth, sess, authID, wsURL, wsHeaders)
 			if errDialRetry != nil || connRetry == nil {
-				recordAPIResponseError(ctx, e.cfg, errDialRetry)
+				closeHTTPResponseBody(respHSRetry, "codex websockets executor: close handshake response body error")
+				helps.RecordAPIWebsocketError(ctx, e.cfg, "dial_retry", errDialRetry)
 				sess.clearActive(readCh)
 				sess.reqMu.Unlock()
 				return nil, errDialRetry
 			}
-			sess.connMu.Lock()
-			allowAppend = sess.connCreateSent
-			sess.connMu.Unlock()
-			wsReqBodyRetry := buildCodexWebsocketRequestBody(body, allowAppend)
-			recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+			wsReqBodyRetry := buildCodexWebsocketRequestBody(body)
+			helps.RecordAPIWebsocketRequest(ctx, e.cfg, helps.UpstreamRequestLog{
 				URL:       wsURL,
 				Method:    "WEBSOCKET",
 				Headers:   wsHeaders.Clone(),
@@ -498,8 +486,9 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
 				AuthType:  authType,
 				AuthValue: authValue,
 			})
+			recordAPIWebsocketHandshake(ctx, e.cfg, respHSRetry)
 			if errSendRetry := writeCodexWebsocketMessage(sess, connRetry, wsReqBodyRetry); errSendRetry != nil {
-				recordAPIResponseError(ctx, e.cfg, errSendRetry)
+				helps.RecordAPIWebsocketError(ctx, e.cfg, "send_retry", errSendRetry)
 				e.invalidateUpstreamConn(sess, connRetry, "send_error", errSendRetry)
 				sess.clearActive(readCh)
 				sess.reqMu.Unlock()
@@ -515,7 +504,6 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
 			return nil, errSend
 		}
 	}
-	markCodexWebsocketCreateSent(sess, conn, wsReqBody)

 	out := make(chan cliproxyexecutor.StreamChunk)
 	go func() {
@@ -566,8 +554,8 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
 				}
 				terminateReason = "read_error"
 				terminateErr = errRead
-				recordAPIResponseError(ctx, e.cfg, errRead)
-				reporter.publishFailure(ctx)
+				helps.RecordAPIWebsocketError(ctx, e.cfg, "read", errRead)
+				reporter.PublishFailure(ctx)
 				_ = send(cliproxyexecutor.StreamChunk{Err: errRead})
 				return
 			}
@@ -576,8 +564,8 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
 					err = fmt.Errorf("codex websockets executor: unexpected binary message")
 					terminateReason = "unexpected_binary"
 					terminateErr = err
-					recordAPIResponseError(ctx, e.cfg, err)
-					reporter.publishFailure(ctx)
+					helps.RecordAPIWebsocketError(ctx, e.cfg, "unexpected_binary", err)
+					reporter.PublishFailure(ctx)
 					if sess != nil {
 						e.invalidateUpstreamConn(sess, conn, "unexpected_binary", err)
 					}
@@ -591,13 +579,13 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
 			if len(payload) == 0 {
 				continue
 			}
-			appendAPIResponseChunk(ctx, e.cfg, payload)
+			helps.AppendAPIWebsocketResponse(ctx, e.cfg, payload)

 			if wsErr, ok := parseCodexWebsocketError(payload); ok {
 				terminateReason = "upstream_error"
 				terminateErr = wsErr
-				recordAPIResponseError(ctx, e.cfg, wsErr)
-				reporter.publishFailure(ctx)
+				helps.RecordAPIWebsocketError(ctx, e.cfg, "upstream_error", wsErr)
+				reporter.PublishFailure(ctx)
 				if sess != nil {
 					e.invalidateUpstreamConn(sess, conn, "upstream_error", wsErr)
 				}
@@ -608,15 +596,15 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
 			payload = normalizeCodexWebsocketCompletion(payload)
 			eventType := gjson.GetBytes(payload, "type").String()
 			if eventType == "response.completed" || eventType == "response.done" {
-				if detail, ok := parseCodexUsage(payload); ok {
-					reporter.publish(ctx, detail)
+				if detail, ok := helps.ParseCodexUsage(payload); ok {
+					reporter.Publish(ctx, detail)
 				}
 			}

 			line := encodeCodexWebsocketAsSSE(payload)
 			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, body, body, line, &param)
 			for i := range chunks {
-				if !send(cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}) {
+				if !send(cliproxyexecutor.StreamChunk{Payload: chunks[i]}) {
 					terminateReason = "context_done"
 					terminateErr = ctx.Err()
 					return
@@ -657,31 +645,14 @@ func writeCodexWebsocketMessage(sess *codexWebsocketSession, conn *websocket.Con
 	return conn.WriteMessage(websocket.TextMessage, payload)
 }

-func buildCodexWebsocketRequestBody(body []byte, allowAppend bool) []byte {
+func buildCodexWebsocketRequestBody(body []byte) []byte {
 	if len(body) == 0 {
 		return nil
 	}

-	// Codex CLI websocket v2 uses `response.create` with `previous_response_id` for incremental turns.
-	// The upstream ChatGPT Codex websocket currently rejects that with close 1008 (policy violation).
-	// Fall back to v1 `response.append` semantics on the same websocket connection to keep the session alive.
-	//
-	// NOTE: The upstream expects the first websocket event on each connection to be `response.create`,
-	// so we only use `response.append` after we have initialized the current connection.
-	if allowAppend {
-		if prev := strings.TrimSpace(gjson.GetBytes(body, "previous_response_id").String()); prev != "" {
-			inputNode := gjson.GetBytes(body, "input")
-			wsReqBody := []byte(`{}`)
-			wsReqBody, _ = sjson.SetBytes(wsReqBody, "type", "response.append")
-			if inputNode.Exists() && inputNode.IsArray() && strings.TrimSpace(inputNode.Raw) != "" {
-				wsReqBody, _ = sjson.SetRawBytes(wsReqBody, "input", []byte(inputNode.Raw))
-				return wsReqBody
-			}
-			wsReqBody, _ = sjson.SetRawBytes(wsReqBody, "input", []byte("[]"))
-			return wsReqBody
-		}
-	}
-
+	// Match codex-rs websocket v2 semantics: every request is `response.create`.
+	// Incremental follow-up turns continue on the same websocket using
+	// `previous_response_id` + incremental `input`, not `response.append`.
 	wsReqBody, errSet := sjson.SetBytes(bytes.Clone(body), "type", "response.create")
 	if errSet == nil && len(wsReqBody) > 0 {
 		return wsReqBody
@@ -725,21 +696,6 @@ func readCodexWebsocketMessage(ctx context.Context, sess *codexWebsocketSession,
 	}
 }

-func markCodexWebsocketCreateSent(sess *codexWebsocketSession, conn *websocket.Conn, payload []byte) {
-	if sess == nil || conn == nil || len(payload) == 0 {
-		return
-	}
-	if strings.TrimSpace(gjson.GetBytes(payload, "type").String()) != "response.create" {
-		return
-	}
-
-	sess.connMu.Lock()
-	if sess.conn == conn {
-		sess.connCreateSent = true
-	}
-	sess.connMu.Unlock()
-}
-
 func newProxyAwareWebsocketDialer(cfg *config.Config, auth *cliproxyauth.Auth) *websocket.Dialer {
 	dialer := &websocket.Dialer{
 		Proxy:             http.ProxyFromEnvironment,
@@ -762,21 +718,30 @@ func newProxyAwareWebsocketDialer(cfg *config.Config, auth *cliproxyauth.Auth) *
 		return dialer
 	}

-	parsedURL, errParse := url.Parse(proxyURL)
+	setting, errParse := proxyutil.Parse(proxyURL)
 	if errParse != nil {
-		log.Errorf("codex websockets executor: parse proxy URL failed: %v", errParse)
+		log.Errorf("codex websockets executor: %v", errParse)
 		return dialer
 	}

-	switch parsedURL.Scheme {
+	switch setting.Mode {
+	case proxyutil.ModeDirect:
+		dialer.Proxy = nil
+		return dialer
+	case proxyutil.ModeProxy:
+	default:
+		return dialer
+	}
+
+	switch setting.URL.Scheme {
 	case "socks5":
 		var proxyAuth *proxy.Auth
-		if parsedURL.User != nil {
-			username := parsedURL.User.Username()
-			password, _ := parsedURL.User.Password()
+		if setting.URL.User != nil {
+			username := setting.URL.User.Username()
+			password, _ := setting.URL.User.Password()
 			proxyAuth = &proxy.Auth{User: username, Password: password}
 		}
-		socksDialer, errSOCKS5 := proxy.SOCKS5("tcp", parsedURL.Host, proxyAuth, proxy.Direct)
+		socksDialer, errSOCKS5 := proxy.SOCKS5("tcp", setting.URL.Host, proxyAuth, proxy.Direct)
 		if errSOCKS5 != nil {
 			log.Errorf("codex websockets executor: create SOCKS5 dialer failed: %v", errSOCKS5)
 			return dialer
@@ -786,9 +751,9 @@ func newProxyAwareWebsocketDialer(cfg *config.Config, auth *cliproxyauth.Auth) *
 			return socksDialer.Dial(network, addr)
 		}
 	case "http", "https":
-		dialer.Proxy = http.ProxyURL(parsedURL)
+		dialer.Proxy = http.ProxyURL(setting.URL)
 	default:
-		log.Errorf("codex websockets executor: unsupported proxy scheme: %s", parsedURL.Scheme)
+		log.Errorf("codex websockets executor: unsupported proxy scheme: %s", setting.URL.Scheme)
 	}

 	return dialer
@@ -814,19 +779,19 @@ func applyCodexPromptCacheHeaders(from sdktranslator.Format, req cliproxyexecuto
 		return rawJSON, headers
 	}

-	var cache codexCache
+	var cache helps.CodexCache
 	if from == "claude" {
 		userIDResult := gjson.GetBytes(req.Payload, "metadata.user_id")
 		if userIDResult.Exists() {
 			key := fmt.Sprintf("%s-%s", req.Model, userIDResult.String())
-			if cached, ok := getCodexCache(key); ok {
+			if cached, ok := helps.GetCodexCache(key); ok {
 				cache = cached
 			} else {
-				cache = codexCache{
+				cache = helps.CodexCache{
 					ID:     uuid.New().String(),
 					Expire: time.Now().Add(1 * time.Hour),
 				}
-				setCodexCache(key, cache)
+				helps.SetCodexCache(key, cache)
 			}
 		}
 	} else if from == "openai-response" {
@@ -838,13 +803,12 @@ func applyCodexPromptCacheHeaders(from sdktranslator.Format, req cliproxyexecuto
 	if cache.ID != "" {
 		rawJSON, _ = sjson.SetBytes(rawJSON, "prompt_cache_key", cache.ID)
 		headers.Set("Conversation_id", cache.ID)
-		headers.Set("Session_id", cache.ID)
 	}

 	return rawJSON, headers
 }

-func applyCodexWebsocketHeaders(ctx context.Context, headers http.Header, auth *cliproxyauth.Auth, token string) http.Header {
+func applyCodexWebsocketHeaders(ctx context.Context, headers http.Header, auth *cliproxyauth.Auth, token string, cfg *config.Config) http.Header {
 	if headers == nil {
 		headers = http.Header{}
 	}
@@ -853,16 +817,18 @@ func applyCodexWebsocketHeaders(ctx context.Context, headers http.Header, auth *
 	}

 	var ginHeaders http.Header
-	if ginCtx := ginContextFrom(ctx); ginCtx != nil && ginCtx.Request != nil {
-		ginHeaders = ginCtx.Request.Header
+	if ginCtx, ok := ctx.Value("gin").(*gin.Context); ok && ginCtx != nil && ginCtx.Request != nil {
+		ginHeaders = ginCtx.Request.Header.Clone()
 	}

-	misc.EnsureHeader(headers, ginHeaders, "x-codex-beta-features", "")
+	_, cfgBetaFeatures := codexHeaderDefaults(cfg, auth)
+	ensureHeaderWithPriority(headers, ginHeaders, "x-codex-beta-features", cfgBetaFeatures, "")
 	misc.EnsureHeader(headers, ginHeaders, "x-codex-turn-state", "")
 	misc.EnsureHeader(headers, ginHeaders, "x-codex-turn-metadata", "")
+	misc.EnsureHeader(headers, ginHeaders, "x-client-request-id", "")
 	misc.EnsureHeader(headers, ginHeaders, "x-responsesapi-include-timing-metrics", "")
+	misc.EnsureHeader(headers, ginHeaders, "Version", "")

-	misc.EnsureHeader(headers, ginHeaders, "Version", codexClientVersion)
 	betaHeader := strings.TrimSpace(headers.Get("OpenAI-Beta"))
 	if betaHeader == "" && ginHeaders != nil {
 		betaHeader = strings.TrimSpace(ginHeaders.Get("OpenAI-Beta"))
@@ -871,8 +837,10 @@ func applyCodexWebsocketHeaders(ctx context.Context, headers http.Header, auth *
 		betaHeader = codexResponsesWebsocketBetaHeaderValue
 	}
 	headers.Set("OpenAI-Beta", betaHeader)
-	misc.EnsureHeader(headers, ginHeaders, "Session_id", uuid.NewString())
-	misc.EnsureHeader(headers, ginHeaders, "User-Agent", codexUserAgent)
+	if strings.Contains(headers.Get("User-Agent"), "Mac OS") {
+		misc.EnsureHeader(headers, ginHeaders, "Session_id", uuid.NewString())
+	}
+	headers.Del("User-Agent")

 	isAPIKey := false
 	if auth != nil && auth.Attributes != nil {
@@ -880,8 +848,12 @@ func applyCodexWebsocketHeaders(ctx context.Context, headers http.Header, auth *
 			isAPIKey = true
 		}
 	}
+	if originator := strings.TrimSpace(ginHeaders.Get("Originator")); originator != "" {
+		headers.Set("Originator", originator)
+	} else if !isAPIKey {
+		headers.Set("Originator", codexOriginator)
+	}
 	if !isAPIKey {
-		headers.Set("Originator", "codex_cli_rs")
 		if auth != nil && auth.Metadata != nil {
 			if accountID, ok := auth.Metadata["account_id"].(string); ok {
 				if trimmed := strings.TrimSpace(accountID); trimmed != "" {
@@ -900,6 +872,62 @@ func applyCodexWebsocketHeaders(ctx context.Context, headers http.Header, auth *
 	return headers
 }

+func codexHeaderDefaults(cfg *config.Config, auth *cliproxyauth.Auth) (string, string) {
+	if cfg == nil || auth == nil {
+		return "", ""
+	}
+	if auth.Attributes != nil {
+		if v := strings.TrimSpace(auth.Attributes["api_key"]); v != "" {
+			return "", ""
+		}
+	}
+	return strings.TrimSpace(cfg.CodexHeaderDefaults.UserAgent), strings.TrimSpace(cfg.CodexHeaderDefaults.BetaFeatures)
+}
+
+func ensureHeaderWithPriority(target http.Header, source http.Header, key, configValue, fallbackValue string) {
+	if target == nil {
+		return
+	}
+	if strings.TrimSpace(target.Get(key)) != "" {
+		return
+	}
+	if source != nil {
+		if val := strings.TrimSpace(source.Get(key)); val != "" {
+			target.Set(key, val)
+			return
+		}
+	}
+	if val := strings.TrimSpace(configValue); val != "" {
+		target.Set(key, val)
+		return
+	}
+	if val := strings.TrimSpace(fallbackValue); val != "" {
+		target.Set(key, val)
+	}
+}
+
+func ensureHeaderWithConfigPrecedence(target http.Header, source http.Header, key, configValue, fallbackValue string) {
+	if target == nil {
+		return
+	}
+	if strings.TrimSpace(target.Get(key)) != "" {
+		return
+	}
+	if val := strings.TrimSpace(configValue); val != "" {
+		target.Set(key, val)
+		return
+	}
+	if source != nil {
+		if val := strings.TrimSpace(source.Get(key)); val != "" {
+			target.Set(key, val)
+			return
+		}
+	}
+	if val := strings.TrimSpace(fallbackValue); val != "" {
+		target.Set(key, val)
+	}
+}
+
 type statusErrWithHeaders struct {
 	statusErr
 	headers http.Header
@@ -996,6 +1024,32 @@ func encodeCodexWebsocketAsSSE(payload []byte) []byte {
 	return line
 }

+func websocketUpgradeRequestLog(info helps.UpstreamRequestLog) helps.UpstreamRequestLog {
+	upgradeInfo := info
+	upgradeInfo.URL = helps.WebsocketUpgradeRequestURL(info.URL)
+	upgradeInfo.Method = http.MethodGet
+	upgradeInfo.Body = nil
+	upgradeInfo.Headers = info.Headers.Clone()
+	if upgradeInfo.Headers == nil {
+		upgradeInfo.Headers = make(http.Header)
+	}
+	if strings.TrimSpace(upgradeInfo.Headers.Get("Connection")) == "" {
+		upgradeInfo.Headers.Set("Connection", "Upgrade")
+	}
+	if strings.TrimSpace(upgradeInfo.Headers.Get("Upgrade")) == "" {
+		upgradeInfo.Headers.Set("Upgrade", "websocket")
+	}
+	return upgradeInfo
+}
+
+func recordAPIWebsocketHandshake(ctx context.Context, cfg *config.Config, resp *http.Response) {
+	if resp == nil {
+		return
+	}
+	helps.RecordAPIWebsocketHandshake(ctx, cfg, resp.StatusCode, resp.Header.Clone())
+	closeHTTPResponseBody(resp, "codex websockets executor: close handshake response body error")
+}
+
 func websocketHandshakeBody(resp *http.Response) []byte {
 	if resp == nil || resp.Body == nil {
 		return nil
@@ -1017,36 +1071,6 @@ func closeHTTPResponseBody(resp *http.Response, logPrefix string) {
 	}
 }

-func closeOnContextDone(ctx context.Context, conn *websocket.Conn) chan struct{} {
-	done := make(chan struct{})
-	if ctx == nil || conn == nil {
-		return done
-	}
-	go func() {
-		select {
-		case <-done:
-		case <-ctx.Done():
-			_ = conn.Close()
-		}
-	}()
-	return done
-}
-
-func cancelReadOnContextDone(ctx context.Context, conn *websocket.Conn) chan struct{} {
-	done := make(chan struct{})
-	if ctx == nil || conn == nil {
-		return done
-	}
-	go func() {
-		select {
-		case <-done:
-		case <-ctx.Done():
-			_ = conn.SetReadDeadline(time.Now())
-		}
-	}()
-	return done
-}
-
 func executionSessionIDFromOptions(opts cliproxyexecutor.Options) string {
 	if len(opts.Metadata) == 0 {
 		return ""
@@ -1070,16 +1094,23 @@ func (e *CodexWebsocketsExecutor) getOrCreateSession(sessionID string) *codexWeb
 	if sessionID == "" {
 		return nil
 	}
-	e.sessMu.Lock()
-	defer e.sessMu.Unlock()
-	if e.sessions == nil {
-		e.sessions = make(map[string]*codexWebsocketSession)
+	if e == nil {
+		return nil
 	}
-	if sess, ok := e.sessions[sessionID]; ok && sess != nil {
+	store := e.store
+	if store == nil {
+		store = globalCodexWebsocketSessionStore
+	}
+	store.mu.Lock()
+	defer store.mu.Unlock()
+	if store.sessions == nil {
+		store.sessions = make(map[string]*codexWebsocketSession)
+	}
+	if sess, ok := store.sessions[sessionID]; ok && sess != nil {
 		return sess
 	}
 	sess := &codexWebsocketSession{sessionID: sessionID}
-	e.sessions[sessionID] = sess
+	store.sessions[sessionID] = sess
 	return sess
 }

@@ -1120,7 +1151,6 @@ func (e *CodexWebsocketsExecutor) ensureUpstreamConn(ctx context.Context, auth *
 	sess.conn = conn
 	sess.wsURL = wsURL
 	sess.authID = authID
-	sess.connCreateSent = false
 	sess.readerConn = conn
 	sess.connMu.Unlock()

@@ -1206,7 +1236,6 @@ func (e *CodexWebsocketsExecutor) invalidateUpstreamConn(sess *codexWebsocketSes
 		return
 	}
 	sess.conn = nil
-	sess.connCreateSent = false
 	if sess.readerConn == conn {
 		sess.readerConn = nil
 	}
@@ -1227,14 +1256,20 @@ func (e *CodexWebsocketsExecutor) CloseExecutionSession(sessionID string) {
 		return
 	}
 	if sessionID == cliproxyauth.CloseAllExecutionSessionsID {
-		e.closeAllExecutionSessions("executor_replaced")
+		// Executor replacement can happen during hot reload (config/credential changes).
+		// Do not force-close upstream websocket sessions here, otherwise in-flight
+		// downstream websocket requests get interrupted.
 		return
 	}

-	e.sessMu.Lock()
-	sess := e.sessions[sessionID]
-	delete(e.sessions, sessionID)
-	e.sessMu.Unlock()
+	store := e.store
+	if store == nil {
+		store = globalCodexWebsocketSessionStore
+	}
+	store.mu.Lock()
+	sess := store.sessions[sessionID]
+	delete(store.sessions, sessionID)
+	store.mu.Unlock()

 	e.closeExecutionSession(sess, "session_closed")
 }
@@ -1244,15 +1279,19 @@ func (e *CodexWebsocketsExecutor) closeAllExecutionSessions(reason string) {
 		return
 	}

-	e.sessMu.Lock()
-	sessions := make([]*codexWebsocketSession, 0, len(e.sessions))
-	for sessionID, sess := range e.sessions {
-		delete(e.sessions, sessionID)
+	store := e.store
+	if store == nil {
+		store = globalCodexWebsocketSessionStore
+	}
+	store.mu.Lock()
+	sessions := make([]*codexWebsocketSession, 0, len(store.sessions))
+	for sessionID, sess := range store.sessions {
+		delete(store.sessions, sessionID)
 		if sess != nil {
 			sessions = append(sessions, sess)
 		}
 	}
-	e.sessMu.Unlock()
+	store.mu.Unlock()

 	for i := range sessions {
 		e.closeExecutionSession(sessions[i], reason)
@@ -1260,6 +1299,10 @@ func (e *CodexWebsocketsExecutor) closeAllExecutionSessions(reason string) {
 }

 func (e *CodexWebsocketsExecutor) closeExecutionSession(sess *codexWebsocketSession, reason string) {
+	closeCodexWebsocketSession(sess, reason)
+}
+
+func closeCodexWebsocketSession(sess *codexWebsocketSession, reason string) {
 	if sess == nil {
 		return
 	}
@@ -1273,7 +1316,6 @@ func (e *CodexWebsocketsExecutor) closeExecutionSession(sess *codexWebsocketSess
 	authID := sess.authID
 	wsURL := sess.wsURL
 	sess.conn = nil
-	sess.connCreateSent = false
 	if sess.readerConn == conn {
 		sess.readerConn = nil
 	}
@@ -1301,6 +1343,69 @@ func logCodexWebsocketDisconnected(sessionID string, authID string, wsURL string
 	log.Infof("codex websockets: upstream disconnected session=%s auth=%s url=%s reason=%s", strings.TrimSpace(sessionID), strings.TrimSpace(authID), strings.TrimSpace(wsURL), strings.TrimSpace(reason))
 }

+// CloseCodexWebsocketSessionsForAuthID closes all active Codex upstream websocket sessions
+// associated with the supplied auth ID.
+func CloseCodexWebsocketSessionsForAuthID(authID string, reason string) {
+	authID = strings.TrimSpace(authID)
+	if authID == "" {
+		return
+	}
+	reason = strings.TrimSpace(reason)
+	if reason == "" {
+		reason = "auth_removed"
+	}
+
+	store := globalCodexWebsocketSessionStore
+	if store == nil {
+		return
+	}
+
+	type sessionItem struct {
+		sessionID string
+		sess      *codexWebsocketSession
+	}
+
+	store.mu.Lock()
+	items := make([]sessionItem, 0, len(store.sessions))
+	for sessionID, sess := range store.sessions {
+		items = append(items, sessionItem{sessionID: sessionID, sess: sess})
+	}
+	store.mu.Unlock()
+
+	matches := make([]sessionItem, 0)
+	for i := range items {
+		sess := items[i].sess
+		if sess == nil {
+			continue
+		}
+		sess.connMu.Lock()
+		sessAuthID := strings.TrimSpace(sess.authID)
+		sess.connMu.Unlock()
+		if sessAuthID == authID {
+			matches = append(matches, items[i])
+		}
+	}
+	if len(matches) == 0 {
+		return
+	}
+
+	toClose := make([]*codexWebsocketSession, 0, len(matches))
+	store.mu.Lock()
+	for i := range matches {
+		current, ok := store.sessions[matches[i].sessionID]
+		if !ok || current == nil || current != matches[i].sess {
+			continue
+		}
+		delete(store.sessions, matches[i].sessionID)
+		toClose = append(toClose, current)
+	}
+	store.mu.Unlock()
+
+	for i := range toClose {
+		closeCodexWebsocketSession(toClose[i], reason)
+	}
+}
+
 // CodexAutoExecutor routes Codex requests to the websocket transport only when:
 //  1. The downstream transport is websocket, and
 //  2. The selected auth enables websockets.
--- a/internal/runtime/executor/codex_websockets_executor_store_test.go
+++ b/internal/runtime/executor/codex_websockets_executor_store_test.go
@@ -0,0 +1,48 @@
+package executor
+
+import (
+	"testing"
+
+	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+)
+
+func TestCodexWebsocketsExecutor_SessionStoreSurvivesExecutorReplacement(t *testing.T) {
+	sessionID := "test-session-store-survives-replace"
+
+	globalCodexWebsocketSessionStore.mu.Lock()
+	delete(globalCodexWebsocketSessionStore.sessions, sessionID)
+	globalCodexWebsocketSessionStore.mu.Unlock()
+
+	exec1 := NewCodexWebsocketsExecutor(nil)
+	sess1 := exec1.getOrCreateSession(sessionID)
+	if sess1 == nil {
+		t.Fatalf("expected session to be created")
+	}
+
+	exec2 := NewCodexWebsocketsExecutor(nil)
+	sess2 := exec2.getOrCreateSession(sessionID)
+	if sess2 == nil {
+		t.Fatalf("expected session to be available across executors")
+	}
+	if sess1 != sess2 {
+		t.Fatalf("expected the same session instance across executors")
+	}
+
+	exec1.CloseExecutionSession(cliproxyauth.CloseAllExecutionSessionsID)
+
+	globalCodexWebsocketSessionStore.mu.Lock()
+	_, stillPresent := globalCodexWebsocketSessionStore.sessions[sessionID]
+	globalCodexWebsocketSessionStore.mu.Unlock()
+	if !stillPresent {
+		t.Fatalf("expected session to remain after executor replacement close marker")
+	}
+
+	exec2.CloseExecutionSession(sessionID)
+
+	globalCodexWebsocketSessionStore.mu.Lock()
+	_, presentAfterClose := globalCodexWebsocketSessionStore.sessions[sessionID]
+	globalCodexWebsocketSessionStore.mu.Unlock()
+	if presentAfterClose {
+		t.Fatalf("expected session to be removed after explicit close")
+	}
+}
--- a/internal/runtime/executor/codex_websockets_executor_test.go
+++ b/internal/runtime/executor/codex_websockets_executor_test.go
@@ -0,0 +1,291 @@
+package executor
+
+import (
+	"context"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+	sdkconfig "github.com/router-for-me/CLIProxyAPI/v6/sdk/config"
+	"github.com/tidwall/gjson"
+)
+
+func TestBuildCodexWebsocketRequestBodyPreservesPreviousResponseID(t *testing.T) {
+	body := []byte(`{"model":"gpt-5-codex","previous_response_id":"resp-1","input":[{"type":"message","id":"msg-1"}]}`)
+
+	wsReqBody := buildCodexWebsocketRequestBody(body)
+
+	if got := gjson.GetBytes(wsReqBody, "type").String(); got != "response.create" {
+		t.Fatalf("type = %s, want response.create", got)
+	}
+	if got := gjson.GetBytes(wsReqBody, "previous_response_id").String(); got != "resp-1" {
+		t.Fatalf("previous_response_id = %s, want resp-1", got)
+	}
+	if gjson.GetBytes(wsReqBody, "input.0.id").String() != "msg-1" {
+		t.Fatalf("input item id mismatch")
+	}
+	if got := gjson.GetBytes(wsReqBody, "type").String(); got == "response.append" {
+		t.Fatalf("unexpected websocket request type: %s", got)
+	}
+}
+
+func TestApplyCodexWebsocketHeadersDefaultsToCurrentResponsesBeta(t *testing.T) {
+	headers := applyCodexWebsocketHeaders(context.Background(), http.Header{}, nil, "", nil)
+
+	if got := headers.Get("OpenAI-Beta"); got != codexResponsesWebsocketBetaHeaderValue {
+		t.Fatalf("OpenAI-Beta = %s, want %s", got, codexResponsesWebsocketBetaHeaderValue)
+	}
+	if got := headers.Get("User-Agent"); got != "" {
+		t.Fatalf("User-Agent = %s, want empty", got)
+	}
+	if got := headers.Get("Version"); got != "" {
+		t.Fatalf("Version = %q, want empty", got)
+	}
+	if got := headers.Get("x-codex-beta-features"); got != "" {
+		t.Fatalf("x-codex-beta-features = %q, want empty", got)
+	}
+	if got := headers.Get("X-Codex-Turn-Metadata"); got != "" {
+		t.Fatalf("X-Codex-Turn-Metadata = %q, want empty", got)
+	}
+	if got := headers.Get("X-Client-Request-Id"); got != "" {
+		t.Fatalf("X-Client-Request-Id = %q, want empty", got)
+	}
+}
+
+func TestApplyCodexWebsocketHeadersPassesThroughClientIdentityHeaders(t *testing.T) {
+	auth := &cliproxyauth.Auth{
+		Provider: "codex",
+		Metadata: map[string]any{"email": "user@example.com"},
+	}
+	ctx := contextWithGinHeaders(map[string]string{
+		"Originator":            "Codex Desktop",
+		"Version":               "0.115.0-alpha.27",
+		"X-Codex-Turn-Metadata": `{"turn_id":"turn-1"}`,
+		"X-Client-Request-Id":   "019d2233-e240-7162-992d-38df0a2a0e0d",
+	})
+
+	headers := applyCodexWebsocketHeaders(ctx, http.Header{}, auth, "", nil)
+
+	if got := headers.Get("Originator"); got != "Codex Desktop" {
+		t.Fatalf("Originator = %s, want %s", got, "Codex Desktop")
+	}
+	if got := headers.Get("Version"); got != "0.115.0-alpha.27" {
+		t.Fatalf("Version = %s, want %s", got, "0.115.0-alpha.27")
+	}
+	if got := headers.Get("X-Codex-Turn-Metadata"); got != `{"turn_id":"turn-1"}` {
+		t.Fatalf("X-Codex-Turn-Metadata = %s, want %s", got, `{"turn_id":"turn-1"}`)
+	}
+	if got := headers.Get("X-Client-Request-Id"); got != "019d2233-e240-7162-992d-38df0a2a0e0d" {
+		t.Fatalf("X-Client-Request-Id = %s, want %s", got, "019d2233-e240-7162-992d-38df0a2a0e0d")
+	}
+}
+
+func TestApplyCodexWebsocketHeadersUsesConfigDefaultsForOAuth(t *testing.T) {
+	cfg := &config.Config{
+		CodexHeaderDefaults: config.CodexHeaderDefaults{
+			UserAgent:    "my-codex-client/1.0",
+			BetaFeatures: "feature-a,feature-b",
+		},
+	}
+	auth := &cliproxyauth.Auth{
+		Provider: "codex",
+		Metadata: map[string]any{"email": "user@example.com"},
+	}
+
+	headers := applyCodexWebsocketHeaders(context.Background(), http.Header{}, auth, "", cfg)
+
+	if got := headers.Get("User-Agent"); got != "" {
+		t.Fatalf("User-Agent = %s, want empty", got)
+	}
+	if got := headers.Get("x-codex-beta-features"); got != "feature-a,feature-b" {
+		t.Fatalf("x-codex-beta-features = %s, want %s", got, "feature-a,feature-b")
+	}
+	if got := headers.Get("OpenAI-Beta"); got != codexResponsesWebsocketBetaHeaderValue {
+		t.Fatalf("OpenAI-Beta = %s, want %s", got, codexResponsesWebsocketBetaHeaderValue)
+	}
+}
+
+func TestApplyCodexWebsocketHeadersPrefersExistingHeadersOverClientAndConfig(t *testing.T) {
+	cfg := &config.Config{
+		CodexHeaderDefaults: config.CodexHeaderDefaults{
+			UserAgent:    "config-ua",
+			BetaFeatures: "config-beta",
+		},
+	}
+	auth := &cliproxyauth.Auth{
+		Provider: "codex",
+		Metadata: map[string]any{"email": "user@example.com"},
+	}
+	ctx := contextWithGinHeaders(map[string]string{
+		"User-Agent":            "client-ua",
+		"X-Codex-Beta-Features": "client-beta",
+	})
+	headers := http.Header{}
+	headers.Set("User-Agent", "existing-ua")
+	headers.Set("X-Codex-Beta-Features", "existing-beta")
+
+	got := applyCodexWebsocketHeaders(ctx, headers, auth, "", cfg)
+
+	if gotVal := got.Get("User-Agent"); gotVal != "" {
+		t.Fatalf("User-Agent = %s, want empty", gotVal)
+	}
+	if gotVal := got.Get("x-codex-beta-features"); gotVal != "existing-beta" {
+		t.Fatalf("x-codex-beta-features = %s, want %s", gotVal, "existing-beta")
+	}
+}
+
+func TestApplyCodexWebsocketHeadersConfigUserAgentOverridesClientHeader(t *testing.T) {
+	cfg := &config.Config{
+		CodexHeaderDefaults: config.CodexHeaderDefaults{
+			UserAgent:    "config-ua",
+			BetaFeatures: "config-beta",
+		},
+	}
+	auth := &cliproxyauth.Auth{
+		Provider: "codex",
+		Metadata: map[string]any{"email": "user@example.com"},
+	}
+	ctx := contextWithGinHeaders(map[string]string{
+		"User-Agent":            "client-ua",
+		"X-Codex-Beta-Features": "client-beta",
+	})
+
+	headers := applyCodexWebsocketHeaders(ctx, http.Header{}, auth, "", cfg)
+
+	if got := headers.Get("User-Agent"); got != "" {
+		t.Fatalf("User-Agent = %s, want empty", got)
+	}
+	if got := headers.Get("x-codex-beta-features"); got != "client-beta" {
+		t.Fatalf("x-codex-beta-features = %s, want %s", got, "client-beta")
+	}
+}
+
+func TestApplyCodexWebsocketHeadersIgnoresConfigForAPIKeyAuth(t *testing.T) {
+	cfg := &config.Config{
+		CodexHeaderDefaults: config.CodexHeaderDefaults{
+			UserAgent:    "config-ua",
+			BetaFeatures: "config-beta",
+		},
+	}
+	auth := &cliproxyauth.Auth{
+		Provider:   "codex",
+		Attributes: map[string]string{"api_key": "sk-test"},
+	}
+
+	headers := applyCodexWebsocketHeaders(context.Background(), http.Header{}, auth, "sk-test", cfg)
+
+	if got := headers.Get("User-Agent"); got != "" {
+		t.Fatalf("User-Agent = %s, want empty", got)
+	}
+	if got := headers.Get("x-codex-beta-features"); got != "" {
+		t.Fatalf("x-codex-beta-features = %q, want empty", got)
+	}
+}
+
+func TestApplyCodexHeadersUsesConfigUserAgentForOAuth(t *testing.T) {
+	req, err := http.NewRequest(http.MethodPost, "https://example.com/responses", nil)
+	if err != nil {
+		t.Fatalf("NewRequest() error = %v", err)
+	}
+	cfg := &config.Config{
+		CodexHeaderDefaults: config.CodexHeaderDefaults{
+			UserAgent:    "config-ua",
+			BetaFeatures: "config-beta",
+		},
+	}
+	auth := &cliproxyauth.Auth{
+		Provider: "codex",
+		Metadata: map[string]any{"email": "user@example.com"},
+	}
+	req = req.WithContext(contextWithGinHeaders(map[string]string{
+		"User-Agent": "client-ua",
+	}))
+
+	applyCodexHeaders(req, auth, "oauth-token", true, cfg)
+
+	if got := req.Header.Get("User-Agent"); got != "config-ua" {
+		t.Fatalf("User-Agent = %s, want %s", got, "config-ua")
+	}
+	if got := req.Header.Get("x-codex-beta-features"); got != "" {
+		t.Fatalf("x-codex-beta-features = %q, want empty", got)
+	}
+}
+
+func TestApplyCodexHeadersPassesThroughClientIdentityHeaders(t *testing.T) {
+	req, err := http.NewRequest(http.MethodPost, "https://example.com/responses", nil)
+	if err != nil {
+		t.Fatalf("NewRequest() error = %v", err)
+	}
+	auth := &cliproxyauth.Auth{
+		Provider: "codex",
+		Metadata: map[string]any{"email": "user@example.com"},
+	}
+	req = req.WithContext(contextWithGinHeaders(map[string]string{
+		"Originator":            "Codex Desktop",
+		"Version":               "0.115.0-alpha.27",
+		"X-Codex-Turn-Metadata": `{"turn_id":"turn-1"}`,
+		"X-Client-Request-Id":   "019d2233-e240-7162-992d-38df0a2a0e0d",
+	}))
+
+	applyCodexHeaders(req, auth, "oauth-token", true, nil)
+
+	if got := req.Header.Get("Originator"); got != "Codex Desktop" {
+		t.Fatalf("Originator = %s, want %s", got, "Codex Desktop")
+	}
+	if got := req.Header.Get("Version"); got != "0.115.0-alpha.27" {
+		t.Fatalf("Version = %s, want %s", got, "0.115.0-alpha.27")
+	}
+	if got := req.Header.Get("X-Codex-Turn-Metadata"); got != `{"turn_id":"turn-1"}` {
+		t.Fatalf("X-Codex-Turn-Metadata = %s, want %s", got, `{"turn_id":"turn-1"}`)
+	}
+	if got := req.Header.Get("X-Client-Request-Id"); got != "019d2233-e240-7162-992d-38df0a2a0e0d" {
+		t.Fatalf("X-Client-Request-Id = %s, want %s", got, "019d2233-e240-7162-992d-38df0a2a0e0d")
+	}
+}
+
+func TestApplyCodexHeadersDoesNotInjectClientOnlyHeadersByDefault(t *testing.T) {
+	req, err := http.NewRequest(http.MethodPost, "https://example.com/responses", nil)
+	if err != nil {
+		t.Fatalf("NewRequest() error = %v", err)
+	}
+
+	applyCodexHeaders(req, nil, "oauth-token", true, nil)
+
+	if got := req.Header.Get("Version"); got != "" {
+		t.Fatalf("Version = %q, want empty", got)
+	}
+	if got := req.Header.Get("X-Codex-Turn-Metadata"); got != "" {
+		t.Fatalf("X-Codex-Turn-Metadata = %q, want empty", got)
+	}
+	if got := req.Header.Get("X-Client-Request-Id"); got != "" {
+		t.Fatalf("X-Client-Request-Id = %q, want empty", got)
+	}
+}
+
+func contextWithGinHeaders(headers map[string]string) context.Context {
+	gin.SetMode(gin.TestMode)
+	recorder := httptest.NewRecorder()
+	ginCtx, _ := gin.CreateTestContext(recorder)
+	ginCtx.Request = httptest.NewRequest(http.MethodPost, "/", nil)
+	ginCtx.Request.Header = make(http.Header, len(headers))
+	for key, value := range headers {
+		ginCtx.Request.Header.Set(key, value)
+	}
+	return context.WithValue(context.Background(), "gin", ginCtx)
+}
+
+func TestNewProxyAwareWebsocketDialerDirectDisablesProxy(t *testing.T) {
+	t.Parallel()
+
+	dialer := newProxyAwareWebsocketDialer(
+		&config.Config{SDKConfig: sdkconfig.SDKConfig{ProxyURL: "http://global-proxy.example.com:8080"}},
+		&cliproxyauth.Auth{ProxyURL: "direct"},
+	)
+
+	if dialer.Proxy != nil {
+		t.Fatal("expected websocket proxy function to be nil for direct mode")
+	}
+}
--- a/internal/runtime/executor/compat_helpers.go
+++ b/internal/runtime/executor/compat_helpers.go
@@ -0,0 +1,129 @@
+package executor
+
+import (
+	"context"
+	"net/http"
+	"time"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
+	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
+	"github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/usage"
+	"github.com/tidwall/gjson"
+	"github.com/tiktoken-go/tokenizer"
+)
+
+func newProxyAwareHTTPClient(ctx context.Context, cfg *config.Config, auth *cliproxyauth.Auth, timeout time.Duration) *http.Client {
+	return helps.NewProxyAwareHTTPClient(ctx, cfg, auth, timeout)
+}
+
+func parseOpenAIUsage(data []byte) usage.Detail {
+	return helps.ParseOpenAIUsage(data)
+}
+
+func parseOpenAIStreamUsage(line []byte) (usage.Detail, bool) {
+	return helps.ParseOpenAIStreamUsage(line)
+}
+
+func parseOpenAIResponsesUsage(data []byte) usage.Detail {
+	return helps.ParseOpenAIUsage(data)
+}
+
+func parseOpenAIResponsesStreamUsage(line []byte) (usage.Detail, bool) {
+	return helps.ParseOpenAIStreamUsage(line)
+}
+
+func getTokenizer(model string) (tokenizer.Codec, error) {
+	return helps.TokenizerForModel(model)
+}
+
+func countOpenAIChatTokens(enc tokenizer.Codec, payload []byte) (int64, error) {
+	return helps.CountOpenAIChatTokens(enc, payload)
+}
+
+func countClaudeChatTokens(enc tokenizer.Codec, payload []byte) (int64, error) {
+	return helps.CountClaudeChatTokens(enc, payload)
+}
+
+func buildOpenAIUsageJSON(count int64) []byte {
+	return helps.BuildOpenAIUsageJSON(count)
+}
+
+type upstreamRequestLog = helps.UpstreamRequestLog
+
+func recordAPIRequest(ctx context.Context, cfg *config.Config, info upstreamRequestLog) {
+	helps.RecordAPIRequest(ctx, cfg, info)
+}
+
+func recordAPIResponseMetadata(ctx context.Context, cfg *config.Config, status int, headers http.Header) {
+	helps.RecordAPIResponseMetadata(ctx, cfg, status, headers)
+}
+
+func recordAPIResponseError(ctx context.Context, cfg *config.Config, err error) {
+	helps.RecordAPIResponseError(ctx, cfg, err)
+}
+
+func appendAPIResponseChunk(ctx context.Context, cfg *config.Config, chunk []byte) {
+	helps.AppendAPIResponseChunk(ctx, cfg, chunk)
+}
+
+func payloadRequestedModel(opts cliproxyexecutor.Options, fallback string) string {
+	return helps.PayloadRequestedModel(opts, fallback)
+}
+
+func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string, payload, original []byte, requestedModel string) []byte {
+	return helps.ApplyPayloadConfigWithRoot(cfg, model, protocol, root, payload, original, requestedModel)
+}
+
+func summarizeErrorBody(contentType string, body []byte) string {
+	return helps.SummarizeErrorBody(contentType, body)
+}
+
+func apiKeyFromContext(ctx context.Context) string {
+	return helps.APIKeyFromContext(ctx)
+}
+
+func tokenizerForModel(model string) (tokenizer.Codec, error) {
+	return helps.TokenizerForModel(model)
+}
+
+func collectOpenAIContent(content gjson.Result, segments *[]string) {
+	helps.CollectOpenAIContent(content, segments)
+}
+
+type usageReporter struct {
+	reporter *helps.UsageReporter
+}
+
+func newUsageReporter(ctx context.Context, provider, model string, auth *cliproxyauth.Auth) *usageReporter {
+	return &usageReporter{reporter: helps.NewUsageReporter(ctx, provider, model, auth)}
+}
+
+func (r *usageReporter) publish(ctx context.Context, detail usage.Detail) {
+	if r == nil || r.reporter == nil {
+		return
+	}
+	r.reporter.Publish(ctx, detail)
+}
+
+func (r *usageReporter) publishFailure(ctx context.Context) {
+	if r == nil || r.reporter == nil {
+		return
+	}
+	r.reporter.PublishFailure(ctx)
+}
+
+func (r *usageReporter) trackFailure(ctx context.Context, errPtr *error) {
+	if r == nil || r.reporter == nil {
+		return
+	}
+	r.reporter.TrackFailure(ctx, errPtr)
+}
+
+func (r *usageReporter) ensurePublished(ctx context.Context) {
+	if r == nil || r.reporter == nil {
+		return
+	}
+	r.reporter.EnsurePublished(ctx)
+}
--- a/internal/runtime/executor/cursor_executor.go
+++ b/internal/runtime/executor/cursor_executor.go
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -18,6 +18,7 @@ import (

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/geminicli"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
@@ -81,6 +82,11 @@ func (e *GeminiCLIExecutor) PrepareRequest(req *http.Request, auth *cliproxyauth
 	}
 	req.Header.Set("Authorization", "Bearer "+tok.AccessToken)
 	applyGeminiCLIHeaders(req, "unknown")
+	var attrs map[string]string
+	if auth != nil {
+		attrs = auth.Attributes
+	}
+	util.ApplyCustomHeadersFromAttrs(req, attrs)
 	return nil
 }

@@ -112,8 +118,8 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 		return resp, err
 	}

-	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
-	defer reporter.trackFailure(ctx, &err)
+	reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.TrackFailure(ctx, &err)

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini-cli")
@@ -132,8 +138,8 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 	}

 	basePayload = fixGeminiCLIImageAspectRatio(baseModel, basePayload)
-	requestedModel := payloadRequestedModel(opts, req.Model)
-	basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated, requestedModel)
+	requestedModel := helps.PayloadRequestedModel(opts, req.Model)
+	basePayload = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated, requestedModel)

 	action := "generateContent"
 	if req.Metadata != nil {
@@ -190,7 +196,8 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 		reqHTTP.Header.Set("Authorization", "Bearer "+tok.AccessToken)
 		applyGeminiCLIHeaders(reqHTTP, attemptModel)
 		reqHTTP.Header.Set("Accept", "application/json")
-		recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+		util.ApplyCustomHeadersFromAttrs(reqHTTP, auth.Attributes)
+		helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
 			URL:       url,
 			Method:    http.MethodPost,
 			Headers:   reqHTTP.Header.Clone(),
@@ -204,7 +211,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth

 		httpResp, errDo := httpClient.Do(reqHTTP)
 		if errDo != nil {
-			recordAPIResponseError(ctx, e.cfg, errDo)
+			helps.RecordAPIResponseError(ctx, e.cfg, errDo)
 			err = errDo
 			return resp, err
 		}
@@ -213,24 +220,24 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("gemini cli executor: close response body error: %v", errClose)
 		}
-		recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+		helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
 		if errRead != nil {
-			recordAPIResponseError(ctx, e.cfg, errRead)
+			helps.RecordAPIResponseError(ctx, e.cfg, errRead)
 			err = errRead
 			return resp, err
 		}
-		appendAPIResponseChunk(ctx, e.cfg, data)
+		helps.AppendAPIResponseChunk(ctx, e.cfg, data)
 		if httpResp.StatusCode >= 200 && httpResp.StatusCode < 300 {
-			reporter.publish(ctx, parseGeminiCLIUsage(data))
+			reporter.Publish(ctx, helps.ParseGeminiCLIUsage(data))
 			var param any
 			out := sdktranslator.TranslateNonStream(respCtx, to, from, attemptModel, opts.OriginalRequest, payload, data, &param)
-			resp = cliproxyexecutor.Response{Payload: []byte(out), Headers: httpResp.Header.Clone()}
+			resp = cliproxyexecutor.Response{Payload: out, Headers: httpResp.Header.Clone()}
 			return resp, nil
 		}

 		lastStatus = httpResp.StatusCode
 		lastBody = append([]byte(nil), data...)
-		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
+		helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
 		if httpResp.StatusCode == 429 {
 			if idx+1 < len(models) {
 				log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1])
@@ -245,7 +252,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 	}

 	if len(lastBody) > 0 {
-		appendAPIResponseChunk(ctx, e.cfg, lastBody)
+		helps.AppendAPIResponseChunk(ctx, e.cfg, lastBody)
 	}
 	if lastStatus == 0 {
 		lastStatus = 429
@@ -266,8 +273,8 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 		return nil, err
 	}

-	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
-	defer reporter.trackFailure(ctx, &err)
+	reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.TrackFailure(ctx, &err)

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini-cli")
@@ -286,8 +293,8 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 	}

 	basePayload = fixGeminiCLIImageAspectRatio(baseModel, basePayload)
-	requestedModel := payloadRequestedModel(opts, req.Model)
-	basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated, requestedModel)
+	requestedModel := helps.PayloadRequestedModel(opts, req.Model)
+	basePayload = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated, requestedModel)

 	projectID := resolveGeminiProjectID(auth)

@@ -335,7 +342,8 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 		reqHTTP.Header.Set("Authorization", "Bearer "+tok.AccessToken)
 		applyGeminiCLIHeaders(reqHTTP, attemptModel)
 		reqHTTP.Header.Set("Accept", "text/event-stream")
-		recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+		util.ApplyCustomHeadersFromAttrs(reqHTTP, auth.Attributes)
+		helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
 			URL:       url,
 			Method:    http.MethodPost,
 			Headers:   reqHTTP.Header.Clone(),
@@ -349,25 +357,25 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut

 		httpResp, errDo := httpClient.Do(reqHTTP)
 		if errDo != nil {
-			recordAPIResponseError(ctx, e.cfg, errDo)
+			helps.RecordAPIResponseError(ctx, e.cfg, errDo)
 			err = errDo
 			return nil, err
 		}
-		recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+		helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
 		if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 			data, errRead := io.ReadAll(httpResp.Body)
 			if errClose := httpResp.Body.Close(); errClose != nil {
 				log.Errorf("gemini cli executor: close response body error: %v", errClose)
 			}
 			if errRead != nil {
-				recordAPIResponseError(ctx, e.cfg, errRead)
+				helps.RecordAPIResponseError(ctx, e.cfg, errRead)
 				err = errRead
 				return nil, err
 			}
-			appendAPIResponseChunk(ctx, e.cfg, data)
+			helps.AppendAPIResponseChunk(ctx, e.cfg, data)
 			lastStatus = httpResp.StatusCode
 			lastBody = append([]byte(nil), data...)
-			logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
+			helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
 			if httpResp.StatusCode == 429 {
 				if idx+1 < len(models) {
 					log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1])
@@ -394,25 +402,25 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 				var param any
 				for scanner.Scan() {
 					line := scanner.Bytes()
-					appendAPIResponseChunk(ctx, e.cfg, line)
-					if detail, ok := parseGeminiCLIStreamUsage(line); ok {
-						reporter.publish(ctx, detail)
+					helps.AppendAPIResponseChunk(ctx, e.cfg, line)
+					if detail, ok := helps.ParseGeminiCLIStreamUsage(line); ok {
+						reporter.Publish(ctx, detail)
 					}
 					if bytes.HasPrefix(line, dataTag) {
 						segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, opts.OriginalRequest, reqBody, bytes.Clone(line), &param)
 						for i := range segments {
-							out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])}
+							out <- cliproxyexecutor.StreamChunk{Payload: segments[i]}
 						}
 					}
 				}

 				segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, opts.OriginalRequest, reqBody, []byte("[DONE]"), &param)
 				for i := range segments {
-					out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])}
+					out <- cliproxyexecutor.StreamChunk{Payload: segments[i]}
 				}
 				if errScan := scanner.Err(); errScan != nil {
-					recordAPIResponseError(ctx, e.cfg, errScan)
-					reporter.publishFailure(ctx)
+					helps.RecordAPIResponseError(ctx, e.cfg, errScan)
+					reporter.PublishFailure(ctx)
 					out <- cliproxyexecutor.StreamChunk{Err: errScan}
 				}
 				return
@@ -420,22 +428,22 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut

 			data, errRead := io.ReadAll(resp.Body)
 			if errRead != nil {
-				recordAPIResponseError(ctx, e.cfg, errRead)
-				reporter.publishFailure(ctx)
+				helps.RecordAPIResponseError(ctx, e.cfg, errRead)
+				reporter.PublishFailure(ctx)
 				out <- cliproxyexecutor.StreamChunk{Err: errRead}
 				return
 			}
-			appendAPIResponseChunk(ctx, e.cfg, data)
-			reporter.publish(ctx, parseGeminiCLIUsage(data))
+			helps.AppendAPIResponseChunk(ctx, e.cfg, data)
+			reporter.Publish(ctx, helps.ParseGeminiCLIUsage(data))
 			var param any
 			segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, opts.OriginalRequest, reqBody, data, &param)
 			for i := range segments {
-				out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])}
+				out <- cliproxyexecutor.StreamChunk{Payload: segments[i]}
 			}

 			segments = sdktranslator.TranslateStream(respCtx, to, from, attemptModel, opts.OriginalRequest, reqBody, []byte("[DONE]"), &param)
 			for i := range segments {
-				out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])}
+				out <- cliproxyexecutor.StreamChunk{Payload: segments[i]}
 			}
 		}(httpResp, append([]byte(nil), payload...), attemptModel)

@@ -443,7 +451,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 	}

 	if len(lastBody) > 0 {
-		appendAPIResponseChunk(ctx, e.cfg, lastBody)
+		helps.AppendAPIResponseChunk(ctx, e.cfg, lastBody)
 	}
 	if lastStatus == 0 {
 		lastStatus = 429
@@ -516,7 +524,8 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
 		reqHTTP.Header.Set("Authorization", "Bearer "+tok.AccessToken)
 		applyGeminiCLIHeaders(reqHTTP, baseModel)
 		reqHTTP.Header.Set("Accept", "application/json")
-		recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+		util.ApplyCustomHeadersFromAttrs(reqHTTP, auth.Attributes)
+		helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
 			URL:       url,
 			Method:    http.MethodPost,
 			Headers:   reqHTTP.Header.Clone(),
@@ -530,21 +539,23 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.

 		resp, errDo := httpClient.Do(reqHTTP)
 		if errDo != nil {
-			recordAPIResponseError(ctx, e.cfg, errDo)
+			helps.RecordAPIResponseError(ctx, e.cfg, errDo)
 			return cliproxyexecutor.Response{}, errDo
 		}
 		data, errRead := io.ReadAll(resp.Body)
-		_ = resp.Body.Close()
-		recordAPIResponseMetadata(ctx, e.cfg, resp.StatusCode, resp.Header.Clone())
+		if errClose := resp.Body.Close(); errClose != nil {
+			helps.LogWithRequestID(ctx).Errorf("response body close error: %v", errClose)
+		}
+		helps.RecordAPIResponseMetadata(ctx, e.cfg, resp.StatusCode, resp.Header.Clone())
 		if errRead != nil {
-			recordAPIResponseError(ctx, e.cfg, errRead)
+			helps.RecordAPIResponseError(ctx, e.cfg, errRead)
 			return cliproxyexecutor.Response{}, errRead
 		}
-		appendAPIResponseChunk(ctx, e.cfg, data)
+		helps.AppendAPIResponseChunk(ctx, e.cfg, data)
 		if resp.StatusCode >= 200 && resp.StatusCode < 300 {
 			count := gjson.GetBytes(data, "totalTokens").Int()
 			translated := sdktranslator.TranslateTokenCount(respCtx, to, from, count, data)
-			return cliproxyexecutor.Response{Payload: []byte(translated), Headers: resp.Header.Clone()}, nil
+			return cliproxyexecutor.Response{Payload: translated, Headers: resp.Header.Clone()}, nil
 		}
 		lastStatus = resp.StatusCode
 		lastBody = append([]byte(nil), data...)
@@ -611,7 +622,7 @@ func prepareGeminiCLITokenSource(ctx context.Context, cfg *config.Config, auth *
 	}

 	ctxToken := ctx
-	if httpClient := newProxyAwareHTTPClient(ctx, cfg, auth, 0); httpClient != nil {
+	if httpClient := helps.NewProxyAwareHTTPClient(ctx, cfg, auth, 0); httpClient != nil {
 		ctxToken = context.WithValue(ctxToken, oauth2.HTTPClient, httpClient)
 	}

@@ -707,7 +718,7 @@ func geminiOAuthMetadata(auth *cliproxyauth.Auth) map[string]any {
 }

 func newHTTPClient(ctx context.Context, cfg *config.Config, auth *cliproxyauth.Auth, timeout time.Duration) *http.Client {
-	return newProxyAwareHTTPClient(ctx, cfg, auth, timeout)
+	return helps.NewProxyAwareHTTPClient(ctx, cfg, auth, timeout)
 }

 func cloneMap(in map[string]any) map[string]any {
@@ -811,18 +822,18 @@ func fixGeminiCLIImageAspectRatio(modelName string, rawJSON []byte) []byte {

 				if !hasInlineData {
 					emptyImageBase64ed, _ := util.CreateWhiteImageBase64(aspectRatioResult.String())
-					emptyImagePart := `{"inlineData":{"mime_type":"image/png","data":""}}`
-					emptyImagePart, _ = sjson.Set(emptyImagePart, "inlineData.data", emptyImageBase64ed)
-					newPartsJson := `[]`
-					newPartsJson, _ = sjson.SetRaw(newPartsJson, "-1", `{"text": "Based on the following requirements, create an image within the uploaded picture. The new content *MUST* completely cover the entire area of the original picture, maintaining its exact proportions, and *NO* blank areas should appear."}`)
-					newPartsJson, _ = sjson.SetRaw(newPartsJson, "-1", emptyImagePart)
+					emptyImagePart := []byte(`{"inlineData":{"mime_type":"image/png","data":""}}`)
+					emptyImagePart, _ = sjson.SetBytes(emptyImagePart, "inlineData.data", emptyImageBase64ed)
+					newPartsJson := []byte(`[]`)
+					newPartsJson, _ = sjson.SetRawBytes(newPartsJson, "-1", []byte(`{"text": "Based on the following requirements, create an image within the uploaded picture. The new content *MUST* completely cover the entire area of the original picture, maintaining its exact proportions, and *NO* blank areas should appear."}`))
+					newPartsJson, _ = sjson.SetRawBytes(newPartsJson, "-1", emptyImagePart)

 					parts := contentArray[0].Get("parts").Array()
 					for j := 0; j < len(parts); j++ {
-						newPartsJson, _ = sjson.SetRaw(newPartsJson, "-1", parts[j].Raw)
+						newPartsJson, _ = sjson.SetRawBytes(newPartsJson, "-1", []byte(parts[j].Raw))
 					}

-					rawJSON, _ = sjson.SetRawBytes(rawJSON, "request.contents.0.parts", []byte(newPartsJson))
+					rawJSON, _ = sjson.SetRawBytes(rawJSON, "request.contents.0.parts", newPartsJson)
 					rawJSON, _ = sjson.SetRawBytes(rawJSON, "request.generationConfig.responseModalities", []byte(`["IMAGE", "TEXT"]`))
 				}
 			}
--- a/internal/runtime/executor/gemini_executor.go
+++ b/internal/runtime/executor/gemini_executor.go
@@ -13,6 +13,7 @@ import (
 	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
@@ -85,7 +86,7 @@ func (e *GeminiExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Aut
 	if err := e.PrepareRequest(httpReq, auth); err != nil {
 		return nil, err
 	}
-	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
 	return httpClient.Do(httpReq)
 }

@@ -110,8 +111,8 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r

 	apiKey, bearer := geminiCreds(auth)

-	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
-	defer reporter.trackFailure(ctx, &err)
+	reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.TrackFailure(ctx, &err)

 	// Official Gemini API via API key or OAuth bearer
 	from := opts.SourceFormat
@@ -130,8 +131,8 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	}

 	body = fixGeminiImageAspectRatio(baseModel, body)
-	requestedModel := payloadRequestedModel(opts, req.Model)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
+	requestedModel := helps.PayloadRequestedModel(opts, req.Model)
+	body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

 	action := "generateContent"
@@ -165,7 +166,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 		authLabel = auth.Label
 		authType, authValue = auth.AccountInfo()
 	}
-	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+	helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
 		URL:       url,
 		Method:    http.MethodPost,
 		Headers:   httpReq.Header.Clone(),
@@ -177,10 +178,10 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 		AuthValue: authValue,
 	})

-	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
 	httpResp, err := httpClient.Do(httpReq)
 	if err != nil {
-		recordAPIResponseError(ctx, e.cfg, err)
+		helps.RecordAPIResponseError(ctx, e.cfg, err)
 		return resp, err
 	}
 	defer func() {
@@ -188,24 +189,24 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 			log.Errorf("gemini executor: close response body error: %v", errClose)
 		}
 	}()
-	recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+	helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
-		appendAPIResponseChunk(ctx, e.cfg, b)
-		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		helps.AppendAPIResponseChunk(ctx, e.cfg, b)
+		helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		return resp, err
 	}
 	data, err := io.ReadAll(httpResp.Body)
 	if err != nil {
-		recordAPIResponseError(ctx, e.cfg, err)
+		helps.RecordAPIResponseError(ctx, e.cfg, err)
 		return resp, err
 	}
-	appendAPIResponseChunk(ctx, e.cfg, data)
-	reporter.publish(ctx, parseGeminiUsage(data))
+	helps.AppendAPIResponseChunk(ctx, e.cfg, data)
+	reporter.Publish(ctx, helps.ParseGeminiUsage(data))
 	var param any
 	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, body, data, &param)
-	resp = cliproxyexecutor.Response{Payload: []byte(out), Headers: httpResp.Header.Clone()}
+	resp = cliproxyexecutor.Response{Payload: out, Headers: httpResp.Header.Clone()}
 	return resp, nil
 }

@@ -218,8 +219,8 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A

 	apiKey, bearer := geminiCreds(auth)

-	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
-	defer reporter.trackFailure(ctx, &err)
+	reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.TrackFailure(ctx, &err)

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
@@ -237,8 +238,8 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	}

 	body = fixGeminiImageAspectRatio(baseModel, body)
-	requestedModel := payloadRequestedModel(opts, req.Model)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
+	requestedModel := helps.PayloadRequestedModel(opts, req.Model)
+	body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

 	baseURL := resolveGeminiBaseURL(auth)
@@ -268,7 +269,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 		authLabel = auth.Label
 		authType, authValue = auth.AccountInfo()
 	}
-	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+	helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
 		URL:       url,
 		Method:    http.MethodPost,
 		Headers:   httpReq.Header.Clone(),
@@ -280,17 +281,17 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 		AuthValue: authValue,
 	})

-	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
 	httpResp, err := httpClient.Do(httpReq)
 	if err != nil {
-		recordAPIResponseError(ctx, e.cfg, err)
+		helps.RecordAPIResponseError(ctx, e.cfg, err)
 		return nil, err
 	}
-	recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+	helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
-		appendAPIResponseChunk(ctx, e.cfg, b)
-		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		helps.AppendAPIResponseChunk(ctx, e.cfg, b)
+		helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("gemini executor: close response body error: %v", errClose)
 		}
@@ -310,27 +311,27 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
-			appendAPIResponseChunk(ctx, e.cfg, line)
-			filtered := FilterSSEUsageMetadata(line)
-			payload := jsonPayload(filtered)
+			helps.AppendAPIResponseChunk(ctx, e.cfg, line)
+			filtered := helps.FilterSSEUsageMetadata(line)
+			payload := helps.JSONPayload(filtered)
 			if len(payload) == 0 {
 				continue
 			}
-			if detail, ok := parseGeminiStreamUsage(payload); ok {
-				reporter.publish(ctx, detail)
+			if detail, ok := helps.ParseGeminiStreamUsage(payload); ok {
+				reporter.Publish(ctx, detail)
 			}
 			lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(payload), &param)
 			for i := range lines {
-				out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
+				out <- cliproxyexecutor.StreamChunk{Payload: lines[i]}
 			}
 		}
 		lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, []byte("[DONE]"), &param)
 		for i := range lines {
-			out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
+			out <- cliproxyexecutor.StreamChunk{Payload: lines[i]}
 		}
 		if errScan := scanner.Err(); errScan != nil {
-			recordAPIResponseError(ctx, e.cfg, errScan)
-			reporter.publishFailure(ctx)
+			helps.RecordAPIResponseError(ctx, e.cfg, errScan)
+			reporter.PublishFailure(ctx)
 			out <- cliproxyexecutor.StreamChunk{Err: errScan}
 		}
 	}()
@@ -381,7 +382,7 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 		authLabel = auth.Label
 		authType, authValue = auth.AccountInfo()
 	}
-	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+	helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
 		URL:       url,
 		Method:    http.MethodPost,
 		Headers:   httpReq.Header.Clone(),
@@ -393,29 +394,33 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 		AuthValue: authValue,
 	})

-	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
 	resp, err := httpClient.Do(httpReq)
 	if err != nil {
-		recordAPIResponseError(ctx, e.cfg, err)
+		helps.RecordAPIResponseError(ctx, e.cfg, err)
 		return cliproxyexecutor.Response{}, err
 	}
-	defer func() { _ = resp.Body.Close() }()
-	recordAPIResponseMetadata(ctx, e.cfg, resp.StatusCode, resp.Header.Clone())
+	defer func() {
+		if errClose := resp.Body.Close(); errClose != nil {
+			helps.LogWithRequestID(ctx).Errorf("response body close error: %v", errClose)
+		}
+	}()
+	helps.RecordAPIResponseMetadata(ctx, e.cfg, resp.StatusCode, resp.Header.Clone())

 	data, err := io.ReadAll(resp.Body)
 	if err != nil {
-		recordAPIResponseError(ctx, e.cfg, err)
+		helps.RecordAPIResponseError(ctx, e.cfg, err)
 		return cliproxyexecutor.Response{}, err
 	}
-	appendAPIResponseChunk(ctx, e.cfg, data)
+	helps.AppendAPIResponseChunk(ctx, e.cfg, data)
 	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
-		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", resp.StatusCode, summarizeErrorBody(resp.Header.Get("Content-Type"), data))
+		helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", resp.StatusCode, helps.SummarizeErrorBody(resp.Header.Get("Content-Type"), data))
 		return cliproxyexecutor.Response{}, statusErr{code: resp.StatusCode, msg: string(data)}
 	}

 	count := gjson.GetBytes(data, "totalTokens").Int()
 	translated := sdktranslator.TranslateTokenCount(respCtx, to, from, count, data)
-	return cliproxyexecutor.Response{Payload: []byte(translated), Headers: resp.Header.Clone()}, nil
+	return cliproxyexecutor.Response{Payload: translated, Headers: resp.Header.Clone()}, nil
 }

 // Refresh refreshes the authentication credentials (no-op for Gemini API key).
@@ -527,18 +532,18 @@ func fixGeminiImageAspectRatio(modelName string, rawJSON []byte) []byte {

 				if !hasInlineData {
 					emptyImageBase64ed, _ := util.CreateWhiteImageBase64(aspectRatioResult.String())
-					emptyImagePart := `{"inlineData":{"mime_type":"image/png","data":""}}`
-					emptyImagePart, _ = sjson.Set(emptyImagePart, "inlineData.data", emptyImageBase64ed)
-					newPartsJson := `[]`
-					newPartsJson, _ = sjson.SetRaw(newPartsJson, "-1", `{"text": "Based on the following requirements, create an image within the uploaded picture. The new content *MUST* completely cover the entire area of the original picture, maintaining its exact proportions, and *NO* blank areas should appear."}`)
-					newPartsJson, _ = sjson.SetRaw(newPartsJson, "-1", emptyImagePart)
+					emptyImagePart := []byte(`{"inlineData":{"mime_type":"image/png","data":""}}`)
+					emptyImagePart, _ = sjson.SetBytes(emptyImagePart, "inlineData.data", emptyImageBase64ed)
+					newPartsJson := []byte(`[]`)
+					newPartsJson, _ = sjson.SetRawBytes(newPartsJson, "-1", []byte(`{"text": "Based on the following requirements, create an image within the uploaded picture. The new content *MUST* completely cover the entire area of the original picture, maintaining its exact proportions, and *NO* blank areas should appear."}`))
+					newPartsJson, _ = sjson.SetRawBytes(newPartsJson, "-1", emptyImagePart)

 					parts := contentArray[0].Get("parts").Array()
 					for j := 0; j < len(parts); j++ {
-						newPartsJson, _ = sjson.SetRaw(newPartsJson, "-1", parts[j].Raw)
+						newPartsJson, _ = sjson.SetRawBytes(newPartsJson, "-1", []byte(parts[j].Raw))
 					}

-					rawJSON, _ = sjson.SetRawBytes(rawJSON, "contents.0.parts", []byte(newPartsJson))
+					rawJSON, _ = sjson.SetRawBytes(rawJSON, "contents.0.parts", newPartsJson)
 					rawJSON, _ = sjson.SetRawBytes(rawJSON, "generationConfig.responseModalities", []byte(`["IMAGE", "TEXT"]`))
 				}
 			}
--- a/internal/runtime/executor/gemini_vertex_executor.go
+++ b/internal/runtime/executor/gemini_vertex_executor.go
@@ -16,7 +16,9 @@ import (

 	vertexauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/vertex"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
@@ -227,7 +229,7 @@ func (e *GeminiVertexExecutor) HttpRequest(ctx context.Context, auth *cliproxyau
 	if err := e.PrepareRequest(httpReq, auth); err != nil {
 		return nil, err
 	}
-	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
 	return httpClient.Do(httpReq)
 }

@@ -301,8 +303,8 @@ func (e *GeminiVertexExecutor) Refresh(_ context.Context, auth *cliproxyauth.Aut
 func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (resp cliproxyexecutor.Response, err error) {
 	baseModel := thinking.ParseSuffix(req.Model).ModelName

-	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
-	defer reporter.trackFailure(ctx, &err)
+	reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.TrackFailure(ctx, &err)

 	var body []byte

@@ -332,8 +334,8 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 		}

 		body = fixGeminiImageAspectRatio(baseModel, body)
-		requestedModel := payloadRequestedModel(opts, req.Model)
-		body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
+		requestedModel := helps.PayloadRequestedModel(opts, req.Model)
+		body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
 		body, _ = sjson.SetBytes(body, "model", baseModel)
 	}

@@ -362,6 +364,11 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 		return resp, statusErr{code: 500, msg: "internal server error"}
 	}
 	applyGeminiHeaders(httpReq, auth)
+	var attrs map[string]string
+	if auth != nil {
+		attrs = auth.Attributes
+	}
+	util.ApplyCustomHeadersFromAttrs(httpReq, attrs)

 	var authID, authLabel, authType, authValue string
 	if auth != nil {
@@ -369,7 +376,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 		authLabel = auth.Label
 		authType, authValue = auth.AccountInfo()
 	}
-	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+	helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
 		URL:       url,
 		Method:    http.MethodPost,
 		Headers:   httpReq.Header.Clone(),
@@ -381,10 +388,10 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 		AuthValue: authValue,
 	})

-	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
 	httpResp, errDo := httpClient.Do(httpReq)
 	if errDo != nil {
-		recordAPIResponseError(ctx, e.cfg, errDo)
+		helps.RecordAPIResponseError(ctx, e.cfg, errDo)
 		return resp, errDo
 	}
 	defer func() {
@@ -392,21 +399,21 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 			log.Errorf("vertex executor: close response body error: %v", errClose)
 		}
 	}()
-	recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+	helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
-		appendAPIResponseChunk(ctx, e.cfg, b)
-		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		helps.AppendAPIResponseChunk(ctx, e.cfg, b)
+		helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		return resp, err
 	}
 	data, errRead := io.ReadAll(httpResp.Body)
 	if errRead != nil {
-		recordAPIResponseError(ctx, e.cfg, errRead)
+		helps.RecordAPIResponseError(ctx, e.cfg, errRead)
 		return resp, errRead
 	}
-	appendAPIResponseChunk(ctx, e.cfg, data)
-	reporter.publish(ctx, parseGeminiUsage(data))
+	helps.AppendAPIResponseChunk(ctx, e.cfg, data)
+	reporter.Publish(ctx, helps.ParseGeminiUsage(data))

 	// For Imagen models, convert response to Gemini format before translation
 	// This ensures Imagen responses use the same format as gemini-3-pro-image-preview
@@ -419,7 +426,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 	to := sdktranslator.FromString("gemini")
 	var param any
 	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, body, data, &param)
-	resp = cliproxyexecutor.Response{Payload: []byte(out), Headers: httpResp.Header.Clone()}
+	resp = cliproxyexecutor.Response{Payload: out, Headers: httpResp.Header.Clone()}
 	return resp, nil
 }

@@ -427,8 +434,8 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (resp cliproxyexecutor.Response, err error) {
 	baseModel := thinking.ParseSuffix(req.Model).ModelName

-	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
-	defer reporter.trackFailure(ctx, &err)
+	reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.TrackFailure(ctx, &err)

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
@@ -447,8 +454,8 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	}

 	body = fixGeminiImageAspectRatio(baseModel, body)
-	requestedModel := payloadRequestedModel(opts, req.Model)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
+	requestedModel := helps.PayloadRequestedModel(opts, req.Model)
+	body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

 	action := getVertexAction(baseModel, false)
@@ -460,7 +467,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip

 	// For API key auth, use simpler URL format without project/location
 	if baseURL == "" {
-		baseURL = "https://generativelanguage.googleapis.com"
+		baseURL = "https://aiplatform.googleapis.com"
 	}
 	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, action)
 	if opts.Alt != "" && action != "countTokens" {
@@ -477,6 +484,11 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 		httpReq.Header.Set("x-goog-api-key", apiKey)
 	}
 	applyGeminiHeaders(httpReq, auth)
+	var attrs map[string]string
+	if auth != nil {
+		attrs = auth.Attributes
+	}
+	util.ApplyCustomHeadersFromAttrs(httpReq, attrs)

 	var authID, authLabel, authType, authValue string
 	if auth != nil {
@@ -484,7 +496,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 		authLabel = auth.Label
 		authType, authValue = auth.AccountInfo()
 	}
-	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+	helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
 		URL:       url,
 		Method:    http.MethodPost,
 		Headers:   httpReq.Header.Clone(),
@@ -496,10 +508,10 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 		AuthValue: authValue,
 	})

-	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
 	httpResp, errDo := httpClient.Do(httpReq)
 	if errDo != nil {
-		recordAPIResponseError(ctx, e.cfg, errDo)
+		helps.RecordAPIResponseError(ctx, e.cfg, errDo)
 		return resp, errDo
 	}
 	defer func() {
@@ -507,24 +519,24 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 			log.Errorf("vertex executor: close response body error: %v", errClose)
 		}
 	}()
-	recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+	helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
-		appendAPIResponseChunk(ctx, e.cfg, b)
-		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		helps.AppendAPIResponseChunk(ctx, e.cfg, b)
+		helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
 		return resp, err
 	}
 	data, errRead := io.ReadAll(httpResp.Body)
 	if errRead != nil {
-		recordAPIResponseError(ctx, e.cfg, errRead)
+		helps.RecordAPIResponseError(ctx, e.cfg, errRead)
 		return resp, errRead
 	}
-	appendAPIResponseChunk(ctx, e.cfg, data)
-	reporter.publish(ctx, parseGeminiUsage(data))
+	helps.AppendAPIResponseChunk(ctx, e.cfg, data)
+	reporter.Publish(ctx, helps.ParseGeminiUsage(data))
 	var param any
 	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, body, data, &param)
-	resp = cliproxyexecutor.Response{Payload: []byte(out), Headers: httpResp.Header.Clone()}
+	resp = cliproxyexecutor.Response{Payload: out, Headers: httpResp.Header.Clone()}
 	return resp, nil
 }

@@ -532,8 +544,8 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (_ *cliproxyexecutor.StreamResult, err error) {
 	baseModel := thinking.ParseSuffix(req.Model).ModelName

-	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
-	defer reporter.trackFailure(ctx, &err)
+	reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.TrackFailure(ctx, &err)

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
@@ -552,8 +564,8 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 	}

 	body = fixGeminiImageAspectRatio(baseModel, body)
-	requestedModel := payloadRequestedModel(opts, req.Model)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
+	requestedModel := helps.PayloadRequestedModel(opts, req.Model)
+	body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

 	action := getVertexAction(baseModel, true)
@@ -581,6 +593,11 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 		return nil, statusErr{code: 500, msg: "internal server error"}
 	}
 	applyGeminiHeaders(httpReq, auth)
+	var attrs map[string]string
+	if auth != nil {
+		attrs = auth.Attributes
+	}
+	util.ApplyCustomHeadersFromAttrs(httpReq, attrs)

 	var authID, authLabel, authType, authValue string
 	if auth != nil {
@@ -588,7 +605,7 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 		authLabel = auth.Label
 		authType, authValue = auth.AccountInfo()
 	}
-	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+	helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
 		URL:       url,
 		Method:    http.MethodPost,
 		Headers:   httpReq.Header.Clone(),
@@ -600,17 +617,17 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 		AuthValue: authValue,
 	})

-	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
 	httpResp, errDo := httpClient.Do(httpReq)
 	if errDo != nil {
-		recordAPIResponseError(ctx, e.cfg, errDo)
+		helps.RecordAPIResponseError(ctx, e.cfg, errDo)
 		return nil, errDo
 	}
-	recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+	helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
-		appendAPIResponseChunk(ctx, e.cfg, b)
-		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		helps.AppendAPIResponseChunk(ctx, e.cfg, b)
+		helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("vertex executor: close response body error: %v", errClose)
 		}
@@ -630,22 +647,22 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
-			appendAPIResponseChunk(ctx, e.cfg, line)
-			if detail, ok := parseGeminiStreamUsage(line); ok {
-				reporter.publish(ctx, detail)
+			helps.AppendAPIResponseChunk(ctx, e.cfg, line)
+			if detail, ok := helps.ParseGeminiStreamUsage(line); ok {
+				reporter.Publish(ctx, detail)
 			}
 			lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(line), &param)
 			for i := range lines {
-				out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
+				out <- cliproxyexecutor.StreamChunk{Payload: lines[i]}
 			}
 		}
 		lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, []byte("[DONE]"), &param)
 		for i := range lines {
-			out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
+			out <- cliproxyexecutor.StreamChunk{Payload: lines[i]}
 		}
 		if errScan := scanner.Err(); errScan != nil {
-			recordAPIResponseError(ctx, e.cfg, errScan)
-			reporter.publishFailure(ctx)
+			helps.RecordAPIResponseError(ctx, e.cfg, errScan)
+			reporter.PublishFailure(ctx)
 			out <- cliproxyexecutor.StreamChunk{Err: errScan}
 		}
 	}()
@@ -656,8 +673,8 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (_ *cliproxyexecutor.StreamResult, err error) {
 	baseModel := thinking.ParseSuffix(req.Model).ModelName

-	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
-	defer reporter.trackFailure(ctx, &err)
+	reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.TrackFailure(ctx, &err)

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
@@ -676,14 +693,14 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 	}

 	body = fixGeminiImageAspectRatio(baseModel, body)
-	requestedModel := payloadRequestedModel(opts, req.Model)
-	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
+	requestedModel := helps.PayloadRequestedModel(opts, req.Model)
+	body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

 	action := getVertexAction(baseModel, true)
 	// For API key auth, use simpler URL format without project/location
 	if baseURL == "" {
-		baseURL = "https://generativelanguage.googleapis.com"
+		baseURL = "https://aiplatform.googleapis.com"
 	}
 	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, action)
 	// Imagen models don't support streaming, skip SSE params
@@ -705,6 +722,11 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 		httpReq.Header.Set("x-goog-api-key", apiKey)
 	}
 	applyGeminiHeaders(httpReq, auth)
+	var attrs map[string]string
+	if auth != nil {
+		attrs = auth.Attributes
+	}
+	util.ApplyCustomHeadersFromAttrs(httpReq, attrs)

 	var authID, authLabel, authType, authValue string
 	if auth != nil {
@@ -712,7 +734,7 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 		authLabel = auth.Label
 		authType, authValue = auth.AccountInfo()
 	}
-	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+	helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
 		URL:       url,
 		Method:    http.MethodPost,
 		Headers:   httpReq.Header.Clone(),
@@ -724,17 +746,17 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 		AuthValue: authValue,
 	})

-	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
 	httpResp, errDo := httpClient.Do(httpReq)
 	if errDo != nil {
-		recordAPIResponseError(ctx, e.cfg, errDo)
+		helps.RecordAPIResponseError(ctx, e.cfg, errDo)
 		return nil, errDo
 	}
-	recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+	helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
-		appendAPIResponseChunk(ctx, e.cfg, b)
-		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		helps.AppendAPIResponseChunk(ctx, e.cfg, b)
+		helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		if errClose := httpResp.Body.Close(); errClose != nil {
 			log.Errorf("vertex executor: close response body error: %v", errClose)
 		}
@@ -754,22 +776,22 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
-			appendAPIResponseChunk(ctx, e.cfg, line)
-			if detail, ok := parseGeminiStreamUsage(line); ok {
-				reporter.publish(ctx, detail)
+			helps.AppendAPIResponseChunk(ctx, e.cfg, line)
+			if detail, ok := helps.ParseGeminiStreamUsage(line); ok {
+				reporter.Publish(ctx, detail)
 			}
 			lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(line), &param)
 			for i := range lines {
-				out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
+				out <- cliproxyexecutor.StreamChunk{Payload: lines[i]}
 			}
 		}
 		lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, []byte("[DONE]"), &param)
 		for i := range lines {
-			out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
+			out <- cliproxyexecutor.StreamChunk{Payload: lines[i]}
 		}
 		if errScan := scanner.Err(); errScan != nil {
-			recordAPIResponseError(ctx, e.cfg, errScan)
-			reporter.publishFailure(ctx)
+			helps.RecordAPIResponseError(ctx, e.cfg, errScan)
+			reporter.PublishFailure(ctx)
 			out <- cliproxyexecutor.StreamChunk{Err: errScan}
 		}
 	}()
@@ -812,6 +834,11 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
 		return cliproxyexecutor.Response{}, statusErr{code: 500, msg: "internal server error"}
 	}
 	applyGeminiHeaders(httpReq, auth)
+	var attrs map[string]string
+	if auth != nil {
+		attrs = auth.Attributes
+	}
+	util.ApplyCustomHeadersFromAttrs(httpReq, attrs)

 	var authID, authLabel, authType, authValue string
 	if auth != nil {
@@ -819,7 +846,7 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
 		authLabel = auth.Label
 		authType, authValue = auth.AccountInfo()
 	}
-	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+	helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
 		URL:       url,
 		Method:    http.MethodPost,
 		Headers:   httpReq.Header.Clone(),
@@ -831,10 +858,10 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
 		AuthValue: authValue,
 	})

-	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
 	httpResp, errDo := httpClient.Do(httpReq)
 	if errDo != nil {
-		recordAPIResponseError(ctx, e.cfg, errDo)
+		helps.RecordAPIResponseError(ctx, e.cfg, errDo)
 		return cliproxyexecutor.Response{}, errDo
 	}
 	defer func() {
@@ -842,22 +869,22 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
 			log.Errorf("vertex executor: close response body error: %v", errClose)
 		}
 	}()
-	recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+	helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
-		appendAPIResponseChunk(ctx, e.cfg, b)
-		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		helps.AppendAPIResponseChunk(ctx, e.cfg, b)
+		helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(b)}
 	}
 	data, errRead := io.ReadAll(httpResp.Body)
 	if errRead != nil {
-		recordAPIResponseError(ctx, e.cfg, errRead)
+		helps.RecordAPIResponseError(ctx, e.cfg, errRead)
 		return cliproxyexecutor.Response{}, errRead
 	}
-	appendAPIResponseChunk(ctx, e.cfg, data)
+	helps.AppendAPIResponseChunk(ctx, e.cfg, data)
 	count := gjson.GetBytes(data, "totalTokens").Int()
 	out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
-	return cliproxyexecutor.Response{Payload: []byte(out), Headers: httpResp.Header.Clone()}, nil
+	return cliproxyexecutor.Response{Payload: out, Headers: httpResp.Header.Clone()}, nil
 }

 // countTokensWithAPIKey handles token counting using API key credentials.
@@ -883,7 +910,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *

 	// For API key auth, use simpler URL format without project/location
 	if baseURL == "" {
-		baseURL = "https://generativelanguage.googleapis.com"
+		baseURL = "https://aiplatform.googleapis.com"
 	}
 	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, "countTokens")

@@ -896,6 +923,11 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
 		httpReq.Header.Set("x-goog-api-key", apiKey)
 	}
 	applyGeminiHeaders(httpReq, auth)
+	var attrs map[string]string
+	if auth != nil {
+		attrs = auth.Attributes
+	}
+	util.ApplyCustomHeadersFromAttrs(httpReq, attrs)

 	var authID, authLabel, authType, authValue string
 	if auth != nil {
@@ -903,7 +935,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
 		authLabel = auth.Label
 		authType, authValue = auth.AccountInfo()
 	}
-	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+	helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
 		URL:       url,
 		Method:    http.MethodPost,
 		Headers:   httpReq.Header.Clone(),
@@ -915,10 +947,10 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
 		AuthValue: authValue,
 	})

-	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
 	httpResp, errDo := httpClient.Do(httpReq)
 	if errDo != nil {
-		recordAPIResponseError(ctx, e.cfg, errDo)
+		helps.RecordAPIResponseError(ctx, e.cfg, errDo)
 		return cliproxyexecutor.Response{}, errDo
 	}
 	defer func() {
@@ -926,22 +958,22 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
 			log.Errorf("vertex executor: close response body error: %v", errClose)
 		}
 	}()
-	recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+	helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
-		appendAPIResponseChunk(ctx, e.cfg, b)
-		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		helps.AppendAPIResponseChunk(ctx, e.cfg, b)
+		helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 		return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(b)}
 	}
 	data, errRead := io.ReadAll(httpResp.Body)
 	if errRead != nil {
-		recordAPIResponseError(ctx, e.cfg, errRead)
+		helps.RecordAPIResponseError(ctx, e.cfg, errRead)
 		return cliproxyexecutor.Response{}, errRead
 	}
-	appendAPIResponseChunk(ctx, e.cfg, data)
+	helps.AppendAPIResponseChunk(ctx, e.cfg, data)
 	count := gjson.GetBytes(data, "totalTokens").Int()
 	out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
-	return cliproxyexecutor.Response{Payload: []byte(out), Headers: httpResp.Header.Clone()}, nil
+	return cliproxyexecutor.Response{Payload: out, Headers: httpResp.Header.Clone()}, nil
 }

 // vertexCreds extracts project, location and raw service account JSON from auth metadata.
@@ -1012,7 +1044,7 @@ func vertexBaseURL(location string) string {
 }

 func vertexAccessToken(ctx context.Context, cfg *config.Config, auth *cliproxyauth.Auth, saJSON []byte) (string, error) {
-	if httpClient := newProxyAwareHTTPClient(ctx, cfg, auth, 0); httpClient != nil {
+	if httpClient := helps.NewProxyAwareHTTPClient(ctx, cfg, auth, 0); httpClient != nil {
 		ctx = context.WithValue(ctx, oauth2.HTTPClient, httpClient)
 	}
 	// Use cloud-platform scope for Vertex AI.
--- a/Show More
+++ b/Show More