Merge pull request #88 from router-for-me/plus

v6.6.85
Merge branch 'main' into plus
2026-03-29 16:54:41 +00:00 · 2026-01-06 23:20:47 +08:00 · 2026-01-06 23:20:39 +08:00 · 2026-01-06 23:15:28 +08:00 · 2026-01-06 22:58:53 +08:00 · 2026-01-05 20:54:33 +07:00
43 changed files with 2429 additions and 258 deletions
--- a/README.md
+++ b/README.md
@@ -19,6 +19,17 @@ This project only accepts pull requests that relate to third-party provider supp
 If you need to submit any non-third-party provider changes, please open them against the mainline repository.
 ## More choices
 Those projects are ports of CLIProxyAPI or inspired by it:
 ### [9Router](https://github.com/decolua/9router)
 A Next.js implementation inspired by CLIProxyAPI, easy to install and use, built from scratch with format translation (OpenAI/Claude/Gemini/Ollama), combo system with auto-fallback, multi-account management with exponential backoff, a Next.js web dashboard, and support for CLI tools (Cursor, Claude Code, Cline, RooCode) - no API keys needed.
 > [!NOTE]  
 > If you have developed a port of CLIProxyAPI or a project inspired by it, please open a PR to add it to this list.
 ## License
 This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
--- a/README_CN.md
+++ b/README_CN.md
@@ -19,6 +19,17 @@
 如果需要提交任何非第三方供应商支持的 Pull Request，请提交到主线版本。
 ## 更多选择
 以下项目是 CLIProxyAPI 的移植版或受其启发：
 ### [9Router](https://github.com/decolua/9router)
 基于 Next.js 的实现，灵感来自 CLIProxyAPI，易于安装使用；自研格式转换（OpenAI/Claude/Gemini/Ollama）、组合系统与自动回退、多账户管理（指数退避）、Next.js Web 控制台，并支持 Cursor、Claude Code、Cline、RooCode 等 CLI 工具，无需 API 密钥。
 > [!NOTE]  
 > 如果你开发了 CLIProxyAPI 的移植或衍生项目，请提交 PR 将其添加到此列表中。
 ## 许可证
 此项目根据 MIT 许可证授权 - 有关详细信息，请参阅 [LICENSE](LICENSE) 文件。
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -221,6 +221,7 @@ ws-auth: false
 #   gemini-cli:
 #     - name: "gemini-2.5-pro"          # original model name under this channel
 #       alias: "g2.5p"                  # client-visible alias
 #       fork: true                      # when true, keep original and also add the alias as an extra model (default: false)
 #   vertex:
 #     - name: "gemini-2.5-pro"
 #       alias: "g2.5p"
--- a/docker-build.sh
+++ b/docker-build.sh
@@ -5,9 +5,115 @@
 # This script automates the process of building and running the Docker container
 # with version information dynamically injected at build time.
-# Exit immediately if a command exits with a non-zero status.
+# Hidden feature: Preserve usage statistics across rebuilds
 # Usage: ./docker-build.sh --with-usage
 # First run prompts for management API key, saved to temp/stats/.api_secret
 set -euo pipefail
 STATS_DIR="temp/stats"
 STATS_FILE="${STATS_DIR}/.usage_backup.json"
 SECRET_FILE="${STATS_DIR}/.api_secret"
 WITH_USAGE=false
 get_port() {
  if [[ -f "config.yaml" ]]; then
    grep -E "^port:" config.yaml | sed -E 's/^port: *["'"'"']?([0-9]+)["'"'"']?.*$/\1/'
  else
    echo "8317"
  fi
 }
 export_stats_api_secret() {
  if [[ -f "${SECRET_FILE}" ]]; then
    API_SECRET=$(cat "${SECRET_FILE}")
  else
    if [[ ! -d "${STATS_DIR}" ]]; then
      mkdir -p "${STATS_DIR}"
    fi
    echo "First time using --with-usage. Management API key required."
    read -r -p "Enter management key: " -s API_SECRET
    echo
    echo "${API_SECRET}" > "${SECRET_FILE}"
    chmod 600 "${SECRET_FILE}"
  fi
 }
 check_container_running() {
  local port
  port=$(get_port)
  if ! curl -s -o /dev/null -w "%{http_code}" "http://localhost:${port}/" | grep -q "200"; then
    echo "Error: cli-proxy-api service is not responding at localhost:${port}"
    echo "Please start the container first or use without --with-usage flag."
    exit 1
  fi
 }
 export_stats() {
  local port
  port=$(get_port)
  if [[ ! -d "${STATS_DIR}" ]]; then
    mkdir -p "${STATS_DIR}"
  fi
  check_container_running
  echo "Exporting usage statistics..."
  EXPORT_RESPONSE=$(curl -s -w "\n%{http_code}" -H "X-Management-Key: ${API_SECRET}" \
    "http://localhost:${port}/v0/management/usage/export")
  HTTP_CODE=$(echo "${EXPORT_RESPONSE}" | tail -n1)
  RESPONSE_BODY=$(echo "${EXPORT_RESPONSE}" | sed '$d')
  if [[ "${HTTP_CODE}" != "200" ]]; then
    echo "Export failed (HTTP ${HTTP_CODE}): ${RESPONSE_BODY}"
    exit 1
  fi
  echo "${RESPONSE_BODY}" > "${STATS_FILE}"
  echo "Statistics exported to ${STATS_FILE}"
 }
 import_stats() {
  local port
  port=$(get_port)
  echo "Importing usage statistics..."
  IMPORT_RESPONSE=$(curl -s -w "\n%{http_code}" -X POST \
    -H "X-Management-Key: ${API_SECRET}" \
    -H "Content-Type: application/json" \
    -d @"${STATS_FILE}" \
    "http://localhost:${port}/v0/management/usage/import")
  IMPORT_CODE=$(echo "${IMPORT_RESPONSE}" | tail -n1)
  IMPORT_BODY=$(echo "${IMPORT_RESPONSE}" | sed '$d')
  if [[ "${IMPORT_CODE}" == "200" ]]; then
    echo "Statistics imported successfully"
  else
    echo "Import failed (HTTP ${IMPORT_CODE}): ${IMPORT_BODY}"
  fi
  rm -f "${STATS_FILE}"
 }
 wait_for_service() {
  local port
  port=$(get_port)
  echo "Waiting for service to be ready..."
  for i in {1..30}; do
    if curl -s -o /dev/null -w "%{http_code}" "http://localhost:${port}/" | grep -q "200"; then
      break
    fi
    sleep 1
  done
  sleep 2
 }
 if [[ "${1:-}" == "--with-usage" ]]; then
  WITH_USAGE=true
  export_stats_api_secret
 fi
 # --- Step 1: Choose Environment ---
 echo "Please select an option:"
 echo "1) Run using Pre-built Image (Recommended)"
@@ -18,7 +124,14 @@ read -r -p "Enter choice [1-2]: " choice
 case "$choice" in
  1)
    echo "--- Running with Pre-built Image ---"
    if [[ "${WITH_USAGE}" == "true" ]]; then
      export_stats
    fi
    docker compose up -d --remove-orphans --no-build
    if [[ "${WITH_USAGE}" == "true" ]]; then
      wait_for_service
      import_stats
    fi
    echo "Services are starting from remote image."
    echo "Run 'docker compose logs -f' to see the logs."
    ;;
@@ -38,7 +151,11 @@ case "$choice" in
    # Build and start the services with a local-only image tag
    export CLI_PROXY_IMAGE="cli-proxy-api:local"
-    
+
    if [[ "${WITH_USAGE}" == "true" ]]; then
      export_stats
    fi
    echo "Building the Docker image..."
    docker compose build \
      --build-arg VERSION="${VERSION}" \
@@ -48,6 +165,11 @@ case "$choice" in
    echo "Starting the services..."
    docker compose up -d --remove-orphans --pull never
    if [[ "${WITH_USAGE}" == "true" ]]; then
      wait_for_service
      import_stats
    fi
    echo "Build complete. Services are starting."
    echo "Run 'docker compose logs -f' to see the logs."
    ;;
@@ -55,4 +177,4 @@ case "$choice" in
    echo "Invalid choice. Please enter 1 or 2."
    exit 1
    ;;
-esac
+esac
--- a/internal/api/handlers/management/config_basic.go
+++ b/internal/api/handlers/management/config_basic.go
@@ -202,6 +202,26 @@ func (h *Handler) PutLoggingToFile(c *gin.Context) {
 	h.updateBoolField(c, func(v bool) { h.cfg.LoggingToFile = v })
 }
 // LogsMaxTotalSizeMB
 func (h *Handler) GetLogsMaxTotalSizeMB(c *gin.Context) {
 	c.JSON(200, gin.H{"logs-max-total-size-mb": h.cfg.LogsMaxTotalSizeMB})
 }
 func (h *Handler) PutLogsMaxTotalSizeMB(c *gin.Context) {
 	var body struct {
 		Value *int `json:"value"`
 	}
 	if errBindJSON := c.ShouldBindJSON(&body); errBindJSON != nil || body.Value == nil {
 		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid body"})
 		return
 	}
 	value := *body.Value
 	if value < 0 {
 		value = 0
 	}
 	h.cfg.LogsMaxTotalSizeMB = value
 	h.persist(c)
 }
 // Request log
 func (h *Handler) GetRequestLog(c *gin.Context) { c.JSON(200, gin.H{"request-log": h.cfg.RequestLog}) }
 func (h *Handler) PutRequestLog(c *gin.Context) {
@@ -232,6 +252,52 @@ func (h *Handler) PutMaxRetryInterval(c *gin.Context) {
 	h.updateIntField(c, func(v int) { h.cfg.MaxRetryInterval = v })
 }
 // ForceModelPrefix
 func (h *Handler) GetForceModelPrefix(c *gin.Context) {
 	c.JSON(200, gin.H{"force-model-prefix": h.cfg.ForceModelPrefix})
 }
 func (h *Handler) PutForceModelPrefix(c *gin.Context) {
 	h.updateBoolField(c, func(v bool) { h.cfg.ForceModelPrefix = v })
 }
 func normalizeRoutingStrategy(strategy string) (string, bool) {
 	normalized := strings.ToLower(strings.TrimSpace(strategy))
 	switch normalized {
 	case "", "round-robin", "roundrobin", "rr":
 		return "round-robin", true
 	case "fill-first", "fillfirst", "ff":
 		return "fill-first", true
 	default:
 		return "", false
 	}
 }
 // RoutingStrategy
 func (h *Handler) GetRoutingStrategy(c *gin.Context) {
 	strategy, ok := normalizeRoutingStrategy(h.cfg.Routing.Strategy)
 	if !ok {
 		c.JSON(200, gin.H{"strategy": strings.TrimSpace(h.cfg.Routing.Strategy)})
 		return
 	}
 	c.JSON(200, gin.H{"strategy": strategy})
 }
 func (h *Handler) PutRoutingStrategy(c *gin.Context) {
 	var body struct {
 		Value *string `json:"value"`
 	}
 	if errBindJSON := c.ShouldBindJSON(&body); errBindJSON != nil || body.Value == nil {
 		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid body"})
 		return
 	}
 	normalized, ok := normalizeRoutingStrategy(*body.Value)
 	if !ok {
 		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid strategy"})
 		return
 	}
 	h.cfg.Routing.Strategy = normalized
 	h.persist(c)
 }
 // Proxy URL
 func (h *Handler) GetProxyURL(c *gin.Context) { c.JSON(200, gin.H{"proxy-url": h.cfg.ProxyURL}) }
 func (h *Handler) PutProxyURL(c *gin.Context) {
--- a/internal/api/handlers/management/config_lists.go
+++ b/internal/api/handlers/management/config_lists.go
@@ -487,6 +487,137 @@ func (h *Handler) DeleteOpenAICompat(c *gin.Context) {
 	c.JSON(400, gin.H{"error": "missing name or index"})
 }
 // vertex-api-key: []VertexCompatKey
 func (h *Handler) GetVertexCompatKeys(c *gin.Context) {
 	c.JSON(200, gin.H{"vertex-api-key": h.cfg.VertexCompatAPIKey})
 }
 func (h *Handler) PutVertexCompatKeys(c *gin.Context) {
 	data, err := c.GetRawData()
 	if err != nil {
 		c.JSON(400, gin.H{"error": "failed to read body"})
 		return
 	}
 	var arr []config.VertexCompatKey
 	if err = json.Unmarshal(data, &arr); err != nil {
 		var obj struct {
 			Items []config.VertexCompatKey `json:"items"`
 		}
 		if err2 := json.Unmarshal(data, &obj); err2 != nil || len(obj.Items) == 0 {
 			c.JSON(400, gin.H{"error": "invalid body"})
 			return
 		}
 		arr = obj.Items
 	}
 	for i := range arr {
 		normalizeVertexCompatKey(&arr[i])
 	}
 	h.cfg.VertexCompatAPIKey = arr
 	h.cfg.SanitizeVertexCompatKeys()
 	h.persist(c)
 }
 func (h *Handler) PatchVertexCompatKey(c *gin.Context) {
 	type vertexCompatPatch struct {
 		APIKey   *string                     `json:"api-key"`
 		Prefix   *string                     `json:"prefix"`
 		BaseURL  *string                     `json:"base-url"`
 		ProxyURL *string                     `json:"proxy-url"`
 		Headers  *map[string]string          `json:"headers"`
 		Models   *[]config.VertexCompatModel `json:"models"`
 	}
 	var body struct {
 		Index *int               `json:"index"`
 		Match *string            `json:"match"`
 		Value *vertexCompatPatch `json:"value"`
 	}
 	if errBindJSON := c.ShouldBindJSON(&body); errBindJSON != nil || body.Value == nil {
 		c.JSON(400, gin.H{"error": "invalid body"})
 		return
 	}
 	targetIndex := -1
 	if body.Index != nil && *body.Index >= 0 && *body.Index < len(h.cfg.VertexCompatAPIKey) {
 		targetIndex = *body.Index
 	}
 	if targetIndex == -1 && body.Match != nil {
 		match := strings.TrimSpace(*body.Match)
 		if match != "" {
 			for i := range h.cfg.VertexCompatAPIKey {
 				if h.cfg.VertexCompatAPIKey[i].APIKey == match {
 					targetIndex = i
 					break
 				}
 			}
 		}
 	}
 	if targetIndex == -1 {
 		c.JSON(404, gin.H{"error": "item not found"})
 		return
 	}
 	entry := h.cfg.VertexCompatAPIKey[targetIndex]
 	if body.Value.APIKey != nil {
 		trimmed := strings.TrimSpace(*body.Value.APIKey)
 		if trimmed == "" {
 			h.cfg.VertexCompatAPIKey = append(h.cfg.VertexCompatAPIKey[:targetIndex], h.cfg.VertexCompatAPIKey[targetIndex+1:]...)
 			h.cfg.SanitizeVertexCompatKeys()
 			h.persist(c)
 			return
 		}
 		entry.APIKey = trimmed
 	}
 	if body.Value.Prefix != nil {
 		entry.Prefix = strings.TrimSpace(*body.Value.Prefix)
 	}
 	if body.Value.BaseURL != nil {
 		trimmed := strings.TrimSpace(*body.Value.BaseURL)
 		if trimmed == "" {
 			h.cfg.VertexCompatAPIKey = append(h.cfg.VertexCompatAPIKey[:targetIndex], h.cfg.VertexCompatAPIKey[targetIndex+1:]...)
 			h.cfg.SanitizeVertexCompatKeys()
 			h.persist(c)
 			return
 		}
 		entry.BaseURL = trimmed
 	}
 	if body.Value.ProxyURL != nil {
 		entry.ProxyURL = strings.TrimSpace(*body.Value.ProxyURL)
 	}
 	if body.Value.Headers != nil {
 		entry.Headers = config.NormalizeHeaders(*body.Value.Headers)
 	}
 	if body.Value.Models != nil {
 		entry.Models = append([]config.VertexCompatModel(nil), (*body.Value.Models)...)
 	}
 	normalizeVertexCompatKey(&entry)
 	h.cfg.VertexCompatAPIKey[targetIndex] = entry
 	h.cfg.SanitizeVertexCompatKeys()
 	h.persist(c)
 }
 func (h *Handler) DeleteVertexCompatKey(c *gin.Context) {
 	if val := strings.TrimSpace(c.Query("api-key")); val != "" {
 		out := make([]config.VertexCompatKey, 0, len(h.cfg.VertexCompatAPIKey))
 		for _, v := range h.cfg.VertexCompatAPIKey {
 			if v.APIKey != val {
 				out = append(out, v)
 			}
 		}
 		h.cfg.VertexCompatAPIKey = out
 		h.cfg.SanitizeVertexCompatKeys()
 		h.persist(c)
 		return
 	}
 	if idxStr := c.Query("index"); idxStr != "" {
 		var idx int
 		_, errScan := fmt.Sscanf(idxStr, "%d", &idx)
 		if errScan == nil && idx >= 0 && idx < len(h.cfg.VertexCompatAPIKey) {
 			h.cfg.VertexCompatAPIKey = append(h.cfg.VertexCompatAPIKey[:idx], h.cfg.VertexCompatAPIKey[idx+1:]...)
 			h.cfg.SanitizeVertexCompatKeys()
 			h.persist(c)
 			return
 		}
 	}
 	c.JSON(400, gin.H{"error": "missing api-key or index"})
 }
 // oauth-excluded-models: map[string][]string
 func (h *Handler) GetOAuthExcludedModels(c *gin.Context) {
 	c.JSON(200, gin.H{"oauth-excluded-models": config.NormalizeOAuthExcludedModels(h.cfg.OAuthExcludedModels)})
@@ -572,6 +703,103 @@ func (h *Handler) DeleteOAuthExcludedModels(c *gin.Context) {
 	h.persist(c)
 }
 // oauth-model-mappings: map[string][]ModelNameMapping
 func (h *Handler) GetOAuthModelMappings(c *gin.Context) {
 	c.JSON(200, gin.H{"oauth-model-mappings": sanitizedOAuthModelMappings(h.cfg.OAuthModelMappings)})
 }
 func (h *Handler) PutOAuthModelMappings(c *gin.Context) {
 	data, err := c.GetRawData()
 	if err != nil {
 		c.JSON(400, gin.H{"error": "failed to read body"})
 		return
 	}
 	var entries map[string][]config.ModelNameMapping
 	if err = json.Unmarshal(data, &entries); err != nil {
 		var wrapper struct {
 			Items map[string][]config.ModelNameMapping `json:"items"`
 		}
 		if err2 := json.Unmarshal(data, &wrapper); err2 != nil {
 			c.JSON(400, gin.H{"error": "invalid body"})
 			return
 		}
 		entries = wrapper.Items
 	}
 	h.cfg.OAuthModelMappings = sanitizedOAuthModelMappings(entries)
 	h.persist(c)
 }
 func (h *Handler) PatchOAuthModelMappings(c *gin.Context) {
 	var body struct {
 		Provider *string                   `json:"provider"`
 		Channel  *string                   `json:"channel"`
 		Mappings []config.ModelNameMapping `json:"mappings"`
 	}
 	if errBindJSON := c.ShouldBindJSON(&body); errBindJSON != nil {
 		c.JSON(400, gin.H{"error": "invalid body"})
 		return
 	}
 	channelRaw := ""
 	if body.Channel != nil {
 		channelRaw = *body.Channel
 	} else if body.Provider != nil {
 		channelRaw = *body.Provider
 	}
 	channel := strings.ToLower(strings.TrimSpace(channelRaw))
 	if channel == "" {
 		c.JSON(400, gin.H{"error": "invalid channel"})
 		return
 	}
 	normalizedMap := sanitizedOAuthModelMappings(map[string][]config.ModelNameMapping{channel: body.Mappings})
 	normalized := normalizedMap[channel]
 	if len(normalized) == 0 {
 		if h.cfg.OAuthModelMappings == nil {
 			c.JSON(404, gin.H{"error": "channel not found"})
 			return
 		}
 		if _, ok := h.cfg.OAuthModelMappings[channel]; !ok {
 			c.JSON(404, gin.H{"error": "channel not found"})
 			return
 		}
 		delete(h.cfg.OAuthModelMappings, channel)
 		if len(h.cfg.OAuthModelMappings) == 0 {
 			h.cfg.OAuthModelMappings = nil
 		}
 		h.persist(c)
 		return
 	}
 	if h.cfg.OAuthModelMappings == nil {
 		h.cfg.OAuthModelMappings = make(map[string][]config.ModelNameMapping)
 	}
 	h.cfg.OAuthModelMappings[channel] = normalized
 	h.persist(c)
 }
 func (h *Handler) DeleteOAuthModelMappings(c *gin.Context) {
 	channel := strings.ToLower(strings.TrimSpace(c.Query("channel")))
 	if channel == "" {
 		channel = strings.ToLower(strings.TrimSpace(c.Query("provider")))
 	}
 	if channel == "" {
 		c.JSON(400, gin.H{"error": "missing channel"})
 		return
 	}
 	if h.cfg.OAuthModelMappings == nil {
 		c.JSON(404, gin.H{"error": "channel not found"})
 		return
 	}
 	if _, ok := h.cfg.OAuthModelMappings[channel]; !ok {
 		c.JSON(404, gin.H{"error": "channel not found"})
 		return
 	}
 	delete(h.cfg.OAuthModelMappings, channel)
 	if len(h.cfg.OAuthModelMappings) == 0 {
 		h.cfg.OAuthModelMappings = nil
 	}
 	h.persist(c)
 }
 // codex-api-key: []CodexKey
 func (h *Handler) GetCodexKeys(c *gin.Context) {
 	c.JSON(200, gin.H{"codex-api-key": h.cfg.CodexKey})
@@ -789,6 +1017,53 @@ func normalizeCodexKey(entry *config.CodexKey) {
 	entry.Models = normalized
 }
 func normalizeVertexCompatKey(entry *config.VertexCompatKey) {
 	if entry == nil {
 		return
 	}
 	entry.APIKey = strings.TrimSpace(entry.APIKey)
 	entry.Prefix = strings.TrimSpace(entry.Prefix)
 	entry.BaseURL = strings.TrimSpace(entry.BaseURL)
 	entry.ProxyURL = strings.TrimSpace(entry.ProxyURL)
 	entry.Headers = config.NormalizeHeaders(entry.Headers)
 	if len(entry.Models) == 0 {
 		return
 	}
 	normalized := make([]config.VertexCompatModel, 0, len(entry.Models))
 	for i := range entry.Models {
 		model := entry.Models[i]
 		model.Name = strings.TrimSpace(model.Name)
 		model.Alias = strings.TrimSpace(model.Alias)
 		if model.Name == "" || model.Alias == "" {
 			continue
 		}
 		normalized = append(normalized, model)
 	}
 	entry.Models = normalized
 }
 func sanitizedOAuthModelMappings(entries map[string][]config.ModelNameMapping) map[string][]config.ModelNameMapping {
 	if len(entries) == 0 {
 		return nil
 	}
 	copied := make(map[string][]config.ModelNameMapping, len(entries))
 	for channel, mappings := range entries {
 		if len(mappings) == 0 {
 			continue
 		}
 		copied[channel] = append([]config.ModelNameMapping(nil), mappings...)
 	}
 	if len(copied) == 0 {
 		return nil
 	}
 	cfg := config.Config{OAuthModelMappings: copied}
 	cfg.SanitizeOAuthModelMappings()
 	if len(cfg.OAuthModelMappings) == 0 {
 		return nil
 	}
 	return cfg.OAuthModelMappings
 }
 // GetAmpCode returns the complete ampcode configuration.
 func (h *Handler) GetAmpCode(c *gin.Context) {
 	if h == nil || h.cfg == nil {
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -33,6 +33,7 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers/claude"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers/gemini"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers/openai"
 	sdkAuth "github.com/router-for-me/CLIProxyAPI/v6/sdk/auth"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	log "github.com/sirupsen/logrus"
 	"gopkg.in/yaml.v3"
@@ -511,6 +512,10 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.PUT("/logging-to-file", s.mgmt.PutLoggingToFile)
 		mgmt.PATCH("/logging-to-file", s.mgmt.PutLoggingToFile)
 		mgmt.GET("/logs-max-total-size-mb", s.mgmt.GetLogsMaxTotalSizeMB)
 		mgmt.PUT("/logs-max-total-size-mb", s.mgmt.PutLogsMaxTotalSizeMB)
 		mgmt.PATCH("/logs-max-total-size-mb", s.mgmt.PutLogsMaxTotalSizeMB)
 		mgmt.GET("/usage-statistics-enabled", s.mgmt.GetUsageStatisticsEnabled)
 		mgmt.PUT("/usage-statistics-enabled", s.mgmt.PutUsageStatisticsEnabled)
 		mgmt.PATCH("/usage-statistics-enabled", s.mgmt.PutUsageStatisticsEnabled)
@@ -583,6 +588,14 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.PUT("/max-retry-interval", s.mgmt.PutMaxRetryInterval)
 		mgmt.PATCH("/max-retry-interval", s.mgmt.PutMaxRetryInterval)
 		mgmt.GET("/force-model-prefix", s.mgmt.GetForceModelPrefix)
 		mgmt.PUT("/force-model-prefix", s.mgmt.PutForceModelPrefix)
 		mgmt.PATCH("/force-model-prefix", s.mgmt.PutForceModelPrefix)
 		mgmt.GET("/routing/strategy", s.mgmt.GetRoutingStrategy)
 		mgmt.PUT("/routing/strategy", s.mgmt.PutRoutingStrategy)
 		mgmt.PATCH("/routing/strategy", s.mgmt.PutRoutingStrategy)
 		mgmt.GET("/claude-api-key", s.mgmt.GetClaudeKeys)
 		mgmt.PUT("/claude-api-key", s.mgmt.PutClaudeKeys)
 		mgmt.PATCH("/claude-api-key", s.mgmt.PatchClaudeKey)
@@ -598,11 +611,21 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.PATCH("/openai-compatibility", s.mgmt.PatchOpenAICompat)
 		mgmt.DELETE("/openai-compatibility", s.mgmt.DeleteOpenAICompat)
 		mgmt.GET("/vertex-api-key", s.mgmt.GetVertexCompatKeys)
 		mgmt.PUT("/vertex-api-key", s.mgmt.PutVertexCompatKeys)
 		mgmt.PATCH("/vertex-api-key", s.mgmt.PatchVertexCompatKey)
 		mgmt.DELETE("/vertex-api-key", s.mgmt.DeleteVertexCompatKey)
 		mgmt.GET("/oauth-excluded-models", s.mgmt.GetOAuthExcludedModels)
 		mgmt.PUT("/oauth-excluded-models", s.mgmt.PutOAuthExcludedModels)
 		mgmt.PATCH("/oauth-excluded-models", s.mgmt.PatchOAuthExcludedModels)
 		mgmt.DELETE("/oauth-excluded-models", s.mgmt.DeleteOAuthExcludedModels)
 		mgmt.GET("/oauth-model-mappings", s.mgmt.GetOAuthModelMappings)
 		mgmt.PUT("/oauth-model-mappings", s.mgmt.PutOAuthModelMappings)
 		mgmt.PATCH("/oauth-model-mappings", s.mgmt.PatchOAuthModelMappings)
 		mgmt.DELETE("/oauth-model-mappings", s.mgmt.DeleteOAuthModelMappings)
 		mgmt.GET("/auth-files", s.mgmt.ListAuthFiles)
 		mgmt.GET("/auth-files/models", s.mgmt.GetAuthFileModels)
 		mgmt.GET("/auth-files/download", s.mgmt.DownloadAuthFile)
@@ -987,8 +1010,12 @@ func (s *Server) UpdateClients(cfg *config.Config) {
 		log.Warnf("amp module is nil, skipping config update")
 	}
-	// Count client sources from configuration and auth directory
+	// Count client sources from configuration and auth store.
-	authFiles := util.CountAuthFiles(cfg.AuthDir)
+	tokenStore := sdkAuth.GetTokenStore()
 	if dirSetter, ok := tokenStore.(interface{ SetBaseDir(string) }); ok {
 		dirSetter.SetBaseDir(cfg.AuthDir)
 	}
 	authEntries := util.CountAuthFiles(context.Background(), tokenStore)
 	geminiAPIKeyCount := len(cfg.GeminiKey)
 	claudeAPIKeyCount := len(cfg.ClaudeKey)
 	codexAPIKeyCount := len(cfg.CodexKey)
@@ -999,10 +1026,10 @@ func (s *Server) UpdateClients(cfg *config.Config) {
 		openAICompatCount += len(entry.APIKeyEntries)
 	}
-	total := authFiles + geminiAPIKeyCount + claudeAPIKeyCount + codexAPIKeyCount + vertexAICompatCount + openAICompatCount
+	total := authEntries + geminiAPIKeyCount + claudeAPIKeyCount + codexAPIKeyCount + vertexAICompatCount + openAICompatCount
-	fmt.Printf("server clients and configuration updated: %d clients (%d auth files + %d Gemini API keys + %d Claude API keys + %d Codex keys + %d Vertex-compat + %d OpenAI-compat)\n",
+	fmt.Printf("server clients and configuration updated: %d clients (%d auth entries + %d Gemini API keys + %d Claude API keys + %d Codex keys + %d Vertex-compat + %d OpenAI-compat)\n",
 		total,
-		authFiles,
+		authEntries,
 		geminiAPIKeyCount,
 		claudeAPIKeyCount,
 		codexAPIKeyCount,
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -157,11 +157,14 @@ type RoutingConfig struct {
 	Strategy string `yaml:"strategy,omitempty" json:"strategy,omitempty"`
 }
-// ModelNameMapping defines a model ID rename mapping for a specific channel.
+// ModelNameMapping defines a model ID mapping for a specific channel.
-// It maps the original model name (Name) to the client-visible alias (Alias).
+// It maps the upstream model name (Name) to the client-visible alias (Alias).
 // When Fork is true, the alias is added as an additional model in listings while
 // keeping the original model ID available.
 type ModelNameMapping struct {
 	Name  string `yaml:"name" json:"name"`
 	Alias string `yaml:"alias" json:"alias"`
 	Fork  bool   `yaml:"fork,omitempty" json:"fork,omitempty"`
 }
 // AmpModelMapping defines a model name mapping for Amp CLI requests.
@@ -596,7 +599,7 @@ func (cfg *Config) SanitizeOAuthModelMappings() {
 			}
 			seenName[nameKey] = struct{}{}
 			seenAlias[aliasKey] = struct{}{}
-			clean = append(clean, ModelNameMapping{Name: name, Alias: alias})
+			clean = append(clean, ModelNameMapping{Name: name, Alias: alias, Fork: mapping.Fork})
 		}
 		if len(clean) > 0 {
 			out[channel] = clean
--- a/internal/config/oauth_model_mappings_test.go
+++ b/internal/config/oauth_model_mappings_test.go
@@ -0,0 +1,27 @@
 package config
 import "testing"
 func TestSanitizeOAuthModelMappings_PreservesForkFlag(t *testing.T) {
 	cfg := &Config{
 		OAuthModelMappings: map[string][]ModelNameMapping{
 			" CoDeX ": {
 				{Name: " gpt-5 ", Alias: " g5 ", Fork: true},
 				{Name: "gpt-6", Alias: "g6"},
 			},
 		},
 	}
 	cfg.SanitizeOAuthModelMappings()
 	mappings := cfg.OAuthModelMappings["codex"]
 	if len(mappings) != 2 {
 		t.Fatalf("expected 2 sanitized mappings, got %d", len(mappings))
 	}
 	if mappings[0].Name != "gpt-5" || mappings[0].Alias != "g5" || !mappings[0].Fork {
 		t.Fatalf("expected first mapping to be gpt-5->g5 fork=true, got name=%q alias=%q fork=%v", mappings[0].Name, mappings[0].Alias, mappings[0].Fork)
 	}
 	if mappings[1].Name != "gpt-6" || mappings[1].Alias != "g6" || mappings[1].Fork {
 		t.Fatalf("expected second mapping to be gpt-6->g6 fork=false, got name=%q alias=%q fork=%v", mappings[1].Name, mappings[1].Alias, mappings[1].Fork)
 	}
 }
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -740,7 +740,7 @@ func GetIFlowModels() []*ModelInfo {
 		{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
 		{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
 		{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
-		{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000},
+		{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: iFlowThinkingSupport},
 		{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
 	}
 	models := make([]*ModelInfo, 0, len(entries))
@@ -773,7 +773,7 @@ func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
 	return map[string]*AntigravityModelConfig{
 		"gemini-2.5-flash":                        {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash"},
 		"gemini-2.5-flash-lite":                   {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash-lite"},
-		"gemini-2.5-computer-use-preview-10-2025": {Name: "models/gemini-2.5-computer-use-preview-10-2025"},
+		"gemini-2.5-computer-use-preview-10-2025": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/gemini-2.5-computer-use-preview-10-2025"},
 		"gemini-3-pro-preview":                    {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-preview"},
 		"gemini-3-pro-image-preview":              {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-image-preview"},
 		"gemini-3-flash-preview":                  {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, Name: "models/gemini-3-flash-preview"},
--- a/internal/registry/model_registry.go
+++ b/internal/registry/model_registry.go
@@ -4,6 +4,7 @@
 package registry
 import (
 	"context"
 	"fmt"
 	"sort"
 	"strings"
@@ -84,6 +85,13 @@ type ModelRegistration struct {
 	SuspendedClients map[string]string
 }
 // ModelRegistryHook provides optional callbacks for external integrations to track model list changes.
 // Hook implementations must be non-blocking and resilient; calls are executed asynchronously and panics are recovered.
 type ModelRegistryHook interface {
 	OnModelsRegistered(ctx context.Context, provider, clientID string, models []*ModelInfo)
 	OnModelsUnregistered(ctx context.Context, provider, clientID string)
 }
 // ModelRegistry manages the global registry of available models
 type ModelRegistry struct {
 	// models maps model ID to registration information
@@ -97,6 +105,8 @@ type ModelRegistry struct {
 	clientProviders map[string]string
 	// mutex ensures thread-safe access to the registry
 	mutex *sync.RWMutex
 	// hook is an optional callback sink for model registration changes
 	hook ModelRegistryHook
 }
 // Global model registry instance
@@ -117,6 +127,53 @@ func GetGlobalRegistry() *ModelRegistry {
 	return globalRegistry
 }
 // SetHook sets an optional hook for observing model registration changes.
 func (r *ModelRegistry) SetHook(hook ModelRegistryHook) {
 	if r == nil {
 		return
 	}
 	r.mutex.Lock()
 	defer r.mutex.Unlock()
 	r.hook = hook
 }
 const defaultModelRegistryHookTimeout = 5 * time.Second
 func (r *ModelRegistry) triggerModelsRegistered(provider, clientID string, models []*ModelInfo) {
 	hook := r.hook
 	if hook == nil {
 		return
 	}
 	modelsCopy := cloneModelInfosUnique(models)
 	go func() {
 		defer func() {
 			if recovered := recover(); recovered != nil {
 				log.Errorf("model registry hook OnModelsRegistered panic: %v", recovered)
 			}
 		}()
 		ctx, cancel := context.WithTimeout(context.Background(), defaultModelRegistryHookTimeout)
 		defer cancel()
 		hook.OnModelsRegistered(ctx, provider, clientID, modelsCopy)
 	}()
 }
 func (r *ModelRegistry) triggerModelsUnregistered(provider, clientID string) {
 	hook := r.hook
 	if hook == nil {
 		return
 	}
 	go func() {
 		defer func() {
 			if recovered := recover(); recovered != nil {
 				log.Errorf("model registry hook OnModelsUnregistered panic: %v", recovered)
 			}
 		}()
 		ctx, cancel := context.WithTimeout(context.Background(), defaultModelRegistryHookTimeout)
 		defer cancel()
 		hook.OnModelsUnregistered(ctx, provider, clientID)
 	}()
 }
 // RegisterClient registers a client and its supported models
 // Parameters:
 //   - clientID: Unique identifier for the client
@@ -177,6 +234,7 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
 		} else {
 			delete(r.clientProviders, clientID)
 		}
 		r.triggerModelsRegistered(provider, clientID, models)
 		log.Debugf("Registered client %s from provider %s with %d models", clientID, clientProvider, len(rawModelIDs))
 		misc.LogCredentialSeparator()
 		return
@@ -310,6 +368,7 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
 		delete(r.clientProviders, clientID)
 	}
 	r.triggerModelsRegistered(provider, clientID, models)
 	if len(added) == 0 && len(removed) == 0 && !providerChanged {
 		// Only metadata (e.g., display name) changed; skip separator when no log output.
 		return
@@ -400,6 +459,25 @@ func cloneModelInfo(model *ModelInfo) *ModelInfo {
 	return &copyModel
 }
 func cloneModelInfosUnique(models []*ModelInfo) []*ModelInfo {
 	if len(models) == 0 {
 		return nil
 	}
 	cloned := make([]*ModelInfo, 0, len(models))
 	seen := make(map[string]struct{}, len(models))
 	for _, model := range models {
 		if model == nil || model.ID == "" {
 			continue
 		}
 		if _, exists := seen[model.ID]; exists {
 			continue
 		}
 		seen[model.ID] = struct{}{}
 		cloned = append(cloned, cloneModelInfo(model))
 	}
 	return cloned
 }
 // UnregisterClient removes a client and decrements counts for its models
 // Parameters:
 //   - clientID: Unique identifier for the client to remove
@@ -460,6 +538,7 @@ func (r *ModelRegistry) unregisterClientInternal(clientID string) {
 	log.Debugf("Unregistered client %s", clientID)
 	// Separator line after completing client unregistration (after the summary line)
 	misc.LogCredentialSeparator()
 	r.triggerModelsUnregistered(provider, clientID)
 }
 // SetModelQuotaExceeded marks a model as quota exceeded for a specific client
@@ -625,6 +704,131 @@ func (r *ModelRegistry) GetAvailableModels(handlerType string) []map[string]any
 	return models
 }
 // GetAvailableModelsByProvider returns models available for the given provider identifier.
 // Parameters:
 //   - provider: Provider identifier (e.g., "codex", "gemini", "antigravity")
 //
 // Returns:
 //   - []*ModelInfo: List of available models for the provider
 func (r *ModelRegistry) GetAvailableModelsByProvider(provider string) []*ModelInfo {
 	provider = strings.ToLower(strings.TrimSpace(provider))
 	if provider == "" {
 		return nil
 	}
 	r.mutex.RLock()
 	defer r.mutex.RUnlock()
 	type providerModel struct {
 		count int
 		info  *ModelInfo
 	}
 	providerModels := make(map[string]*providerModel)
 	for clientID, clientProvider := range r.clientProviders {
 		if clientProvider != provider {
 			continue
 		}
 		modelIDs := r.clientModels[clientID]
 		if len(modelIDs) == 0 {
 			continue
 		}
 		clientInfos := r.clientModelInfos[clientID]
 		for _, modelID := range modelIDs {
 			modelID = strings.TrimSpace(modelID)
 			if modelID == "" {
 				continue
 			}
 			entry := providerModels[modelID]
 			if entry == nil {
 				entry = &providerModel{}
 				providerModels[modelID] = entry
 			}
 			entry.count++
 			if entry.info == nil {
 				if clientInfos != nil {
 					if info := clientInfos[modelID]; info != nil {
 						entry.info = info
 					}
 				}
 				if entry.info == nil {
 					if reg, ok := r.models[modelID]; ok && reg != nil && reg.Info != nil {
 						entry.info = reg.Info
 					}
 				}
 			}
 		}
 	}
 	if len(providerModels) == 0 {
 		return nil
 	}
 	quotaExpiredDuration := 5 * time.Minute
 	now := time.Now()
 	result := make([]*ModelInfo, 0, len(providerModels))
 	for modelID, entry := range providerModels {
 		if entry == nil || entry.count <= 0 {
 			continue
 		}
 		registration, ok := r.models[modelID]
 		expiredClients := 0
 		cooldownSuspended := 0
 		otherSuspended := 0
 		if ok && registration != nil {
 			if registration.QuotaExceededClients != nil {
 				for clientID, quotaTime := range registration.QuotaExceededClients {
 					if clientID == "" {
 						continue
 					}
 					if p, okProvider := r.clientProviders[clientID]; !okProvider || p != provider {
 						continue
 					}
 					if quotaTime != nil && now.Sub(*quotaTime) < quotaExpiredDuration {
 						expiredClients++
 					}
 				}
 			}
 			if registration.SuspendedClients != nil {
 				for clientID, reason := range registration.SuspendedClients {
 					if clientID == "" {
 						continue
 					}
 					if p, okProvider := r.clientProviders[clientID]; !okProvider || p != provider {
 						continue
 					}
 					if strings.EqualFold(reason, "quota") {
 						cooldownSuspended++
 						continue
 					}
 					otherSuspended++
 				}
 			}
 		}
 		availableClients := entry.count
 		effectiveClients := availableClients - expiredClients - otherSuspended
 		if effectiveClients < 0 {
 			effectiveClients = 0
 		}
 		if effectiveClients > 0 || (availableClients > 0 && (expiredClients > 0 || cooldownSuspended > 0) && otherSuspended == 0) {
 			if entry.info != nil {
 				result = append(result, entry.info)
 				continue
 			}
 			if ok && registration != nil && registration.Info != nil {
 				result = append(result, registration.Info)
 			}
 		}
 	}
 	return result
 }
 // GetModelCount returns the number of available clients for a specific model
 // Parameters:
 //   - modelID: The model ID to check
--- a/internal/registry/model_registry_hook_test.go
+++ b/internal/registry/model_registry_hook_test.go
@@ -0,0 +1,204 @@
 package registry
 import (
 	"context"
 	"sync"
 	"testing"
 	"time"
 )
 func newTestModelRegistry() *ModelRegistry {
 	return &ModelRegistry{
 		models:           make(map[string]*ModelRegistration),
 		clientModels:     make(map[string][]string),
 		clientModelInfos: make(map[string]map[string]*ModelInfo),
 		clientProviders:  make(map[string]string),
 		mutex:            &sync.RWMutex{},
 	}
 }
 type registeredCall struct {
 	provider string
 	clientID string
 	models   []*ModelInfo
 }
 type unregisteredCall struct {
 	provider string
 	clientID string
 }
 type capturingHook struct {
 	registeredCh   chan registeredCall
 	unregisteredCh chan unregisteredCall
 }
 func (h *capturingHook) OnModelsRegistered(ctx context.Context, provider, clientID string, models []*ModelInfo) {
 	h.registeredCh <- registeredCall{provider: provider, clientID: clientID, models: models}
 }
 func (h *capturingHook) OnModelsUnregistered(ctx context.Context, provider, clientID string) {
 	h.unregisteredCh <- unregisteredCall{provider: provider, clientID: clientID}
 }
 func TestModelRegistryHook_OnModelsRegisteredCalled(t *testing.T) {
 	r := newTestModelRegistry()
 	hook := &capturingHook{
 		registeredCh:   make(chan registeredCall, 1),
 		unregisteredCh: make(chan unregisteredCall, 1),
 	}
 	r.SetHook(hook)
 	inputModels := []*ModelInfo{
 		{ID: "m1", DisplayName: "Model One"},
 		{ID: "m2", DisplayName: "Model Two"},
 	}
 	r.RegisterClient("client-1", "OpenAI", inputModels)
 	select {
 	case call := <-hook.registeredCh:
 		if call.provider != "openai" {
 			t.Fatalf("provider mismatch: got %q, want %q", call.provider, "openai")
 		}
 		if call.clientID != "client-1" {
 			t.Fatalf("clientID mismatch: got %q, want %q", call.clientID, "client-1")
 		}
 		if len(call.models) != 2 {
 			t.Fatalf("models length mismatch: got %d, want %d", len(call.models), 2)
 		}
 		if call.models[0] == nil || call.models[0].ID != "m1" {
 			t.Fatalf("models[0] mismatch: got %#v, want ID=%q", call.models[0], "m1")
 		}
 		if call.models[1] == nil || call.models[1].ID != "m2" {
 			t.Fatalf("models[1] mismatch: got %#v, want ID=%q", call.models[1], "m2")
 		}
 	case <-time.After(2 * time.Second):
 		t.Fatal("timeout waiting for OnModelsRegistered hook call")
 	}
 }
 func TestModelRegistryHook_OnModelsUnregisteredCalled(t *testing.T) {
 	r := newTestModelRegistry()
 	hook := &capturingHook{
 		registeredCh:   make(chan registeredCall, 1),
 		unregisteredCh: make(chan unregisteredCall, 1),
 	}
 	r.SetHook(hook)
 	r.RegisterClient("client-1", "OpenAI", []*ModelInfo{{ID: "m1"}})
 	select {
 	case <-hook.registeredCh:
 	case <-time.After(2 * time.Second):
 		t.Fatal("timeout waiting for OnModelsRegistered hook call")
 	}
 	r.UnregisterClient("client-1")
 	select {
 	case call := <-hook.unregisteredCh:
 		if call.provider != "openai" {
 			t.Fatalf("provider mismatch: got %q, want %q", call.provider, "openai")
 		}
 		if call.clientID != "client-1" {
 			t.Fatalf("clientID mismatch: got %q, want %q", call.clientID, "client-1")
 		}
 	case <-time.After(2 * time.Second):
 		t.Fatal("timeout waiting for OnModelsUnregistered hook call")
 	}
 }
 type blockingHook struct {
 	started chan struct{}
 	unblock chan struct{}
 }
 func (h *blockingHook) OnModelsRegistered(ctx context.Context, provider, clientID string, models []*ModelInfo) {
 	select {
 	case <-h.started:
 	default:
 		close(h.started)
 	}
 	<-h.unblock
 }
 func (h *blockingHook) OnModelsUnregistered(ctx context.Context, provider, clientID string) {}
 func TestModelRegistryHook_DoesNotBlockRegisterClient(t *testing.T) {
 	r := newTestModelRegistry()
 	hook := &blockingHook{
 		started: make(chan struct{}),
 		unblock: make(chan struct{}),
 	}
 	r.SetHook(hook)
 	defer close(hook.unblock)
 	done := make(chan struct{})
 	go func() {
 		r.RegisterClient("client-1", "OpenAI", []*ModelInfo{{ID: "m1"}})
 		close(done)
 	}()
 	select {
 	case <-hook.started:
 	case <-time.After(2 * time.Second):
 		t.Fatal("timeout waiting for hook to start")
 	}
 	select {
 	case <-done:
 	case <-time.After(200 * time.Millisecond):
 		t.Fatal("RegisterClient appears to be blocked by hook")
 	}
 	if !r.ClientSupportsModel("client-1", "m1") {
 		t.Fatal("model registration failed; expected client to support model")
 	}
 }
 type panicHook struct {
 	registeredCalled   chan struct{}
 	unregisteredCalled chan struct{}
 }
 func (h *panicHook) OnModelsRegistered(ctx context.Context, provider, clientID string, models []*ModelInfo) {
 	if h.registeredCalled != nil {
 		h.registeredCalled <- struct{}{}
 	}
 	panic("boom")
 }
 func (h *panicHook) OnModelsUnregistered(ctx context.Context, provider, clientID string) {
 	if h.unregisteredCalled != nil {
 		h.unregisteredCalled <- struct{}{}
 	}
 	panic("boom")
 }
 func TestModelRegistryHook_PanicDoesNotAffectRegistry(t *testing.T) {
 	r := newTestModelRegistry()
 	hook := &panicHook{
 		registeredCalled:   make(chan struct{}, 1),
 		unregisteredCalled: make(chan struct{}, 1),
 	}
 	r.SetHook(hook)
 	r.RegisterClient("client-1", "OpenAI", []*ModelInfo{{ID: "m1"}})
 	select {
 	case <-hook.registeredCalled:
 	case <-time.After(2 * time.Second):
 		t.Fatal("timeout waiting for OnModelsRegistered hook call")
 	}
 	if !r.ClientSupportsModel("client-1", "m1") {
 		t.Fatal("model registration failed; expected client to support model")
 	}
 	r.UnregisterClient("client-1")
 	select {
 	case <-hook.unregisteredCalled:
 	case <-time.After(2 * time.Second):
 		t.Fatal("timeout waiting for OnModelsUnregistered hook call")
 	}
 }
--- a/internal/runtime/executor/aistudio_executor.go
+++ b/internal/runtime/executor/aistudio_executor.go
@@ -323,6 +323,11 @@ type translatedPayload struct {
 func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts cliproxyexecutor.Options, stream bool) ([]byte, translatedPayload, error) {
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, stream)
 	payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
 	payload = ApplyThinkingMetadata(payload, req.Metadata, req.Model)
 	payload = util.ApplyGemini3ThinkingLevelFromMetadata(req.Model, req.Metadata, payload)
@@ -331,7 +336,7 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
 	payload = util.NormalizeGeminiThinkingBudget(req.Model, payload, true)
 	payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
 	payload = fixGeminiImageAspectRatio(req.Model, payload)
-	payload = applyPayloadConfig(e.cfg, req.Model, payload)
+	payload = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", payload, originalTranslated)
 	payload, _ = sjson.DeleteBytes(payload, "generationConfig.maxOutputTokens")
 	payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseMimeType")
 	payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseJsonSchema")
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -45,6 +45,7 @@ const (
 	defaultAntigravityAgent        = "antigravity/1.104.0 darwin/arm64"
 	antigravityAuthType            = "antigravity"
 	refreshSkew                    = 3000 * time.Second
 	tokenRefreshTimeout            = 30 * time.Second
 )
 var (
@@ -94,13 +95,18 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("antigravity")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
+	translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
 	translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
-	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
+	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
 	translated = normalizeAntigravityThinking(req.Model, translated, isClaude)
-	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated)
+	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated)
 	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
@@ -189,13 +195,18 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("antigravity")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
+	translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
 	translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
-	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
+	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
 	translated = normalizeAntigravityThinking(req.Model, translated, true)
-	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated)
+	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated)
 	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
@@ -525,13 +536,18 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("antigravity")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
+	translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
 	translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
-	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
+	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
 	translated = normalizeAntigravityThinking(req.Model, translated, isClaude)
-	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated)
+	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated)
 	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
@@ -697,8 +713,8 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
 	for idx, baseURL := range baseURLs {
 		payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-		payload = applyThinkingMetadataCLI(payload, req.Metadata, req.Model)
+		payload = ApplyThinkingMetadataCLI(payload, req.Metadata, req.Model)
-		payload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, payload)
+		payload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, payload)
 		payload = normalizeAntigravityThinking(req.Model, payload, isClaude)
 		payload = deleteJSONField(payload, "project")
 		payload = deleteJSONField(payload, "model")
@@ -899,7 +915,13 @@ func (e *AntigravityExecutor) ensureAccessToken(ctx context.Context, auth *clipr
 	if accessToken != "" && expiry.After(time.Now().Add(refreshSkew)) {
 		return accessToken, nil, nil
 	}
-	updated, errRefresh := e.refreshToken(ctx, auth.Clone())
+	refreshCtx := context.Background()
 	if ctx != nil {
 		if rt, ok := ctx.Value("cliproxy.roundtripper").(http.RoundTripper); ok && rt != nil {
 			refreshCtx = context.WithValue(refreshCtx, "cliproxy.roundtripper", rt)
 		}
 	}
 	updated, errRefresh := e.refreshToken(refreshCtx, auth.Clone())
 	if errRefresh != nil {
 		return "", nil, errRefresh
 	}
@@ -929,7 +951,7 @@ func (e *AntigravityExecutor) refreshToken(ctx context.Context, auth *cliproxyau
 	httpReq.Header.Set("User-Agent", defaultAntigravityAgent)
 	httpReq.Header.Set("Content-Type", "application/x-www-form-urlencoded")
-	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, tokenRefreshTimeout)
 	httpResp, errDo := httpClient.Do(httpReq)
 	if errDo != nil {
 		return auth, errDo
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -57,6 +57,11 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	to := sdktranslator.FromString("claude")
 	// Use streaming translation to preserve function calling, except for claude.
 	stream := from != to
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, stream)
 	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), stream)
 	body, _ = sjson.SetBytes(body, "model", model)
 	// Inject thinking config based on model metadata for thinking variants
@@ -65,7 +70,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	if !strings.HasPrefix(model, "claude-3-5-haiku") {
 		body = checkSystemInstructions(body)
 	}
-	body = applyPayloadConfig(e.cfg, model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
 	// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
 	body = disableThinkingIfToolChoiceForced(body)
@@ -167,12 +172,17 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
 		model = override
 	}
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
 	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
 	body, _ = sjson.SetBytes(body, "model", model)
 	// Inject thinking config based on model metadata for thinking variants
 	body = e.injectThinkingConfig(model, req.Metadata, body)
 	body = checkSystemInstructions(body)
-	body = applyPayloadConfig(e.cfg, model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
 	// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
 	body = disableThinkingIfToolChoiceForced(body)
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -56,13 +56,18 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
 	body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false)
 	body = NormalizeThinkingConfig(body, model, false)
 	if errValidate := ValidateThinkingConfig(body, model); errValidate != nil {
 		return resp, errValidate
 	}
-	body = applyPayloadConfig(e.cfg, model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", model)
 	body, _ = sjson.SetBytes(body, "stream", true)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
@@ -156,6 +161,11 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
 	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
 	body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false)
@@ -163,7 +173,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	if errValidate := ValidateThinkingConfig(body, model); errValidate != nil {
 		return nil, errValidate
 	}
-	body = applyPayloadConfig(e.cfg, model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.SetBytes(body, "model", model)
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -77,14 +77,19 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini-cli")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
 	basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
+	basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
 	basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload)
-	basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload)
+	basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload)
 	basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
 	basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
 	basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
-	basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload)
+	basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload, originalTranslated)
 	action := "generateContent"
 	if req.Metadata != nil {
@@ -216,14 +221,19 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini-cli")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
 	basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
+	basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
 	basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload)
-	basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload)
+	basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload)
 	basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
 	basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
 	basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
-	basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload)
+	basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload, originalTranslated)
 	projectID := resolveGeminiProjectID(auth)
@@ -421,7 +431,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
 	// Gemini CLI endpoint when iterating fallback variants.
 	for _, attemptModel := range models {
 		payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false)
-		payload = applyThinkingMetadataCLI(payload, req.Metadata, req.Model)
+		payload = ApplyThinkingMetadataCLI(payload, req.Metadata, req.Model)
 		payload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, payload)
 		payload = deleteJSONField(payload, "project")
 		payload = deleteJSONField(payload, "model")
--- a/internal/runtime/executor/gemini_executor.go
+++ b/internal/runtime/executor/gemini_executor.go
@@ -85,13 +85,18 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	// Official Gemini API via API key or OAuth bearer
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
 	body = ApplyThinkingMetadata(body, req.Metadata, model)
 	body = util.ApplyDefaultThinkingIfNeeded(model, body)
 	body = util.NormalizeGeminiThinkingBudget(model, body)
 	body = util.StripThinkingConfigIfUnsupported(model, body)
 	body = fixGeminiImageAspectRatio(model, body)
-	body = applyPayloadConfig(e.cfg, model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", model)
 	action := "generateContent"
@@ -183,13 +188,18 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
 	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
 	body = ApplyThinkingMetadata(body, req.Metadata, model)
 	body = util.ApplyDefaultThinkingIfNeeded(model, body)
 	body = util.NormalizeGeminiThinkingBudget(model, body)
 	body = util.StripThinkingConfigIfUnsupported(model, body)
 	body = fixGeminiImageAspectRatio(model, body)
-	body = applyPayloadConfig(e.cfg, model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", model)
 	baseURL := resolveGeminiBaseURL(auth)
--- a/internal/runtime/executor/gemini_vertex_executor.go
+++ b/internal/runtime/executor/gemini_vertex_executor.go
@@ -122,6 +122,11 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
 		if budgetOverride != nil {
@@ -134,7 +139,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
 	body = fixGeminiImageAspectRatio(req.Model, body)
-	body = applyPayloadConfig(e.cfg, req.Model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", req.Model)
 	action := "generateContent"
@@ -225,6 +230,11 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
 	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) {
 		if budgetOverride != nil {
@@ -237,7 +247,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	body = util.NormalizeGeminiThinkingBudget(model, body)
 	body = util.StripThinkingConfigIfUnsupported(model, body)
 	body = fixGeminiImageAspectRatio(model, body)
-	body = applyPayloadConfig(e.cfg, model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", model)
 	action := "generateContent"
@@ -324,6 +334,11 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
 		if budgetOverride != nil {
@@ -336,7 +351,7 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
 	body = fixGeminiImageAspectRatio(req.Model, body)
-	body = applyPayloadConfig(e.cfg, req.Model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", req.Model)
 	baseURL := vertexBaseURL(location)
@@ -444,6 +459,11 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
 	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
 	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) {
 		if budgetOverride != nil {
@@ -456,7 +476,7 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 	body = util.NormalizeGeminiThinkingBudget(model, body)
 	body = util.StripThinkingConfigIfUnsupported(model, body)
 	body = fixGeminiImageAspectRatio(model, body)
-	body = applyPayloadConfig(e.cfg, model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", model)
 	// For API key auth, use simpler URL format without project/location
--- a/internal/runtime/executor/github_copilot_executor.go
+++ b/internal/runtime/executor/github_copilot_executor.go
@@ -79,9 +79,14 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	body = e.normalizeModel(req.Model, body)
-	body = applyPayloadConfig(e.cfg, req.Model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "stream", false)
 	url := githubCopilotBaseURL + githubCopilotChatPath
@@ -162,9 +167,14 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 	body = e.normalizeModel(req.Model, body)
-	body = applyPayloadConfig(e.cfg, req.Model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "stream", true)
 	// Enable stream options for usage stats in stream
 	body, _ = sjson.SetBytes(body, "stream_options.include_usage", true)
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -56,6 +56,11 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	body, _ = sjson.SetBytes(body, "model", req.Model)
@@ -65,7 +70,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	}
 	body = applyIFlowThinkingConfig(body)
 	body = preserveReasoningContentInMessages(body)
-	body = applyPayloadConfig(e.cfg, req.Model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
 	endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
@@ -145,6 +150,11 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
@@ -160,7 +170,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
 		body = ensureToolsArray(body)
 	}
-	body = applyPayloadConfig(e.cfg, req.Model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
 	endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
@@ -441,21 +451,18 @@ func ensureToolsArray(body []byte) []byte {
 	return updated
 }
-// preserveReasoningContentInMessages ensures reasoning_content from assistant messages in the
+// preserveReasoningContentInMessages checks if reasoning_content from assistant messages
-// conversation history is preserved when sending to iFlow models that support thinking.
+// is preserved in conversation history for iFlow models that support thinking.
-// This is critical for multi-turn conversations where the model needs to see its previous
+// This is helpful for multi-turn conversations where the model may benefit from seeing
-// reasoning to maintain coherent thought chains across tool calls and conversation turns.
+// its previous reasoning to maintain coherent thought chains.
 //
-// For GLM-4.7 and MiniMax-M2.1, the full assistant response (including reasoning) must be
+// For GLM-4.6/4.7 and MiniMax M2/M2.1, it is recommended to include the full assistant
-// appended back into message history before the next call.
+// response (including reasoning_content) in message history for better context continuity.
 func preserveReasoningContentInMessages(body []byte) []byte {
 	model := strings.ToLower(gjson.GetBytes(body, "model").String())
 	// Only apply to models that support thinking with history preservation
-	needsPreservation := strings.HasPrefix(model, "glm-4.7") ||
+	needsPreservation := strings.HasPrefix(model, "glm-4") || strings.HasPrefix(model, "minimax-m2")
 		strings.HasPrefix(model, "glm-4-7") ||
 		strings.HasPrefix(model, "minimax-m2.1") ||
 		strings.HasPrefix(model, "minimax-m2-1")
 	if !needsPreservation {
 		return body
@@ -493,45 +500,35 @@ func preserveReasoningContentInMessages(body []byte) []byte {
 // This should be called after NormalizeThinkingConfig has processed the payload.
 //
 // Model-specific handling:
-//   - GLM-4.7: Uses extra_body={"thinking": {"type": "enabled"}, "clear_thinking": false}
+//   - GLM-4.6/4.7: Uses chat_template_kwargs.enable_thinking (boolean) and chat_template_kwargs.clear_thinking=false
-//   - MiniMax-M2.1: Uses reasoning_split=true for OpenAI-style reasoning separation
+//   - MiniMax M2/M2.1: Uses reasoning_split=true for OpenAI-style reasoning separation
 //   - Other iFlow models: Uses chat_template_kwargs.enable_thinking (boolean)
 func applyIFlowThinkingConfig(body []byte) []byte {
 	effort := gjson.GetBytes(body, "reasoning_effort")
-	model := strings.ToLower(gjson.GetBytes(body, "model").String())
+	if !effort.Exists() {
-
+		return body
 	// Check if thinking should be enabled
 	val := ""
 	if effort.Exists() {
 		val = strings.ToLower(strings.TrimSpace(effort.String()))
 	}
-	enableThinking := effort.Exists() && val != "none" && val != ""
+
 	model := strings.ToLower(gjson.GetBytes(body, "model").String())
 	val := strings.ToLower(strings.TrimSpace(effort.String()))
 	enableThinking := val != "none" && val != ""
 	// Remove reasoning_effort as we'll convert to model-specific format
-	if effort.Exists() {
+	body, _ = sjson.DeleteBytes(body, "reasoning_effort")
-		body, _ = sjson.DeleteBytes(body, "reasoning_effort")
+	body, _ = sjson.DeleteBytes(body, "thinking")
 	}
-	// GLM-4.7: Use extra_body with thinking config and clear_thinking: false
+	// GLM-4.6/4.7: Use chat_template_kwargs
-	if strings.HasPrefix(model, "glm-4.7") || strings.HasPrefix(model, "glm-4-7") {
+	if strings.HasPrefix(model, "glm-4") {
 		if enableThinking {
 			body, _ = sjson.SetBytes(body, "extra_body.thinking.type", "enabled")
 			body, _ = sjson.SetBytes(body, "extra_body.clear_thinking", false)
 		}
 		return body
 	}
 	// MiniMax-M2.1: Use reasoning_split=true for interleaved thinking
 	if strings.HasPrefix(model, "minimax-m2.1") || strings.HasPrefix(model, "minimax-m2-1") {
 		if enableThinking {
 			body, _ = sjson.SetBytes(body, "reasoning_split", true)
 		}
 		return body
 	}
 	// Other iFlow models (including GLM-4.6): Use chat_template_kwargs.enable_thinking
 	if effort.Exists() {
 		body, _ = sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
 		if enableThinking {
 			body, _ = sjson.SetBytes(body, "chat_template_kwargs.clear_thinking", false)
 		}
 		return body
 	}
 	// MiniMax M2/M2.1: Use reasoning_split
 	if strings.HasPrefix(model, "minimax-m2") {
 		body, _ = sjson.SetBytes(body, "reasoning_split", enableThinking)
 		return body
 	}
 	return body
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -53,12 +53,17 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 	// Translate inbound request to OpenAI format
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, opts.Stream)
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), opts.Stream)
 	modelOverride := e.resolveUpstreamModel(req.Model, auth)
 	if modelOverride != "" {
 		translated = e.overrideModel(translated, modelOverride)
 	}
-	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
+	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated, originalTranslated)
 	allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
 	translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
 	translated = NormalizeThinkingConfig(translated, req.Model, allowCompat)
@@ -145,12 +150,17 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 	}
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 	modelOverride := e.resolveUpstreamModel(req.Model, auth)
 	if modelOverride != "" {
 		translated = e.overrideModel(translated, modelOverride)
 	}
-	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
+	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated, originalTranslated)
 	allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
 	translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
 	translated = NormalizeThinkingConfig(translated, req.Model, allowCompat)
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -14,32 +14,54 @@ import (
 // ApplyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192))
 // for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
 func ApplyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
-	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
+	// Use the alias from metadata if available, as it's registered in the global registry
 	// with thinking metadata; the upstream model name may not be registered.
 	lookupModel := util.ResolveOriginalModel(model, metadata)
 	// Determine which model to use for thinking support check.
 	// If the alias (lookupModel) is not in the registry, fall back to the upstream model.
 	thinkingModel := lookupModel
 	if !util.ModelSupportsThinking(lookupModel) && util.ModelSupportsThinking(model) {
 		thinkingModel = model
 	}
 	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(thinkingModel, metadata)
 	if !ok || (budgetOverride == nil && includeOverride == nil) {
 		return payload
 	}
-	if !util.ModelSupportsThinking(model) {
+	if !util.ModelSupportsThinking(thinkingModel) {
 		return payload
 	}
 	if budgetOverride != nil {
-		norm := util.NormalizeThinkingBudget(model, *budgetOverride)
+		norm := util.NormalizeThinkingBudget(thinkingModel, *budgetOverride)
 		budgetOverride = &norm
 	}
 	return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
 }
-// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192))
+// ApplyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192))
 // for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking.
-func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
+func ApplyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
-	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
+	// Use the alias from metadata if available, as it's registered in the global registry
 	// with thinking metadata; the upstream model name may not be registered.
 	lookupModel := util.ResolveOriginalModel(model, metadata)
 	// Determine which model to use for thinking support check.
 	// If the alias (lookupModel) is not in the registry, fall back to the upstream model.
 	thinkingModel := lookupModel
 	if !util.ModelSupportsThinking(lookupModel) && util.ModelSupportsThinking(model) {
 		thinkingModel = model
 	}
 	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(thinkingModel, metadata)
 	if !ok || (budgetOverride == nil && includeOverride == nil) {
 		return payload
 	}
-	if !util.ModelSupportsThinking(model) {
+	if !util.ModelSupportsThinking(thinkingModel) {
 		return payload
 	}
 	if budgetOverride != nil {
-		norm := util.NormalizeThinkingBudget(model, *budgetOverride)
+		norm := util.NormalizeThinkingBudget(thinkingModel, *budgetOverride)
 		budgetOverride = &norm
 	}
 	return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
@@ -82,17 +104,11 @@ func ApplyReasoningEffortMetadata(payload []byte, metadata map[string]any, model
 	return payload
 }
 // applyPayloadConfig applies payload default and override rules from configuration
 // to the given JSON payload for the specified model.
 // Defaults only fill missing fields, while overrides always overwrite existing values.
 func applyPayloadConfig(cfg *config.Config, model string, payload []byte) []byte {
 	return applyPayloadConfigWithRoot(cfg, model, "", "", payload)
 }
 // applyPayloadConfigWithRoot behaves like applyPayloadConfig but treats all parameter
 // paths as relative to the provided root path (for example, "request" for Gemini CLI)
-// and restricts matches to the given protocol when supplied.
+// and restricts matches to the given protocol when supplied. Defaults are checked
-func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string, payload []byte) []byte {
+// against the original payload when provided.
 func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string, payload, original []byte) []byte {
 	if cfg == nil || len(payload) == 0 {
 		return payload
 	}
@@ -105,6 +121,11 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 		return payload
 	}
 	out := payload
 	source := original
 	if len(source) == 0 {
 		source = payload
 	}
 	appliedDefaults := make(map[string]struct{})
 	// Apply default rules: first write wins per field across all matching rules.
 	for i := range rules.Default {
 		rule := &rules.Default[i]
@@ -116,7 +137,10 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 			if fullPath == "" {
 				continue
 			}
-			if gjson.GetBytes(out, fullPath).Exists() {
+			if gjson.GetBytes(source, fullPath).Exists() {
 				continue
 			}
 			if _, ok := appliedDefaults[fullPath]; ok {
 				continue
 			}
 			updated, errSet := sjson.SetBytes(out, fullPath, value)
@@ -124,6 +148,7 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 				continue
 			}
 			out = updated
 			appliedDefaults[fullPath] = struct{}{}
 		}
 	}
 	// Apply override rules: last write wins per field across all matching rules.
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -49,6 +49,11 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	body, _ = sjson.SetBytes(body, "model", req.Model)
@@ -56,7 +61,7 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
 		return resp, errValidate
 	}
-	body = applyPayloadConfig(e.cfg, req.Model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
@@ -125,6 +130,11 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
 	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
@@ -140,7 +150,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 		body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`))
 	}
 	body, _ = sjson.SetBytes(body, "stream_options.include_usage", true)
-	body = applyPayloadConfig(e.cfg, req.Model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
@@ -184,7 +184,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 			role := m.Get("role").String()
 			content := m.Get("content")
-			if role == "system" && len(arr) > 1 {
+			if (role == "system" || role == "developer") && len(arr) > 1 {
 				// system -> request.systemInstruction as a user message style
 				if content.Type == gjson.String {
 					out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
@@ -201,7 +201,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 						}
 					}
 				}
-			} else if role == "user" || (role == "system" && len(arr) == 1) {
+			} else if role == "user" || ((role == "system" || role == "developer") && len(arr) == 1) {
 				// Build single user content node to avoid splitting into multiple contents
 				node := []byte(`{"role":"user","parts":[]}`)
 				if content.Type == gjson.String {
@@ -223,6 +223,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 									data := pieces[1][7:]
 									node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.mime_type", mime)
 									node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.data", data)
 									node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".thoughtSignature", geminiCLIFunctionThoughtSignature)
 									p++
 								}
 							}
@@ -266,6 +267,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 									data := pieces[1][7:]
 									node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.mime_type", mime)
 									node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.data", data)
 									node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".thoughtSignature", geminiCLIFunctionThoughtSignature)
 									p++
 								}
 							}
--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
@@ -152,7 +152,7 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 			role := m.Get("role").String()
 			content := m.Get("content")
-			if role == "system" && len(arr) > 1 {
+			if (role == "system" || role == "developer") && len(arr) > 1 {
 				// system -> request.systemInstruction as a user message style
 				if content.Type == gjson.String {
 					out, _ = sjson.SetBytes(out, "request.systemInstruction.role", "user")
@@ -169,7 +169,7 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 						}
 					}
 				}
-			} else if role == "user" || (role == "system" && len(arr) == 1) {
+			} else if role == "user" || ((role == "system" || role == "developer") && len(arr) == 1) {
 				// Build single user content node to avoid splitting into multiple contents
 				node := []byte(`{"role":"user","parts":[]}`)
 				if content.Type == gjson.String {
@@ -191,6 +191,7 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 									data := pieces[1][7:]
 									node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.mime_type", mime)
 									node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.data", data)
 									node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".thoughtSignature", geminiCLIFunctionThoughtSignature)
 									p++
 								}
 							}
@@ -236,6 +237,7 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 									data := pieces[1][7:]
 									node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.mime_type", mime)
 									node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.data", data)
 									node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".thoughtSignature", geminiCLIFunctionThoughtSignature)
 									p++
 								}
 							}
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
@@ -170,7 +170,7 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 			role := m.Get("role").String()
 			content := m.Get("content")
-			if role == "system" && len(arr) > 1 {
+			if (role == "system" || role == "developer") && len(arr) > 1 {
 				// system -> system_instruction as a user message style
 				if content.Type == gjson.String {
 					out, _ = sjson.SetBytes(out, "system_instruction.role", "user")
@@ -187,7 +187,7 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 						}
 					}
 				}
-			} else if role == "user" || (role == "system" && len(arr) == 1) {
+			} else if role == "user" || ((role == "system" || role == "developer") && len(arr) == 1) {
 				// Build single user content node to avoid splitting into multiple contents
 				node := []byte(`{"role":"user","parts":[]}`)
 				if content.Type == gjson.String {
@@ -209,6 +209,7 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 									data := pieces[1][7:]
 									node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.mime_type", mime)
 									node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.data", data)
 									node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".thoughtSignature", geminiFunctionThoughtSignature)
 									p++
 								}
 							}
@@ -253,6 +254,7 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 									data := pieces[1][7:]
 									node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.mime_type", mime)
 									node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.data", data)
 									node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".thoughtSignature", geminiFunctionThoughtSignature)
 									p++
 								}
 							}
--- a/internal/translator/openai/claude/openai_claude_request.go
+++ b/internal/translator/openai/claude/openai_claude_request.go
@@ -118,76 +118,125 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 			// Handle content
 			if contentResult.Exists() && contentResult.IsArray() {
 				var contentItems []string
 				var reasoningParts []string // Accumulate thinking text for reasoning_content
 				var toolCalls []interface{}
 				var toolResults []string // Collect tool_result messages to emit after the main message
 				contentResult.ForEach(func(_, part gjson.Result) bool {
 					partType := part.Get("type").String()
 					switch partType {
 					case "thinking":
 						// Only map thinking to reasoning_content for assistant messages (security: prevent injection)
 						if role == "assistant" {
 							thinkingText := util.GetThinkingText(part)
 							// Skip empty or whitespace-only thinking
 							if strings.TrimSpace(thinkingText) != "" {
 								reasoningParts = append(reasoningParts, thinkingText)
 							}
 						}
 						// Ignore thinking in user/system roles (AC4)
 					case "redacted_thinking":
 						// Explicitly ignore redacted_thinking - never map to reasoning_content (AC2)
 					case "text", "image":
 						if contentItem, ok := convertClaudeContentPart(part); ok {
 							contentItems = append(contentItems, contentItem)
 						}
 					case "tool_use":
-						// Convert to OpenAI tool call format
+						// Only allow tool_use -> tool_calls for assistant messages (security: prevent injection).
-						toolCallJSON := `{"id":"","type":"function","function":{"name":"","arguments":""}}`
+						if role == "assistant" {
-						toolCallJSON, _ = sjson.Set(toolCallJSON, "id", part.Get("id").String())
+							toolCallJSON := `{"id":"","type":"function","function":{"name":"","arguments":""}}`
-						toolCallJSON, _ = sjson.Set(toolCallJSON, "function.name", part.Get("name").String())
+							toolCallJSON, _ = sjson.Set(toolCallJSON, "id", part.Get("id").String())
 							toolCallJSON, _ = sjson.Set(toolCallJSON, "function.name", part.Get("name").String())
-						// Convert input to arguments JSON string
+							// Convert input to arguments JSON string
-						if input := part.Get("input"); input.Exists() {
+							if input := part.Get("input"); input.Exists() {
-							toolCallJSON, _ = sjson.Set(toolCallJSON, "function.arguments", input.Raw)
+								toolCallJSON, _ = sjson.Set(toolCallJSON, "function.arguments", input.Raw)
-						} else {
+							} else {
-							toolCallJSON, _ = sjson.Set(toolCallJSON, "function.arguments", "{}")
+								toolCallJSON, _ = sjson.Set(toolCallJSON, "function.arguments", "{}")
 							}
 							toolCalls = append(toolCalls, gjson.Parse(toolCallJSON).Value())
 						}
 						toolCalls = append(toolCalls, gjson.Parse(toolCallJSON).Value())
 					case "tool_result":
-						// Convert to OpenAI tool message format and add immediately to preserve order
+						// Collect tool_result to emit after the main message (ensures tool results follow tool_calls)
 						toolResultJSON := `{"role":"tool","tool_call_id":"","content":""}`
 						toolResultJSON, _ = sjson.Set(toolResultJSON, "tool_call_id", part.Get("tool_use_id").String())
-						toolResultJSON, _ = sjson.Set(toolResultJSON, "content", part.Get("content").String())
+						toolResultJSON, _ = sjson.Set(toolResultJSON, "content", convertClaudeToolResultContentToString(part.Get("content")))
-						messagesJSON, _ = sjson.Set(messagesJSON, "-1", gjson.Parse(toolResultJSON).Value())
+						toolResults = append(toolResults, toolResultJSON)
 					}
 					return true
 				})
-				// Emit text/image content as one message
+				// Build reasoning content string
-				if len(contentItems) > 0 {
+				reasoningContent := ""
-					msgJSON := `{"role":"","content":""}`
+				if len(reasoningParts) > 0 {
-					msgJSON, _ = sjson.Set(msgJSON, "role", role)
+					reasoningContent = strings.Join(reasoningParts, "\n\n")
 					contentArrayJSON := "[]"
 					for _, contentItem := range contentItems {
 						contentArrayJSON, _ = sjson.SetRaw(contentArrayJSON, "-1", contentItem)
 					}
 					msgJSON, _ = sjson.SetRaw(msgJSON, "content", contentArrayJSON)
 					contentValue := gjson.Get(msgJSON, "content")
 					hasContent := false
 					switch {
 					case !contentValue.Exists():
 						hasContent = false
 					case contentValue.Type == gjson.String:
 						hasContent = contentValue.String() != ""
 					case contentValue.IsArray():
 						hasContent = len(contentValue.Array()) > 0
 					default:
 						hasContent = contentValue.Raw != "" && contentValue.Raw != "null"
 					}
 					if hasContent {
 						messagesJSON, _ = sjson.Set(messagesJSON, "-1", gjson.Parse(msgJSON).Value())
 					}
 				}
-				// Emit tool calls in a separate assistant message
+				hasContent := len(contentItems) > 0
-				if role == "assistant" && len(toolCalls) > 0 {
+				hasReasoning := reasoningContent != ""
-					toolCallMsgJSON := `{"role":"assistant","tool_calls":[]}`
+				hasToolCalls := len(toolCalls) > 0
-					toolCallMsgJSON, _ = sjson.Set(toolCallMsgJSON, "tool_calls", toolCalls)
+				hasToolResults := len(toolResults) > 0
-					messagesJSON, _ = sjson.Set(messagesJSON, "-1", gjson.Parse(toolCallMsgJSON).Value())
+
 				// OpenAI requires: tool messages MUST immediately follow the assistant message with tool_calls.
 				// Therefore, we emit tool_result messages FIRST (they respond to the previous assistant's tool_calls),
 				// then emit the current message's content.
 				for _, toolResultJSON := range toolResults {
 					messagesJSON, _ = sjson.Set(messagesJSON, "-1", gjson.Parse(toolResultJSON).Value())
 				}
 				// For assistant messages: emit a single unified message with content, tool_calls, and reasoning_content
 				// This avoids splitting into multiple assistant messages which breaks OpenAI tool-call adjacency
 				if role == "assistant" {
 					if hasContent || hasReasoning || hasToolCalls {
 						msgJSON := `{"role":"assistant"}`
 						// Add content (as array if we have items, empty string if reasoning-only)
 						if hasContent {
 							contentArrayJSON := "[]"
 							for _, contentItem := range contentItems {
 								contentArrayJSON, _ = sjson.SetRaw(contentArrayJSON, "-1", contentItem)
 							}
 							msgJSON, _ = sjson.SetRaw(msgJSON, "content", contentArrayJSON)
 						} else {
 							// Ensure content field exists for OpenAI compatibility
 							msgJSON, _ = sjson.Set(msgJSON, "content", "")
 						}
 						// Add reasoning_content if present
 						if hasReasoning {
 							msgJSON, _ = sjson.Set(msgJSON, "reasoning_content", reasoningContent)
 						}
 						// Add tool_calls if present (in same message as content)
 						if hasToolCalls {
 							msgJSON, _ = sjson.Set(msgJSON, "tool_calls", toolCalls)
 						}
 						messagesJSON, _ = sjson.Set(messagesJSON, "-1", gjson.Parse(msgJSON).Value())
 					}
 				} else {
 					// For non-assistant roles: emit content message if we have content
 					// If the message only contains tool_results (no text/image), we still processed them above
 					if hasContent {
 						msgJSON := `{"role":""}`
 						msgJSON, _ = sjson.Set(msgJSON, "role", role)
 						contentArrayJSON := "[]"
 						for _, contentItem := range contentItems {
 							contentArrayJSON, _ = sjson.SetRaw(contentArrayJSON, "-1", contentItem)
 						}
 						msgJSON, _ = sjson.SetRaw(msgJSON, "content", contentArrayJSON)
 						messagesJSON, _ = sjson.Set(messagesJSON, "-1", gjson.Parse(msgJSON).Value())
 					} else if hasToolResults && !hasContent {
 						// tool_results already emitted above, no additional user message needed
 					}
 				}
 			} else if contentResult.Exists() && contentResult.Type == gjson.String {
@@ -307,3 +356,43 @@ func convertClaudeContentPart(part gjson.Result) (string, bool) {
 		return "", false
 	}
 }
 func convertClaudeToolResultContentToString(content gjson.Result) string {
 	if !content.Exists() {
 		return ""
 	}
 	if content.Type == gjson.String {
 		return content.String()
 	}
 	if content.IsArray() {
 		var parts []string
 		content.ForEach(func(_, item gjson.Result) bool {
 			switch {
 			case item.Type == gjson.String:
 				parts = append(parts, item.String())
 			case item.IsObject() && item.Get("text").Exists() && item.Get("text").Type == gjson.String:
 				parts = append(parts, item.Get("text").String())
 			default:
 				parts = append(parts, item.Raw)
 			}
 			return true
 		})
 		joined := strings.Join(parts, "\n\n")
 		if strings.TrimSpace(joined) != "" {
 			return joined
 		}
 		return content.Raw
 	}
 	if content.IsObject() {
 		if text := content.Get("text"); text.Exists() && text.Type == gjson.String {
 			return text.String()
 		}
 		return content.Raw
 	}
 	return content.Raw
 }
--- a/internal/translator/openai/claude/openai_claude_request_test.go
+++ b/internal/translator/openai/claude/openai_claude_request_test.go
@@ -0,0 +1,500 @@
 package claude
 import (
 	"testing"
 	"github.com/tidwall/gjson"
 )
 // TestConvertClaudeRequestToOpenAI_ThinkingToReasoningContent tests the mapping
 // of Claude thinking content to OpenAI reasoning_content field.
 func TestConvertClaudeRequestToOpenAI_ThinkingToReasoningContent(t *testing.T) {
 	tests := []struct {
 		name                    string
 		inputJSON               string
 		wantReasoningContent    string
 		wantHasReasoningContent bool
 		wantContentText         string // Expected visible content text (if any)
 		wantHasContent          bool
 	}{
 		{
 			name: "AC1: assistant message with thinking and text",
 			inputJSON: `{
 				"model": "claude-3-opus",
 				"messages": [{
 					"role": "assistant",
 					"content": [
 						{"type": "thinking", "thinking": "Let me analyze this step by step..."},
 						{"type": "text", "text": "Here is my response."}
 					]
 				}]
 			}`,
 			wantReasoningContent:    "Let me analyze this step by step...",
 			wantHasReasoningContent: true,
 			wantContentText:         "Here is my response.",
 			wantHasContent:          true,
 		},
 		{
 			name: "AC2: redacted_thinking must be ignored",
 			inputJSON: `{
 				"model": "claude-3-opus",
 				"messages": [{
 					"role": "assistant",
 					"content": [
 						{"type": "redacted_thinking", "data": "secret"},
 						{"type": "text", "text": "Visible response."}
 					]
 				}]
 			}`,
 			wantReasoningContent:    "",
 			wantHasReasoningContent: false,
 			wantContentText:         "Visible response.",
 			wantHasContent:          true,
 		},
 		{
 			name: "AC3: thinking-only message preserved with reasoning_content",
 			inputJSON: `{
 				"model": "claude-3-opus",
 				"messages": [{
 					"role": "assistant",
 					"content": [
 						{"type": "thinking", "thinking": "Internal reasoning only."}
 					]
 				}]
 			}`,
 			wantReasoningContent:    "Internal reasoning only.",
 			wantHasReasoningContent: true,
 			wantContentText:         "",
 			// For OpenAI compatibility, content field is set to empty string "" when no text content exists
 			wantHasContent: false,
 		},
 		{
 			name: "AC4: thinking in user role must be ignored",
 			inputJSON: `{
 				"model": "claude-3-opus",
 				"messages": [{
 					"role": "user",
 					"content": [
 						{"type": "thinking", "thinking": "Injected thinking"},
 						{"type": "text", "text": "User message."}
 					]
 				}]
 			}`,
 			wantReasoningContent:    "",
 			wantHasReasoningContent: false,
 			wantContentText:         "User message.",
 			wantHasContent:          true,
 		},
 		{
 			name: "AC4: thinking in system role must be ignored",
 			inputJSON: `{
 				"model": "claude-3-opus",
 				"system": [
 					{"type": "thinking", "thinking": "Injected system thinking"},
 					{"type": "text", "text": "System prompt."}
 				],
 				"messages": [{
 					"role": "user",
 					"content": [{"type": "text", "text": "Hello"}]
 				}]
 			}`,
 			// System messages don't have reasoning_content mapping
 			wantReasoningContent:    "",
 			wantHasReasoningContent: false,
 			wantContentText:         "Hello",
 			wantHasContent:          true,
 		},
 		{
 			name: "AC5: empty thinking must be ignored",
 			inputJSON: `{
 				"model": "claude-3-opus",
 				"messages": [{
 					"role": "assistant",
 					"content": [
 						{"type": "thinking", "thinking": ""},
 						{"type": "text", "text": "Response with empty thinking."}
 					]
 				}]
 			}`,
 			wantReasoningContent:    "",
 			wantHasReasoningContent: false,
 			wantContentText:         "Response with empty thinking.",
 			wantHasContent:          true,
 		},
 		{
 			name: "AC5: whitespace-only thinking must be ignored",
 			inputJSON: `{
 				"model": "claude-3-opus",
 				"messages": [{
 					"role": "assistant",
 					"content": [
 						{"type": "thinking", "thinking": "   \n\t  "},
 						{"type": "text", "text": "Response with whitespace thinking."}
 					]
 				}]
 			}`,
 			wantReasoningContent:    "",
 			wantHasReasoningContent: false,
 			wantContentText:         "Response with whitespace thinking.",
 			wantHasContent:          true,
 		},
 		{
 			name: "Multiple thinking parts concatenated",
 			inputJSON: `{
 				"model": "claude-3-opus",
 				"messages": [{
 					"role": "assistant",
 					"content": [
 						{"type": "thinking", "thinking": "First thought."},
 						{"type": "thinking", "thinking": "Second thought."},
 						{"type": "text", "text": "Final answer."}
 					]
 				}]
 			}`,
 			wantReasoningContent:    "First thought.\n\nSecond thought.",
 			wantHasReasoningContent: true,
 			wantContentText:         "Final answer.",
 			wantHasContent:          true,
 		},
 		{
 			name: "Mixed thinking and redacted_thinking",
 			inputJSON: `{
 				"model": "claude-3-opus",
 				"messages": [{
 					"role": "assistant",
 					"content": [
 						{"type": "thinking", "thinking": "Visible thought."},
 						{"type": "redacted_thinking", "data": "hidden"},
 						{"type": "text", "text": "Answer."}
 					]
 				}]
 			}`,
 			wantReasoningContent:    "Visible thought.",
 			wantHasReasoningContent: true,
 			wantContentText:         "Answer.",
 			wantHasContent:          true,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			result := ConvertClaudeRequestToOpenAI("test-model", []byte(tt.inputJSON), false)
 			resultJSON := gjson.ParseBytes(result)
 			// Find the relevant message (skip system message at index 0)
 			messages := resultJSON.Get("messages").Array()
 			if len(messages) < 2 {
 				if tt.wantHasReasoningContent || tt.wantHasContent {
 					t.Fatalf("Expected at least 2 messages (system + user/assistant), got %d", len(messages))
 				}
 				return
 			}
 			// Check the last non-system message
 			var targetMsg gjson.Result
 			for i := len(messages) - 1; i >= 0; i-- {
 				if messages[i].Get("role").String() != "system" {
 					targetMsg = messages[i]
 					break
 				}
 			}
 			// Check reasoning_content
 			gotReasoningContent := targetMsg.Get("reasoning_content").String()
 			gotHasReasoningContent := targetMsg.Get("reasoning_content").Exists()
 			if gotHasReasoningContent != tt.wantHasReasoningContent {
 				t.Errorf("reasoning_content existence = %v, want %v", gotHasReasoningContent, tt.wantHasReasoningContent)
 			}
 			if gotReasoningContent != tt.wantReasoningContent {
 				t.Errorf("reasoning_content = %q, want %q", gotReasoningContent, tt.wantReasoningContent)
 			}
 			// Check content
 			content := targetMsg.Get("content")
 			// content has meaningful content if it's a non-empty array, or a non-empty string
 			var gotHasContent bool
 			switch {
 			case content.IsArray():
 				gotHasContent = len(content.Array()) > 0
 			case content.Type == gjson.String:
 				gotHasContent = content.String() != ""
 			default:
 				gotHasContent = false
 			}
 			if gotHasContent != tt.wantHasContent {
 				t.Errorf("content existence = %v, want %v", gotHasContent, tt.wantHasContent)
 			}
 			if tt.wantHasContent && tt.wantContentText != "" {
 				// Find text content
 				var foundText string
 				content.ForEach(func(_, v gjson.Result) bool {
 					if v.Get("type").String() == "text" {
 						foundText = v.Get("text").String()
 						return false
 					}
 					return true
 				})
 				if foundText != tt.wantContentText {
 					t.Errorf("content text = %q, want %q", foundText, tt.wantContentText)
 				}
 			}
 		})
 	}
 }
 // TestConvertClaudeRequestToOpenAI_ThinkingOnlyMessagePreserved tests AC3:
 // that a message with only thinking content is preserved (not dropped).
 func TestConvertClaudeRequestToOpenAI_ThinkingOnlyMessagePreserved(t *testing.T) {
 	inputJSON := `{
 		"model": "claude-3-opus",
 		"messages": [
 			{
 				"role": "user",
 				"content": [{"type": "text", "text": "What is 2+2?"}]
 			},
 			{
 				"role": "assistant",
 				"content": [{"type": "thinking", "thinking": "Let me calculate: 2+2=4"}]
 			},
 			{
 				"role": "user",
 				"content": [{"type": "text", "text": "Thanks"}]
 			}
 		]
 	}`
 	result := ConvertClaudeRequestToOpenAI("test-model", []byte(inputJSON), false)
 	resultJSON := gjson.ParseBytes(result)
 	messages := resultJSON.Get("messages").Array()
 	// Should have: system (auto-added) + user + assistant (thinking-only) + user = 4 messages
 	if len(messages) != 4 {
 		t.Fatalf("Expected 4 messages, got %d. Messages: %v", len(messages), resultJSON.Get("messages").Raw)
 	}
 	// Check the assistant message (index 2) has reasoning_content
 	assistantMsg := messages[2]
 	if assistantMsg.Get("role").String() != "assistant" {
 		t.Errorf("Expected message[2] to be assistant, got %s", assistantMsg.Get("role").String())
 	}
 	if !assistantMsg.Get("reasoning_content").Exists() {
 		t.Error("Expected assistant message to have reasoning_content")
 	}
 	if assistantMsg.Get("reasoning_content").String() != "Let me calculate: 2+2=4" {
 		t.Errorf("Unexpected reasoning_content: %s", assistantMsg.Get("reasoning_content").String())
 	}
 }
 func TestConvertClaudeRequestToOpenAI_ToolResultOrderAndContent(t *testing.T) {
 	inputJSON := `{
 		"model": "claude-3-opus",
 		"messages": [
 			{
 				"role": "assistant",
 				"content": [
 					{"type": "tool_use", "id": "call_1", "name": "do_work", "input": {"a": 1}}
 				]
 			},
 			{
 				"role": "user",
 				"content": [
 					{"type": "text", "text": "before"},
 					{"type": "tool_result", "tool_use_id": "call_1", "content": [{"type":"text","text":"tool ok"}]},
 					{"type": "text", "text": "after"}
 				]
 			}
 		]
 	}`
 	result := ConvertClaudeRequestToOpenAI("test-model", []byte(inputJSON), false)
 	resultJSON := gjson.ParseBytes(result)
 	messages := resultJSON.Get("messages").Array()
 	// OpenAI requires: tool messages MUST immediately follow assistant(tool_calls).
 	// Correct order: system + assistant(tool_calls) + tool(result) + user(before+after)
 	if len(messages) != 4 {
 		t.Fatalf("Expected 4 messages, got %d. Messages: %s", len(messages), resultJSON.Get("messages").Raw)
 	}
 	if messages[0].Get("role").String() != "system" {
 		t.Fatalf("Expected messages[0] to be system, got %s", messages[0].Get("role").String())
 	}
 	if messages[1].Get("role").String() != "assistant" || !messages[1].Get("tool_calls").Exists() {
 		t.Fatalf("Expected messages[1] to be assistant tool_calls, got %s: %s", messages[1].Get("role").String(), messages[1].Raw)
 	}
 	// tool message MUST immediately follow assistant(tool_calls) per OpenAI spec
 	if messages[2].Get("role").String() != "tool" {
 		t.Fatalf("Expected messages[2] to be tool (must follow tool_calls), got %s", messages[2].Get("role").String())
 	}
 	if got := messages[2].Get("tool_call_id").String(); got != "call_1" {
 		t.Fatalf("Expected tool_call_id %q, got %q", "call_1", got)
 	}
 	if got := messages[2].Get("content").String(); got != "tool ok" {
 		t.Fatalf("Expected tool content %q, got %q", "tool ok", got)
 	}
 	// User message comes after tool message
 	if messages[3].Get("role").String() != "user" {
 		t.Fatalf("Expected messages[3] to be user, got %s", messages[3].Get("role").String())
 	}
 	// User message should contain both "before" and "after" text
 	if got := messages[3].Get("content.0.text").String(); got != "before" {
 		t.Fatalf("Expected user text[0] %q, got %q", "before", got)
 	}
 	if got := messages[3].Get("content.1.text").String(); got != "after" {
 		t.Fatalf("Expected user text[1] %q, got %q", "after", got)
 	}
 }
 func TestConvertClaudeRequestToOpenAI_ToolResultObjectContent(t *testing.T) {
 	inputJSON := `{
 		"model": "claude-3-opus",
 		"messages": [
 			{
 				"role": "assistant",
 				"content": [
 					{"type": "tool_use", "id": "call_1", "name": "do_work", "input": {"a": 1}}
 				]
 			},
 			{
 				"role": "user",
 				"content": [
 					{"type": "tool_result", "tool_use_id": "call_1", "content": {"foo": "bar"}}
 				]
 			}
 		]
 	}`
 	result := ConvertClaudeRequestToOpenAI("test-model", []byte(inputJSON), false)
 	resultJSON := gjson.ParseBytes(result)
 	messages := resultJSON.Get("messages").Array()
 	// system + assistant(tool_calls) + tool(result)
 	if len(messages) != 3 {
 		t.Fatalf("Expected 3 messages, got %d. Messages: %s", len(messages), resultJSON.Get("messages").Raw)
 	}
 	if messages[2].Get("role").String() != "tool" {
 		t.Fatalf("Expected messages[2] to be tool, got %s", messages[2].Get("role").String())
 	}
 	toolContent := messages[2].Get("content").String()
 	parsed := gjson.Parse(toolContent)
 	if parsed.Get("foo").String() != "bar" {
 		t.Fatalf("Expected tool content JSON foo=bar, got %q", toolContent)
 	}
 }
 func TestConvertClaudeRequestToOpenAI_AssistantTextToolUseTextOrder(t *testing.T) {
 	inputJSON := `{
 		"model": "claude-3-opus",
 		"messages": [
 			{
 				"role": "assistant",
 				"content": [
 					{"type": "text", "text": "pre"},
 					{"type": "tool_use", "id": "call_1", "name": "do_work", "input": {"a": 1}},
 					{"type": "text", "text": "post"}
 				]
 			}
 		]
 	}`
 	result := ConvertClaudeRequestToOpenAI("test-model", []byte(inputJSON), false)
 	resultJSON := gjson.ParseBytes(result)
 	messages := resultJSON.Get("messages").Array()
 	// New behavior: content + tool_calls unified in single assistant message
 	// Expect: system + assistant(content[pre,post] + tool_calls)
 	if len(messages) != 2 {
 		t.Fatalf("Expected 2 messages, got %d. Messages: %s", len(messages), resultJSON.Get("messages").Raw)
 	}
 	if messages[0].Get("role").String() != "system" {
 		t.Fatalf("Expected messages[0] to be system, got %s", messages[0].Get("role").String())
 	}
 	assistantMsg := messages[1]
 	if assistantMsg.Get("role").String() != "assistant" {
 		t.Fatalf("Expected messages[1] to be assistant, got %s", assistantMsg.Get("role").String())
 	}
 	// Should have both content and tool_calls in same message
 	if !assistantMsg.Get("tool_calls").Exists() {
 		t.Fatalf("Expected assistant message to have tool_calls")
 	}
 	if got := assistantMsg.Get("tool_calls.0.id").String(); got != "call_1" {
 		t.Fatalf("Expected tool_call id %q, got %q", "call_1", got)
 	}
 	if got := assistantMsg.Get("tool_calls.0.function.name").String(); got != "do_work" {
 		t.Fatalf("Expected tool_call name %q, got %q", "do_work", got)
 	}
 	// Content should have both pre and post text
 	if got := assistantMsg.Get("content.0.text").String(); got != "pre" {
 		t.Fatalf("Expected content[0] text %q, got %q", "pre", got)
 	}
 	if got := assistantMsg.Get("content.1.text").String(); got != "post" {
 		t.Fatalf("Expected content[1] text %q, got %q", "post", got)
 	}
 }
 func TestConvertClaudeRequestToOpenAI_AssistantThinkingToolUseThinkingSplit(t *testing.T) {
 	inputJSON := `{
 		"model": "claude-3-opus",
 		"messages": [
 			{
 				"role": "assistant",
 				"content": [
 					{"type": "thinking", "thinking": "t1"},
 					{"type": "text", "text": "pre"},
 					{"type": "tool_use", "id": "call_1", "name": "do_work", "input": {"a": 1}},
 					{"type": "thinking", "thinking": "t2"},
 					{"type": "text", "text": "post"}
 				]
 			}
 		]
 	}`
 	result := ConvertClaudeRequestToOpenAI("test-model", []byte(inputJSON), false)
 	resultJSON := gjson.ParseBytes(result)
 	messages := resultJSON.Get("messages").Array()
 	// New behavior: all content, thinking, and tool_calls unified in single assistant message
 	// Expect: system + assistant(content[pre,post] + tool_calls + reasoning_content[t1+t2])
 	if len(messages) != 2 {
 		t.Fatalf("Expected 2 messages, got %d. Messages: %s", len(messages), resultJSON.Get("messages").Raw)
 	}
 	assistantMsg := messages[1]
 	if assistantMsg.Get("role").String() != "assistant" {
 		t.Fatalf("Expected messages[1] to be assistant, got %s", assistantMsg.Get("role").String())
 	}
 	// Should have content with both pre and post
 	if got := assistantMsg.Get("content.0.text").String(); got != "pre" {
 		t.Fatalf("Expected content[0] text %q, got %q", "pre", got)
 	}
 	if got := assistantMsg.Get("content.1.text").String(); got != "post" {
 		t.Fatalf("Expected content[1] text %q, got %q", "post", got)
 	}
 	// Should have tool_calls
 	if !assistantMsg.Get("tool_calls").Exists() {
 		t.Fatalf("Expected assistant message to have tool_calls")
 	}
 	// Should have combined reasoning_content from both thinking blocks
 	if got := assistantMsg.Get("reasoning_content").String(); got != "t1\n\nt2" {
 		t.Fatalf("Expected reasoning_content %q, got %q", "t1\n\nt2", got)
 	}
 }
--- a/internal/translator/openai/claude/openai_claude_response.go
+++ b/internal/translator/openai/claude/openai_claude_response.go
@@ -299,17 +299,16 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI
 				inputTokens = promptTokens.Int()
 				outputTokens = completionTokens.Int()
 			}
 			// Send message_delta with usage
 			messageDeltaJSON := `{"type":"message_delta","delta":{"stop_reason":"","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
 			messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "delta.stop_reason", mapOpenAIFinishReasonToAnthropic(param.FinishReason))
 			messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.input_tokens", inputTokens)
 			messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.output_tokens", outputTokens)
 			results = append(results, "event: message_delta\ndata: "+messageDeltaJSON+"\n\n")
 			param.MessageDeltaSent = true
 			emitMessageStopIfNeeded(param, &results)
 		}
 		// Send message_delta with usage
 		messageDeltaJSON := `{"type":"message_delta","delta":{"stop_reason":"","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
 		messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "delta.stop_reason", mapOpenAIFinishReasonToAnthropic(param.FinishReason))
 		messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.input_tokens", inputTokens)
 		messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.output_tokens", outputTokens)
 		results = append(results, "event: message_delta\ndata: "+messageDeltaJSON+"\n\n")
 		param.MessageDeltaSent = true
 		emitMessageStopIfNeeded(param, &results)
 	}
 	return results
@@ -480,15 +479,15 @@ func collectOpenAIReasoningTexts(node gjson.Result) []string {
 	switch node.Type {
 	case gjson.String:
-		if text := strings.TrimSpace(node.String()); text != "" {
+		if text := node.String(); text != "" {
 			texts = append(texts, text)
 		}
 	case gjson.JSON:
 		if text := node.Get("text"); text.Exists() {
-			if trimmed := strings.TrimSpace(text.String()); trimmed != "" {
+			if textStr := text.String(); textStr != "" {
-				texts = append(texts, trimmed)
+				texts = append(texts, textStr)
 			}
-		} else if raw := strings.TrimSpace(node.Raw); raw != "" && !strings.HasPrefix(raw, "{") && !strings.HasPrefix(raw, "[") {
+		} else if raw := node.Raw; raw != "" && !strings.HasPrefix(raw, "{") && !strings.HasPrefix(raw, "[") {
 			texts = append(texts, raw)
 		}
 	}
--- a/internal/util/gemini_schema.go
+++ b/internal/util/gemini_schema.go
@@ -390,6 +390,11 @@ func addEmptySchemaPlaceholder(jsonStr string) string {
 		// If schema has properties but none are required, add a minimal placeholder.
 		if propsVal.IsObject() && !hasRequiredProperties {
 			// DO NOT add placeholder if it's a top-level schema (parentPath is empty)
 			// or if we've already added a placeholder reason above.
 			if parentPath == "" {
 				continue
 			}
 			placeholderPath := joinPath(propsPath, "_")
 			if !gjson.Get(jsonStr, placeholderPath).Exists() {
 				jsonStr, _ = sjson.Set(jsonStr, placeholderPath+".type", "boolean")
--- a/internal/util/gemini_schema_test.go
+++ b/internal/util/gemini_schema_test.go
@@ -127,8 +127,10 @@ func TestCleanJSONSchemaForAntigravity_AnyOfFlattening_SmartSelection(t *testing
 				"type": "object",
 				"description": "Accepts: null | object",
 				"properties": {
 					"_": { "type": "boolean" },
 					"kind": { "type": "string" }
-				}
+				},
 				"required": ["_"]
 			}
 		}
 	}`
--- a/internal/util/gemini_thinking.go
+++ b/internal/util/gemini_thinking.go
@@ -71,10 +71,13 @@ func ApplyGeminiThinkingConfig(body []byte, budget *int, includeThoughts *bool)
 		incl = &defaultInclude
 	}
 	if incl != nil {
-		valuePath := "generationConfig.thinkingConfig.include_thoughts"
+		if !gjson.GetBytes(updated, "generationConfig.thinkingConfig.includeThoughts").Exists() &&
-		rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
+			!gjson.GetBytes(updated, "generationConfig.thinkingConfig.include_thoughts").Exists() {
-		if err == nil {
+			valuePath := "generationConfig.thinkingConfig.include_thoughts"
-			updated = rewritten
+			rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
 			if err == nil {
 				updated = rewritten
 			}
 		}
 	}
 	return updated
@@ -99,10 +102,13 @@ func ApplyGeminiCLIThinkingConfig(body []byte, budget *int, includeThoughts *boo
 		incl = &defaultInclude
 	}
 	if incl != nil {
-		valuePath := "request.generationConfig.thinkingConfig.include_thoughts"
+		if !gjson.GetBytes(updated, "request.generationConfig.thinkingConfig.includeThoughts").Exists() &&
-		rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
+			!gjson.GetBytes(updated, "request.generationConfig.thinkingConfig.include_thoughts").Exists() {
-		if err == nil {
+			valuePath := "request.generationConfig.thinkingConfig.include_thoughts"
-			updated = rewritten
+			rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
 			if err == nil {
 				updated = rewritten
 			}
 		}
 	}
 	return updated
@@ -130,15 +136,15 @@ func ApplyGeminiThinkingLevel(body []byte, level string, includeThoughts *bool)
 		incl = &defaultInclude
 	}
 	if incl != nil {
-		valuePath := "generationConfig.thinkingConfig.includeThoughts"
+		if !gjson.GetBytes(updated, "generationConfig.thinkingConfig.includeThoughts").Exists() &&
-		rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
+			!gjson.GetBytes(updated, "generationConfig.thinkingConfig.include_thoughts").Exists() {
-		if err == nil {
+			valuePath := "generationConfig.thinkingConfig.includeThoughts"
-			updated = rewritten
+			rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
 			if err == nil {
 				updated = rewritten
 			}
 		}
 	}
 	if it := gjson.GetBytes(body, "generationConfig.thinkingConfig.include_thoughts"); it.Exists() {
 		updated, _ = sjson.DeleteBytes(updated, "generationConfig.thinkingConfig.include_thoughts")
 	}
 	if tb := gjson.GetBytes(body, "generationConfig.thinkingConfig.thinkingBudget"); tb.Exists() {
 		updated, _ = sjson.DeleteBytes(updated, "generationConfig.thinkingConfig.thinkingBudget")
 	}
@@ -167,15 +173,15 @@ func ApplyGeminiCLIThinkingLevel(body []byte, level string, includeThoughts *boo
 		incl = &defaultInclude
 	}
 	if incl != nil {
-		valuePath := "request.generationConfig.thinkingConfig.includeThoughts"
+		if !gjson.GetBytes(updated, "request.generationConfig.thinkingConfig.includeThoughts").Exists() &&
-		rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
+			!gjson.GetBytes(updated, "request.generationConfig.thinkingConfig.include_thoughts").Exists() {
-		if err == nil {
+			valuePath := "request.generationConfig.thinkingConfig.includeThoughts"
-			updated = rewritten
+			rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
 			if err == nil {
 				updated = rewritten
 			}
 		}
 	}
 	if it := gjson.GetBytes(body, "request.generationConfig.thinkingConfig.include_thoughts"); it.Exists() {
 		updated, _ = sjson.DeleteBytes(updated, "request.generationConfig.thinkingConfig.include_thoughts")
 	}
 	if tb := gjson.GetBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget"); tb.Exists() {
 		updated, _ = sjson.DeleteBytes(updated, "request.generationConfig.thinkingConfig.thinkingBudget")
 	}
@@ -251,9 +257,14 @@ func ThinkingBudgetToGemini3Level(model string, budget int) (string, bool) {
 // modelsWithDefaultThinking lists models that should have thinking enabled by default
 // when no explicit thinkingConfig is provided.
 // Note: Gemini 3 models are NOT included here because per Google's official documentation:
 //   - thinkingLevel defaults to "high" (dynamic thinking)
 //   - includeThoughts defaults to false
 //
 // We should not override these API defaults; let users explicitly configure if needed.
 var modelsWithDefaultThinking = map[string]bool{
-	"gemini-3-pro-preview":       true,
+	// "gemini-3-pro-preview":       true,
-	"gemini-3-pro-image-preview": true,
+	// "gemini-3-pro-image-preview": true,
 	// "gemini-3-flash-preview":     true,
 }
@@ -288,37 +299,73 @@ func ApplyDefaultThinkingIfNeeded(model string, body []byte) []byte {
 // ApplyGemini3ThinkingLevelFromMetadata applies thinkingLevel from metadata for Gemini 3 models.
 // For standard Gemini API format (generationConfig.thinkingConfig path).
-// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal)).
+// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal))
 // or numeric budget suffix (e.g., model(1000)) which gets converted to a thinkingLevel.
 func ApplyGemini3ThinkingLevelFromMetadata(model string, metadata map[string]any, body []byte) []byte {
-	if !IsGemini3Model(model) {
+	// Use the alias from metadata if available for model type detection
 	lookupModel := ResolveOriginalModel(model, metadata)
 	if !IsGemini3Model(lookupModel) && !IsGemini3Model(model) {
 		return body
 	}
 	// Determine which model to use for validation
 	checkModel := model
 	if IsGemini3Model(lookupModel) {
 		checkModel = lookupModel
 	}
 	// First try to get effort string from metadata
 	effort, ok := ReasoningEffortFromMetadata(metadata)
-	if !ok || effort == "" {
+	if ok && effort != "" {
-		return body
+		if level, valid := ValidateGemini3ThinkingLevel(checkModel, effort); valid {
 			return ApplyGeminiThinkingLevel(body, level, nil)
 		}
 	}
-	// Validate and apply the thinkingLevel
+
-	if level, valid := ValidateGemini3ThinkingLevel(model, effort); valid {
+	// Fallback: check for numeric budget and convert to thinkingLevel
-		return ApplyGeminiThinkingLevel(body, level, nil)
+	budget, _, _, matched := ThinkingFromMetadata(metadata)
 	if matched && budget != nil {
 		if level, valid := ThinkingBudgetToGemini3Level(checkModel, *budget); valid {
 			return ApplyGeminiThinkingLevel(body, level, nil)
 		}
 	}
 	return body
 }
 // ApplyGemini3ThinkingLevelFromMetadataCLI applies thinkingLevel from metadata for Gemini 3 models.
 // For Gemini CLI API format (request.generationConfig.thinkingConfig path).
-// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal)).
+// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal))
 // or numeric budget suffix (e.g., model(1000)) which gets converted to a thinkingLevel.
 func ApplyGemini3ThinkingLevelFromMetadataCLI(model string, metadata map[string]any, body []byte) []byte {
-	if !IsGemini3Model(model) {
+	// Use the alias from metadata if available for model type detection
 	lookupModel := ResolveOriginalModel(model, metadata)
 	if !IsGemini3Model(lookupModel) && !IsGemini3Model(model) {
 		return body
 	}
 	// Determine which model to use for validation
 	checkModel := model
 	if IsGemini3Model(lookupModel) {
 		checkModel = lookupModel
 	}
 	// First try to get effort string from metadata
 	effort, ok := ReasoningEffortFromMetadata(metadata)
-	if !ok || effort == "" {
+	if ok && effort != "" {
-		return body
+		if level, valid := ValidateGemini3ThinkingLevel(checkModel, effort); valid {
 			return ApplyGeminiCLIThinkingLevel(body, level, nil)
 		}
 	}
-	// Validate and apply the thinkingLevel
+
-	if level, valid := ValidateGemini3ThinkingLevel(model, effort); valid {
+	// Fallback: check for numeric budget and convert to thinkingLevel
-		return ApplyGeminiCLIThinkingLevel(body, level, nil)
+	budget, _, _, matched := ThinkingFromMetadata(metadata)
 	if matched && budget != nil {
 		if level, valid := ThinkingBudgetToGemini3Level(checkModel, *budget); valid {
 			return ApplyGeminiCLIThinkingLevel(body, level, nil)
 		}
 	}
 	return body
 }
@@ -326,15 +373,17 @@ func ApplyGemini3ThinkingLevelFromMetadataCLI(model string, metadata map[string]
 // For Gemini CLI API format (request.generationConfig.thinkingConfig path).
 // Returns the modified body if thinkingConfig was added, otherwise returns the original.
 // For Gemini 3 models, uses thinkingLevel instead of thinkingBudget per Google's documentation.
-func ApplyDefaultThinkingIfNeededCLI(model string, body []byte) []byte {
+func ApplyDefaultThinkingIfNeededCLI(model string, metadata map[string]any, body []byte) []byte {
-	if !ModelHasDefaultThinking(model) {
+	// Use the alias from metadata if available for model property lookup
 	lookupModel := ResolveOriginalModel(model, metadata)
 	if !ModelHasDefaultThinking(lookupModel) && !ModelHasDefaultThinking(model) {
 		return body
 	}
 	if gjson.GetBytes(body, "request.generationConfig.thinkingConfig").Exists() {
 		return body
 	}
 	// Gemini 3 models use thinkingLevel instead of thinkingBudget
-	if IsGemini3Model(model) {
+	if IsGemini3Model(lookupModel) || IsGemini3Model(model) {
 		// Don't set a default - let the API use its dynamic default ("high")
 		// Only set includeThoughts
 		updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.includeThoughts", true)
--- a/internal/util/sanitize_test.go
+++ b/internal/util/sanitize_test.go
@@ -0,0 +1,56 @@
 package util
 import (
 	"testing"
 )
 func TestSanitizeFunctionName(t *testing.T) {
 	tests := []struct {
 		name     string
 		input    string
 		expected string
 	}{
 		{"Normal", "valid_name", "valid_name"},
 		{"With Dots", "name.with.dots", "name.with.dots"},
 		{"With Colons", "name:with:colons", "name:with:colons"},
 		{"With Dashes", "name-with-dashes", "name-with-dashes"},
 		{"Mixed Allowed", "name.with_dots:colons-dashes", "name.with_dots:colons-dashes"},
 		{"Invalid Characters", "name!with@invalid#chars", "name_with_invalid_chars"},
 		{"Spaces", "name with spaces", "name_with_spaces"},
 		{"Non-ASCII", "name_with_你好_chars", "name_with____chars"},
 		{"Starts with digit", "123name", "_123name"},
 		{"Starts with dot", ".name", "_.name"},
 		{"Starts with colon", ":name", "_:name"},
 		{"Starts with dash", "-name", "_-name"},
 		{"Starts with invalid char", "!name", "_name"},
 		{"Exactly 64 chars", "this_is_a_very_long_name_that_exactly_reaches_sixty_four_charact", "this_is_a_very_long_name_that_exactly_reaches_sixty_four_charact"},
 		{"Too long (65 chars)", "this_is_a_very_long_name_that_exactly_reaches_sixty_four_charactX", "this_is_a_very_long_name_that_exactly_reaches_sixty_four_charact"},
 		{"Very long", "this_is_a_very_long_name_that_exceeds_the_sixty_four_character_limit_for_function_names", "this_is_a_very_long_name_that_exceeds_the_sixty_four_character_l"},
 		{"Starts with digit (64 chars total)", "1234567890123456789012345678901234567890123456789012345678901234", "_123456789012345678901234567890123456789012345678901234567890123"},
 		{"Starts with invalid char (64 chars total)", "!234567890123456789012345678901234567890123456789012345678901234", "_234567890123456789012345678901234567890123456789012345678901234"},
 		{"Empty", "", ""},
 		{"Single character invalid", "@", "_"},
 		{"Single character valid", "a", "a"},
 		{"Single character digit", "1", "_1"},
 		{"Single character underscore", "_", "_"},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			got := SanitizeFunctionName(tt.input)
 			if got != tt.expected {
 				t.Errorf("SanitizeFunctionName(%q) = %v, want %v", tt.input, got, tt.expected)
 			}
 			// Verify Gemini compliance
 			if len(got) > 64 {
 				t.Errorf("SanitizeFunctionName(%q) result too long: %d", tt.input, len(got))
 			}
 			if len(got) > 0 {
 				first := got[0]
 				if !((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_') {
 					t.Errorf("SanitizeFunctionName(%q) result starts with invalid char: %c", tt.input, first)
 				}
 			}
 		})
 	}
 }
--- a/internal/util/thinking.go
+++ b/internal/util/thinking.go
@@ -12,9 +12,18 @@ func ModelSupportsThinking(model string) bool {
 	if model == "" {
 		return false
 	}
 	// First check the global dynamic registry
 	if info := registry.GetGlobalRegistry().GetModelInfo(model); info != nil {
 		return info.Thinking != nil
 	}
 	// Fallback: check static model definitions
 	if info := registry.LookupStaticModelInfo(model); info != nil {
 		return info.Thinking != nil
 	}
 	// Fallback: check Antigravity static config
 	if cfg := registry.GetAntigravityModelConfig()[model]; cfg != nil {
 		return cfg.Thinking != nil
 	}
 	return false
 }
@@ -63,11 +72,19 @@ func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zero
 	if model == "" {
 		return false, 0, 0, false, false
 	}
-	info := registry.GetGlobalRegistry().GetModelInfo(model)
+	// First check global dynamic registry
-	if info == nil || info.Thinking == nil {
+	if info := registry.GetGlobalRegistry().GetModelInfo(model); info != nil && info.Thinking != nil {
-		return false, 0, 0, false, false
+		return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
 	}
-	return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
+	// Fallback: check static model definitions
 	if info := registry.LookupStaticModelInfo(model); info != nil && info.Thinking != nil {
 		return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
 	}
 	// Fallback: check Antigravity static config
 	if cfg := registry.GetAntigravityModelConfig()[model]; cfg != nil && cfg.Thinking != nil {
 		return true, cfg.Thinking.Min, cfg.Thinking.Max, cfg.Thinking.ZeroAllowed, cfg.Thinking.DynamicAllowed
 	}
 	return false, 0, 0, false, false
 }
 // GetModelThinkingLevels returns the discrete reasoning effort levels for the model.
--- a/internal/util/util.go
+++ b/internal/util/util.go
@@ -4,16 +4,56 @@
 package util
 import (
 	"context"
 	"fmt"
 	"io/fs"
 	"os"
 	"path/filepath"
 	"regexp"
 	"strings"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	log "github.com/sirupsen/logrus"
 )
 var functionNameSanitizer = regexp.MustCompile(`[^a-zA-Z0-9_.:-]`)
 // SanitizeFunctionName ensures a function name matches the requirements for Gemini/Vertex AI.
 // It replaces invalid characters with underscores, ensures it starts with a letter or underscore,
 // and truncates it to 64 characters if necessary.
 // Regex Rule: [^a-zA-Z0-9_.:-] replaced with _.
 func SanitizeFunctionName(name string) string {
 	if name == "" {
 		return ""
 	}
 	// Replace invalid characters with underscore
 	sanitized := functionNameSanitizer.ReplaceAllString(name, "_")
 	// Ensure it starts with a letter or underscore
 	// Re-reading requirements: Must start with a letter or an underscore.
 	if len(sanitized) > 0 {
 		first := sanitized[0]
 		if !((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_') {
 			// If it starts with an allowed character but not allowed at the beginning (digit, dot, colon, dash),
 			// we must prepend an underscore.
 			// To stay within the 64-character limit while prepending, we must truncate first.
 			if len(sanitized) >= 64 {
 				sanitized = sanitized[:63]
 			}
 			sanitized = "_" + sanitized
 		}
 	} else {
 		sanitized = "_"
 	}
 	// Truncate to 64 characters
 	if len(sanitized) > 64 {
 		sanitized = sanitized[:64]
 	}
 	return sanitized
 }
 // SetLogLevel configures the logrus log level based on the configuration.
 // It sets the log level to DebugLevel if debug mode is enabled, otherwise to InfoLevel.
 func SetLogLevel(cfg *config.Config) {
@@ -53,36 +93,23 @@ func ResolveAuthDir(authDir string) (string, error) {
 	return filepath.Clean(authDir), nil
 }
-// CountAuthFiles returns the number of JSON auth files located under the provided directory.
+// CountAuthFiles returns the number of auth records available through the provided Store.
-// The function resolves leading tildes to the user's home directory and performs a case-insensitive
+// For filesystem-backed stores, this reflects the number of JSON auth files under the configured directory.
-// match on the ".json" suffix so that files saved with uppercase extensions are also counted.
+func CountAuthFiles[T any](ctx context.Context, store interface {
-func CountAuthFiles(authDir string) int {
+	List(context.Context) ([]T, error)
-	dir, err := ResolveAuthDir(authDir)
+}) int {
 	if store == nil {
 		return 0
 	}
 	if ctx == nil {
 		ctx = context.Background()
 	}
 	entries, err := store.List(ctx)
 	if err != nil {
-		log.Debugf("countAuthFiles: failed to resolve auth directory: %v", err)
+		log.Debugf("countAuthFiles: failed to list auth records: %v", err)
 		return 0
 	}
-	if dir == "" {
+	return len(entries)
 		return 0
 	}
 	count := 0
 	walkErr := filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
 		if err != nil {
 			log.Debugf("countAuthFiles: error accessing %s: %v", path, err)
 			return nil
 		}
 		if d.IsDir() {
 			return nil
 		}
 		if strings.HasSuffix(strings.ToLower(d.Name()), ".json") {
 			count++
 		}
 		return nil
 	})
 	if walkErr != nil {
 		log.Debugf("countAuthFiles: walk error: %v", walkErr)
 	}
 	return count
 }
 // WritablePath returns the cleaned WRITABLE_PATH environment variable when it is set.
--- a/internal/watcher/diff/oauth_model_mappings.go
+++ b/internal/watcher/diff/oauth_model_mappings.go
@@ -80,6 +80,9 @@ func summarizeOAuthModelMappingList(list []config.ModelNameMapping) OAuthModelMa
 			continue
 		}
 		key := name + "->" + alias
 		if mapping.Fork {
 			key += "|fork"
 		}
 		if _, exists := seen[key]; exists {
 			continue
 		}
--- a/sdk/cliproxy/auth/conductor.go
+++ b/sdk/cliproxy/auth/conductor.go
@@ -1536,6 +1536,9 @@ func (m *Manager) markRefreshPending(id string, now time.Time) bool {
 }
 func (m *Manager) refreshAuth(ctx context.Context, id string) {
 	if ctx == nil {
 		ctx = context.Background()
 	}
 	m.mu.RLock()
 	auth := m.auths[id]
 	var exec ProviderExecutor
@@ -1548,6 +1551,10 @@ func (m *Manager) refreshAuth(ctx context.Context, id string) {
 	}
 	cloned := auth.Clone()
 	updated, err := exec.Refresh(ctx, cloned)
 	if err != nil && errors.Is(err, context.Canceled) {
 		log.Debugf("refresh canceled for %s, %s", auth.Provider, auth.ID)
 		return
 	}
 	log.Debugf("refreshed %s, %s, %v", auth.Provider, auth.ID, err)
 	now := time.Now()
 	if err != nil {
--- a/sdk/cliproxy/auth/model_name_mappings.go
+++ b/sdk/cliproxy/auth/model_name_mappings.go
@@ -81,7 +81,9 @@ func (m *Manager) applyOAuthModelMapping(auth *Auth, requestedModel string, meta
 			out[k] = v
 		}
 	}
-	out[util.ModelMappingOriginalModelMetadataKey] = upstreamModel
+	// Store the requested alias (e.g., "gp") so downstream can use it to look up
 	// model metadata from the global registry where it was registered under this alias.
 	out[util.ModelMappingOriginalModelMetadataKey] = requestedModel
 	return upstreamModel, out
 }
--- a/sdk/cliproxy/model_registry.go
+++ b/sdk/cliproxy/model_registry.go
@@ -5,6 +5,9 @@ import "github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 // ModelInfo re-exports the registry model info structure.
 type ModelInfo = registry.ModelInfo
 // ModelRegistryHook re-exports the registry hook interface for external integrations.
 type ModelRegistryHook = registry.ModelRegistryHook
 // ModelRegistry describes registry operations consumed by external callers.
 type ModelRegistry interface {
 	RegisterClient(clientID, clientProvider string, models []*ModelInfo)
@@ -13,9 +16,15 @@ type ModelRegistry interface {
 	ClearModelQuotaExceeded(clientID, modelID string)
 	ClientSupportsModel(clientID, modelID string) bool
 	GetAvailableModels(handlerType string) []map[string]any
 	GetAvailableModelsByProvider(provider string) []*ModelInfo
 }
 // GlobalModelRegistry returns the shared registry instance.
 func GlobalModelRegistry() ModelRegistry {
 	return registry.GetGlobalRegistry()
 }
 // SetGlobalModelRegistryHook registers an optional hook on the shared global registry instance.
 func SetGlobalModelRegistryHook(hook ModelRegistryHook) {
 	registry.GetGlobalRegistry().SetHook(hook)
 }
--- a/sdk/cliproxy/service.go
+++ b/sdk/cliproxy/service.go
@@ -1240,7 +1240,13 @@ func applyOAuthModelMappings(cfg *config.Config, provider, authKind string, mode
 	if len(mappings) == 0 {
 		return models
 	}
-	forward := make(map[string]string, len(mappings))
+
 	type mappingEntry struct {
 		alias string
 		fork  bool
 	}
 	forward := make(map[string]mappingEntry, len(mappings))
 	for i := range mappings {
 		name := strings.TrimSpace(mappings[i].Name)
 		alias := strings.TrimSpace(mappings[i].Alias)
@@ -1254,7 +1260,7 @@ func applyOAuthModelMappings(cfg *config.Config, provider, authKind string, mode
 		if _, exists := forward[key]; exists {
 			continue
 		}
-		forward[key] = alias
+		forward[key] = mappingEntry{alias: alias, fork: mappings[i].Fork}
 	}
 	if len(forward) == 0 {
 		return models
@@ -1269,10 +1275,45 @@ func applyOAuthModelMappings(cfg *config.Config, provider, authKind string, mode
 		if id == "" {
 			continue
 		}
-		mappedID := id
+		key := strings.ToLower(id)
-		if to, ok := forward[strings.ToLower(id)]; ok && strings.TrimSpace(to) != "" {
+		entry, ok := forward[key]
-			mappedID = strings.TrimSpace(to)
+		if !ok {
 			if _, exists := seen[key]; exists {
 				continue
 			}
 			seen[key] = struct{}{}
 			out = append(out, model)
 			continue
 		}
 		mappedID := strings.TrimSpace(entry.alias)
 		if mappedID == "" {
 			if _, exists := seen[key]; exists {
 				continue
 			}
 			seen[key] = struct{}{}
 			out = append(out, model)
 			continue
 		}
 		if entry.fork {
 			if _, exists := seen[key]; !exists {
 				seen[key] = struct{}{}
 				out = append(out, model)
 			}
 			aliasKey := strings.ToLower(mappedID)
 			if _, exists := seen[aliasKey]; exists {
 				continue
 			}
 			seen[aliasKey] = struct{}{}
 			clone := *model
 			clone.ID = mappedID
 			if clone.Name != "" {
 				clone.Name = rewriteModelInfoName(clone.Name, id, mappedID)
 			}
 			out = append(out, &clone)
 			continue
 		}
 		uniqueKey := strings.ToLower(mappedID)
 		if _, exists := seen[uniqueKey]; exists {
 			continue
--- a/sdk/cliproxy/service_oauth_model_mappings_test.go
+++ b/sdk/cliproxy/service_oauth_model_mappings_test.go
@@ -0,0 +1,58 @@
 package cliproxy
 import (
 	"testing"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/config"
 )
 func TestApplyOAuthModelMappings_Rename(t *testing.T) {
 	cfg := &config.Config{
 		OAuthModelMappings: map[string][]config.ModelNameMapping{
 			"codex": {
 				{Name: "gpt-5", Alias: "g5"},
 			},
 		},
 	}
 	models := []*ModelInfo{
 		{ID: "gpt-5", Name: "models/gpt-5"},
 	}
 	out := applyOAuthModelMappings(cfg, "codex", "oauth", models)
 	if len(out) != 1 {
 		t.Fatalf("expected 1 model, got %d", len(out))
 	}
 	if out[0].ID != "g5" {
 		t.Fatalf("expected model id %q, got %q", "g5", out[0].ID)
 	}
 	if out[0].Name != "models/g5" {
 		t.Fatalf("expected model name %q, got %q", "models/g5", out[0].Name)
 	}
 }
 func TestApplyOAuthModelMappings_ForkAddsAlias(t *testing.T) {
 	cfg := &config.Config{
 		OAuthModelMappings: map[string][]config.ModelNameMapping{
 			"codex": {
 				{Name: "gpt-5", Alias: "g5", Fork: true},
 			},
 		},
 	}
 	models := []*ModelInfo{
 		{ID: "gpt-5", Name: "models/gpt-5"},
 	}
 	out := applyOAuthModelMappings(cfg, "codex", "oauth", models)
 	if len(out) != 2 {
 		t.Fatalf("expected 2 models, got %d", len(out))
 	}
 	if out[0].ID != "gpt-5" {
 		t.Fatalf("expected first model id %q, got %q", "gpt-5", out[0].ID)
 	}
 	if out[1].ID != "g5" {
 		t.Fatalf("expected second model id %q, got %q", "g5", out[1].ID)
 	}
 	if out[1].Name != "models/g5" {
 		t.Fatalf("expected forked model name %q, got %q", "models/g5", out[1].Name)
 	}
 }
--- a/test/model_alias_thinking_suffix_test.go
+++ b/test/model_alias_thinking_suffix_test.go
@@ -0,0 +1,211 @@
 package test
 import (
 	"testing"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 )
 // TestModelAliasThinkingSuffix tests the 32 test cases defined in docs/thinking_suffix_test_cases.md
 // These tests verify the thinking suffix parsing and application logic across different providers.
 func TestModelAliasThinkingSuffix(t *testing.T) {
 	tests := []struct {
 		id            int
 		name          string
 		provider      string
 		requestModel  string
 		suffixType    string
 		expectedField string // "thinkingBudget", "thinkingLevel", "budget_tokens", "reasoning_effort", "enable_thinking"
 		expectedValue any
 		upstreamModel string // The upstream model after alias resolution
 		isAlias       bool
 	}{
 		// === 1. Antigravity Provider ===
 		// 1.1 Budget-only models (Gemini 2.5)
 		{1, "antigravity_original_numeric", "antigravity", "gemini-2.5-computer-use-preview-10-2025(1000)", "numeric", "thinkingBudget", 1000, "gemini-2.5-computer-use-preview-10-2025", false},
 		{2, "antigravity_alias_numeric", "antigravity", "gp(1000)", "numeric", "thinkingBudget", 1000, "gemini-2.5-computer-use-preview-10-2025", true},
 		// 1.2 Budget+Levels models (Gemini 3)
 		{3, "antigravity_original_numeric_to_level", "antigravity", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false},
 		{4, "antigravity_original_level", "antigravity", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false},
 		{5, "antigravity_alias_numeric_to_level", "antigravity", "gf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true},
 		{6, "antigravity_alias_level", "antigravity", "gf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true},
 		// === 2. Gemini CLI Provider ===
 		// 2.1 Budget-only models
 		{7, "gemini_cli_original_numeric", "gemini-cli", "gemini-2.5-pro(8192)", "numeric", "thinkingBudget", 8192, "gemini-2.5-pro", false},
 		{8, "gemini_cli_alias_numeric", "gemini-cli", "g25p(8192)", "numeric", "thinkingBudget", 8192, "gemini-2.5-pro", true},
 		// 2.2 Budget+Levels models
 		{9, "gemini_cli_original_numeric_to_level", "gemini-cli", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false},
 		{10, "gemini_cli_original_level", "gemini-cli", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false},
 		{11, "gemini_cli_alias_numeric_to_level", "gemini-cli", "gf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true},
 		{12, "gemini_cli_alias_level", "gemini-cli", "gf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true},
 		// === 3. Vertex Provider ===
 		// 3.1 Budget-only models
 		{13, "vertex_original_numeric", "vertex", "gemini-2.5-pro(16384)", "numeric", "thinkingBudget", 16384, "gemini-2.5-pro", false},
 		{14, "vertex_alias_numeric", "vertex", "vg25p(16384)", "numeric", "thinkingBudget", 16384, "gemini-2.5-pro", true},
 		// 3.2 Budget+Levels models
 		{15, "vertex_original_numeric_to_level", "vertex", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false},
 		{16, "vertex_original_level", "vertex", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false},
 		{17, "vertex_alias_numeric_to_level", "vertex", "vgf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true},
 		{18, "vertex_alias_level", "vertex", "vgf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true},
 		// === 4. AI Studio Provider ===
 		// 4.1 Budget-only models
 		{19, "aistudio_original_numeric", "aistudio", "gemini-2.5-pro(12000)", "numeric", "thinkingBudget", 12000, "gemini-2.5-pro", false},
 		{20, "aistudio_alias_numeric", "aistudio", "ag25p(12000)", "numeric", "thinkingBudget", 12000, "gemini-2.5-pro", true},
 		// 4.2 Budget+Levels models
 		{21, "aistudio_original_numeric_to_level", "aistudio", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false},
 		{22, "aistudio_original_level", "aistudio", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false},
 		{23, "aistudio_alias_numeric_to_level", "aistudio", "agf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true},
 		{24, "aistudio_alias_level", "aistudio", "agf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true},
 		// === 5. Claude Provider ===
 		{25, "claude_original_numeric", "claude", "claude-sonnet-4-5-20250929(16384)", "numeric", "budget_tokens", 16384, "claude-sonnet-4-5-20250929", false},
 		{26, "claude_alias_numeric", "claude", "cs45(16384)", "numeric", "budget_tokens", 16384, "claude-sonnet-4-5-20250929", true},
 		// === 6. Codex Provider ===
 		{27, "codex_original_level", "codex", "gpt-5(high)", "level", "reasoning_effort", "high", "gpt-5", false},
 		{28, "codex_alias_level", "codex", "g5(high)", "level", "reasoning_effort", "high", "gpt-5", true},
 		// === 7. Qwen Provider ===
 		{29, "qwen_original_level", "qwen", "qwen3-coder-plus(high)", "level", "enable_thinking", true, "qwen3-coder-plus", false},
 		{30, "qwen_alias_level", "qwen", "qcp(high)", "level", "enable_thinking", true, "qwen3-coder-plus", true},
 		// === 8. iFlow Provider ===
 		{31, "iflow_original_level", "iflow", "glm-4.7(high)", "level", "reasoning_effort", "high", "glm-4.7", false},
 		{32, "iflow_alias_level", "iflow", "glm(high)", "level", "reasoning_effort", "high", "glm-4.7", true},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			// Step 1: Parse model suffix (simulates SDK layer normalization)
 			// For "gp(1000)" -> requestedModel="gp", metadata={thinking_budget: 1000}
 			requestedModel, metadata := util.NormalizeThinkingModel(tt.requestModel)
 			// Verify suffix was parsed
 			if metadata == nil && (tt.suffixType == "numeric" || tt.suffixType == "level") {
 				t.Errorf("Case #%d: NormalizeThinkingModel(%q) metadata is nil", tt.id, tt.requestModel)
 				return
 			}
 			// Step 2: Simulate OAuth model mapping
 			// Real flow: applyOAuthModelMapping stores requestedModel (the alias) in metadata
 			if tt.isAlias {
 				if metadata == nil {
 					metadata = make(map[string]any)
 				}
 				metadata[util.ModelMappingOriginalModelMetadataKey] = requestedModel
 			}
 			// Step 3: Verify metadata extraction
 			switch tt.suffixType {
 			case "numeric":
 				budget, _, _, matched := util.ThinkingFromMetadata(metadata)
 				if !matched {
 					t.Errorf("Case #%d: ThinkingFromMetadata did not match", tt.id)
 					return
 				}
 				if budget == nil {
 					t.Errorf("Case #%d: expected budget in metadata", tt.id)
 					return
 				}
 				// For thinkingBudget/budget_tokens, verify the parsed budget value
 				if tt.expectedField == "thinkingBudget" || tt.expectedField == "budget_tokens" {
 					expectedBudget := tt.expectedValue.(int)
 					if *budget != expectedBudget {
 						t.Errorf("Case #%d: budget = %d, want %d", tt.id, *budget, expectedBudget)
 					}
 				}
 				// For thinkingLevel (Gemini 3), verify conversion from budget to level
 				if tt.expectedField == "thinkingLevel" {
 					level, ok := util.ThinkingBudgetToGemini3Level(tt.upstreamModel, *budget)
 					if !ok {
 						t.Errorf("Case #%d: ThinkingBudgetToGemini3Level failed", tt.id)
 						return
 					}
 					expectedLevel := tt.expectedValue.(string)
 					if level != expectedLevel {
 						t.Errorf("Case #%d: converted level = %q, want %q", tt.id, level, expectedLevel)
 					}
 				}
 			case "level":
 				_, _, effort, matched := util.ThinkingFromMetadata(metadata)
 				if !matched {
 					t.Errorf("Case #%d: ThinkingFromMetadata did not match", tt.id)
 					return
 				}
 				if effort == nil {
 					t.Errorf("Case #%d: expected effort in metadata", tt.id)
 					return
 				}
 				if tt.expectedField == "thinkingLevel" || tt.expectedField == "reasoning_effort" {
 					expectedEffort := tt.expectedValue.(string)
 					if *effort != expectedEffort {
 						t.Errorf("Case #%d: effort = %q, want %q", tt.id, *effort, expectedEffort)
 					}
 				}
 			}
 			// Step 4: Test Gemini-specific thinkingLevel conversion for Gemini 3 models
 			if tt.expectedField == "thinkingLevel" && util.IsGemini3Model(tt.upstreamModel) {
 				body := []byte(`{"request":{"contents":[]}}`)
 				// Build metadata simulating real OAuth flow:
 				// - requestedModel (alias like "gf") is stored in model_mapping_original_model
 				// - upstreamModel is passed as the model parameter
 				testMetadata := make(map[string]any)
 				if tt.isAlias {
 					// Real flow: applyOAuthModelMapping stores requestedModel (the alias)
 					testMetadata[util.ModelMappingOriginalModelMetadataKey] = requestedModel
 				}
 				// Copy parsed metadata (thinking_budget, reasoning_effort, etc.)
 				for k, v := range metadata {
 					testMetadata[k] = v
 				}
 				result := util.ApplyGemini3ThinkingLevelFromMetadataCLI(tt.upstreamModel, testMetadata, body)
 				levelVal := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel")
 				expectedLevel := tt.expectedValue.(string)
 				if !levelVal.Exists() {
 					t.Errorf("Case #%d: expected thinkingLevel in result", tt.id)
 				} else if levelVal.String() != expectedLevel {
 					t.Errorf("Case #%d: thinkingLevel = %q, want %q", tt.id, levelVal.String(), expectedLevel)
 				}
 			}
 			// Step 5: Test Gemini 2.5 thinkingBudget application using real ApplyThinkingMetadataCLI flow
 			if tt.expectedField == "thinkingBudget" && util.IsGemini25Model(tt.upstreamModel) {
 				body := []byte(`{"request":{"contents":[]}}`)
 				// Build metadata simulating real OAuth flow:
 				// - requestedModel (alias like "gp") is stored in model_mapping_original_model
 				// - upstreamModel is passed as the model parameter
 				testMetadata := make(map[string]any)
 				if tt.isAlias {
 					// Real flow: applyOAuthModelMapping stores requestedModel (the alias)
 					testMetadata[util.ModelMappingOriginalModelMetadataKey] = requestedModel
 				}
 				// Copy parsed metadata (thinking_budget, reasoning_effort, etc.)
 				for k, v := range metadata {
 					testMetadata[k] = v
 				}
 				// Use the exported ApplyThinkingMetadataCLI which includes the fallback logic
 				result := executor.ApplyThinkingMetadataCLI(body, testMetadata, tt.upstreamModel)
 				budgetVal := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget")
 				expectedBudget := tt.expectedValue.(int)
 				if !budgetVal.Exists() {
 					t.Errorf("Case #%d: expected thinkingBudget in result", tt.id)
 				} else if int(budgetVal.Int()) != expectedBudget {
 					t.Errorf("Case #%d: thinkingBudget = %d, want %d", tt.id, int(budgetVal.Int()), expectedBudget)
 				}
 			}
 		})
 	}
 }
Author	SHA1	Message	Date
Luis Pater	109cf3928a	Merge pull request #88 from router-for-me/plus v6.6.85	2026-01-06 23:20:47 +08:00
Luis Pater	4794645dec	Merge branch 'main' into plus	2026-01-06 23:20:39 +08:00
Luis Pater	f861bd6a94	docs: add 9Router to community projects in README	2026-01-06 23:15:28 +08:00
Luis Pater	6dbfdd140d	Merge pull request #871 from decolua/patch-1 Update README.md	2026-01-06 22:58:53 +08:00
decolua	386ccffed4	Update README.md Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>	2026-01-05 20:54:33 +07:00
decolua	ffddd1c90a	Update README.md	2026-01-05 20:29:26 +07:00
Luis Pater	8f8dfd081b	Merge pull request #850 from can1357/main feat(translator): add developer role support for Gemini translators	2026-01-05 11:27:24 +08:00
Luis Pater	9f1b445c7c	docs: add ProxyPilot to community projects in Chinese README	2026-01-05 11:23:48 +08:00
Luis Pater	ae933dfe14	Merge pull request #858 from Finesssee/add-proxypilot docs: add ProxyPilot to community projects	2026-01-05 11:20:52 +08:00
Luis Pater	5d33d6b8ea	Merge branch 'router-for-me:main' into main	2026-01-05 10:42:32 +08:00
Luis Pater	e124db723b	Merge pull request #862 from router-for-me/gemini fix(gemini): abort default injection on existing thinking keys	2026-01-05 10:41:07 +08:00
hkfires	05444cf32d	fix(gemini): abort default injection on existing thinking keys	2026-01-05 10:24:30 +08:00
Luis Pater	478aff1189	Merge branch 'router-for-me:main' into main	2026-01-05 09:26:23 +08:00
Luis Pater	8edbda57cf	feat(translator): add `thoughtSignature` to node parts for Gemini and Antigravity requests Enhanced node structure by including `thoughtSignature` for inline data parts in Gemini OpenAI, Gemini CLI, and Antigravity request handlers to improve traceability of thought processes.	2026-01-05 09:25:17 +08:00
Finessse	821249a5ed	docs: add ProxyPilot to community projects	2026-01-04 18:19:41 +07:00
Luis Pater	2331b9a2e7	Merge branch 'router-for-me:main' into main	2026-01-04 18:10:49 +08:00
Luis Pater	ee33863b47	Merge pull request #857 from router-for-me/management-update Management update	2026-01-04 18:07:13 +08:00
Supra4E8C	cd22c849e2	feat(management): 更新OAuth模型映射的清理逻辑以增强数据安全性	2026-01-04 17:57:34 +08:00
Supra4E8C	f0e73efda2	feat(management): add vertex api key and oauth model mappings endpoints	2026-01-04 17:32:00 +08:00
Supra4E8C	3156109c71	feat(management): 支持管理接口调整日志大小/强制前缀/路由策略	2026-01-04 12:21:49 +08:00
can1357	6762e081f3	feat(translator): add developer role support for Gemini translators Treat OpenAI's "developer" role the same as "system" role in request translation for gemini, gemini-cli, and antigravity backends.	2026-01-03 21:01:01 +01:00
Luis Pater	5ca3508284	Merge pull request #80 from router-for-me/plus v6.6.81	2026-01-04 01:38:47 +08:00
Luis Pater	771fec9447	Merge branch 'main' into plus	2026-01-04 01:38:40 +08:00
Luis Pater	7815ee338d	fix(translator): adjust `message_delta` emission boundary in Claude-to-OpenAI conversion Fixed incorrect boundary logic for `message_delta` emission, ensuring proper handling of usage updates and `emitMessageStopIfNeeded` within the response loop.	2026-01-04 01:36:51 +08:00
Luis Pater	44b6c872e2	feat(config): add support for `Fork` in OAuth model mappings with alias handling Implemented `Fork` flag in `ModelNameMapping` to allow aliases as additional models while preserving the original model ID. Updated the `applyOAuthModelMappings` logic, added tests for `Fork` behavior, and updated documentation and examples accordingly.	2026-01-04 01:18:29 +08:00
Luis Pater	7a77b23f2d	feat(executor): add token refresh timeout and improve context handling during refresh Introduced `tokenRefreshTimeout` constant for token refresh operations and enhanced context propagation for `refreshToken` by embedding roundtrip information if available. Adjusted `refreshAuth` to ensure default context initialization and handle cancellation errors appropriately.	2026-01-04 00:26:08 +08:00
Luis Pater	672e8549c0	docs: reorganize README to adjust CodMate placement Moved CodMate entry under ProxyPal in both English and Chinese README files for consistency in structure and better readability.	2026-01-03 21:31:53 +08:00
Luis Pater	66f5269a23	Merge pull request #837 from loocor/main docs: add CodMate to community projects	2026-01-03 21:30:15 +08:00
Luis Pater	4eaf769894	Merge branch 'router-for-me:main' into main	2026-01-03 04:55:26 +08:00
Luis Pater	ebec293497	feat(api): integrate `TokenStore` for improved auth entry management Replaced file-based auth entry counting with `TokenStore`-backed implementation, enhancing flexibility and context-aware token management. Updated related logic to reflect this change.	2026-01-03 04:53:47 +08:00
Luis Pater	e02ceecd35	feat(registry): introduce `ModelRegistryHook` for monitoring model registrations and unregistrations Added support for external hooks to observe model registry events using the `ModelRegistryHook` interface. Implemented thread-safe, non-blocking execution of hooks with panic recovery. Comprehensive tests added to verify hook behavior during registration, unregistration, blocking, and panic scenarios.	2026-01-02 23:18:40 +08:00
Luis Pater	9116392a45	Merge branch 'router-for-me:main' into main	2026-01-02 20:49:51 +08:00
Luis Pater	c8b33a8cc3	Merge pull request #824 from router-for-me/script feat(script): add usage statistics preservation across container rebuilds	2026-01-02 20:42:25 +08:00
Loocor	dca8d5ded8	Add CodMate app information to README Added CodMate section to README with app details.	2026-01-02 17:15:38 +08:00
Loocor	2a7fd1e897	Add CodMate description to README_CN.md 添加 CodMate 应用的描述，提供 CLI AI 会话管理功能。	2026-01-02 17:15:09 +08:00
Luis Pater	b9d1e70ac2	Merge pull request #830 from router-for-me/gemini fix(util): disable default thinking for gemini-3 series	2026-01-02 10:59:24 +08:00
hkfires	fdf5720217	fix(gemini): remove default thinking for gemini 3 models	2026-01-02 10:55:59 +08:00
hkfires	f40bd0cd51	feat(script): add usage statistics preservation across container rebuilds	2026-01-02 10:01:20 +08:00
hkfires	e33676bb87	fix(util): disable default thinking for gemini-3 series	2026-01-02 09:43:40 +08:00
Luis Pater	b1f1cee1e5	feat(executor): refine payload handling by integrating original request context Updated `applyPayloadConfig` to `applyPayloadConfigWithRoot` across payload translation logic, enabling validation against the original request payload when available. Added support for improved model normalization and translation consistency.	2026-01-02 03:28:37 +08:00
Luis Pater	a1ecc9ab00	Merge branch 'router-for-me:main' into main	2026-01-02 00:04:50 +08:00
Luis Pater	2a663d5cba	feat(executor): enhance payload translation with original request context Refactored `applyPayloadConfig` to `applyPayloadConfigWithRoot`, adding support for default rule validation against the original payload when available. Updated all executors to use `applyPayloadConfigWithRoot` and incorporate an optional original request payload for translations.	2026-01-02 00:03:26 +08:00
Luis Pater	ba486ca6b7	Merge branch 'router-for-me:main' into main	2026-01-01 21:03:16 +08:00
Luis Pater	750b930679	Merge pull request #823 from router-for-me/translator feat(translator): enhance Claude-to-OpenAI conversion with thinking block and tool result handling	2026-01-01 20:16:10 +08:00
hkfires	3902fd7501	fix(iflow): remove thinking field from request body in thinking config handler	2026-01-01 19:40:28 +08:00
hkfires	4fc3d5e935	refactor(iflow): simplify thinking config handling for GLM and MiniMax models	2026-01-01 19:31:08 +08:00
hkfires	2d2f4572a7	fix(translator): remove unnecessary whitespace trimming in reasoning text collection	2026-01-01 12:39:09 +08:00
hkfires	8f4c46f38d	fix(translator): emit tool_result messages before user content in Claude-to-OpenAI conversion	2026-01-01 11:11:43 +08:00
hkfires	b6ba51bc2a	feat(translator): add thinking block and tool result handling for Claude-to-OpenAI conversion	2026-01-01 09:41:25 +08:00
Luis Pater	6a66d32d37	Merge pull request #803 from HsnSaboor/fix-invalid-function-names-sanitization-v2 feat(translator): resolve invalid function name errors by sanitizing Claude tool names	2026-01-01 01:15:50 +08:00
Luis Pater	8d15723195	feat(registry): add `GetAvailableModelsByProvider` method for retrieving models by provider	2025-12-31 23:37:46 +08:00
Chén Mù	736e0aae86	Merge pull request #814 from router-for-me/aistudio Fix model alias thinking suffix	2025-12-31 03:08:05 -08:00
hkfires	8bf3305b2b	fix(thinking): fallback to upstream model for thinking support when alias not in registry	2025-12-31 18:07:13 +08:00
hkfires	d00e3ea973	feat(thinking): add numeric budget to thinkingLevel conversion fallback	2025-12-31 17:14:47 +08:00
hkfires	89db4e9481	fix(thinking): use model alias for thinking config resolution in mapped models	2025-12-31 17:09:22 +08:00
hkfires	e332419081	feat(registry): add thinking support for gemini-2.5-computer-use-preview model	2025-12-31 17:09:22 +08:00
Saboor Hassan	47b9503112	chore: revert changes to internal/translator to comply with path guard This commit reverts all modifications within internal/translator. A separate issue will be created for the maintenance team to integrate SanitizeFunctionName into the translators. Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>	2025-12-31 02:19:26 +05:00
Saboor Hassan	3b9253c2be	fix(translator): resolve invalid function name errors by sanitizing Claude tool names This commit centralizes tool name sanitization in SanitizeFunctionName, applying character compliance, starting character rules, and length limits. It also fixes a regression in gemini_schema tests and preserves MCP-specific shortening logic while ensuring compliance. Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>	2025-12-31 02:14:46 +05:00
Saboor Hassan	d241359153	fix(translator): address PR feedback for tool name sanitization - Pre-compile sanitization regex for better performance. - Optimize SanitizeFunctionName for conciseness and correctness. - Handle 64-char edge cases by truncating before prepending underscore. - Fix bug in Antigravity translator (incorrect join index). - Refactor Gemini translators to avoid redundant sanitization calls. - Add comprehensive unit tests including 64-char edge cases. Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>	2025-12-31 01:54:41 +05:00
Saboor Hassan	f4d4249ba5	feat(translator): sanitize tool/function names for upstream provider compatibility Implemented SanitizeFunctionName utility to ensure Claude tool names meet Gemini/Upstream strict naming conventions (alphanumeric, starts with letter/underscore, max 64 chars). Applied sanitization to tool definitions and usage in all relevant translators. Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>	2025-12-31 01:41:07 +05:00