Merge pull request #186 from taetaetae/fix/kiro-claude-compaction-empty-content

fix(kiro): handle empty content in Claude format assistant messages
Merge branch 'router-for-me:main' into main
2026-03-10 15:53:16 +00:00 · 2026-02-06 05:39:41 +08:00 · 2026-02-06 05:32:10 +08:00 · 2026-02-06 05:31:54 +08:00 · 2026-02-06 05:30:28 +08:00 · 2026-02-05 21:26:29 +00:00
75 changed files with 2084 additions and 562 deletions
--- a/.github/workflows/docker-image.yml
+++ b/.github/workflows/docker-image.yml
@@ -1,13 +1,14 @@
 name: docker-image

 on:
+  workflow_dispatch:
  push:
    tags:
      - v*

 env:
  APP_NAME: CLIProxyAPI
-  DOCKERHUB_REPO: eceasy/cli-proxy-api-plus
+  DOCKERHUB_REPO: ${{ secrets.DOCKERHUB_USERNAME }}/cli-proxy-api-plus

 jobs:
  docker_amd64:
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,7 @@ logs/*
 conv/*
 temp/*
 refs/*
+tmp/*

 # Storage backends
 pgstore/*
--- a/assets/aicodemirror.png
+++ b/assets/aicodemirror.png
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -40,6 +40,11 @@ api-keys:
 # Enable debug logging
 debug: false

+# Enable pprof HTTP debug server (host:port). Keep it bound to localhost for safety.
+pprof:
+  enable: false
+  addr: "127.0.0.1:8316"
+
 # When true, disable high-overhead HTTP middleware features to reduce per-request memory usage under high concurrency.
 commercial-mode: false

--- a/go.mod
+++ b/go.mod
@@ -5,6 +5,7 @@ go 1.24.0
 require (
 	github.com/andybalholm/brotli v1.0.6
 	github.com/fsnotify/fsnotify v1.9.0
+	github.com/fxamacker/cbor/v2 v2.9.0
 	github.com/gin-gonic/gin v1.10.1
 	github.com/go-git/go-git/v6 v6.0.0-20251009132922-75a182125145
 	github.com/google/uuid v1.6.0
@@ -13,8 +14,8 @@ require (
 	github.com/joho/godotenv v1.5.1
 	github.com/klauspost/compress v1.17.4
 	github.com/minio/minio-go/v7 v7.0.66
-	github.com/refraction-networking/utls v1.8.2
 	github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c
+	github.com/refraction-networking/utls v1.8.2
 	github.com/sirupsen/logrus v1.9.3
 	github.com/tidwall/gjson v1.18.0
 	github.com/tidwall/sjson v1.2.5
@@ -41,7 +42,6 @@ require (
 	github.com/dlclark/regexp2 v1.11.5 // indirect
 	github.com/dustin/go-humanize v1.0.1 // indirect
 	github.com/emirpasic/gods v1.18.1 // indirect
-	github.com/fxamacker/cbor/v2 v2.9.0 // indirect
 	github.com/gabriel-vasile/mimetype v1.4.3 // indirect
 	github.com/gin-contrib/sse v0.1.0 // indirect
 	github.com/go-git/gcfg/v2 v2.0.2 // indirect
--- a/internal/api/handlers/management/api_tools.go
+++ b/internal/api/handlers/management/api_tools.go
@@ -13,12 +13,13 @@ import (

 	"github.com/fxamacker/cbor/v2"
 	"github.com/gin-gonic/gin"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/geminicli"
-	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	log "github.com/sirupsen/logrus"
 	"golang.org/x/net/proxy"
 	"golang.org/x/oauth2"
 	"golang.org/x/oauth2/google"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/geminicli"
+	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 )

 const defaultAPICallTimeout = 60 * time.Second
@@ -55,6 +56,7 @@ type apiCallResponse struct {
 	StatusCode int                 `json:"status_code"`
 	Header     map[string][]string `json:"header"`
 	Body       string              `json:"body"`
+	Quota      *QuotaSnapshots     `json:"quota,omitempty"`
 }

 // APICall makes a generic HTTP request on behalf of the management API caller.
@@ -97,6 +99,8 @@ type apiCallResponse struct {
 //	- status_code: Upstream HTTP status code.
 //	- header: Upstream response headers.
 //	- body: Upstream response body as string.
+//	- quota (optional): For GitHub Copilot enterprise accounts, contains quota_snapshots
+//	  with details for chat, completions, and premium_interactions.
 //
 // Example:
 //
@@ -236,6 +240,13 @@ func (h *Handler) APICall(c *gin.Context) {
 		Body:       string(respBody),
 	}

+	// If this is a GitHub Copilot token endpoint response, try to enrich with quota information
+	if resp.StatusCode == http.StatusOK &&
+		strings.Contains(urlStr, "copilot_internal") &&
+		strings.Contains(urlStr, "/token") {
+		response = h.enrichCopilotTokenResponse(c.Request.Context(), response, auth, urlStr)
+	}
+
 	// Return response in the same format as the request
 	if isCBOR {
 		cborData, errMarshal := cbor.Marshal(response)
@@ -735,3 +746,344 @@ func buildProxyTransport(proxyStr string) *http.Transport {
 	log.Debugf("unsupported proxy scheme: %s", proxyURL.Scheme)
 	return nil
 }
+
+// QuotaDetail represents quota information for a specific resource type
+type QuotaDetail struct {
+	Entitlement      float64 `json:"entitlement"`
+	OverageCount     float64 `json:"overage_count"`
+	OveragePermitted bool    `json:"overage_permitted"`
+	PercentRemaining float64 `json:"percent_remaining"`
+	QuotaID          string  `json:"quota_id"`
+	QuotaRemaining   float64 `json:"quota_remaining"`
+	Remaining        float64 `json:"remaining"`
+	Unlimited        bool    `json:"unlimited"`
+}
+
+// QuotaSnapshots contains quota details for different resource types
+type QuotaSnapshots struct {
+	Chat                QuotaDetail `json:"chat"`
+	Completions         QuotaDetail `json:"completions"`
+	PremiumInteractions QuotaDetail `json:"premium_interactions"`
+}
+
+// CopilotUsageResponse represents the GitHub Copilot usage information
+type CopilotUsageResponse struct {
+	AccessTypeSKU         string         `json:"access_type_sku"`
+	AnalyticsTrackingID   string         `json:"analytics_tracking_id"`
+	AssignedDate          string         `json:"assigned_date"`
+	CanSignupForLimited   bool           `json:"can_signup_for_limited"`
+	ChatEnabled           bool           `json:"chat_enabled"`
+	CopilotPlan           string         `json:"copilot_plan"`
+	OrganizationLoginList []interface{}  `json:"organization_login_list"`
+	OrganizationList      []interface{}  `json:"organization_list"`
+	QuotaResetDate        string         `json:"quota_reset_date"`
+	QuotaSnapshots        QuotaSnapshots `json:"quota_snapshots"`
+}
+
+type copilotQuotaRequest struct {
+	AuthIndexSnake  *string `json:"auth_index"`
+	AuthIndexCamel  *string `json:"authIndex"`
+	AuthIndexPascal *string `json:"AuthIndex"`
+}
+
+// GetCopilotQuota fetches GitHub Copilot quota information from the /copilot_internal/user endpoint.
+//
+// Endpoint:
+//
+//	GET /v0/management/copilot-quota
+//
+// Query Parameters (optional):
+//   - auth_index: The credential "auth_index" from GET /v0/management/auth-files.
+//     If omitted, uses the first available GitHub Copilot credential.
+//
+// Response:
+//
+//	Returns the CopilotUsageResponse with quota_snapshots containing detailed quota information
+//	for chat, completions, and premium_interactions.
+//
+// Example:
+//
+//	curl -sS -X GET "http://127.0.0.1:8317/v0/management/copilot-quota?auth_index=<AUTH_INDEX>" \
+//	  -H "Authorization: Bearer <MANAGEMENT_KEY>"
+func (h *Handler) GetCopilotQuota(c *gin.Context) {
+	authIndex := strings.TrimSpace(c.Query("auth_index"))
+	if authIndex == "" {
+		authIndex = strings.TrimSpace(c.Query("authIndex"))
+	}
+	if authIndex == "" {
+		authIndex = strings.TrimSpace(c.Query("AuthIndex"))
+	}
+
+	auth := h.findCopilotAuth(authIndex)
+	if auth == nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "no github copilot credential found"})
+		return
+	}
+
+	token, tokenErr := h.resolveTokenForAuth(c.Request.Context(), auth)
+	if tokenErr != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "failed to refresh copilot token"})
+		return
+	}
+	if token == "" {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "copilot token not found"})
+		return
+	}
+
+	apiURL := "https://api.github.com/copilot_internal/user"
+	req, errNewRequest := http.NewRequestWithContext(c.Request.Context(), http.MethodGet, apiURL, nil)
+	if errNewRequest != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to build request"})
+		return
+	}
+
+	req.Header.Set("Authorization", "Bearer "+token)
+	req.Header.Set("User-Agent", "CLIProxyAPIPlus")
+	req.Header.Set("Accept", "application/json")
+
+	httpClient := &http.Client{
+		Timeout:   defaultAPICallTimeout,
+		Transport: h.apiCallTransport(auth),
+	}
+
+	resp, errDo := httpClient.Do(req)
+	if errDo != nil {
+		log.WithError(errDo).Debug("copilot quota request failed")
+		c.JSON(http.StatusBadGateway, gin.H{"error": "request failed"})
+		return
+	}
+	defer func() {
+		if errClose := resp.Body.Close(); errClose != nil {
+			log.Errorf("response body close error: %v", errClose)
+		}
+	}()
+
+	respBody, errReadAll := io.ReadAll(resp.Body)
+	if errReadAll != nil {
+		c.JSON(http.StatusBadGateway, gin.H{"error": "failed to read response"})
+		return
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		c.JSON(http.StatusBadGateway, gin.H{
+			"error":       "github api request failed",
+			"status_code": resp.StatusCode,
+			"body":        string(respBody),
+		})
+		return
+	}
+
+	var usage CopilotUsageResponse
+	if errUnmarshal := json.Unmarshal(respBody, &usage); errUnmarshal != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to parse response"})
+		return
+	}
+
+	c.JSON(http.StatusOK, usage)
+}
+
+// findCopilotAuth locates a GitHub Copilot credential by auth_index or returns the first available one
+func (h *Handler) findCopilotAuth(authIndex string) *coreauth.Auth {
+	if h == nil || h.authManager == nil {
+		return nil
+	}
+
+	auths := h.authManager.List()
+	var firstCopilot *coreauth.Auth
+
+	for _, auth := range auths {
+		if auth == nil {
+			continue
+		}
+
+		provider := strings.ToLower(strings.TrimSpace(auth.Provider))
+		if provider != "copilot" && provider != "github" && provider != "github-copilot" {
+			continue
+		}
+
+		if firstCopilot == nil {
+			firstCopilot = auth
+		}
+
+		if authIndex != "" {
+			auth.EnsureIndex()
+			if auth.Index == authIndex {
+				return auth
+			}
+		}
+	}
+
+	return firstCopilot
+}
+
+// enrichCopilotTokenResponse fetches quota information and adds it to the Copilot token response body
+func (h *Handler) enrichCopilotTokenResponse(ctx context.Context, response apiCallResponse, auth *coreauth.Auth, originalURL string) apiCallResponse {
+	if auth == nil || response.Body == "" {
+		return response
+	}
+
+	// Parse the token response to check if it's enterprise (null limited_user_quotas)
+	var tokenResp map[string]interface{}
+	if err := json.Unmarshal([]byte(response.Body), &tokenResp); err != nil {
+		log.WithError(err).Debug("enrichCopilotTokenResponse: failed to parse copilot token response")
+		return response
+	}
+
+	// Get the GitHub token to call the copilot_internal/user endpoint
+	token, tokenErr := h.resolveTokenForAuth(ctx, auth)
+	if tokenErr != nil {
+		log.WithError(tokenErr).Debug("enrichCopilotTokenResponse: failed to resolve token")
+		return response
+	}
+	if token == "" {
+		return response
+	}
+
+	// Fetch quota information from /copilot_internal/user
+	// Derive the base URL from the original token request to support proxies and test servers
+	parsedURL, errParse := url.Parse(originalURL)
+	if errParse != nil {
+		log.WithError(errParse).Debug("enrichCopilotTokenResponse: failed to parse URL")
+		return response
+	}
+	quotaURL := fmt.Sprintf("%s://%s/copilot_internal/user", parsedURL.Scheme, parsedURL.Host)
+
+	req, errNewRequest := http.NewRequestWithContext(ctx, http.MethodGet, quotaURL, nil)
+	if errNewRequest != nil {
+		log.WithError(errNewRequest).Debug("enrichCopilotTokenResponse: failed to build request")
+		return response
+	}
+
+	req.Header.Set("Authorization", "Bearer "+token)
+	req.Header.Set("User-Agent", "CLIProxyAPIPlus")
+	req.Header.Set("Accept", "application/json")
+
+	httpClient := &http.Client{
+		Timeout:   defaultAPICallTimeout,
+		Transport: h.apiCallTransport(auth),
+	}
+
+	quotaResp, errDo := httpClient.Do(req)
+	if errDo != nil {
+		log.WithError(errDo).Debug("enrichCopilotTokenResponse: quota fetch HTTP request failed")
+		return response
+	}
+
+	defer func() {
+		if errClose := quotaResp.Body.Close(); errClose != nil {
+			log.Errorf("quota response body close error: %v", errClose)
+		}
+	}()
+
+	if quotaResp.StatusCode != http.StatusOK {
+		return response
+	}
+
+	quotaBody, errReadAll := io.ReadAll(quotaResp.Body)
+	if errReadAll != nil {
+		log.WithError(errReadAll).Debug("enrichCopilotTokenResponse: failed to read response")
+		return response
+	}
+
+	// Parse the quota response
+	var quotaData CopilotUsageResponse
+	if err := json.Unmarshal(quotaBody, &quotaData); err != nil {
+		log.WithError(err).Debug("enrichCopilotTokenResponse: failed to parse response")
+		return response
+	}
+
+	// Check if this is an enterprise account by looking for quota_snapshots in the response
+	// Enterprise accounts have quota_snapshots, non-enterprise have limited_user_quotas
+	var quotaRaw map[string]interface{}
+	if err := json.Unmarshal(quotaBody, &quotaRaw); err == nil {
+		if _, hasQuotaSnapshots := quotaRaw["quota_snapshots"]; hasQuotaSnapshots {
+			// Enterprise account - has quota_snapshots
+			tokenResp["quota_snapshots"] = quotaData.QuotaSnapshots
+			tokenResp["access_type_sku"] = quotaData.AccessTypeSKU
+			tokenResp["copilot_plan"] = quotaData.CopilotPlan
+
+			// Add quota reset date for enterprise (quota_reset_date_utc)
+			if quotaResetDateUTC, ok := quotaRaw["quota_reset_date_utc"]; ok {
+				tokenResp["quota_reset_date"] = quotaResetDateUTC
+			} else if quotaData.QuotaResetDate != "" {
+				tokenResp["quota_reset_date"] = quotaData.QuotaResetDate
+			}
+		} else {
+			// Non-enterprise account - build quota from limited_user_quotas and monthly_quotas
+			var quotaSnapshots QuotaSnapshots
+
+			// Get monthly quotas (total entitlement) and limited_user_quotas (remaining)
+			monthlyQuotas, hasMonthly := quotaRaw["monthly_quotas"].(map[string]interface{})
+			limitedQuotas, hasLimited := quotaRaw["limited_user_quotas"].(map[string]interface{})
+
+			// Process chat quota
+			if hasMonthly && hasLimited {
+				if chatTotal, ok := monthlyQuotas["chat"].(float64); ok {
+					chatRemaining := chatTotal // default to full if no limited quota
+					if chatLimited, ok := limitedQuotas["chat"].(float64); ok {
+						chatRemaining = chatLimited
+					}
+					percentRemaining := 0.0
+					if chatTotal > 0 {
+						percentRemaining = (chatRemaining / chatTotal) * 100.0
+					}
+					quotaSnapshots.Chat = QuotaDetail{
+						Entitlement:      chatTotal,
+						Remaining:        chatRemaining,
+						QuotaRemaining:   chatRemaining,
+						PercentRemaining: percentRemaining,
+						QuotaID:          "chat",
+						Unlimited:        false,
+					}
+				}
+
+				// Process completions quota
+				if completionsTotal, ok := monthlyQuotas["completions"].(float64); ok {
+					completionsRemaining := completionsTotal // default to full if no limited quota
+					if completionsLimited, ok := limitedQuotas["completions"].(float64); ok {
+						completionsRemaining = completionsLimited
+					}
+					percentRemaining := 0.0
+					if completionsTotal > 0 {
+						percentRemaining = (completionsRemaining / completionsTotal) * 100.0
+					}
+					quotaSnapshots.Completions = QuotaDetail{
+						Entitlement:      completionsTotal,
+						Remaining:        completionsRemaining,
+						QuotaRemaining:   completionsRemaining,
+						PercentRemaining: percentRemaining,
+						QuotaID:          "completions",
+						Unlimited:        false,
+					}
+				}
+			}
+
+			// Premium interactions don't exist for non-enterprise, leave as zero values
+			quotaSnapshots.PremiumInteractions = QuotaDetail{
+				QuotaID:   "premium_interactions",
+				Unlimited: false,
+			}
+
+			// Add quota_snapshots to the token response
+			tokenResp["quota_snapshots"] = quotaSnapshots
+			tokenResp["access_type_sku"] = quotaData.AccessTypeSKU
+			tokenResp["copilot_plan"] = quotaData.CopilotPlan
+
+			// Add quota reset date for non-enterprise (limited_user_reset_date)
+			if limitedResetDate, ok := quotaRaw["limited_user_reset_date"]; ok {
+				tokenResp["quota_reset_date"] = limitedResetDate
+			}
+		}
+	}
+
+	// Re-serialize the enriched response
+	enrichedBody, errMarshal := json.Marshal(tokenResp)
+	if errMarshal != nil {
+		log.WithError(errMarshal).Debug("failed to marshal enriched response")
+		return response
+	}
+
+	response.Body = string(enrichedBody)
+
+	return response
+}
--- a/internal/api/handlers/management/config_lists.go
+++ b/internal/api/handlers/management/config_lists.go
@@ -754,18 +754,22 @@ func (h *Handler) PatchOAuthModelAlias(c *gin.Context) {
 	normalizedMap := sanitizedOAuthModelAlias(map[string][]config.OAuthModelAlias{channel: body.Aliases})
 	normalized := normalizedMap[channel]
 	if len(normalized) == 0 {
+		// Only delete if channel exists, otherwise just create empty entry
+		if h.cfg.OAuthModelAlias != nil {
+			if _, ok := h.cfg.OAuthModelAlias[channel]; ok {
+				delete(h.cfg.OAuthModelAlias, channel)
+				if len(h.cfg.OAuthModelAlias) == 0 {
+					h.cfg.OAuthModelAlias = nil
+				}
+				h.persist(c)
+				return
+			}
+		}
+		// Create new channel with empty aliases
 		if h.cfg.OAuthModelAlias == nil {
-			c.JSON(404, gin.H{"error": "channel not found"})
-			return
-		}
-		if _, ok := h.cfg.OAuthModelAlias[channel]; !ok {
-			c.JSON(404, gin.H{"error": "channel not found"})
-			return
-		}
-		delete(h.cfg.OAuthModelAlias, channel)
-		if len(h.cfg.OAuthModelAlias) == 0 {
-			h.cfg.OAuthModelAlias = nil
+			h.cfg.OAuthModelAlias = make(map[string][]config.OAuthModelAlias)
 		}
+		h.cfg.OAuthModelAlias[channel] = []config.OAuthModelAlias{}
 		h.persist(c)
 		return
 	}
--- a/internal/auth/kiro/aws.go
+++ b/internal/auth/kiro/aws.go
@@ -92,7 +92,7 @@ const KiroIDETokenFile = ".aws/sso/cache/kiro-auth-token.json"

 // Default retry configuration for file reading
 const (
-	defaultTokenReadMaxAttempts = 10               // Maximum retry attempts
+	defaultTokenReadMaxAttempts = 10                    // Maximum retry attempts
 	defaultTokenReadBaseDelay   = 50 * time.Millisecond // Base delay between retries
 )

@@ -301,7 +301,7 @@ func ListKiroTokenFiles() ([]string, error) {
 	}

 	cacheDir := filepath.Join(homeDir, ".aws", "sso", "cache")
-	
+
 	// Check if directory exists
 	if _, err := os.Stat(cacheDir); os.IsNotExist(err) {
 		return nil, nil // No token files
@@ -488,14 +488,16 @@ func ExtractIDCIdentifier(startURL string) string {

 // GenerateTokenFileName generates a unique filename for token storage.
 // Priority: email > startUrl identifier (for IDC) > authMethod only
-// Format: kiro-{authMethod}-{identifier}.json
+// Email is unique, so no sequence suffix needed. Sequence is only added
+// when email is unavailable to prevent filename collisions.
+// Format: kiro-{authMethod}-{identifier}[-{seq}].json
 func GenerateTokenFileName(tokenData *KiroTokenData) string {
 	authMethod := tokenData.AuthMethod
 	if authMethod == "" {
 		authMethod = "unknown"
 	}

-	// Priority 1: Use email if available
+	// Priority 1: Use email if available (no sequence needed, email is unique)
 	if tokenData.Email != "" {
 		// Sanitize email for filename (replace @ and . with -)
 		sanitizedEmail := tokenData.Email
@@ -504,14 +506,17 @@ func GenerateTokenFileName(tokenData *KiroTokenData) string {
 		return fmt.Sprintf("kiro-%s-%s.json", authMethod, sanitizedEmail)
 	}

-	// Priority 2: For IDC, use startUrl identifier
+	// Generate sequence only when email is unavailable
+	seq := time.Now().UnixNano() % 100000
+
+	// Priority 2: For IDC, use startUrl identifier with sequence
 	if authMethod == "idc" && tokenData.StartURL != "" {
 		identifier := ExtractIDCIdentifier(tokenData.StartURL)
 		if identifier != "" {
-			return fmt.Sprintf("kiro-%s-%s.json", authMethod, identifier)
+			return fmt.Sprintf("kiro-%s-%s-%05d.json", authMethod, identifier, seq)
 		}
 	}

-	// Priority 3: Fallback to authMethod only
-	return fmt.Sprintf("kiro-%s.json", authMethod)
+	// Priority 3: Fallback to authMethod only with sequence
+	return fmt.Sprintf("kiro-%s-%05d.json", authMethod, seq)
 }
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -18,7 +18,10 @@ import (
 	"gopkg.in/yaml.v3"
 )

-const DefaultPanelGitHubRepository = "https://github.com/router-for-me/Cli-Proxy-API-Management-Center"
+const (
+	DefaultPanelGitHubRepository = "https://github.com/router-for-me/Cli-Proxy-API-Management-Center"
+	DefaultPprofAddr             = "127.0.0.1:8316"
+)

 // Config represents the application's configuration, loaded from a YAML file.
 type Config struct {
@@ -41,6 +44,9 @@ type Config struct {
 	// Debug enables or disables debug-level logging and other debug features.
 	Debug bool `yaml:"debug" json:"debug"`

+	// Pprof config controls the optional pprof HTTP debug server.
+	Pprof PprofConfig `yaml:"pprof" json:"pprof"`
+
 	// CommercialMode disables high-overhead HTTP middleware features to minimize per-request memory usage.
 	CommercialMode bool `yaml:"commercial-mode" json:"commercial-mode"`

@@ -134,6 +140,14 @@ type TLSConfig struct {
 	Key string `yaml:"key" json:"key"`
 }

+// PprofConfig holds pprof HTTP server settings.
+type PprofConfig struct {
+	// Enable toggles the pprof HTTP debug server.
+	Enable bool `yaml:"enable" json:"enable"`
+	// Addr is the host:port address for the pprof HTTP server.
+	Addr string `yaml:"addr" json:"addr"`
+}
+
 // RemoteManagement holds management API configuration under 'remote-management'.
 type RemoteManagement struct {
 	// AllowRemote toggles remote (non-localhost) access to management API.
@@ -556,6 +570,8 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
 	cfg.ErrorLogsMaxFiles = 10
 	cfg.UsageStatisticsEnabled = false
 	cfg.DisableCooling = false
+	cfg.Pprof.Enable = false
+	cfg.Pprof.Addr = DefaultPprofAddr
 	cfg.AmpCode.RestrictManagementToLocalhost = false // Default to false: API key auth is sufficient
 	cfg.RemoteManagement.PanelGitHubRepository = DefaultPanelGitHubRepository
 	cfg.IncognitoBrowser = false // Default to normal browser (AWS uses incognito by force)
@@ -599,6 +615,11 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
 		cfg.RemoteManagement.PanelGitHubRepository = DefaultPanelGitHubRepository
 	}

+	cfg.Pprof.Addr = strings.TrimSpace(cfg.Pprof.Addr)
+	if cfg.Pprof.Addr == "" {
+		cfg.Pprof.Addr = DefaultPprofAddr
+	}
+
 	if cfg.LogsMaxTotalSizeMB < 0 {
 		cfg.LogsMaxTotalSizeMB = 0
 	}
--- a/internal/config/oauth_model_alias_migration.go
+++ b/internal/config/oauth_model_alias_migration.go
@@ -17,6 +17,7 @@ var antigravityModelConversionTable = map[string]string{
 	"gemini-claude-sonnet-4-5":                "claude-sonnet-4-5",
 	"gemini-claude-sonnet-4-5-thinking":       "claude-sonnet-4-5-thinking",
 	"gemini-claude-opus-4-5-thinking":         "claude-opus-4-5-thinking",
+	"gemini-claude-opus-4-6-thinking":         "claude-opus-4-6-thinking",
 }

 // defaultAntigravityAliases returns the default oauth-model-alias configuration
@@ -30,6 +31,7 @@ func defaultAntigravityAliases() []OAuthModelAlias {
 		{Name: "claude-sonnet-4-5", Alias: "gemini-claude-sonnet-4-5"},
 		{Name: "claude-sonnet-4-5-thinking", Alias: "gemini-claude-sonnet-4-5-thinking"},
 		{Name: "claude-opus-4-5-thinking", Alias: "gemini-claude-opus-4-5-thinking"},
+		{Name: "claude-opus-4-6-thinking", Alias: "gemini-claude-opus-4-6-thinking"},
 	}
 }

--- a/internal/config/oauth_model_alias_migration_test.go
+++ b/internal/config/oauth_model_alias_migration_test.go
@@ -131,6 +131,9 @@ func TestMigrateOAuthModelAlias_ConvertsAntigravityModels(t *testing.T) {
 	if !strings.Contains(content, "claude-opus-4-5-thinking") {
 		t.Fatal("expected missing default alias claude-opus-4-5-thinking to be added")
 	}
+	if !strings.Contains(content, "claude-opus-4-6-thinking") {
+		t.Fatal("expected missing default alias claude-opus-4-6-thinking to be added")
+	}
 }

 func TestMigrateOAuthModelAlias_AddsDefaultIfNeitherExists(t *testing.T) {
--- a/internal/logging/global_logger.go
+++ b/internal/logging/global_logger.go
@@ -132,7 +132,10 @@ func ResolveLogDirectory(cfg *config.Config) string {
 		return logDir
 	}
 	if !isDirWritable(logDir) {
-		authDir := strings.TrimSpace(cfg.AuthDir)
+		authDir, err := util.ResolveAuthDir(cfg.AuthDir)
+		if err != nil {
+			log.Warnf("Failed to resolve auth-dir %q for log directory: %v", cfg.AuthDir, err)
+		}
 		if authDir != "" {
 			logDir = filepath.Join(authDir, "logs")
 		}
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -19,6 +19,10 @@ import (
 //   - codex
 //   - qwen
 //   - iflow
+//   - kiro
+//   - github-copilot
+//   - kiro
+//   - amazonq
 //   - antigravity (returns static overrides only)
 func GetStaticModelDefinitionsByChannel(channel string) []*ModelInfo {
 	key := strings.ToLower(strings.TrimSpace(channel))
@@ -39,6 +43,12 @@ func GetStaticModelDefinitionsByChannel(channel string) []*ModelInfo {
 		return GetQwenModels()
 	case "iflow":
 		return GetIFlowModels()
+	case "github-copilot":
+		return GetGitHubCopilotModels()
+	case "kiro":
+		return GetKiroModels()
+	case "amazonq":
+		return GetAmazonQModels()
 	case "antigravity":
 		cfg := GetAntigravityModelConfig()
 		if len(cfg) == 0 {
@@ -83,6 +93,9 @@ func LookupStaticModelInfo(modelID string) *ModelInfo {
 		GetOpenAIModels(),
 		GetQwenModels(),
 		GetIFlowModels(),
+		GetGitHubCopilotModels(),
+		GetKiroModels(),
+		GetAmazonQModels(),
 	}
 	for _, models := range allModels {
 		for _, m := range models {
@@ -363,6 +376,18 @@ func GetKiroModels() []*ModelInfo {
 			MaxCompletionTokens: 64000,
 			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
 		},
+		{
+			ID:                  "kiro-claude-opus-4-6",
+			Object:              "model",
+			Created:             1736899200, // 2025-01-15
+			OwnedBy:             "aws",
+			Type:                "kiro",
+			DisplayName:         "Kiro Claude Opus 4.6",
+			Description:         "Claude Opus 4.6 via Kiro (2.2x credit)",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
+		},
 		{
 			ID:                  "kiro-claude-opus-4-5",
 			Object:              "model",
@@ -412,6 +437,18 @@ func GetKiroModels() []*ModelInfo {
 			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		// --- Agentic Variants (Optimized for coding agents with chunked writes) ---
+		{
+			ID:                  "kiro-claude-opus-4-6-agentic",
+			Object:              "model",
+			Created:             1736899200, // 2025-01-15
+			OwnedBy:             "aws",
+			Type:                "kiro",
+			DisplayName:         "Kiro Claude Opus 4.6 (Agentic)",
+			Description:         "Claude Opus 4.6 optimized for coding agents (chunked writes)",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
+		},
 		{
 			ID:                  "kiro-claude-opus-4-5-agentic",
 			Object:              "model",
--- a/internal/registry/model_definitions_static_data.go
+++ b/internal/registry/model_definitions_static_data.go
@@ -28,6 +28,18 @@ func GetClaudeModels() []*ModelInfo {
 			MaxCompletionTokens: 64000,
 			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
 		},
+		{
+			ID:                  "claude-opus-4-6-20260205",
+			Object:              "model",
+			Created:             1770318000, // 2026-02-05
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.6 Opus",
+			Description:         "Premium model combining maximum intelligence with practical performance",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
+		},
 		{
 			ID:                  "claude-opus-4-5-20251101",
 			Object:              "model",
@@ -716,6 +728,20 @@ func GetOpenAIModels() []*ModelInfo {
 			SupportedParameters: []string{"tools"},
 			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
 		},
+		{
+			ID:                  "gpt-5.3-codex",
+			Object:              "model",
+			Created:             1770307200,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5.3",
+			DisplayName:         "GPT 5.3 Codex",
+			Description:         "Stable version of GPT 5.3 Codex, The best model for coding and agentic tasks across domains.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
+		},
 	}
 }

@@ -803,6 +829,7 @@ func GetIFlowModels() []*ModelInfo {
 		{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: iFlowThinkingSupport},
 		{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
 		{ID: "iflow-rome-30ba3b", DisplayName: "iFlow-ROME", Description: "iFlow Rome 30BA3B model", Created: 1736899200},
+		{ID: "kimi-k2.5", DisplayName: "Kimi-K2.5", Description: "Moonshot Kimi K2.5", Created: 1769443200, Thinking: iFlowThinkingSupport},
 	}
 	models := make([]*ModelInfo, 0, len(entries))
 	for _, entry := range entries {
@@ -839,6 +866,7 @@ func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
 		"gemini-3-flash":             {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}},
 		"claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
 		"claude-opus-4-5-thinking":   {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
+		"claude-opus-4-6-thinking":   {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
 		"claude-sonnet-4-5":          {MaxCompletionTokens: 64000},
 		"gpt-oss-120b-medium":        {},
 		"tab_flash_lite_preview":     {},
--- a/internal/runtime/executor/aistudio_executor.go
+++ b/internal/runtime/executor/aistudio_executor.go
@@ -141,7 +141,7 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
 		URL:       endpoint,
 		Method:    http.MethodPost,
 		Headers:   wsReq.Headers.Clone(),
-		Body:      bytes.Clone(body.payload),
+		Body:      body.payload,
 		Provider:  e.Identifier(),
 		AuthID:    authID,
 		AuthLabel: authLabel,
@@ -156,14 +156,14 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
 	}
 	recordAPIResponseMetadata(ctx, e.cfg, wsResp.Status, wsResp.Headers.Clone())
 	if len(wsResp.Body) > 0 {
-		appendAPIResponseChunk(ctx, e.cfg, bytes.Clone(wsResp.Body))
+		appendAPIResponseChunk(ctx, e.cfg, wsResp.Body)
 	}
 	if wsResp.Status < 200 || wsResp.Status >= 300 {
 		return resp, statusErr{code: wsResp.Status, msg: string(wsResp.Body)}
 	}
 	reporter.publish(ctx, parseGeminiUsage(wsResp.Body))
 	var param any
-	out := sdktranslator.TranslateNonStream(ctx, body.toFormat, opts.SourceFormat, req.Model, bytes.Clone(opts.OriginalRequest), bytes.Clone(translatedReq), bytes.Clone(wsResp.Body), &param)
+	out := sdktranslator.TranslateNonStream(ctx, body.toFormat, opts.SourceFormat, req.Model, opts.OriginalRequest, translatedReq, wsResp.Body, &param)
 	resp = cliproxyexecutor.Response{Payload: ensureColonSpacedJSON([]byte(out))}
 	return resp, nil
 }
@@ -199,7 +199,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 		URL:       endpoint,
 		Method:    http.MethodPost,
 		Headers:   wsReq.Headers.Clone(),
-		Body:      bytes.Clone(body.payload),
+		Body:      body.payload,
 		Provider:  e.Identifier(),
 		AuthID:    authID,
 		AuthLabel: authLabel,
@@ -225,7 +225,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 		}
 		var body bytes.Buffer
 		if len(firstEvent.Payload) > 0 {
-			appendAPIResponseChunk(ctx, e.cfg, bytes.Clone(firstEvent.Payload))
+			appendAPIResponseChunk(ctx, e.cfg, firstEvent.Payload)
 			body.Write(firstEvent.Payload)
 		}
 		if firstEvent.Type == wsrelay.MessageTypeStreamEnd {
@@ -244,7 +244,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 				metadataLogged = true
 			}
 			if len(event.Payload) > 0 {
-				appendAPIResponseChunk(ctx, e.cfg, bytes.Clone(event.Payload))
+				appendAPIResponseChunk(ctx, e.cfg, event.Payload)
 				body.Write(event.Payload)
 			}
 			if event.Type == wsrelay.MessageTypeStreamEnd {
@@ -274,12 +274,12 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 				}
 			case wsrelay.MessageTypeStreamChunk:
 				if len(event.Payload) > 0 {
-					appendAPIResponseChunk(ctx, e.cfg, bytes.Clone(event.Payload))
+					appendAPIResponseChunk(ctx, e.cfg, event.Payload)
 					filtered := FilterSSEUsageMetadata(event.Payload)
 					if detail, ok := parseGeminiStreamUsage(filtered); ok {
 						reporter.publish(ctx, detail)
 					}
-					lines := sdktranslator.TranslateStream(ctx, body.toFormat, opts.SourceFormat, req.Model, bytes.Clone(opts.OriginalRequest), translatedReq, bytes.Clone(filtered), &param)
+					lines := sdktranslator.TranslateStream(ctx, body.toFormat, opts.SourceFormat, req.Model, opts.OriginalRequest, translatedReq, filtered, &param)
 					for i := range lines {
 						out <- cliproxyexecutor.StreamChunk{Payload: ensureColonSpacedJSON([]byte(lines[i]))}
 					}
@@ -293,9 +293,9 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 					metadataLogged = true
 				}
 				if len(event.Payload) > 0 {
-					appendAPIResponseChunk(ctx, e.cfg, bytes.Clone(event.Payload))
+					appendAPIResponseChunk(ctx, e.cfg, event.Payload)
 				}
-				lines := sdktranslator.TranslateStream(ctx, body.toFormat, opts.SourceFormat, req.Model, bytes.Clone(opts.OriginalRequest), translatedReq, bytes.Clone(event.Payload), &param)
+				lines := sdktranslator.TranslateStream(ctx, body.toFormat, opts.SourceFormat, req.Model, opts.OriginalRequest, translatedReq, event.Payload, &param)
 				for i := range lines {
 					out <- cliproxyexecutor.StreamChunk{Payload: ensureColonSpacedJSON([]byte(lines[i]))}
 				}
@@ -350,7 +350,7 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A
 		URL:       endpoint,
 		Method:    http.MethodPost,
 		Headers:   wsReq.Headers.Clone(),
-		Body:      bytes.Clone(body.payload),
+		Body:      body.payload,
 		Provider:  e.Identifier(),
 		AuthID:    authID,
 		AuthLabel: authLabel,
@@ -364,7 +364,7 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A
 	}
 	recordAPIResponseMetadata(ctx, e.cfg, resp.Status, resp.Headers.Clone())
 	if len(resp.Body) > 0 {
-		appendAPIResponseChunk(ctx, e.cfg, bytes.Clone(resp.Body))
+		appendAPIResponseChunk(ctx, e.cfg, resp.Body)
 	}
 	if resp.Status < 200 || resp.Status >= 300 {
 		return cliproxyexecutor.Response{}, statusErr{code: resp.Status, msg: string(resp.Body)}
@@ -373,7 +373,7 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A
 	if totalTokens <= 0 {
 		return cliproxyexecutor.Response{}, fmt.Errorf("wsrelay: totalTokens missing in response")
 	}
-	translated := sdktranslator.TranslateTokenCount(ctx, body.toFormat, opts.SourceFormat, totalTokens, bytes.Clone(resp.Body))
+	translated := sdktranslator.TranslateTokenCount(ctx, body.toFormat, opts.SourceFormat, totalTokens, resp.Body)
 	return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
 }

@@ -393,12 +393,13 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, stream)
-	payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
+	payload := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, stream)
 	payload, err := thinking.ApplyThinking(payload, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return nil, translatedPayload{}, err
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -133,12 +133,13 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("antigravity")

-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -230,7 +231,7 @@ attemptLoop:

 			reporter.publish(ctx, parseAntigravityUsage(bodyBytes))
 			var param any
-			converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bodyBytes, &param)
+			converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, bodyBytes, &param)
 			resp = cliproxyexecutor.Response{Payload: []byte(converted)}
 			reporter.ensurePublished(ctx)
 			return resp, nil
@@ -274,12 +275,13 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("antigravity")

-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)

 	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -433,7 +435,7 @@ attemptLoop:

 			reporter.publish(ctx, parseAntigravityUsage(resp.Payload))
 			var param any
-			converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, resp.Payload, &param)
+			converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, resp.Payload, &param)
 			resp = cliproxyexecutor.Response{Payload: []byte(converted)}
 			reporter.ensurePublished(ctx)

@@ -665,12 +667,13 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("antigravity")

-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)

 	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -800,12 +803,12 @@ attemptLoop:
 						reporter.publish(ctx, detail)
 					}

-					chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bytes.Clone(payload), &param)
+					chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, bytes.Clone(payload), &param)
 					for i := range chunks {
 						out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
 					}
 				}
-				tail := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, []byte("[DONE]"), &param)
+				tail := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, []byte("[DONE]"), &param)
 				for i := range tail {
 					out <- cliproxyexecutor.StreamChunk{Payload: []byte(tail[i])}
 				}
@@ -872,7 +875,7 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
 	respCtx := context.WithValue(ctx, "alt", opts.Alt)

 	// Prepare payload once (doesn't depend on baseURL)
-	payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	payload := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	payload, err := thinking.ApplyThinking(payload, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -1280,51 +1283,40 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
 	payload = geminiToAntigravity(modelName, payload, projectID)
 	payload, _ = sjson.SetBytes(payload, "model", modelName)

-	if strings.Contains(modelName, "claude") || strings.Contains(modelName, "gemini-3-pro-high") {
-		strJSON := string(payload)
-		paths := make([]string, 0)
-		util.Walk(gjson.ParseBytes(payload), "", "parametersJsonSchema", &paths)
-		for _, p := range paths {
-			strJSON, _ = util.RenameKey(strJSON, p, p[:len(p)-len("parametersJsonSchema")]+"parameters")
-		}
-
-		// Use the centralized schema cleaner to handle unsupported keywords,
-		// const->enum conversion, and flattening of types/anyOf.
-		strJSON = util.CleanJSONSchemaForAntigravity(strJSON)
-		payload = []byte(strJSON)
-	} else {
-		strJSON := string(payload)
-		paths := make([]string, 0)
-		util.Walk(gjson.Parse(strJSON), "", "parametersJsonSchema", &paths)
-		for _, p := range paths {
-			strJSON, _ = util.RenameKey(strJSON, p, p[:len(p)-len("parametersJsonSchema")]+"parameters")
-		}
-		// Clean tool schemas for Gemini to remove unsupported JSON Schema keywords
-		// without adding empty-schema placeholders.
-		strJSON = util.CleanJSONSchemaForGemini(strJSON)
-		payload = []byte(strJSON)
+	useAntigravitySchema := strings.Contains(modelName, "claude") || strings.Contains(modelName, "gemini-3-pro-high")
+	payloadStr := string(payload)
+	paths := make([]string, 0)
+	util.Walk(gjson.Parse(payloadStr), "", "parametersJsonSchema", &paths)
+	for _, p := range paths {
+		payloadStr, _ = util.RenameKey(payloadStr, p, p[:len(p)-len("parametersJsonSchema")]+"parameters")
 	}

-	if strings.Contains(modelName, "claude") || strings.Contains(modelName, "gemini-3-pro-high") {
-		systemInstructionPartsResult := gjson.GetBytes(payload, "request.systemInstruction.parts")
-		payload, _ = sjson.SetBytes(payload, "request.systemInstruction.role", "user")
-		payload, _ = sjson.SetBytes(payload, "request.systemInstruction.parts.0.text", systemInstruction)
-		payload, _ = sjson.SetBytes(payload, "request.systemInstruction.parts.1.text", fmt.Sprintf("Please ignore following [ignore]%s[/ignore]", systemInstruction))
+	if useAntigravitySchema {
+		payloadStr = util.CleanJSONSchemaForAntigravity(payloadStr)
+	} else {
+		payloadStr = util.CleanJSONSchemaForGemini(payloadStr)
+	}
+
+	if useAntigravitySchema {
+		systemInstructionPartsResult := gjson.Get(payloadStr, "request.systemInstruction.parts")
+		payloadStr, _ = sjson.Set(payloadStr, "request.systemInstruction.role", "user")
+		payloadStr, _ = sjson.Set(payloadStr, "request.systemInstruction.parts.0.text", systemInstruction)
+		payloadStr, _ = sjson.Set(payloadStr, "request.systemInstruction.parts.1.text", fmt.Sprintf("Please ignore following [ignore]%s[/ignore]", systemInstruction))

 		if systemInstructionPartsResult.Exists() && systemInstructionPartsResult.IsArray() {
 			for _, partResult := range systemInstructionPartsResult.Array() {
-				payload, _ = sjson.SetRawBytes(payload, "request.systemInstruction.parts.-1", []byte(partResult.Raw))
+				payloadStr, _ = sjson.SetRaw(payloadStr, "request.systemInstruction.parts.-1", partResult.Raw)
 			}
 		}
 	}

 	if strings.Contains(modelName, "claude") {
-		payload, _ = sjson.SetBytes(payload, "request.toolConfig.functionCallingConfig.mode", "VALIDATED")
+		payloadStr, _ = sjson.Set(payloadStr, "request.toolConfig.functionCallingConfig.mode", "VALIDATED")
 	} else {
-		payload, _ = sjson.DeleteBytes(payload, "request.generationConfig.maxOutputTokens")
+		payloadStr, _ = sjson.Delete(payloadStr, "request.generationConfig.maxOutputTokens")
 	}

-	httpReq, errReq := http.NewRequestWithContext(ctx, http.MethodPost, requestURL.String(), bytes.NewReader(payload))
+	httpReq, errReq := http.NewRequestWithContext(ctx, http.MethodPost, requestURL.String(), strings.NewReader(payloadStr))
 	if errReq != nil {
 		return nil, errReq
 	}
@@ -1346,11 +1338,15 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
 		authLabel = auth.Label
 		authType, authValue = auth.AccountInfo()
 	}
+	var payloadLog []byte
+	if e.cfg != nil && e.cfg.RequestLog {
+		payloadLog = []byte(payloadStr)
+	}
 	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
 		URL:       requestURL.String(),
 		Method:    http.MethodPost,
 		Headers:   httpReq.Header.Clone(),
-		Body:      payload,
+		Body:      payloadLog,
 		Provider:  e.Identifier(),
 		AuthID:    authID,
 		AuthLabel: authLabel,
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -100,12 +100,13 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	to := sdktranslator.FromString("claude")
 	// Use streaming translation to preserve function calling, except for claude.
 	stream := from != to
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, stream)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, stream)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
@@ -216,7 +217,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 		to,
 		from,
 		req.Model,
-		bytes.Clone(opts.OriginalRequest),
+		opts.OriginalRequest,
 		bodyForTranslation,
 		data,
 		&param,
@@ -240,12 +241,13 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	defer reporter.trackFailure(ctx, &err)
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("claude")
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
@@ -381,7 +383,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 				to,
 				from,
 				req.Model,
-				bytes.Clone(opts.OriginalRequest),
+				opts.OriginalRequest,
 				bodyForTranslation,
 				bytes.Clone(line),
 				&param,
@@ -411,7 +413,7 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 	to := sdktranslator.FromString("claude")
 	// Use streaming translation to preserve function calling, except for claude.
 	stream := from != to
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, stream)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

 	if !strings.HasPrefix(baseModel, "claude-3-5-haiku") {
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -27,6 +27,11 @@ import (
 	"github.com/google/uuid"
 )

+const (
+	codexClientVersion = "0.98.0"
+	codexUserAgent     = "codex_cli_rs/0.98.0 (Mac OS 26.0.1; arm64) Apple_Terminal/464"
+)
+
 var dataTag = []byte("data:")

 // CodexExecutor is a stateless executor for Codex (OpenAI Responses API entrypoint).
@@ -88,12 +93,13 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -176,7 +182,7 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 		}

 		var param any
-		out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(originalPayload), body, line, &param)
+		out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, originalPayload, body, line, &param)
 		resp = cliproxyexecutor.Response{Payload: []byte(out)}
 		return resp, nil
 	}
@@ -197,12 +203,13 @@ func (e *CodexExecutor) executeCompact(ctx context.Context, auth *cliproxyauth.A

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai-response")
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -265,7 +272,7 @@ func (e *CodexExecutor) executeCompact(ctx context.Context, auth *cliproxyauth.A
 	reporter.publish(ctx, parseOpenAIUsage(data))
 	reporter.ensurePublished(ctx)
 	var param any
-	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(originalPayload), body, data, &param)
+	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, originalPayload, body, data, &param)
 	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
 }
@@ -286,12 +293,13 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -378,7 +386,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 				}
 			}

-			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(originalPayload), body, bytes.Clone(line), &param)
+			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, originalPayload, body, bytes.Clone(line), &param)
 			for i := range chunks {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
 			}
@@ -397,7 +405,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	body, err := thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -634,10 +642,10 @@ func applyCodexHeaders(r *http.Request, auth *cliproxyauth.Auth, token string, s
 		ginHeaders = ginCtx.Request.Header
 	}

-	misc.EnsureHeader(r.Header, ginHeaders, "Version", "0.21.0")
+	misc.EnsureHeader(r.Header, ginHeaders, "Version", codexClientVersion)
 	misc.EnsureHeader(r.Header, ginHeaders, "Openai-Beta", "responses=experimental")
 	misc.EnsureHeader(r.Header, ginHeaders, "Session_id", uuid.NewString())
-	misc.EnsureHeader(r.Header, ginHeaders, "User-Agent", "codex_cli_rs/0.50.0 (Mac OS 26.0.1; arm64) Apple_Terminal/464")
+	misc.EnsureHeader(r.Header, ginHeaders, "User-Agent", codexUserAgent)

 	if stream {
 		r.Header.Set("Accept", "text/event-stream")
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -119,12 +119,13 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini-cli")

-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	basePayload := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	basePayload, err = thinking.ApplyThinking(basePayload, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -223,7 +224,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 		if httpResp.StatusCode >= 200 && httpResp.StatusCode < 300 {
 			reporter.publish(ctx, parseGeminiCLIUsage(data))
 			var param any
-			out := sdktranslator.TranslateNonStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), payload, data, &param)
+			out := sdktranslator.TranslateNonStream(respCtx, to, from, attemptModel, opts.OriginalRequest, payload, data, &param)
 			resp = cliproxyexecutor.Response{Payload: []byte(out)}
 			return resp, nil
 		}
@@ -272,12 +273,13 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini-cli")

-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	basePayload := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)

 	basePayload, err = thinking.ApplyThinking(basePayload, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -399,14 +401,14 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 						reporter.publish(ctx, detail)
 					}
 					if bytes.HasPrefix(line, dataTag) {
-						segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone(line), &param)
+						segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, opts.OriginalRequest, reqBody, bytes.Clone(line), &param)
 						for i := range segments {
 							out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])}
 						}
 					}
 				}

-				segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone([]byte("[DONE]")), &param)
+				segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, opts.OriginalRequest, reqBody, []byte("[DONE]"), &param)
 				for i := range segments {
 					out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])}
 				}
@@ -428,12 +430,12 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 			appendAPIResponseChunk(ctx, e.cfg, data)
 			reporter.publish(ctx, parseGeminiCLIUsage(data))
 			var param any
-			segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), reqBody, data, &param)
+			segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, opts.OriginalRequest, reqBody, data, &param)
 			for i := range segments {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])}
 			}

-			segments = sdktranslator.TranslateStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone([]byte("[DONE]")), &param)
+			segments = sdktranslator.TranslateStream(respCtx, to, from, attemptModel, opts.OriginalRequest, reqBody, []byte("[DONE]"), &param)
 			for i := range segments {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])}
 			}
@@ -485,7 +487,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
 	// The loop variable attemptModel is only used as the concrete model id sent to the upstream
 	// Gemini CLI endpoint when iterating fallback variants.
 	for range models {
-		payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+		payload := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 		payload, err = thinking.ApplyThinking(payload, req.Model, from.String(), to.String(), e.Identifier())
 		if err != nil {
--- a/internal/runtime/executor/gemini_executor.go
+++ b/internal/runtime/executor/gemini_executor.go
@@ -116,12 +116,13 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	// Official Gemini API via API key or OAuth bearer
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -203,7 +204,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	appendAPIResponseChunk(ctx, e.cfg, data)
 	reporter.publish(ctx, parseGeminiUsage(data))
 	var param any
-	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
+	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, body, data, &param)
 	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
 }
@@ -222,12 +223,13 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -318,12 +320,12 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 			if detail, ok := parseGeminiStreamUsage(payload); ok {
 				reporter.publish(ctx, detail)
 			}
-			lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(payload), &param)
+			lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(payload), &param)
 			for i := range lines {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
 			}
 		}
-		lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone([]byte("[DONE]")), &param)
+		lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, []byte("[DONE]"), &param)
 		for i := range lines {
 			out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
 		}
@@ -344,7 +346,7 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
-	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
--- a/internal/runtime/executor/gemini_vertex_executor.go
+++ b/internal/runtime/executor/gemini_vertex_executor.go
@@ -318,12 +318,13 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 		from := opts.SourceFormat
 		to := sdktranslator.FromString("gemini")

-		originalPayload := bytes.Clone(req.Payload)
+		originalPayloadSource := req.Payload
 		if len(opts.OriginalRequest) > 0 {
-			originalPayload = bytes.Clone(opts.OriginalRequest)
+			originalPayloadSource = opts.OriginalRequest
 		}
+		originalPayload := originalPayloadSource
 		originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-		body = sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+		body = sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 		body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 		if err != nil {
@@ -417,7 +418,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	var param any
-	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
+	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, body, data, &param)
 	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
 }
@@ -432,12 +433,13 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")

-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -521,7 +523,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	appendAPIResponseChunk(ctx, e.cfg, data)
 	reporter.publish(ctx, parseGeminiUsage(data))
 	var param any
-	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
+	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, body, data, &param)
 	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
 }
@@ -536,12 +538,13 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")

-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -632,12 +635,12 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 			if detail, ok := parseGeminiStreamUsage(line); ok {
 				reporter.publish(ctx, detail)
 			}
-			lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(line), &param)
+			lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(line), &param)
 			for i := range lines {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
 			}
 		}
-		lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, []byte("[DONE]"), &param)
+		lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, []byte("[DONE]"), &param)
 		for i := range lines {
 			out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
 		}
@@ -660,12 +663,13 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")

-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -756,12 +760,12 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 			if detail, ok := parseGeminiStreamUsage(line); ok {
 				reporter.publish(ctx, detail)
 			}
-			lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(line), &param)
+			lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(line), &param)
 			for i := range lines {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
 			}
 		}
-		lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, []byte("[DONE]"), &param)
+		lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, []byte("[DONE]"), &param)
 		for i := range lines {
 			out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
 		}
@@ -781,7 +785,7 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")

-	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -865,7 +869,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")

-	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -1003,6 +1007,8 @@ func vertexBaseURL(location string) string {
 	loc := strings.TrimSpace(location)
 	if loc == "" {
 		loc = "us-central1"
+	} else if loc == "global" {
+		return "https://aiplatform.googleapis.com"
 	}
 	return fmt.Sprintf("https://%s-aiplatform.googleapis.com", loc)
 }
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -87,12 +87,13 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), "iflow", e.Identifier())
@@ -163,7 +164,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	var param any
 	// Note: TranslateNonStream uses req.Model (original with suffix) to preserve
 	// the original model name in the response for client compatibility.
-	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
+	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, body, data, &param)
 	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
 }
@@ -189,12 +190,13 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), "iflow", e.Identifier())
@@ -274,7 +276,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 			if detail, ok := parseOpenAIStreamUsage(line); ok {
 				reporter.publish(ctx, detail)
 			}
-			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(line), &param)
+			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(line), &param)
 			for i := range chunks {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
 			}
@@ -296,7 +298,7 @@ func (e *IFlowExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	enc, err := tokenizerForModel(baseModel)
 	if err != nil {
--- a/internal/runtime/executor/kiro_executor.go
+++ b/internal/runtime/executor/kiro_executor.go
@@ -1681,6 +1681,7 @@ func (e *KiroExecutor) mapModelToKiro(model string) string {
 	modelMap := map[string]string{
 		// Amazon Q format (amazonq- prefix) - same API as Kiro
 		"amazonq-auto":                       "auto",
+		"amazonq-claude-opus-4-6":            "claude-opus-4.6",
 		"amazonq-claude-opus-4-5":            "claude-opus-4.5",
 		"amazonq-claude-sonnet-4-5":          "claude-sonnet-4.5",
 		"amazonq-claude-sonnet-4-5-20250929": "claude-sonnet-4.5",
@@ -1688,6 +1689,7 @@ func (e *KiroExecutor) mapModelToKiro(model string) string {
 		"amazonq-claude-sonnet-4-20250514":   "claude-sonnet-4",
 		"amazonq-claude-haiku-4-5":           "claude-haiku-4.5",
 		// Kiro format (kiro- prefix) - valid model names that should be preserved
+		"kiro-claude-opus-4-6":            "claude-opus-4.6",
 		"kiro-claude-opus-4-5":            "claude-opus-4.5",
 		"kiro-claude-sonnet-4-5":          "claude-sonnet-4.5",
 		"kiro-claude-sonnet-4-5-20250929": "claude-sonnet-4.5",
@@ -1696,6 +1698,8 @@ func (e *KiroExecutor) mapModelToKiro(model string) string {
 		"kiro-claude-haiku-4-5":           "claude-haiku-4.5",
 		"kiro-auto":                       "auto",
 		// Native format (no prefix) - used by Kiro IDE directly
+		"claude-opus-4-6":            "claude-opus-4.6",
+		"claude-opus-4.6":            "claude-opus-4.6",
 		"claude-opus-4-5":            "claude-opus-4.5",
 		"claude-opus-4.5":            "claude-opus-4.5",
 		"claude-haiku-4-5":           "claude-haiku-4.5",
@@ -1707,10 +1711,12 @@ func (e *KiroExecutor) mapModelToKiro(model string) string {
 		"claude-sonnet-4-20250514":   "claude-sonnet-4",
 		"auto":                       "auto",
 		// Agentic variants (same backend model IDs, but with special system prompt)
+		"claude-opus-4.6-agentic":        "claude-opus-4.6",
 		"claude-opus-4.5-agentic":        "claude-opus-4.5",
 		"claude-sonnet-4.5-agentic":      "claude-sonnet-4.5",
 		"claude-sonnet-4-agentic":        "claude-sonnet-4",
 		"claude-haiku-4.5-agentic":       "claude-haiku-4.5",
+		"kiro-claude-opus-4-6-agentic":   "claude-opus-4.6",
 		"kiro-claude-opus-4-5-agentic":   "claude-opus-4.5",
 		"kiro-claude-sonnet-4-5-agentic": "claude-sonnet-4.5",
 		"kiro-claude-sonnet-4-agentic":   "claude-sonnet-4",
@@ -2442,8 +2448,9 @@ func (e *KiroExecutor) extractEventTypeFromBytes(headers []byte) string {
 func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out chan<- cliproxyexecutor.StreamChunk, targetFormat sdktranslator.Format, model string, originalReq, claudeBody []byte, reporter *usageReporter, thinkingEnabled bool) {
 	reader := bufio.NewReaderSize(body, 20*1024*1024) // 20MB buffer to match other providers
 	var totalUsage usage.Detail
-	var hasToolUses bool              // Track if any tool uses were emitted
-	var upstreamStopReason string     // Track stop_reason from upstream events
+	var hasToolUses bool          // Track if any tool uses were emitted
+	var hasTruncatedTools bool    // Track if any tool uses were truncated
+	var upstreamStopReason string // Track stop_reason from upstream events

 	// Tool use state tracking for input buffering and deduplication
 	processedIDs := make(map[string]bool)
@@ -3221,40 +3228,16 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
 			_ = signature // Signature can be used for verification if needed

 		case "toolUseEvent":
-			// Debug: log raw toolUseEvent payload for large tool inputs
-			if log.IsLevelEnabled(log.DebugLevel) {
-				payloadStr := string(payload)
-				if len(payloadStr) > 500 {
-					payloadStr = payloadStr[:500] + "...[truncated]"
-				}
-				log.Debugf("kiro: raw toolUseEvent payload (%d bytes): %s", len(payload), payloadStr)
-			}
 			// Handle dedicated tool use events with input buffering
 			completedToolUses, newState := kiroclaude.ProcessToolUseEvent(event, currentToolUse, processedIDs)
 			currentToolUse = newState

 			// Emit completed tool uses
 			for _, tu := range completedToolUses {
-				// Check for truncated write marker - emit as a Bash tool that echoes the error
-				// This way Claude Code will execute it, see the error, and the agent can retry
-				if tu.Name == "__truncated_write__" {
-					filePath := ""
-					if fp, ok := tu.Input["file_path"].(string); ok && fp != "" {
-						filePath = fp
-					}
-
-					// Create a Bash tool that echoes the error message
-					// This will be executed by Claude Code and the agent will see the result
-					var errorMsg string
-					if filePath != "" {
-						errorMsg = fmt.Sprintf("echo '[WRITE TOOL ERROR] The file content for \"%s\" is too large to be transmitted by the upstream API. You MUST retry by writing the file in smaller chunks: First use Write to create the file with the first 700 lines, then use multiple Edit operations to append the remaining content in chunks of ~700 lines each.'", filePath)
-					} else {
-						errorMsg = "echo '[WRITE TOOL ERROR] The file content is too large to be transmitted by the upstream API. The Write tool input was truncated. You MUST retry by writing the file in smaller chunks: First use Write to create the file with the first 700 lines, then use multiple Edit operations to append the remaining content in chunks of ~700 lines each.'"
-					}
-
-					log.Warnf("kiro: converting truncated write to Bash echo for file: %s", filePath)
-
-					hasToolUses = true
+				// Check if this tool was truncated - emit with SOFT_LIMIT_REACHED marker
+				if tu.IsTruncated {
+					hasTruncatedTools = true
+					log.Infof("kiro: streamToChannel emitting truncated tool with SOFT_LIMIT_REACHED: %s (ID: %s)", tu.Name, tu.ToolUseID)

 					// Close text block if open
 					if isTextBlockOpen && contentBlockIndex >= 0 {
@@ -3270,8 +3253,8 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out

 					contentBlockIndex++

-					// Emit as Bash tool_use
-					blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "tool_use", tu.ToolUseID, "Bash")
+					// Emit tool_use with SOFT_LIMIT_REACHED marker input
+					blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "tool_use", tu.ToolUseID, tu.Name)
 					sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam)
 					for _, chunk := range sseData {
 						if chunk != "" {
@@ -3279,16 +3262,14 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
 						}
 					}

-					// Emit the Bash command as input
-					bashInput := map[string]interface{}{
-						"command": errorMsg,
+					// Build SOFT_LIMIT_REACHED marker input
+					markerInput := map[string]interface{}{
+						"_status":  "SOFT_LIMIT_REACHED",
+						"_message": "Tool output was truncated. Split content into smaller chunks (max 300 lines). Due to potential model hallucination, you MUST re-fetch the current working directory and generate the correct file_path.",
 					}
-					inputJSON, err := json.Marshal(bashInput)
-					if err != nil {
-						log.Errorf("kiro: failed to marshal bash input for truncated write error: %v", err)
-						continue
-					}
-					inputDelta := kiroclaude.BuildClaudeInputJsonDeltaEvent(string(inputJSON), contentBlockIndex)
+
+					markerJSON, _ := json.Marshal(markerInput)
+					inputDelta := kiroclaude.BuildClaudeInputJsonDeltaEvent(string(markerJSON), contentBlockIndex)
 					sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, inputDelta, &translatorParam)
 					for _, chunk := range sseData {
 						if chunk != "" {
@@ -3296,6 +3277,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
 						}
 					}

+					// Close tool_use block
 					blockStop := kiroclaude.BuildClaudeContentBlockStopEvent(contentBlockIndex)
 					sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStop, &translatorParam)
 					for _, chunk := range sseData {
@@ -3304,7 +3286,8 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
 						}
 					}

-					continue // Skip the normal tool_use emission
+					hasToolUses = true // Keep this so stop_reason = tool_use
+					continue
 				}

 				hasToolUses = true
@@ -3605,7 +3588,12 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
 	}

 	// Determine stop reason: prefer upstream, then detect tool_use, default to end_turn
+	// SOFT_LIMIT_REACHED: Keep stop_reason = "tool_use" so Claude continues the loop
 	stopReason := upstreamStopReason
+	if hasTruncatedTools {
+		// Log that we're using SOFT_LIMIT_REACHED approach
+		log.Infof("kiro: streamToChannel using SOFT_LIMIT_REACHED - keeping stop_reason=tool_use for truncated tools")
+	}
 	if stopReason == "" {
 		if hasToolUses {
 			stopReason = "tool_use"
--- a/internal/runtime/executor/logging_helpers.go
+++ b/internal/runtime/executor/logging_helpers.go
@@ -80,7 +80,7 @@ func recordAPIRequest(ctx context.Context, cfg *config.Config, info upstreamRequ
 	writeHeaders(builder, info.Headers)
 	builder.WriteString("\nBody:\n")
 	if len(info.Body) > 0 {
-		builder.WriteString(string(bytes.Clone(info.Body)))
+		builder.WriteString(string(info.Body))
 	} else {
 		builder.WriteString("<empty>")
 	}
@@ -152,7 +152,7 @@ func appendAPIResponseChunk(ctx context.Context, cfg *config.Config, chunk []byt
 	if cfg == nil || !cfg.RequestLog {
 		return
 	}
-	data := bytes.TrimSpace(bytes.Clone(chunk))
+	data := bytes.TrimSpace(chunk)
 	if len(data) == 0 {
 		return
 	}
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -88,12 +88,13 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 		to = sdktranslator.FromString("openai-response")
 		endpoint = "/responses/compact"
 	}
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, opts.Stream)
-	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), opts.Stream)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, opts.Stream)
 	requestedModel := payloadRequestedModel(opts, req.Model)
 	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated, requestedModel)
 	if opts.Alt == "responses/compact" {
@@ -170,7 +171,7 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 	reporter.ensurePublished(ctx)
 	// Translate response back to source format when needed
 	var param any
-	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, body, &param)
+	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, body, &param)
 	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
 }
@@ -189,12 +190,13 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)
 	requestedModel := payloadRequestedModel(opts, req.Model)
 	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated, requestedModel)

@@ -283,7 +285,7 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy

 			// OpenAI-compatible streams are SSE: lines typically prefixed with "data: ".
 			// Pass through translator; it yields one or more chunks for the target schema.
-			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bytes.Clone(line), &param)
+			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, bytes.Clone(line), &param)
 			for i := range chunks {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
 			}
@@ -304,7 +306,7 @@ func (e *OpenAICompatExecutor) CountTokens(ctx context.Context, auth *cliproxyau

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	modelForCounting := baseModel

--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -81,12 +81,13 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
@@ -150,7 +151,7 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	var param any
 	// Note: TranslateNonStream uses req.Model (original with suffix) to preserve
 	// the original model name in the response for client compatibility.
-	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
+	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, body, data, &param)
 	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
 }
@@ -171,12 +172,13 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
@@ -253,12 +255,12 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 			if detail, ok := parseOpenAIStreamUsage(line); ok {
 				reporter.publish(ctx, detail)
 			}
-			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(line), &param)
+			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(line), &param)
 			for i := range chunks {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
 			}
 		}
-		doneChunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone([]byte("[DONE]")), &param)
+		doneChunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, []byte("[DONE]"), &param)
 		for i := range doneChunks {
 			out <- cliproxyexecutor.StreamChunk{Payload: []byte(doneChunks[i])}
 		}
@@ -276,7 +278,7 @@ func (e *QwenExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth,

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	modelName := gjson.GetBytes(body, "model").String()
 	if strings.TrimSpace(modelName) == "" {
--- a/internal/thinking/apply.go
+++ b/internal/thinking/apply.go
@@ -388,7 +388,12 @@ func extractGeminiConfig(body []byte, provider string) ThinkingConfig {
 	}

 	// Check thinkingLevel first (Gemini 3 format takes precedence)
-	if level := gjson.GetBytes(body, prefix+".thinkingLevel"); level.Exists() {
+	level := gjson.GetBytes(body, prefix+".thinkingLevel")
+	if !level.Exists() {
+		// Google official Gemini Python SDK sends snake_case field names
+		level = gjson.GetBytes(body, prefix+".thinking_level")
+	}
+	if level.Exists() {
 		value := level.String()
 		switch value {
 		case "none":
@@ -401,7 +406,12 @@ func extractGeminiConfig(body []byte, provider string) ThinkingConfig {
 	}

 	// Check thinkingBudget (Gemini 2.5 format)
-	if budget := gjson.GetBytes(body, prefix+".thinkingBudget"); budget.Exists() {
+	budget := gjson.GetBytes(body, prefix+".thinkingBudget")
+	if !budget.Exists() {
+		// Google official Gemini Python SDK sends snake_case field names
+		budget = gjson.GetBytes(body, prefix+".thinking_budget")
+	}
+	if budget.Exists() {
 		value := int(budget.Int())
 		switch value {
 		case 0:
--- a/internal/thinking/provider/antigravity/apply.go
+++ b/internal/thinking/provider/antigravity/apply.go
@@ -94,8 +94,10 @@ func (a *Applier) applyCompatible(body []byte, config thinking.ThinkingConfig, m
 }

 func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
-	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
+	// Remove conflicting fields to avoid both thinkingLevel and thinkingBudget in output
 	result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget")
+	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.thinking_budget")
+	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.thinking_level")
 	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
 	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts")

@@ -114,28 +116,30 @@ func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig)

 	level := string(config.Level)
 	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", level)
-	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", true)
+
+	// Respect user's explicit includeThoughts setting from original body; default to true if not set
+	// Support both camelCase and snake_case variants
+	includeThoughts := true
+	if inc := gjson.GetBytes(body, "request.generationConfig.thinkingConfig.includeThoughts"); inc.Exists() {
+		includeThoughts = inc.Bool()
+	} else if inc := gjson.GetBytes(body, "request.generationConfig.thinkingConfig.include_thoughts"); inc.Exists() {
+		includeThoughts = inc.Bool()
+	}
+	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", includeThoughts)
 	return result, nil
 }

 func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo, isClaude bool) ([]byte, error) {
-	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
+	// Remove conflicting fields to avoid both thinkingLevel and thinkingBudget in output
 	result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingLevel")
+	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.thinking_level")
+	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.thinking_budget")
 	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
 	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts")

 	budget := config.Budget
-	includeThoughts := false
-	switch config.Mode {
-	case thinking.ModeNone:
-		includeThoughts = false
-	case thinking.ModeAuto:
-		includeThoughts = true
-	default:
-		includeThoughts = budget > 0
-	}

-	// Apply Claude-specific constraints
+	// Apply Claude-specific constraints first to get the final budget value
 	if isClaude && modelInfo != nil {
 		budget, result = a.normalizeClaudeBudget(budget, result, modelInfo)
 		// Check if budget was removed entirely
@@ -144,6 +148,37 @@ func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig,
 		}
 	}

+	// For ModeNone, always set includeThoughts to false regardless of user setting.
+	// This ensures that when user requests budget=0 (disable thinking output),
+	// the includeThoughts is correctly set to false even if budget is clamped to min.
+	if config.Mode == thinking.ModeNone {
+		result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
+		result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", false)
+		return result, nil
+	}
+
+	// Determine includeThoughts: respect user's explicit setting from original body if provided
+	// Support both camelCase and snake_case variants
+	var includeThoughts bool
+	var userSetIncludeThoughts bool
+	if inc := gjson.GetBytes(body, "request.generationConfig.thinkingConfig.includeThoughts"); inc.Exists() {
+		includeThoughts = inc.Bool()
+		userSetIncludeThoughts = true
+	} else if inc := gjson.GetBytes(body, "request.generationConfig.thinkingConfig.include_thoughts"); inc.Exists() {
+		includeThoughts = inc.Bool()
+		userSetIncludeThoughts = true
+	}
+
+	if !userSetIncludeThoughts {
+		// No explicit setting, use default logic based on mode
+		switch config.Mode {
+		case thinking.ModeAuto:
+			includeThoughts = true
+		default:
+			includeThoughts = budget > 0
+		}
+	}
+
 	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", includeThoughts)
 	return result, nil
--- a/internal/thinking/provider/gemini/apply.go
+++ b/internal/thinking/provider/gemini/apply.go
@@ -118,8 +118,10 @@ func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig)
 	//   - ModeNone + Budget>0: forced to think but hide output (includeThoughts=false)
 	// ValidateConfig sets config.Level to the lowest level when ModeNone + Budget > 0.

-	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
+	// Remove conflicting fields to avoid both thinkingLevel and thinkingBudget in output
 	result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig.thinkingBudget")
+	result, _ = sjson.DeleteBytes(result, "generationConfig.thinkingConfig.thinking_budget")
+	result, _ = sjson.DeleteBytes(result, "generationConfig.thinkingConfig.thinking_level")
 	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
 	result, _ = sjson.DeleteBytes(result, "generationConfig.thinkingConfig.include_thoughts")

@@ -138,29 +140,58 @@ func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig)

 	level := string(config.Level)
 	result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingLevel", level)
-	result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", true)
+
+	// Respect user's explicit includeThoughts setting from original body; default to true if not set
+	// Support both camelCase and snake_case variants
+	includeThoughts := true
+	if inc := gjson.GetBytes(body, "generationConfig.thinkingConfig.includeThoughts"); inc.Exists() {
+		includeThoughts = inc.Bool()
+	} else if inc := gjson.GetBytes(body, "generationConfig.thinkingConfig.include_thoughts"); inc.Exists() {
+		includeThoughts = inc.Bool()
+	}
+	result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", includeThoughts)
 	return result, nil
 }

 func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
-	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
+	// Remove conflicting fields to avoid both thinkingLevel and thinkingBudget in output
 	result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig.thinkingLevel")
+	result, _ = sjson.DeleteBytes(result, "generationConfig.thinkingConfig.thinking_level")
+	result, _ = sjson.DeleteBytes(result, "generationConfig.thinkingConfig.thinking_budget")
 	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
 	result, _ = sjson.DeleteBytes(result, "generationConfig.thinkingConfig.include_thoughts")

 	budget := config.Budget
-	// ModeNone semantics:
-	//   - ModeNone + Budget=0: completely disable thinking
-	//   - ModeNone + Budget>0: forced to think but hide output (includeThoughts=false)
-	// When ZeroAllowed=false, ValidateConfig clamps Budget to Min while preserving ModeNone.
-	includeThoughts := false
-	switch config.Mode {
-	case thinking.ModeNone:
-		includeThoughts = false
-	case thinking.ModeAuto:
-		includeThoughts = true
-	default:
-		includeThoughts = budget > 0
+
+	// For ModeNone, always set includeThoughts to false regardless of user setting.
+	// This ensures that when user requests budget=0 (disable thinking output),
+	// the includeThoughts is correctly set to false even if budget is clamped to min.
+	if config.Mode == thinking.ModeNone {
+		result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingBudget", budget)
+		result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", false)
+		return result, nil
+	}
+
+	// Determine includeThoughts: respect user's explicit setting from original body if provided
+	// Support both camelCase and snake_case variants
+	var includeThoughts bool
+	var userSetIncludeThoughts bool
+	if inc := gjson.GetBytes(body, "generationConfig.thinkingConfig.includeThoughts"); inc.Exists() {
+		includeThoughts = inc.Bool()
+		userSetIncludeThoughts = true
+	} else if inc := gjson.GetBytes(body, "generationConfig.thinkingConfig.include_thoughts"); inc.Exists() {
+		includeThoughts = inc.Bool()
+		userSetIncludeThoughts = true
+	}
+
+	if !userSetIncludeThoughts {
+		// No explicit setting, use default logic based on mode
+		switch config.Mode {
+		case thinking.ModeAuto:
+			includeThoughts = true
+		default:
+			includeThoughts = budget > 0
+		}
 	}

 	result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingBudget", budget)
--- a/internal/thinking/provider/geminicli/apply.go
+++ b/internal/thinking/provider/geminicli/apply.go
@@ -79,8 +79,10 @@ func (a *Applier) applyCompatible(body []byte, config thinking.ThinkingConfig) (
 }

 func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
-	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
+	// Remove conflicting fields to avoid both thinkingLevel and thinkingBudget in output
 	result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget")
+	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.thinking_budget")
+	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.thinking_level")
 	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
 	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts")

@@ -99,25 +101,58 @@ func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig)

 	level := string(config.Level)
 	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", level)
-	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", true)
+
+	// Respect user's explicit includeThoughts setting from original body; default to true if not set
+	// Support both camelCase and snake_case variants
+	includeThoughts := true
+	if inc := gjson.GetBytes(body, "request.generationConfig.thinkingConfig.includeThoughts"); inc.Exists() {
+		includeThoughts = inc.Bool()
+	} else if inc := gjson.GetBytes(body, "request.generationConfig.thinkingConfig.include_thoughts"); inc.Exists() {
+		includeThoughts = inc.Bool()
+	}
+	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", includeThoughts)
 	return result, nil
 }

 func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
-	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
+	// Remove conflicting fields to avoid both thinkingLevel and thinkingBudget in output
 	result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingLevel")
+	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.thinking_level")
+	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.thinking_budget")
 	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
 	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts")

 	budget := config.Budget
-	includeThoughts := false
-	switch config.Mode {
-	case thinking.ModeNone:
-		includeThoughts = false
-	case thinking.ModeAuto:
-		includeThoughts = true
-	default:
-		includeThoughts = budget > 0
+
+	// For ModeNone, always set includeThoughts to false regardless of user setting.
+	// This ensures that when user requests budget=0 (disable thinking output),
+	// the includeThoughts is correctly set to false even if budget is clamped to min.
+	if config.Mode == thinking.ModeNone {
+		result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
+		result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", false)
+		return result, nil
+	}
+
+	// Determine includeThoughts: respect user's explicit setting from original body if provided
+	// Support both camelCase and snake_case variants
+	var includeThoughts bool
+	var userSetIncludeThoughts bool
+	if inc := gjson.GetBytes(body, "request.generationConfig.thinkingConfig.includeThoughts"); inc.Exists() {
+		includeThoughts = inc.Bool()
+		userSetIncludeThoughts = true
+	} else if inc := gjson.GetBytes(body, "request.generationConfig.thinkingConfig.include_thoughts"); inc.Exists() {
+		includeThoughts = inc.Bool()
+		userSetIncludeThoughts = true
+	}
+
+	if !userSetIncludeThoughts {
+		// No explicit setting, use default logic based on mode
+		switch config.Mode {
+		case thinking.ModeAuto:
+			includeThoughts = true
+		default:
+			includeThoughts = budget > 0
+		}
 	}

 	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
--- a/internal/translator/antigravity/claude/antigravity_claude_request.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_request.go
@@ -6,7 +6,6 @@
 package claude

 import (
-	"bytes"
 	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/cache"
@@ -37,7 +36,7 @@ import (
 //   - []byte: The transformed request data in Gemini CLI API format
 func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _ bool) []byte {
 	enableThoughtTranslate := true
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON

 	// system instruction
 	systemInstructionJSON := ""
@@ -115,7 +114,7 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 							if signatureResult.Exists() && signatureResult.String() != "" {
 								arrayClientSignatures := strings.SplitN(signatureResult.String(), "#", 2)
 								if len(arrayClientSignatures) == 2 {
-									if modelName == arrayClientSignatures[0] {
+									if cache.GetModelGroup(modelName) == arrayClientSignatures[0] {
 										clientSignature = arrayClientSignatures[1]
 									}
 								}
--- a/internal/translator/antigravity/gemini/antigravity_gemini_request.go
+++ b/internal/translator/antigravity/gemini/antigravity_gemini_request.go
@@ -6,7 +6,6 @@
 package gemini

 import (
-	"bytes"
 	"fmt"
 	"strings"

@@ -34,7 +33,7 @@ import (
 // Returns:
 //   - []byte: The transformed request data in Gemini API format
 func ConvertGeminiRequestToAntigravity(modelName string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	template := ""
 	template = `{"project":"","request":{},"model":""}`
 	template, _ = sjson.SetRaw(template, "request", string(rawJSON))
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
@@ -3,7 +3,6 @@
 package chat_completions

 import (
-	"bytes"
 	"fmt"
 	"strings"

@@ -28,7 +27,7 @@ const geminiCLIFunctionThoughtSignature = "skip_thought_signature_validator"
 // Returns:
 //   - []byte: The transformed request data in Gemini CLI API format
 func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	// Base envelope (no default thinkingConfig)
 	out := []byte(`{"project":"","request":{"contents":[]},"model":"gemini-2.5-pro"}`)

--- a/internal/translator/antigravity/openai/responses/antigravity_openai-responses_request.go
+++ b/internal/translator/antigravity/openai/responses/antigravity_openai-responses_request.go
@@ -1,14 +1,12 @@
 package responses

 import (
-	"bytes"
-
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/antigravity/gemini"
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/openai/responses"
 )

 func ConvertOpenAIResponsesRequestToAntigravity(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	rawJSON = ConvertOpenAIResponsesRequestToGemini(modelName, rawJSON, stream)
 	return ConvertGeminiRequestToAntigravity(modelName, rawJSON, stream)
 }
--- a/internal/translator/claude/gemini-cli/claude_gemini-cli_request.go
+++ b/internal/translator/claude/gemini-cli/claude_gemini-cli_request.go
@@ -6,8 +6,6 @@
 package geminiCLI

 import (
-	"bytes"
-
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/claude/gemini"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -30,7 +28,7 @@ import (
 // Returns:
 //   - []byte: The transformed request data in Claude Code API format
 func ConvertGeminiCLIRequestToClaude(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON

 	modelResult := gjson.GetBytes(rawJSON, "model")
 	// Extract the inner request object and promote it to the top level
--- a/internal/translator/claude/gemini/claude_gemini_request.go
+++ b/internal/translator/claude/gemini/claude_gemini_request.go
@@ -6,7 +6,6 @@
 package gemini

 import (
-	"bytes"
 	"crypto/rand"
 	"crypto/sha256"
 	"encoding/hex"
@@ -46,7 +45,7 @@ var (
 // Returns:
 //   - []byte: The transformed request data in Claude Code API format
 func ConvertGeminiRequestToClaude(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON

 	if account == "" {
 		u, _ := uuid.NewRandom()
@@ -116,7 +115,11 @@ func ConvertGeminiRequestToClaude(modelName string, inputRawJSON []byte, stream
 		// Include thoughts configuration for reasoning process visibility
 		// Translator only does format conversion, ApplyThinking handles model capability validation.
 		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
-			if thinkingLevel := thinkingConfig.Get("thinkingLevel"); thinkingLevel.Exists() {
+			thinkingLevel := thinkingConfig.Get("thinkingLevel")
+			if !thinkingLevel.Exists() {
+				thinkingLevel = thinkingConfig.Get("thinking_level")
+			}
+			if thinkingLevel.Exists() {
 				level := strings.ToLower(strings.TrimSpace(thinkingLevel.String()))
 				switch level {
 				case "":
@@ -132,23 +135,29 @@ func ConvertGeminiRequestToClaude(modelName string, inputRawJSON []byte, stream
 						out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
 					}
 				}
-			} else if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
-				budget := int(thinkingBudget.Int())
-				switch budget {
-				case 0:
-					out, _ = sjson.Set(out, "thinking.type", "disabled")
-					out, _ = sjson.Delete(out, "thinking.budget_tokens")
-				case -1:
-					out, _ = sjson.Set(out, "thinking.type", "enabled")
-					out, _ = sjson.Delete(out, "thinking.budget_tokens")
-				default:
-					out, _ = sjson.Set(out, "thinking.type", "enabled")
-					out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
+			} else {
+				thinkingBudget := thinkingConfig.Get("thinkingBudget")
+				if !thinkingBudget.Exists() {
+					thinkingBudget = thinkingConfig.Get("thinking_budget")
+				}
+				if thinkingBudget.Exists() {
+					budget := int(thinkingBudget.Int())
+					switch budget {
+					case 0:
+						out, _ = sjson.Set(out, "thinking.type", "disabled")
+						out, _ = sjson.Delete(out, "thinking.budget_tokens")
+					case -1:
+						out, _ = sjson.Set(out, "thinking.type", "enabled")
+						out, _ = sjson.Delete(out, "thinking.budget_tokens")
+					default:
+						out, _ = sjson.Set(out, "thinking.type", "enabled")
+						out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
+					}
+				} else if includeThoughts := thinkingConfig.Get("includeThoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
+					out, _ = sjson.Set(out, "thinking.type", "enabled")
+				} else if includeThoughts := thinkingConfig.Get("include_thoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
+					out, _ = sjson.Set(out, "thinking.type", "enabled")
 				}
-			} else if includeThoughts := thinkingConfig.Get("includeThoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
-				out, _ = sjson.Set(out, "thinking.type", "enabled")
-			} else if includeThoughts := thinkingConfig.Get("include_thoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
-				out, _ = sjson.Set(out, "thinking.type", "enabled")
 			}
 		}
 	}
--- a/internal/translator/claude/openai/chat-completions/claude_openai_request.go
+++ b/internal/translator/claude/openai/chat-completions/claude_openai_request.go
@@ -6,7 +6,6 @@
 package chat_completions

 import (
-	"bytes"
 	"crypto/rand"
 	"crypto/sha256"
 	"encoding/hex"
@@ -44,7 +43,7 @@ var (
 // Returns:
 //   - []byte: The transformed request data in Claude Code API format
 func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON

 	if account == "" {
 		u, _ := uuid.NewRandom()
--- a/internal/translator/claude/openai/responses/claude_openai-responses_request.go
+++ b/internal/translator/claude/openai/responses/claude_openai-responses_request.go
@@ -1,7 +1,6 @@
 package responses

 import (
-	"bytes"
 	"crypto/rand"
 	"crypto/sha256"
 	"encoding/hex"
@@ -32,7 +31,7 @@ var (
 // - max_output_tokens -> max_tokens
 // - stream passthrough via parameter
 func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON

 	if account == "" {
 		u, _ := uuid.NewRandom()
--- a/internal/translator/codex/claude/codex_claude_request.go
+++ b/internal/translator/codex/claude/codex_claude_request.go
@@ -6,7 +6,6 @@
 package claude

 import (
-	"bytes"
 	"fmt"
 	"strconv"
 	"strings"
@@ -35,7 +34,7 @@ import (
 // Returns:
 //   - []byte: The transformed request data in internal client format
 func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON

 	template := `{"model":"","instructions":"","input":[]}`

--- a/internal/translator/codex/gemini-cli/codex_gemini-cli_request.go
+++ b/internal/translator/codex/gemini-cli/codex_gemini-cli_request.go
@@ -6,8 +6,6 @@
 package geminiCLI

 import (
-	"bytes"
-
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/codex/gemini"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -30,7 +28,7 @@ import (
 // Returns:
 //   - []byte: The transformed request data in Codex API format
 func ConvertGeminiCLIRequestToCodex(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON

 	rawJSON = []byte(gjson.GetBytes(rawJSON, "request").Raw)
 	rawJSON, _ = sjson.SetBytes(rawJSON, "model", modelName)
--- a/internal/translator/codex/gemini/codex_gemini_request.go
+++ b/internal/translator/codex/gemini/codex_gemini_request.go
@@ -6,7 +6,6 @@
 package gemini

 import (
-	"bytes"
 	"crypto/rand"
 	"fmt"
 	"math/big"
@@ -37,7 +36,7 @@ import (
 // Returns:
 //   - []byte: The transformed request data in Codex API format
 func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	// Base template
 	out := `{"model":"","instructions":"","input":[]}`

@@ -243,19 +242,30 @@ func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	out, _ = sjson.Set(out, "parallel_tool_calls", true)

 	// Convert Gemini thinkingConfig to Codex reasoning.effort.
+	// Note: Google official Python SDK sends snake_case fields (thinking_level/thinking_budget).
 	effortSet := false
 	if genConfig := root.Get("generationConfig"); genConfig.Exists() {
 		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
-			if thinkingLevel := thinkingConfig.Get("thinkingLevel"); thinkingLevel.Exists() {
+			thinkingLevel := thinkingConfig.Get("thinkingLevel")
+			if !thinkingLevel.Exists() {
+				thinkingLevel = thinkingConfig.Get("thinking_level")
+			}
+			if thinkingLevel.Exists() {
 				effort := strings.ToLower(strings.TrimSpace(thinkingLevel.String()))
 				if effort != "" {
 					out, _ = sjson.Set(out, "reasoning.effort", effort)
 					effortSet = true
 				}
-			} else if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
-				if effort, ok := thinking.ConvertBudgetToLevel(int(thinkingBudget.Int())); ok {
-					out, _ = sjson.Set(out, "reasoning.effort", effort)
-					effortSet = true
+			} else {
+				thinkingBudget := thinkingConfig.Get("thinkingBudget")
+				if !thinkingBudget.Exists() {
+					thinkingBudget = thinkingConfig.Get("thinking_budget")
+				}
+				if thinkingBudget.Exists() {
+					if effort, ok := thinking.ConvertBudgetToLevel(int(thinkingBudget.Int())); ok {
+						out, _ = sjson.Set(out, "reasoning.effort", effort)
+						effortSet = true
+					}
 				}
 			}
 		}
--- a/internal/translator/codex/openai/chat-completions/codex_openai_request.go
+++ b/internal/translator/codex/openai/chat-completions/codex_openai_request.go
@@ -7,8 +7,6 @@
 package chat_completions

 import (
-	"bytes"
-
 	"strconv"
 	"strings"

@@ -29,7 +27,7 @@ import (
 // Returns:
 //   - []byte: The transformed request data in OpenAI Responses API format
 func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	// Start with empty JSON object
 	out := `{"instructions":""}`

--- a/internal/translator/codex/openai/responses/codex_openai-responses_request.go
+++ b/internal/translator/codex/openai/responses/codex_openai-responses_request.go
@@ -1,7 +1,6 @@
 package responses

 import (
-	"bytes"
 	"fmt"

 	"github.com/tidwall/gjson"
@@ -9,7 +8,13 @@ import (
 )

 func ConvertOpenAIResponsesRequestToCodex(modelName string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
+
+	inputResult := gjson.GetBytes(rawJSON, "input")
+	if inputResult.Type == gjson.String {
+		input, _ := sjson.Set(`[{"type":"message","role":"user","content":[{"type":"input_text","text":""}]}]`, "0.content.0.text", inputResult.String())
+		rawJSON, _ = sjson.SetRawBytes(rawJSON, "input", []byte(input))
+	}

 	rawJSON, _ = sjson.SetBytes(rawJSON, "stream", true)
 	rawJSON, _ = sjson.SetBytes(rawJSON, "store", false)
--- a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
+++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
@@ -35,7 +35,7 @@ const geminiCLIClaudeThoughtSignature = "skip_thought_signature_validator"
 // Returns:
 //   - []byte: The transformed request data in Gemini CLI API format
 func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	rawJSON = bytes.Replace(rawJSON, []byte(`"url":{"type":"string","format":"uri",`), []byte(`"url":{"type":"string",`), -1)

 	// Build output Gemini CLI request JSON
@@ -116,6 +116,19 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
 						part, _ = sjson.Set(part, "functionResponse.name", funcName)
 						part, _ = sjson.Set(part, "functionResponse.response.result", responseData)
 						contentJSON, _ = sjson.SetRaw(contentJSON, "parts.-1", part)
+
+					case "image":
+						source := contentResult.Get("source")
+						if source.Get("type").String() == "base64" {
+							mimeType := source.Get("media_type").String()
+							data := source.Get("data").String()
+							if mimeType != "" && data != "" {
+								part := `{"inlineData":{"mime_type":"","data":""}}`
+								part, _ = sjson.Set(part, "inlineData.mime_type", mimeType)
+								part, _ = sjson.Set(part, "inlineData.data", data)
+								contentJSON, _ = sjson.SetRaw(contentJSON, "parts.-1", part)
+							}
+						}
 					}
 					return true
 				})
--- a/internal/translator/gemini-cli/gemini/gemini-cli_gemini_request.go
+++ b/internal/translator/gemini-cli/gemini/gemini-cli_gemini_request.go
@@ -6,7 +6,6 @@
 package gemini

 import (
-	"bytes"
 	"fmt"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
@@ -33,7 +32,7 @@ import (
 // Returns:
 //   - []byte: The transformed request data in Gemini API format
 func ConvertGeminiRequestToGeminiCLI(_ string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	template := ""
 	template = `{"project":"","request":{},"model":""}`
 	template, _ = sjson.SetRaw(template, "request", string(rawJSON))
--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
@@ -3,7 +3,6 @@
 package chat_completions

 import (
-	"bytes"
 	"fmt"
 	"strings"

@@ -28,7 +27,7 @@ const geminiCLIFunctionThoughtSignature = "skip_thought_signature_validator"
 // Returns:
 //   - []byte: The transformed request data in Gemini CLI API format
 func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	// Base envelope (no default thinkingConfig)
 	out := []byte(`{"project":"","request":{"contents":[]},"model":"gemini-2.5-pro"}`)

--- a/internal/translator/gemini-cli/openai/responses/gemini-cli_openai-responses_request.go
+++ b/internal/translator/gemini-cli/openai/responses/gemini-cli_openai-responses_request.go
@@ -1,14 +1,12 @@
 package responses

 import (
-	"bytes"
-
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini-cli/gemini"
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/openai/responses"
 )

 func ConvertOpenAIResponsesRequestToGeminiCLI(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	rawJSON = ConvertOpenAIResponsesRequestToGemini(modelName, rawJSON, stream)
 	return ConvertGeminiRequestToGeminiCLI(modelName, rawJSON, stream)
 }
--- a/internal/translator/gemini/claude/gemini_claude_request.go
+++ b/internal/translator/gemini/claude/gemini_claude_request.go
@@ -28,7 +28,7 @@ const geminiClaudeThoughtSignature = "skip_thought_signature_validator"
 // Returns:
 //   - []byte: The transformed request in Gemini CLI format.
 func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	rawJSON = bytes.Replace(rawJSON, []byte(`"url":{"type":"string","format":"uri",`), []byte(`"url":{"type":"string",`), -1)

 	// Build output Gemini CLI request JSON
--- a/internal/translator/gemini/gemini-cli/gemini_gemini-cli_request.go
+++ b/internal/translator/gemini/gemini-cli/gemini_gemini-cli_request.go
@@ -6,7 +6,6 @@
 package geminiCLI

 import (
-	"bytes"
 	"fmt"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
@@ -19,7 +18,7 @@ import (
 // It extracts the model name, system instruction, message contents, and tool declarations
 // from the raw JSON request and returns them in the format expected by the internal client.
 func ConvertGeminiCLIRequestToGemini(_ string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	modelResult := gjson.GetBytes(rawJSON, "model")
 	rawJSON = []byte(gjson.GetBytes(rawJSON, "request").Raw)
 	rawJSON, _ = sjson.SetBytes(rawJSON, "model", modelResult.String())
--- a/internal/translator/gemini/gemini/gemini_gemini_request.go
+++ b/internal/translator/gemini/gemini/gemini_gemini_request.go
@@ -4,7 +4,6 @@
 package gemini

 import (
-	"bytes"
 	"fmt"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
@@ -19,7 +18,7 @@ import (
 //
 // It keeps the payload otherwise unchanged.
 func ConvertGeminiRequestToGemini(_ string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	// Fast path: if no contents field, only attach safety settings
 	contents := gjson.GetBytes(rawJSON, "contents")
 	if !contents.Exists() {
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
@@ -3,7 +3,6 @@
 package chat_completions

 import (
-	"bytes"
 	"fmt"
 	"strings"

@@ -28,7 +27,7 @@ const geminiFunctionThoughtSignature = "skip_thought_signature_validator"
 // Returns:
 //   - []byte: The transformed request data in Gemini API format
 func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	// Base envelope (no default thinkingConfig)
 	out := []byte(`{"contents":[]}`)

--- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
+++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
@@ -1,7 +1,6 @@
 package responses

 import (
-	"bytes"
 	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
@@ -12,7 +11,7 @@ import (
 const geminiResponsesThoughtSignature = "skip_thought_signature_validator"

 func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON

 	// Note: modelName and stream parameters are part of the fixed method signature
 	_ = modelName // Unused but required by interface
--- a/internal/translator/kiro/claude/kiro_claude_request.go
+++ b/internal/translator/kiro/claude/kiro_claude_request.go
@@ -17,7 +17,6 @@ import (
 	"github.com/tidwall/gjson"
 )

-
 // Kiro API request structs - field order determines JSON key order

 // KiroPayload is the top-level request structure for Kiro API
@@ -34,7 +33,6 @@ type KiroInferenceConfig struct {
 	TopP        float64 `json:"topP,omitempty"`
 }

-
 // KiroConversationState holds the conversation context
 type KiroConversationState struct {
 	ChatTriggerType string               `json:"chatTriggerType"` // Required: "MANUAL" - must be first field
@@ -117,9 +115,11 @@ type KiroAssistantResponseMessage struct {

 // KiroToolUse represents a tool invocation by the assistant
 type KiroToolUse struct {
-	ToolUseID string                 `json:"toolUseId"`
-	Name      string                 `json:"name"`
-	Input     map[string]interface{} `json:"input"`
+	ToolUseID      string                 `json:"toolUseId"`
+	Name           string                 `json:"name"`
+	Input          map[string]interface{} `json:"input"`
+	IsTruncated    bool                   `json:"-"` // Internal flag, not serialized
+	TruncationInfo *TruncationInfo        `json:"-"` // Truncation details, not serialized
 }

 // ConvertClaudeRequestToKiro converts a Claude API request to Kiro format.
@@ -219,16 +219,35 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
 	}

 	// Convert Claude tools to Kiro format
-	kiroTools := convertClaudeToolsToKiro(tools)
+	kiroTools, hasWebSearch := convertClaudeToolsToKiro(tools)
+
+	// If web_search was requested but filtered, inject alternative hint
+	if hasWebSearch {
+		webSearchHint := `[CRITICAL WEB ACCESS INSTRUCTION]
+You have the Fetch/read_url_content tool available. When the user asks about current events, weather, news, or any information that requires web access:
+- DO NOT say you cannot search the web
+- DO NOT refuse to help with web-related queries
+- IMMEDIATELY use the Fetch tool to access relevant URLs
+- Use well-known official websites, documentation sites, or API endpoints
+- Construct appropriate URLs based on the query context
+
+IMPORTANT: Always attempt to fetch information FIRST before declining. You CAN access the web via Fetch.`
+		if systemPrompt != "" {
+			systemPrompt += "\n" + webSearchHint
+		} else {
+			systemPrompt = webSearchHint
+		}
+		log.Infof("kiro: injected web_search alternative hint (tool was filtered)")
+	}

 	// Thinking mode implementation:
 	// Kiro API supports official thinking/reasoning mode via <thinking_mode> tag.
 	// When set to "enabled", Kiro returns reasoning content as official reasoningContentEvent
 	// rather than inline <thinking> tags in assistantResponseEvent.
-	// We use a high max_thinking_length to allow extensive reasoning.
+	// We cap max_thinking_length to reserve space for tool outputs and prevent truncation.
 	if thinkingEnabled {
 		thinkingHint := `<thinking_mode>enabled</thinking_mode>
-<max_thinking_length>200000</max_thinking_length>`
+<max_thinking_length>16000</max_thinking_length>`
 		if systemPrompt != "" {
 			systemPrompt = thinkingHint + "\n\n" + systemPrompt
 		} else {
@@ -378,7 +397,6 @@ func hasThinkingTagInBody(body []byte) bool {
 	return strings.Contains(bodyStr, "<thinking_mode>") || strings.Contains(bodyStr, "<max_thinking_length>")
 }

-
 // IsThinkingEnabledFromHeader checks if thinking mode is enabled via Anthropic-Beta header.
 // Claude CLI uses "Anthropic-Beta: interleaved-thinking-2025-05-14" to enable thinking.
 func IsThinkingEnabledFromHeader(headers http.Header) bool {
@@ -509,15 +527,27 @@ func ensureKiroInputSchema(parameters interface{}) interface{} {
 	}
 }

-// convertClaudeToolsToKiro converts Claude tools to Kiro format
-func convertClaudeToolsToKiro(tools gjson.Result) []KiroToolWrapper {
+// convertClaudeToolsToKiro converts Claude tools to Kiro format.
+// Returns the converted tools and a boolean indicating if web_search was filtered.
+func convertClaudeToolsToKiro(tools gjson.Result) ([]KiroToolWrapper, bool) {
 	var kiroTools []KiroToolWrapper
+	hasWebSearch := false
 	if !tools.IsArray() {
-		return kiroTools
+		return kiroTools, hasWebSearch
 	}

 	for _, tool := range tools.Array() {
 		name := tool.Get("name").String()
+
+		// Filter out web_search/websearch tools (Kiro API doesn't support them)
+		// This matches the behavior in AIClient-2-API/claude-kiro.js
+		nameLower := strings.ToLower(name)
+		if nameLower == "web_search" || nameLower == "websearch" {
+			log.Debugf("kiro: skipping unsupported tool: %s", name)
+			hasWebSearch = true
+			continue
+		}
+
 		description := tool.Get("description").String()
 		inputSchemaResult := tool.Get("input_schema")
 		var inputSchema interface{}
@@ -561,7 +591,7 @@ func convertClaudeToolsToKiro(tools gjson.Result) []KiroToolWrapper {
 	// This prevents 500 errors when Claude Code sends too many tools
 	kiroTools = compressToolsIfNeeded(kiroTools)

-	return kiroTools
+	return kiroTools, hasWebSearch
 }

 // processMessages processes Claude messages and builds Kiro history
@@ -743,7 +773,35 @@ func BuildUserMessageStruct(msg gjson.Result, modelID, origin string) (KiroUserI
 				resultContent := part.Get("content")

 				var textContents []KiroTextContent
-				if resultContent.IsArray() {
+
+				// Check if this tool_result contains error from our SOFT_LIMIT_REACHED tool_use
+				// The client will return an error when trying to execute a tool with marker input
+				resultStr := resultContent.String()
+				isSoftLimitError := strings.Contains(resultStr, "SOFT_LIMIT_REACHED") ||
+					strings.Contains(resultStr, "_status") ||
+					strings.Contains(resultStr, "truncated") ||
+					strings.Contains(resultStr, "missing required") ||
+					strings.Contains(resultStr, "invalid input") ||
+					strings.Contains(resultStr, "Error writing file")
+
+				if isError && isSoftLimitError {
+					// Replace error content with SOFT_LIMIT_REACHED guidance
+					log.Infof("kiro: detected SOFT_LIMIT_REACHED in tool_result for %s, replacing with guidance", toolUseID)
+					softLimitMsg := `SOFT_LIMIT_REACHED
+
+Your previous tool call was incomplete due to API output size limits.
+The content was PARTIALLY transmitted but NOT executed.
+
+REQUIRED ACTION:
+1. Split your content into smaller chunks (max 300 lines per call)
+2. For file writes: Create file with first chunk, then use append for remaining
+3. Do NOT regenerate content you already attempted - continue from where you stopped
+
+STATUS: This is NOT an error. Continue with smaller chunks.`
+					textContents = append(textContents, KiroTextContent{Text: softLimitMsg})
+					// Mark as SUCCESS so Claude doesn't treat it as a failure
+					isError = false
+				} else if resultContent.IsArray() {
 					for _, item := range resultContent.Array() {
 						if item.Get("type").String() == "text" {
 							textContents = append(textContents, KiroTextContent{Text: item.Get("text").String()})
@@ -825,8 +883,21 @@ func BuildAssistantMessageStruct(msg gjson.Result) KiroAssistantResponseMessage
 		contentBuilder.WriteString(content.String())
 	}

+	// CRITICAL FIX: Kiro API requires non-empty content for assistant messages
+	// This can happen with compaction requests where assistant messages have only tool_use
+	// (no text content). Without this fix, Kiro API returns "Improperly formed request" error.
+	finalContent := contentBuilder.String()
+	if strings.TrimSpace(finalContent) == "" {
+		if len(toolUses) > 0 {
+			finalContent = kirocommon.DefaultAssistantContentWithTools
+		} else {
+			finalContent = kirocommon.DefaultAssistantContent
+		}
+		log.Debugf("kiro: assistant content was empty, using default: %s", finalContent)
+	}
+
 	return KiroAssistantResponseMessage{
-		Content:  contentBuilder.String(),
+		Content:  finalContent,
 		ToolUses: toolUses,
 	}
 }
--- a/internal/translator/kiro/claude/kiro_claude_response.go
+++ b/internal/translator/kiro/claude/kiro_claude_response.go
@@ -55,14 +55,39 @@ func BuildClaudeResponse(content string, toolUses []KiroToolUse, model string, u
 		}
 	}

-	// Add tool_use blocks
+	// Add tool_use blocks - emit truncated tools with SOFT_LIMIT_REACHED marker
+	hasTruncatedTools := false
 	for _, toolUse := range toolUses {
-		contentBlocks = append(contentBlocks, map[string]interface{}{
-			"type":  "tool_use",
-			"id":    toolUse.ToolUseID,
-			"name":  toolUse.Name,
-			"input": toolUse.Input,
-		})
+		if toolUse.IsTruncated && toolUse.TruncationInfo != nil {
+			// Emit tool_use with SOFT_LIMIT_REACHED marker input
+			hasTruncatedTools = true
+			log.Infof("kiro: buildClaudeResponse emitting truncated tool with SOFT_LIMIT_REACHED: %s (ID: %s)", toolUse.Name, toolUse.ToolUseID)
+
+			markerInput := map[string]interface{}{
+				"_status":  "SOFT_LIMIT_REACHED",
+				"_message": "Tool output was truncated. Split content into smaller chunks (max 300 lines). Due to potential model hallucination, you MUST re-fetch the current working directory and generate the correct file_path.",
+			}
+
+			contentBlocks = append(contentBlocks, map[string]interface{}{
+				"type":  "tool_use",
+				"id":    toolUse.ToolUseID,
+				"name":  toolUse.Name,
+				"input": markerInput,
+			})
+		} else {
+			// Normal tool use
+			contentBlocks = append(contentBlocks, map[string]interface{}{
+				"type":  "tool_use",
+				"id":    toolUse.ToolUseID,
+				"name":  toolUse.Name,
+				"input": toolUse.Input,
+			})
+		}
+	}
+
+	// Log if we used SOFT_LIMIT_REACHED
+	if hasTruncatedTools {
+		log.Infof("kiro: buildClaudeResponse using SOFT_LIMIT_REACHED - keeping stop_reason=tool_use")
 	}

 	// Ensure at least one content block (Claude API requires non-empty content)
@@ -74,6 +99,7 @@ func BuildClaudeResponse(content string, toolUses []KiroToolUse, model string, u
 	}

 	// Use upstream stopReason; apply fallback logic if not provided
+	// SOFT_LIMIT_REACHED: Keep stop_reason = "tool_use" so Claude continues the loop
 	if stopReason == "" {
 		stopReason = "end_turn"
 		if len(toolUses) > 0 {
@@ -201,4 +227,4 @@ func ExtractThinkingFromContent(content string) []map[string]interface{} {
 	}

 	return blocks
-}
+}
--- a/internal/translator/kiro/claude/kiro_claude_tools.go
+++ b/internal/translator/kiro/claude/kiro_claude_tools.go
@@ -14,10 +14,11 @@ import (

 // ToolUseState tracks the state of an in-progress tool use during streaming.
 type ToolUseState struct {
-	ToolUseID   string
-	Name        string
-	InputBuffer strings.Builder
-	IsComplete  bool
+	ToolUseID      string
+	Name           string
+	InputBuffer    strings.Builder
+	IsComplete     bool
+	TruncationInfo *TruncationInfo // Truncation detection result (set when complete)
 }

 // Pre-compiled regex patterns for performance
@@ -395,17 +396,6 @@ func ProcessToolUseEvent(event map[string]interface{}, currentToolUse *ToolUseSt
 		isStop = stop
 	}

-	// Debug: log when stop event arrives
-	if isStop {
-		log.Debugf("kiro: toolUseEvent stop=true received for tool %s (ID: %s), currentToolUse buffer len: %d",
-			toolName, toolUseID, func() int {
-				if currentToolUse != nil {
-					return currentToolUse.InputBuffer.Len()
-				}
-				return -1
-			}())
-	}
-
 	// Get input - can be string (fragment) or object (complete)
 	var inputFragment string
 	var inputMap map[string]interface{}
@@ -477,98 +467,39 @@ func ProcessToolUseEvent(event map[string]interface{}, currentToolUse *ToolUseSt
 	if isStop && currentToolUse != nil {
 		fullInput := currentToolUse.InputBuffer.String()

-		// Check for Write tool with empty or missing input - this happens when Kiro API
-		// completely skips sending input for large file writes
-		if currentToolUse.Name == "Write" && len(strings.TrimSpace(fullInput)) == 0 {
-			log.Warnf("kiro: Write tool received no input from upstream API. The file content may be too large to transmit.")
-			// Return nil to skip this tool use - it will be handled as a truncation error
-			// The caller should emit a text block explaining the error instead
-			if processedIDs != nil {
-				processedIDs[currentToolUse.ToolUseID] = true
-			}
-			log.Infof("kiro: skipping Write tool use %s due to empty input (content too large)", currentToolUse.ToolUseID)
-			// Return a special marker tool use that indicates truncation
-			toolUse := KiroToolUse{
-				ToolUseID: currentToolUse.ToolUseID,
-				Name:      "__truncated_write__", // Special marker name
-				Input: map[string]interface{}{
-					"error": "Write tool input was not transmitted by upstream API. The file content is too large.",
-				},
-			}
-			toolUses = append(toolUses, toolUse)
-			return toolUses, nil
-		}
-
 		// Repair and parse the accumulated JSON
 		repairedJSON := RepairJSON(fullInput)
 		var finalInput map[string]interface{}
 		if err := json.Unmarshal([]byte(repairedJSON), &finalInput); err != nil {
 			log.Warnf("kiro: failed to parse accumulated tool input: %v, raw: %s", err, fullInput)
 			finalInput = make(map[string]interface{})
-
-			// Check if this is a Write tool with truncated input (missing content field)
-			// This happens when the Kiro API truncates large tool inputs
-			if currentToolUse.Name == "Write" && strings.Contains(fullInput, "file_path") && !strings.Contains(fullInput, "content") {
-				log.Warnf("kiro: Write tool input was truncated by upstream API (content field missing). The file content may be too large.")
-				// Extract file_path if possible for error context
-				filePath := ""
-				if idx := strings.Index(fullInput, "file_path"); idx >= 0 {
-					// Try to extract the file path value
-					rest := fullInput[idx:]
-					if colonIdx := strings.Index(rest, ":"); colonIdx >= 0 {
-						rest = strings.TrimSpace(rest[colonIdx+1:])
-						if len(rest) > 0 && rest[0] == '"' {
-							rest = rest[1:]
-							if endQuote := strings.Index(rest, "\""); endQuote >= 0 {
-								filePath = rest[:endQuote]
-							}
-						}
-					}
-				}
-				if processedIDs != nil {
-					processedIDs[currentToolUse.ToolUseID] = true
-				}
-				// Return a special marker tool use that indicates truncation
-				toolUse := KiroToolUse{
-					ToolUseID: currentToolUse.ToolUseID,
-					Name:      "__truncated_write__", // Special marker name
-					Input: map[string]interface{}{
-						"error":     "Write tool content was truncated by upstream API. The file content is too large.",
-						"file_path": filePath,
-					},
-				}
-				toolUses = append(toolUses, toolUse)
-				return toolUses, nil
-			}
 		}

-		// Additional check: Write tool parsed successfully but missing content field
-		if currentToolUse.Name == "Write" {
-			if _, hasContent := finalInput["content"]; !hasContent {
-				if filePath, hasPath := finalInput["file_path"]; hasPath {
-					log.Warnf("kiro: Write tool input missing 'content' field, likely truncated by upstream API")
-					if processedIDs != nil {
-						processedIDs[currentToolUse.ToolUseID] = true
-					}
-					// Return a special marker tool use that indicates truncation
-					toolUse := KiroToolUse{
-						ToolUseID: currentToolUse.ToolUseID,
-						Name:      "__truncated_write__", // Special marker name
-						Input: map[string]interface{}{
-							"error":     "Write tool content field was missing. The file content is too large.",
-							"file_path": filePath,
-						},
-					}
-					toolUses = append(toolUses, toolUse)
-					return toolUses, nil
-				}
+		// Detect truncation for all tools
+		truncInfo := DetectTruncation(currentToolUse.Name, currentToolUse.ToolUseID, fullInput, finalInput)
+		if truncInfo.IsTruncated {
+			log.Warnf("kiro: TRUNCATION DETECTED for tool %s (ID: %s): type=%s, raw_size=%d bytes",
+				currentToolUse.Name, currentToolUse.ToolUseID, truncInfo.TruncationType, len(fullInput))
+			log.Warnf("kiro: truncation details: %s", truncInfo.ErrorMessage)
+			if len(truncInfo.ParsedFields) > 0 {
+				log.Infof("kiro: partial fields received: %v", truncInfo.ParsedFields)
 			}
+			// Store truncation info in the state for upstream handling
+			currentToolUse.TruncationInfo = &truncInfo
+		} else {
+			log.Infof("kiro: tool use %s input length: %d bytes (no truncation)", currentToolUse.Name, len(fullInput))
 		}

+		// Create the tool use with truncation info if applicable
 		toolUse := KiroToolUse{
-			ToolUseID: currentToolUse.ToolUseID,
-			Name:      currentToolUse.Name,
-			Input:     finalInput,
+			ToolUseID:      currentToolUse.ToolUseID,
+			Name:           currentToolUse.Name,
+			Input:          finalInput,
+			IsTruncated:    truncInfo.IsTruncated,
+			TruncationInfo: nil, // Will be set below if truncated
+		}
+		if truncInfo.IsTruncated {
+			toolUse.TruncationInfo = &truncInfo
 		}
 		toolUses = append(toolUses, toolUse)

@@ -576,7 +507,7 @@ func ProcessToolUseEvent(event map[string]interface{}, currentToolUse *ToolUseSt
 			processedIDs[currentToolUse.ToolUseID] = true
 		}

-		log.Infof("kiro: completed tool use: %s (ID: %s)", currentToolUse.Name, currentToolUse.ToolUseID)
+		log.Infof("kiro: completed tool use: %s (ID: %s, truncated: %v)", currentToolUse.Name, currentToolUse.ToolUseID, truncInfo.IsTruncated)
 		return toolUses, nil
 	}

@@ -610,4 +541,3 @@ func DeduplicateToolUses(toolUses []KiroToolUse) []KiroToolUse {

 	return unique
 }
-
--- a/internal/translator/kiro/claude/truncation_detector.go
+++ b/internal/translator/kiro/claude/truncation_detector.go
@@ -0,0 +1,517 @@
+// Package claude provides truncation detection for Kiro tool call responses.
+// When Kiro API reaches its output token limit, tool call JSON may be truncated,
+// resulting in incomplete or unparseable input parameters.
+package claude
+
+import (
+	"encoding/json"
+	"strings"
+
+	log "github.com/sirupsen/logrus"
+)
+
+// TruncationInfo contains details about detected truncation in a tool use event.
+type TruncationInfo struct {
+	IsTruncated    bool              // Whether truncation was detected
+	TruncationType string            // Type of truncation detected
+	ToolName       string            // Name of the truncated tool
+	ToolUseID      string            // ID of the truncated tool use
+	RawInput       string            // The raw (possibly truncated) input string
+	ParsedFields   map[string]string // Fields that were successfully parsed before truncation
+	ErrorMessage   string            // Human-readable error message
+}
+
+// TruncationType constants for different truncation scenarios
+const (
+	TruncationTypeNone             = ""                  // No truncation detected
+	TruncationTypeEmptyInput       = "empty_input"       // No input data received at all
+	TruncationTypeInvalidJSON      = "invalid_json"      // JSON is syntactically invalid (truncated mid-value)
+	TruncationTypeMissingFields    = "missing_fields"    // JSON parsed but critical fields are missing
+	TruncationTypeIncompleteString = "incomplete_string" // String value was cut off mid-content
+)
+
+// KnownWriteTools lists tool names that typically write content and have a "content" field.
+// These tools are checked for content field truncation specifically.
+var KnownWriteTools = map[string]bool{
+	"Write":              true,
+	"write_to_file":      true,
+	"fsWrite":            true,
+	"create_file":        true,
+	"edit_file":          true,
+	"apply_diff":         true,
+	"str_replace_editor": true,
+	"insert":             true,
+}
+
+// KnownCommandTools lists tool names that execute commands.
+var KnownCommandTools = map[string]bool{
+	"Bash":           true,
+	"execute":        true,
+	"run_command":    true,
+	"shell":          true,
+	"terminal":       true,
+	"execute_python": true,
+}
+
+// RequiredFieldsByTool maps tool names to their required fields.
+// If any of these fields are missing, the tool input is considered truncated.
+var RequiredFieldsByTool = map[string][]string{
+	"Write":              {"file_path", "content"},
+	"write_to_file":      {"path", "content"},
+	"fsWrite":            {"path", "content"},
+	"create_file":        {"path", "content"},
+	"edit_file":          {"path"},
+	"apply_diff":         {"path", "diff"},
+	"str_replace_editor": {"path", "old_str", "new_str"},
+	"Bash":               {"command"},
+	"execute":            {"command"},
+	"run_command":        {"command"},
+}
+
+// DetectTruncation checks if the tool use input appears to be truncated.
+// It returns detailed information about the truncation status and type.
+func DetectTruncation(toolName, toolUseID, rawInput string, parsedInput map[string]interface{}) TruncationInfo {
+	info := TruncationInfo{
+		ToolName:     toolName,
+		ToolUseID:    toolUseID,
+		RawInput:     rawInput,
+		ParsedFields: make(map[string]string),
+	}
+
+	// Scenario 1: Empty input buffer - no data received at all
+	if strings.TrimSpace(rawInput) == "" {
+		info.IsTruncated = true
+		info.TruncationType = TruncationTypeEmptyInput
+		info.ErrorMessage = "Tool input was completely empty - API response may have been truncated before tool parameters were transmitted"
+		log.Warnf("kiro: truncation detected [%s] for tool %s (ID: %s): empty input buffer",
+			info.TruncationType, toolName, toolUseID)
+		return info
+	}
+
+	// Scenario 2: JSON parse failure - syntactically invalid JSON
+	if parsedInput == nil || len(parsedInput) == 0 {
+		// Check if the raw input looks like truncated JSON
+		if looksLikeTruncatedJSON(rawInput) {
+			info.IsTruncated = true
+			info.TruncationType = TruncationTypeInvalidJSON
+			info.ParsedFields = extractPartialFields(rawInput)
+			info.ErrorMessage = buildTruncationErrorMessage(toolName, info.TruncationType, info.ParsedFields, rawInput)
+			log.Warnf("kiro: truncation detected [%s] for tool %s (ID: %s): JSON parse failed, raw length=%d bytes",
+				info.TruncationType, toolName, toolUseID, len(rawInput))
+			return info
+		}
+	}
+
+	// Scenario 3: JSON parsed but critical fields are missing
+	if parsedInput != nil {
+		requiredFields, hasRequirements := RequiredFieldsByTool[toolName]
+		if hasRequirements {
+			missingFields := findMissingRequiredFields(parsedInput, requiredFields)
+			if len(missingFields) > 0 {
+				info.IsTruncated = true
+				info.TruncationType = TruncationTypeMissingFields
+				info.ParsedFields = extractParsedFieldNames(parsedInput)
+				info.ErrorMessage = buildMissingFieldsErrorMessage(toolName, missingFields, info.ParsedFields)
+				log.Warnf("kiro: truncation detected [%s] for tool %s (ID: %s): missing required fields: %v",
+					info.TruncationType, toolName, toolUseID, missingFields)
+				return info
+			}
+		}
+
+		// Scenario 4: Check for incomplete string values (very short content for write tools)
+		if isWriteTool(toolName) {
+			if contentTruncation := detectContentTruncation(parsedInput, rawInput); contentTruncation != "" {
+				info.IsTruncated = true
+				info.TruncationType = TruncationTypeIncompleteString
+				info.ParsedFields = extractParsedFieldNames(parsedInput)
+				info.ErrorMessage = contentTruncation
+				log.Warnf("kiro: truncation detected [%s] for tool %s (ID: %s): %s",
+					info.TruncationType, toolName, toolUseID, contentTruncation)
+				return info
+			}
+		}
+	}
+
+	// No truncation detected
+	info.IsTruncated = false
+	info.TruncationType = TruncationTypeNone
+	return info
+}
+
+// looksLikeTruncatedJSON checks if the raw string appears to be truncated JSON.
+func looksLikeTruncatedJSON(raw string) bool {
+	trimmed := strings.TrimSpace(raw)
+	if trimmed == "" {
+		return false
+	}
+
+	// Must start with { to be considered JSON
+	if !strings.HasPrefix(trimmed, "{") {
+		return false
+	}
+
+	// Count brackets to detect imbalance
+	openBraces := strings.Count(trimmed, "{")
+	closeBraces := strings.Count(trimmed, "}")
+	openBrackets := strings.Count(trimmed, "[")
+	closeBrackets := strings.Count(trimmed, "]")
+
+	// Bracket imbalance suggests truncation
+	if openBraces > closeBraces || openBrackets > closeBrackets {
+		return true
+	}
+
+	// Check for obvious truncation patterns
+	// - Ends with a quote but no closing brace
+	// - Ends with a colon (mid key-value)
+	// - Ends with a comma (mid object/array)
+	lastChar := trimmed[len(trimmed)-1]
+	if lastChar != '}' && lastChar != ']' {
+		// Check if it's not a complete simple value
+		if lastChar == '"' || lastChar == ':' || lastChar == ',' {
+			return true
+		}
+	}
+
+	// Check for unclosed strings (odd number of unescaped quotes)
+	inString := false
+	escaped := false
+	for i := 0; i < len(trimmed); i++ {
+		c := trimmed[i]
+		if escaped {
+			escaped = false
+			continue
+		}
+		if c == '\\' {
+			escaped = true
+			continue
+		}
+		if c == '"' {
+			inString = !inString
+		}
+	}
+	if inString {
+		return true // Unclosed string
+	}
+
+	return false
+}
+
+// extractPartialFields attempts to extract any field names from malformed JSON.
+// This helps provide context about what was received before truncation.
+func extractPartialFields(raw string) map[string]string {
+	fields := make(map[string]string)
+
+	// Simple pattern matching for "key": "value" or "key": value patterns
+	// This works even with truncated JSON
+	trimmed := strings.TrimSpace(raw)
+	if !strings.HasPrefix(trimmed, "{") {
+		return fields
+	}
+
+	// Remove opening brace
+	content := strings.TrimPrefix(trimmed, "{")
+
+	// Split by comma (rough parsing)
+	parts := strings.Split(content, ",")
+	for _, part := range parts {
+		part = strings.TrimSpace(part)
+		if colonIdx := strings.Index(part, ":"); colonIdx > 0 {
+			key := strings.TrimSpace(part[:colonIdx])
+			key = strings.Trim(key, `"`)
+			value := strings.TrimSpace(part[colonIdx+1:])
+
+			// Truncate long values for display
+			if len(value) > 50 {
+				value = value[:50] + "..."
+			}
+			fields[key] = value
+		}
+	}
+
+	return fields
+}
+
+// extractParsedFieldNames returns the field names from a successfully parsed map.
+func extractParsedFieldNames(parsed map[string]interface{}) map[string]string {
+	fields := make(map[string]string)
+	for key, val := range parsed {
+		switch v := val.(type) {
+		case string:
+			if len(v) > 50 {
+				fields[key] = v[:50] + "..."
+			} else {
+				fields[key] = v
+			}
+		case nil:
+			fields[key] = "<null>"
+		default:
+			// For complex types, just indicate presence
+			fields[key] = "<present>"
+		}
+	}
+	return fields
+}
+
+// findMissingRequiredFields checks which required fields are missing from the parsed input.
+func findMissingRequiredFields(parsed map[string]interface{}, required []string) []string {
+	var missing []string
+	for _, field := range required {
+		if _, exists := parsed[field]; !exists {
+			missing = append(missing, field)
+		}
+	}
+	return missing
+}
+
+// isWriteTool checks if the tool is a known write/file operation tool.
+func isWriteTool(toolName string) bool {
+	return KnownWriteTools[toolName]
+}
+
+// detectContentTruncation checks if the content field appears truncated for write tools.
+func detectContentTruncation(parsed map[string]interface{}, rawInput string) string {
+	// Check for content field
+	content, hasContent := parsed["content"]
+	if !hasContent {
+		return ""
+	}
+
+	contentStr, isString := content.(string)
+	if !isString {
+		return ""
+	}
+
+	// Heuristic: if raw input is very large but content is suspiciously short,
+	// it might indicate truncation during JSON repair
+	if len(rawInput) > 1000 && len(contentStr) < 100 {
+		return "content field appears suspiciously short compared to raw input size"
+	}
+
+	// Check for code blocks that appear to be cut off
+	if strings.Contains(contentStr, "```") {
+		openFences := strings.Count(contentStr, "```")
+		if openFences%2 != 0 {
+			return "content contains unclosed code fence (```) suggesting truncation"
+		}
+	}
+
+	return ""
+}
+
+// buildTruncationErrorMessage creates a human-readable error message for truncation.
+func buildTruncationErrorMessage(toolName, truncationType string, parsedFields map[string]string, rawInput string) string {
+	var sb strings.Builder
+	sb.WriteString("Tool input was truncated by the API. ")
+
+	switch truncationType {
+	case TruncationTypeEmptyInput:
+		sb.WriteString("No input data was received.")
+	case TruncationTypeInvalidJSON:
+		sb.WriteString("JSON was cut off mid-transmission. ")
+		if len(parsedFields) > 0 {
+			sb.WriteString("Partial fields received: ")
+			first := true
+			for k := range parsedFields {
+				if !first {
+					sb.WriteString(", ")
+				}
+				sb.WriteString(k)
+				first = false
+			}
+		}
+	case TruncationTypeMissingFields:
+		sb.WriteString("Required fields are missing from the input.")
+	case TruncationTypeIncompleteString:
+		sb.WriteString("Content appears to be shortened or incomplete.")
+	}
+
+	sb.WriteString(" Received ")
+	sb.WriteString(string(rune(len(rawInput))))
+	sb.WriteString(" bytes. Please retry with smaller content chunks.")
+
+	return sb.String()
+}
+
+// buildMissingFieldsErrorMessage creates an error message for missing required fields.
+func buildMissingFieldsErrorMessage(toolName string, missingFields []string, parsedFields map[string]string) string {
+	var sb strings.Builder
+	sb.WriteString("Tool '")
+	sb.WriteString(toolName)
+	sb.WriteString("' is missing required fields: ")
+	sb.WriteString(strings.Join(missingFields, ", "))
+	sb.WriteString(". Fields received: ")
+
+	first := true
+	for k := range parsedFields {
+		if !first {
+			sb.WriteString(", ")
+		}
+		sb.WriteString(k)
+		first = false
+	}
+
+	sb.WriteString(". This usually indicates the API response was truncated.")
+	return sb.String()
+}
+
+// IsTruncated is a convenience function to check if a tool use appears truncated.
+func IsTruncated(toolName, rawInput string, parsedInput map[string]interface{}) bool {
+	info := DetectTruncation(toolName, "", rawInput, parsedInput)
+	return info.IsTruncated
+}
+
+// GetTruncationSummary returns a short summary string for logging.
+func GetTruncationSummary(info TruncationInfo) string {
+	if !info.IsTruncated {
+		return ""
+	}
+
+	result, _ := json.Marshal(map[string]interface{}{
+		"tool":           info.ToolName,
+		"type":           info.TruncationType,
+		"parsed_fields":  info.ParsedFields,
+		"raw_input_size": len(info.RawInput),
+	})
+	return string(result)
+}
+
+// SoftFailureMessage contains the message structure for a truncation soft failure.
+// This is returned to Claude as a tool_result to guide retry behavior.
+type SoftFailureMessage struct {
+	Status      string   // "incomplete" - not an error, just incomplete
+	Reason      string   // Why the tool call was incomplete
+	Guidance    []string // Step-by-step retry instructions
+	Context     string   // Any context about what was received
+	MaxLineHint int      // Suggested maximum lines per chunk
+}
+
+// BuildSoftFailureMessage creates a structured message for Claude when truncation is detected.
+// This follows the "soft failure" pattern:
+// - For Claude: Clear explanation of what happened and how to fix
+// - For User: Hidden or minimized (appears as normal processing)
+//
+// Key principle: "Conclusion First"
+// 1. First state what happened (incomplete)
+// 2. Then explain how to fix (chunked approach)
+// 3. Provide specific guidance (line limits)
+func BuildSoftFailureMessage(info TruncationInfo) SoftFailureMessage {
+	msg := SoftFailureMessage{
+		Status:      "incomplete",
+		MaxLineHint: 300, // Conservative default
+	}
+
+	// Build reason based on truncation type
+	switch info.TruncationType {
+	case TruncationTypeEmptyInput:
+		msg.Reason = "Your tool call was too large and the input was completely lost during transmission."
+		msg.MaxLineHint = 200
+	case TruncationTypeInvalidJSON:
+		msg.Reason = "Your tool call was truncated mid-transmission, resulting in incomplete JSON."
+		msg.MaxLineHint = 250
+	case TruncationTypeMissingFields:
+		msg.Reason = "Your tool call was partially received but critical fields were cut off."
+		msg.MaxLineHint = 300
+	case TruncationTypeIncompleteString:
+		msg.Reason = "Your tool call content was truncated - the full content did not arrive."
+		msg.MaxLineHint = 350
+	default:
+		msg.Reason = "Your tool call was truncated by the API due to output size limits."
+	}
+
+	// Build context from parsed fields
+	if len(info.ParsedFields) > 0 {
+		var parts []string
+		for k, v := range info.ParsedFields {
+			if len(v) > 30 {
+				v = v[:30] + "..."
+			}
+			parts = append(parts, k+"="+v)
+		}
+		msg.Context = "Received partial data: " + strings.Join(parts, ", ")
+	}
+
+	// Build retry guidance - CRITICAL: Conclusion first approach
+	msg.Guidance = []string{
+		"CONCLUSION: Split your output into smaller chunks and retry.",
+		"",
+		"REQUIRED APPROACH:",
+		"1. For file writes: Write in chunks of ~" + formatInt(msg.MaxLineHint) + " lines maximum",
+		"2. For new files: First create with initial chunk, then append remaining sections",
+		"3. For edits: Make surgical, targeted changes - avoid rewriting entire files",
+		"",
+		"EXAMPLE (writing a 600-line file):",
+		"  - Step 1: Write lines 1-300 (create file)",
+		"  - Step 2: Append lines 301-600 (extend file)",
+		"",
+		"DO NOT attempt to write the full content again in a single call.",
+		"The API has a hard output limit that cannot be bypassed.",
+	}
+
+	return msg
+}
+
+// formatInt converts an integer to string (helper to avoid strconv import)
+func formatInt(n int) string {
+	if n == 0 {
+		return "0"
+	}
+	result := ""
+	for n > 0 {
+		result = string(rune('0'+n%10)) + result
+		n /= 10
+	}
+	return result
+}
+
+// BuildSoftFailureToolResult creates a tool_result content for Claude.
+// This is what Claude will see when a tool call is truncated.
+// Returns a string that should be used as the tool_result content.
+func BuildSoftFailureToolResult(info TruncationInfo) string {
+	msg := BuildSoftFailureMessage(info)
+
+	var sb strings.Builder
+	sb.WriteString("TOOL_CALL_INCOMPLETE\n")
+	sb.WriteString("status: ")
+	sb.WriteString(msg.Status)
+	sb.WriteString("\n")
+	sb.WriteString("reason: ")
+	sb.WriteString(msg.Reason)
+	sb.WriteString("\n")
+
+	if msg.Context != "" {
+		sb.WriteString("context: ")
+		sb.WriteString(msg.Context)
+		sb.WriteString("\n")
+	}
+
+	sb.WriteString("\n")
+	for _, line := range msg.Guidance {
+		if line != "" {
+			sb.WriteString(line)
+			sb.WriteString("\n")
+		}
+	}
+
+	return sb.String()
+}
+
+// CreateTruncationToolResult creates a KiroToolUse that represents a soft failure.
+// Instead of returning the truncated tool_use, we return a tool with a special
+// error result that guides Claude to retry with smaller chunks.
+//
+// This is the key mechanism for "soft failure":
+// - stop_reason remains "tool_use" so Claude continues
+// - The tool_result content explains the issue and how to fix it
+// - Claude will read this and adjust its approach
+func CreateTruncationToolResult(info TruncationInfo) KiroToolUse {
+	// We create a pseudo tool_use that represents the failed attempt
+	// The executor will convert this to a tool_result with the guidance message
+	return KiroToolUse{
+		ToolUseID:      info.ToolUseID,
+		Name:           info.ToolName,
+		Input:          nil, // No input since it was truncated
+		IsTruncated:    true,
+		TruncationInfo: &info,
+	}
+}
--- a/internal/translator/kiro/common/constants.go
+++ b/internal/translator/kiro/common/constants.go
@@ -29,6 +29,14 @@ const (
 	// InlineCodeMarker is the markdown inline code marker (backtick).
 	InlineCodeMarker = "`"

+	// DefaultAssistantContentWithTools is the fallback content for assistant messages
+	// that have tool_use but no text content. Kiro API requires non-empty content.
+	DefaultAssistantContentWithTools = "I'll help you with that."
+
+	// DefaultAssistantContent is the fallback content for assistant messages
+	// that have no content at all. Kiro API requires non-empty content.
+	DefaultAssistantContent = "I understand."
+
 	// KiroAgenticSystemPrompt is injected only for -agentic models to prevent timeouts on large writes.
 	// AWS Kiro API has a 2-3 minute timeout for large file write operations.
 	KiroAgenticSystemPrompt = `
--- a/internal/translator/kiro/openai/kiro_openai_request.go
+++ b/internal/translator/kiro/openai/kiro_openai_request.go
@@ -576,9 +576,23 @@ func processOpenAIMessages(messages gjson.Result, modelID, origin string) ([]Kir
 		}
 	}

+	// Truncate history if too long to prevent Kiro API errors
+	history = truncateHistoryIfNeeded(history)
+
 	return history, currentUserMsg, currentToolResults
 }

+const kiroMaxHistoryMessages = 50
+
+func truncateHistoryIfNeeded(history []KiroHistoryMessage) []KiroHistoryMessage {
+	if len(history) <= kiroMaxHistoryMessages {
+		return history
+	}
+
+	log.Debugf("kiro-openai: truncating history from %d to %d messages", len(history), kiroMaxHistoryMessages)
+	return history[len(history)-kiroMaxHistoryMessages:]
+}
+
 // buildUserMessageFromOpenAI builds a user message from OpenAI format and extracts tool results
 func buildUserMessageFromOpenAI(msg gjson.Result, modelID, origin string) (KiroUserInputMessage, []KiroToolResult) {
 	content := msg.Get("content")
@@ -645,13 +659,36 @@ func buildAssistantMessageFromOpenAI(msg gjson.Result) KiroAssistantResponseMess
 		contentBuilder.WriteString(content.String())
 	} else if content.IsArray() {
 		for _, part := range content.Array() {
-			if part.Get("type").String() == "text" {
+			partType := part.Get("type").String()
+			switch partType {
+			case "text":
 				contentBuilder.WriteString(part.Get("text").String())
+			case "tool_use":
+				// Handle tool_use in content array (Anthropic/OpenCode format)
+				// This is different from OpenAI's tool_calls format
+				toolUseID := part.Get("id").String()
+				toolName := part.Get("name").String()
+				inputData := part.Get("input")
+
+				inputMap := make(map[string]interface{})
+				if inputData.Exists() && inputData.IsObject() {
+					inputData.ForEach(func(key, value gjson.Result) bool {
+						inputMap[key.String()] = value.Value()
+						return true
+					})
+				}
+
+				toolUses = append(toolUses, KiroToolUse{
+					ToolUseID: toolUseID,
+					Name:      toolName,
+					Input:     inputMap,
+				})
+				log.Debugf("kiro-openai: extracted tool_use from content array: %s", toolName)
 			}
 		}
 	}

-	// Handle tool_calls
+	// Handle tool_calls (OpenAI format)
 	toolCalls := msg.Get("tool_calls")
 	if toolCalls.IsArray() {
 		for _, tc := range toolCalls.Array() {
@@ -677,8 +714,20 @@ func buildAssistantMessageFromOpenAI(msg gjson.Result) KiroAssistantResponseMess
 		}
 	}

+	// CRITICAL FIX: Kiro API requires non-empty content for assistant messages
+	// This can happen with compaction requests or error recovery scenarios
+	finalContent := contentBuilder.String()
+	if strings.TrimSpace(finalContent) == "" {
+		if len(toolUses) > 0 {
+			finalContent = kirocommon.DefaultAssistantContentWithTools
+		} else {
+			finalContent = kirocommon.DefaultAssistantContent
+		}
+		log.Debugf("kiro-openai: assistant content was empty, using default: %s", finalContent)
+	}
+
 	return KiroAssistantResponseMessage{
-		Content:  contentBuilder.String(),
+		Content:  finalContent,
 		ToolUses: toolUses,
 	}
 }
--- a/internal/translator/openai/claude/openai_claude_request.go
+++ b/internal/translator/openai/claude/openai_claude_request.go
@@ -6,7 +6,6 @@
 package claude

 import (
-	"bytes"
 	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
@@ -18,7 +17,7 @@ import (
 // It extracts the model name, system instruction, message contents, and tool declarations
 // from the raw JSON request and returns them in the format expected by the OpenAI API.
 func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	// Base OpenAI Chat Completions API template
 	out := `{"model":"","messages":[]}`

--- a/internal/translator/openai/gemini-cli/openai_gemini_request.go
+++ b/internal/translator/openai/gemini-cli/openai_gemini_request.go
@@ -6,8 +6,6 @@
 package geminiCLI

 import (
-	"bytes"
-
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/openai/gemini"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -17,7 +15,7 @@ import (
 // It extracts the model name, generation config, message contents, and tool declarations
 // from the raw JSON request and returns them in the format expected by the OpenAI API.
 func ConvertGeminiCLIRequestToOpenAI(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	rawJSON = []byte(gjson.GetBytes(rawJSON, "request").Raw)
 	rawJSON, _ = sjson.SetBytes(rawJSON, "model", modelName)
 	if gjson.GetBytes(rawJSON, "systemInstruction").Exists() {
--- a/internal/translator/openai/gemini/openai_gemini_request.go
+++ b/internal/translator/openai/gemini/openai_gemini_request.go
@@ -6,7 +6,6 @@
 package gemini

 import (
-	"bytes"
 	"crypto/rand"
 	"fmt"
 	"math/big"
@@ -21,7 +20,7 @@ import (
 // It extracts the model name, generation config, message contents, and tool declarations
 // from the raw JSON request and returns them in the format expected by the OpenAI API.
 func ConvertGeminiRequestToOpenAI(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	// Base OpenAI Chat Completions API template
 	out := `{"model":"","messages":[]}`

@@ -83,16 +82,27 @@ func ConvertGeminiRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 		}

 		// Map Gemini thinkingConfig to OpenAI reasoning_effort.
-		// Always perform conversion to support allowCompat models that may not be in registry
+		// Always perform conversion to support allowCompat models that may not be in registry.
+		// Note: Google official Python SDK sends snake_case fields (thinking_level/thinking_budget).
 		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
-			if thinkingLevel := thinkingConfig.Get("thinkingLevel"); thinkingLevel.Exists() {
+			thinkingLevel := thinkingConfig.Get("thinkingLevel")
+			if !thinkingLevel.Exists() {
+				thinkingLevel = thinkingConfig.Get("thinking_level")
+			}
+			if thinkingLevel.Exists() {
 				effort := strings.ToLower(strings.TrimSpace(thinkingLevel.String()))
 				if effort != "" {
 					out, _ = sjson.Set(out, "reasoning_effort", effort)
 				}
-			} else if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
-				if effort, ok := thinking.ConvertBudgetToLevel(int(thinkingBudget.Int())); ok {
-					out, _ = sjson.Set(out, "reasoning_effort", effort)
+			} else {
+				thinkingBudget := thinkingConfig.Get("thinkingBudget")
+				if !thinkingBudget.Exists() {
+					thinkingBudget = thinkingConfig.Get("thinking_budget")
+				}
+				if thinkingBudget.Exists() {
+					if effort, ok := thinking.ConvertBudgetToLevel(int(thinkingBudget.Int())); ok {
+						out, _ = sjson.Set(out, "reasoning_effort", effort)
+					}
 				}
 			}
 		}
--- a/internal/translator/openai/openai/chat-completions/openai_openai_request.go
+++ b/internal/translator/openai/openai/chat-completions/openai_openai_request.go
@@ -3,7 +3,6 @@
 package chat_completions

 import (
-	"bytes"
 	"github.com/tidwall/sjson"
 )

@@ -25,7 +24,7 @@ func ConvertOpenAIRequestToOpenAI(modelName string, inputRawJSON []byte, _ bool)
 		// If there's an error, return the original JSON or handle the error appropriately.
 		// For now, we'll return the original, but in a real scenario, logging or a more robust error
 		// handling mechanism would be needed.
-		return bytes.Clone(inputRawJSON)
+		return inputRawJSON
 	}
 	return updatedJSON
 }
--- a/internal/translator/openai/openai/responses/openai_openai-responses_request.go
+++ b/internal/translator/openai/openai/responses/openai_openai-responses_request.go
@@ -1,7 +1,6 @@
 package responses

 import (
-	"bytes"
 	"strings"

 	"github.com/tidwall/gjson"
@@ -28,7 +27,7 @@ import (
 // Returns:
 //   - []byte: The transformed request data in OpenAI chat completions format
 func ConvertOpenAIResponsesRequestToOpenAIChatCompletions(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	// Base OpenAI chat completions template with default values
 	out := `{"model":"","messages":[],"stream":false}`

@@ -68,6 +67,9 @@ func ConvertOpenAIResponsesRequestToOpenAIChatCompletions(modelName string, inpu
 			case "message", "":
 				// Handle regular message conversion
 				role := item.Get("role").String()
+				if role == "developer" {
+					role = "user"
+				}
 				message := `{"role":"","content":""}`
 				message, _ = sjson.Set(message, "role", role)

@@ -167,7 +169,8 @@ func ConvertOpenAIResponsesRequestToOpenAIChatCompletions(modelName string, inpu
 			// Only function tools need structural conversion because Chat Completions nests details under "function".
 			toolType := tool.Get("type").String()
 			if toolType != "" && toolType != "function" && tool.IsObject() {
-				chatCompletionsTools = append(chatCompletionsTools, tool.Value())
+				// Almost all providers lack built-in tools, so we just ignore them.
+				// chatCompletionsTools = append(chatCompletionsTools, tool.Value())
 				return true
 			}

--- a/internal/util/claude_model_test.go
+++ b/internal/util/claude_model_test.go
@@ -11,6 +11,7 @@ func TestIsClaudeThinkingModel(t *testing.T) {
 		// Claude thinking models - should return true
 		{"claude-sonnet-4-5-thinking", "claude-sonnet-4-5-thinking", true},
 		{"claude-opus-4-5-thinking", "claude-opus-4-5-thinking", true},
+		{"claude-opus-4-6-thinking", "claude-opus-4-6-thinking", true},
 		{"Claude-Sonnet-Thinking uppercase", "Claude-Sonnet-4-5-Thinking", true},
 		{"claude thinking mixed case", "Claude-THINKING-Model", true},

--- a/internal/util/gemini_schema.go
+++ b/internal/util/gemini_schema.go
@@ -61,14 +61,20 @@ func cleanJSONSchema(jsonStr string, addPlaceholder bool) string {

 // removeKeywords removes all occurrences of specified keywords from the JSON schema.
 func removeKeywords(jsonStr string, keywords []string) string {
+	deletePaths := make([]string, 0)
+	pathsByField := findPathsByFields(jsonStr, keywords)
 	for _, key := range keywords {
-		for _, p := range findPaths(jsonStr, key) {
+		for _, p := range pathsByField[key] {
 			if isPropertyDefinition(trimSuffix(p, "."+key)) {
 				continue
 			}
-			jsonStr, _ = sjson.Delete(jsonStr, p)
+			deletePaths = append(deletePaths, p)
 		}
 	}
+	sortByDepth(deletePaths)
+	for _, p := range deletePaths {
+		jsonStr, _ = sjson.Delete(jsonStr, p)
+	}
 	return jsonStr
 }

@@ -235,8 +241,9 @@ var unsupportedConstraints = []string{
 }

 func moveConstraintsToDescription(jsonStr string) string {
+	pathsByField := findPathsByFields(jsonStr, unsupportedConstraints)
 	for _, key := range unsupportedConstraints {
-		for _, p := range findPaths(jsonStr, key) {
+		for _, p := range pathsByField[key] {
 			val := gjson.Get(jsonStr, p)
 			if !val.Exists() || val.IsObject() || val.IsArray() {
 				continue
@@ -424,14 +431,21 @@ func removeUnsupportedKeywords(jsonStr string) string {
 		"$schema", "$defs", "definitions", "const", "$ref", "additionalProperties",
 		"propertyNames", // Gemini doesn't support property name validation
 	)
+
+	deletePaths := make([]string, 0)
+	pathsByField := findPathsByFields(jsonStr, keywords)
 	for _, key := range keywords {
-		for _, p := range findPaths(jsonStr, key) {
+		for _, p := range pathsByField[key] {
 			if isPropertyDefinition(trimSuffix(p, "."+key)) {
 				continue
 			}
-			jsonStr, _ = sjson.Delete(jsonStr, p)
+			deletePaths = append(deletePaths, p)
 		}
 	}
+	sortByDepth(deletePaths)
+	for _, p := range deletePaths {
+		jsonStr, _ = sjson.Delete(jsonStr, p)
+	}
 	// Remove x-* extension fields (e.g., x-google-enum-descriptions) that are not supported by Gemini API
 	jsonStr = removeExtensionFields(jsonStr)
 	return jsonStr
@@ -581,6 +595,42 @@ func findPaths(jsonStr, field string) []string {
 	return paths
 }

+func findPathsByFields(jsonStr string, fields []string) map[string][]string {
+	set := make(map[string]struct{}, len(fields))
+	for _, field := range fields {
+		set[field] = struct{}{}
+	}
+	paths := make(map[string][]string, len(set))
+	walkForFields(gjson.Parse(jsonStr), "", set, paths)
+	return paths
+}
+
+func walkForFields(value gjson.Result, path string, fields map[string]struct{}, paths map[string][]string) {
+	switch value.Type {
+	case gjson.JSON:
+		value.ForEach(func(key, val gjson.Result) bool {
+			keyStr := key.String()
+			safeKey := escapeGJSONPathKey(keyStr)
+
+			var childPath string
+			if path == "" {
+				childPath = safeKey
+			} else {
+				childPath = path + "." + safeKey
+			}
+
+			if _, ok := fields[keyStr]; ok {
+				paths[keyStr] = append(paths[keyStr], childPath)
+			}
+
+			walkForFields(val, childPath, fields, paths)
+			return true
+		})
+	case gjson.String, gjson.Number, gjson.True, gjson.False, gjson.Null:
+		// Terminal types - no further traversal needed
+	}
+}
+
 func sortByDepth(paths []string) {
 	sort.Slice(paths, func(i, j int) bool { return len(paths[i]) > len(paths[j]) })
 }
@@ -667,6 +717,9 @@ func orDefault(val, def string) string {
 }

 func escapeGJSONPathKey(key string) string {
+	if strings.IndexAny(key, ".*?") == -1 {
+		return key
+	}
 	return gjsonPathKeyReplacer.Replace(key)
 }

--- a/internal/util/translator.go
+++ b/internal/util/translator.go
@@ -6,7 +6,6 @@ package util
 import (
 	"bytes"
 	"fmt"
-	"strings"

 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -33,15 +32,15 @@ func Walk(value gjson.Result, path, field string, paths *[]string) {
 			// . -> \.
 			// * -> \*
 			// ? -> \?
-			var keyReplacer = strings.NewReplacer(".", "\\.", "*", "\\*", "?", "\\?")
-			safeKey := keyReplacer.Replace(key.String())
+			keyStr := key.String()
+			safeKey := escapeGJSONPathKey(keyStr)

 			if path == "" {
 				childPath = safeKey
 			} else {
 				childPath = path + "." + safeKey
 			}
-			if key.String() == field {
+			if keyStr == field {
 				*paths = append(*paths, childPath)
 			}
 			Walk(val, childPath, field, paths)
@@ -87,15 +86,6 @@ func RenameKey(jsonStr, oldKeyPath, newKeyPath string) (string, error) {
 	return finalJson, nil
 }

-func DeleteKey(jsonStr, keyName string) string {
-	paths := make([]string, 0)
-	Walk(gjson.Parse(jsonStr), "", keyName, &paths)
-	for _, p := range paths {
-		jsonStr, _ = sjson.Delete(jsonStr, p)
-	}
-	return jsonStr
-}
-
 // FixJSON converts non-standard JSON that uses single quotes for strings into
 // RFC 8259-compliant JSON by converting those single-quoted strings to
 // double-quoted strings with proper escaping.
--- a/internal/watcher/clients.go
+++ b/internal/watcher/clients.go
@@ -6,6 +6,7 @@ import (
 	"context"
 	"crypto/sha256"
 	"encoding/hex"
+	"encoding/json"
 	"fmt"
 	"io/fs"
 	"os"
@@ -15,6 +16,7 @@ import (

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/watcher/diff"
 	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	log "github.com/sirupsen/logrus"
 )
@@ -72,6 +74,7 @@ func (w *Watcher) reloadClients(rescanAuth bool, affectedOAuthProviders []string
 		w.clientsMutex.Lock()

 		w.lastAuthHashes = make(map[string]string)
+		w.lastAuthContents = make(map[string]*coreauth.Auth)
 		if resolvedAuthDir, errResolveAuthDir := util.ResolveAuthDir(cfg.AuthDir); errResolveAuthDir != nil {
 			log.Errorf("failed to resolve auth directory for hash cache: %v", errResolveAuthDir)
 		} else if resolvedAuthDir != "" {
@@ -84,6 +87,11 @@ func (w *Watcher) reloadClients(rescanAuth bool, affectedOAuthProviders []string
 						sum := sha256.Sum256(data)
 						normalizedPath := w.normalizeAuthPath(path)
 						w.lastAuthHashes[normalizedPath] = hex.EncodeToString(sum[:])
+						// Parse and cache auth content for future diff comparisons
+						var auth coreauth.Auth
+						if errParse := json.Unmarshal(data, &auth); errParse == nil {
+							w.lastAuthContents[normalizedPath] = &auth
+						}
 					}
 				}
 				return nil
@@ -127,6 +135,13 @@ func (w *Watcher) addOrUpdateClient(path string) {
 	curHash := hex.EncodeToString(sum[:])
 	normalized := w.normalizeAuthPath(path)

+	// Parse new auth content for diff comparison
+	var newAuth coreauth.Auth
+	if errParse := json.Unmarshal(data, &newAuth); errParse != nil {
+		log.Errorf("failed to parse auth file %s: %v", filepath.Base(path), errParse)
+		return
+	}
+
 	w.clientsMutex.Lock()

 	cfg := w.config
@@ -141,7 +156,26 @@ func (w *Watcher) addOrUpdateClient(path string) {
 		return
 	}

+	// Get old auth for diff comparison
+	var oldAuth *coreauth.Auth
+	if w.lastAuthContents != nil {
+		oldAuth = w.lastAuthContents[normalized]
+	}
+
+	// Compute and log field changes
+	if changes := diff.BuildAuthChangeDetails(oldAuth, &newAuth); len(changes) > 0 {
+		log.Debugf("auth field changes for %s:", filepath.Base(path))
+		for _, c := range changes {
+			log.Debugf("  %s", c)
+		}
+	}
+
+	// Update caches
 	w.lastAuthHashes[normalized] = curHash
+	if w.lastAuthContents == nil {
+		w.lastAuthContents = make(map[string]*coreauth.Auth)
+	}
+	w.lastAuthContents[normalized] = &newAuth

 	w.clientsMutex.Unlock() // Unlock before the callback

@@ -160,6 +194,7 @@ func (w *Watcher) removeClient(path string) {

 	cfg := w.config
 	delete(w.lastAuthHashes, normalized)
+	delete(w.lastAuthContents, normalized)

 	w.clientsMutex.Unlock() // Release the lock before the callback

--- a/internal/watcher/diff/auth_diff.go
+++ b/internal/watcher/diff/auth_diff.go
@@ -0,0 +1,44 @@
+// auth_diff.go computes human-readable diffs for auth file field changes.
+package diff
+
+import (
+	"fmt"
+	"strings"
+
+	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+)
+
+// BuildAuthChangeDetails computes a redacted, human-readable list of auth field changes.
+// Only prefix, proxy_url, and disabled fields are tracked; sensitive data is never printed.
+func BuildAuthChangeDetails(oldAuth, newAuth *coreauth.Auth) []string {
+	changes := make([]string, 0, 3)
+
+	// Handle nil cases by using empty Auth as default
+	if oldAuth == nil {
+		oldAuth = &coreauth.Auth{}
+	}
+	if newAuth == nil {
+		return changes
+	}
+
+	// Compare prefix
+	oldPrefix := strings.TrimSpace(oldAuth.Prefix)
+	newPrefix := strings.TrimSpace(newAuth.Prefix)
+	if oldPrefix != newPrefix {
+		changes = append(changes, fmt.Sprintf("prefix: %s -> %s", oldPrefix, newPrefix))
+	}
+
+	// Compare proxy_url (redacted)
+	oldProxy := strings.TrimSpace(oldAuth.ProxyURL)
+	newProxy := strings.TrimSpace(newAuth.ProxyURL)
+	if oldProxy != newProxy {
+		changes = append(changes, fmt.Sprintf("proxy_url: %s -> %s", formatProxyURL(oldProxy), formatProxyURL(newProxy)))
+	}
+
+	// Compare disabled
+	if oldAuth.Disabled != newAuth.Disabled {
+		changes = append(changes, fmt.Sprintf("disabled: %t -> %t", oldAuth.Disabled, newAuth.Disabled))
+	}
+
+	return changes
+}
--- a/internal/watcher/diff/config_diff.go
+++ b/internal/watcher/diff/config_diff.go
@@ -27,6 +27,12 @@ func BuildConfigChangeDetails(oldCfg, newCfg *config.Config) []string {
 	if oldCfg.Debug != newCfg.Debug {
 		changes = append(changes, fmt.Sprintf("debug: %t -> %t", oldCfg.Debug, newCfg.Debug))
 	}
+	if oldCfg.Pprof.Enable != newCfg.Pprof.Enable {
+		changes = append(changes, fmt.Sprintf("pprof.enable: %t -> %t", oldCfg.Pprof.Enable, newCfg.Pprof.Enable))
+	}
+	if strings.TrimSpace(oldCfg.Pprof.Addr) != strings.TrimSpace(newCfg.Pprof.Addr) {
+		changes = append(changes, fmt.Sprintf("pprof.addr: %s -> %s", strings.TrimSpace(oldCfg.Pprof.Addr), strings.TrimSpace(newCfg.Pprof.Addr)))
+	}
 	if oldCfg.LoggingToFile != newCfg.LoggingToFile {
 		changes = append(changes, fmt.Sprintf("logging-to-file: %t -> %t", oldCfg.LoggingToFile, newCfg.LoggingToFile))
 	}
--- a/internal/watcher/watcher.go
+++ b/internal/watcher/watcher.go
@@ -38,6 +38,7 @@ type Watcher struct {
 	reloadCallback    func(*config.Config)
 	watcher           *fsnotify.Watcher
 	lastAuthHashes    map[string]string
+	lastAuthContents  map[string]*coreauth.Auth
 	lastRemoveTimes   map[string]time.Time
 	lastConfigHash    string
 	authQueue         chan<- AuthUpdate
--- a/sdk/api/handlers/handlers.go
+++ b/sdk/api/handlers/handlers.go
@@ -155,20 +155,6 @@ func requestExecutionMetadata(ctx context.Context) map[string]any {
 	return map[string]any{idempotencyKeyMetadataKey: key}
 }

-func mergeMetadata(base, overlay map[string]any) map[string]any {
-	if len(base) == 0 && len(overlay) == 0 {
-		return nil
-	}
-	out := make(map[string]any, len(base)+len(overlay))
-	for k, v := range base {
-		out[k] = v
-	}
-	for k, v := range overlay {
-		out[k] = v
-	}
-	return out
-}
-
 // BaseAPIHandler contains the handlers for API endpoints.
 // It holds a pool of clients to interact with the backend service and manages
 // load balancing, client selection, and configuration.
@@ -392,14 +378,18 @@ func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType
 	}
 	reqMeta := requestExecutionMetadata(ctx)
 	reqMeta[coreexecutor.RequestedModelMetadataKey] = normalizedModel
+	payload := rawJSON
+	if len(payload) == 0 {
+		payload = nil
+	}
 	req := coreexecutor.Request{
 		Model:   normalizedModel,
-		Payload: cloneBytes(rawJSON),
+		Payload: payload,
 	}
 	opts := coreexecutor.Options{
 		Stream:          false,
 		Alt:             alt,
-		OriginalRequest: cloneBytes(rawJSON),
+		OriginalRequest: rawJSON,
 		SourceFormat:    sdktranslator.FromString(handlerType),
 	}
 	opts.Metadata = reqMeta
@@ -419,7 +409,7 @@ func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType
 		}
 		return nil, &interfaces.ErrorMessage{StatusCode: status, Error: err, Addon: addon}
 	}
-	return cloneBytes(resp.Payload), nil
+	return resp.Payload, nil
 }

 // ExecuteCountWithAuthManager executes a non-streaming request via the core auth manager.
@@ -431,14 +421,18 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle
 	}
 	reqMeta := requestExecutionMetadata(ctx)
 	reqMeta[coreexecutor.RequestedModelMetadataKey] = normalizedModel
+	payload := rawJSON
+	if len(payload) == 0 {
+		payload = nil
+	}
 	req := coreexecutor.Request{
 		Model:   normalizedModel,
-		Payload: cloneBytes(rawJSON),
+		Payload: payload,
 	}
 	opts := coreexecutor.Options{
 		Stream:          false,
 		Alt:             alt,
-		OriginalRequest: cloneBytes(rawJSON),
+		OriginalRequest: rawJSON,
 		SourceFormat:    sdktranslator.FromString(handlerType),
 	}
 	opts.Metadata = reqMeta
@@ -458,7 +452,7 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle
 		}
 		return nil, &interfaces.ErrorMessage{StatusCode: status, Error: err, Addon: addon}
 	}
-	return cloneBytes(resp.Payload), nil
+	return resp.Payload, nil
 }

 // ExecuteStreamWithAuthManager executes a streaming request via the core auth manager.
@@ -473,14 +467,18 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
 	}
 	reqMeta := requestExecutionMetadata(ctx)
 	reqMeta[coreexecutor.RequestedModelMetadataKey] = normalizedModel
+	payload := rawJSON
+	if len(payload) == 0 {
+		payload = nil
+	}
 	req := coreexecutor.Request{
 		Model:   normalizedModel,
-		Payload: cloneBytes(rawJSON),
+		Payload: payload,
 	}
 	opts := coreexecutor.Options{
 		Stream:          true,
 		Alt:             alt,
-		OriginalRequest: cloneBytes(rawJSON),
+		OriginalRequest: rawJSON,
 		SourceFormat:    sdktranslator.FromString(handlerType),
 	}
 	opts.Metadata = reqMeta
@@ -669,17 +667,6 @@ func cloneBytes(src []byte) []byte {
 	return dst
 }

-func cloneMetadata(src map[string]any) map[string]any {
-	if len(src) == 0 {
-		return nil
-	}
-	dst := make(map[string]any, len(src))
-	for k, v := range src {
-		dst[k] = v
-	}
-	return dst
-}
-
 // WriteErrorResponse writes an error message to the response writer using the HTTP status embedded in the message.
 func (h *BaseAPIHandler) WriteErrorResponse(c *gin.Context, msg *interfaces.ErrorMessage) {
 	status := http.StatusInternalServerError
@@ -710,7 +697,7 @@ func (h *BaseAPIHandler) WriteErrorResponse(c *gin.Context, msg *interfaces.Erro
 	var previous []byte
 	if existing, exists := c.Get("API_RESPONSE"); exists {
 		if existingBytes, ok := existing.([]byte); ok && len(existingBytes) > 0 {
-			previous = bytes.Clone(existingBytes)
+			previous = existingBytes
 		}
 	}
 	appendAPIResponse(c, body)
--- a/sdk/auth/kiro.go
+++ b/sdk/auth/kiro.go
@@ -70,14 +70,25 @@ func (a *KiroAuthenticator) createAuthRecord(tokenData *kiroauth.KiroTokenData,
 	}

 	// Determine label and identifier based on auth method
+	// Generate sequence number for uniqueness
+	seq := time.Now().UnixNano() % 100000
+
 	var label, idPart string
 	if tokenData.AuthMethod == "idc" {
 		label = "kiro-idc"
-		// For IDC auth, always use clientID as identifier
-		if tokenData.ClientID != "" {
-			idPart = kiroauth.SanitizeEmailForFilename(tokenData.ClientID)
+		// Priority: email > startUrl identifier > sequence only
+		// Email is unique, so no sequence needed when email is available
+		if tokenData.Email != "" {
+			idPart = kiroauth.SanitizeEmailForFilename(tokenData.Email)
+		} else if tokenData.StartURL != "" {
+			identifier := kiroauth.ExtractIDCIdentifier(tokenData.StartURL)
+			if identifier != "" {
+				idPart = fmt.Sprintf("%s-%05d", identifier, seq)
+			} else {
+				idPart = fmt.Sprintf("%05d", seq)
+			}
 		} else {
-			idPart = fmt.Sprintf("%d", time.Now().UnixNano()%100000)
+			idPart = fmt.Sprintf("%05d", seq)
 		}
 	} else {
 		label = fmt.Sprintf("kiro-%s", source)
@@ -126,14 +137,14 @@ func (a *KiroAuthenticator) createAuthRecord(tokenData *kiroauth.KiroTokenData,
 	}

 	record := &coreauth.Auth{
-		ID:        fileName,
-		Provider:  "kiro",
-		FileName:  fileName,
-		Label:     label,
-		Status:    coreauth.StatusActive,
-		CreatedAt: now,
-		UpdatedAt: now,
-		Metadata:  metadata,
+		ID:         fileName,
+		Provider:   "kiro",
+		FileName:   fileName,
+		Label:      label,
+		Status:     coreauth.StatusActive,
+		CreatedAt:  now,
+		UpdatedAt:  now,
+		Metadata:   metadata,
 		Attributes: attributes,
 		// NextRefreshAfter: 20 minutes before expiry
 		NextRefreshAfter: expiresAt.Add(-20 * time.Minute),
--- a/sdk/cliproxy/pprof_server.go
+++ b/sdk/cliproxy/pprof_server.go
@@ -0,0 +1,163 @@
+package cliproxy
+
+import (
+	"context"
+	"errors"
+	"net/http"
+	"net/http/pprof"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	log "github.com/sirupsen/logrus"
+)
+
+type pprofServer struct {
+	mu      sync.Mutex
+	server  *http.Server
+	addr    string
+	enabled bool
+}
+
+func newPprofServer() *pprofServer {
+	return &pprofServer{}
+}
+
+func (s *Service) applyPprofConfig(cfg *config.Config) {
+	if s == nil || cfg == nil {
+		return
+	}
+	if s.pprofServer == nil {
+		s.pprofServer = newPprofServer()
+	}
+	s.pprofServer.Apply(cfg)
+}
+
+func (s *Service) shutdownPprof(ctx context.Context) error {
+	if s == nil || s.pprofServer == nil {
+		return nil
+	}
+	return s.pprofServer.Shutdown(ctx)
+}
+
+func (p *pprofServer) Apply(cfg *config.Config) {
+	if p == nil || cfg == nil {
+		return
+	}
+	addr := strings.TrimSpace(cfg.Pprof.Addr)
+	if addr == "" {
+		addr = config.DefaultPprofAddr
+	}
+	enabled := cfg.Pprof.Enable
+
+	p.mu.Lock()
+	currentServer := p.server
+	currentAddr := p.addr
+	p.addr = addr
+	p.enabled = enabled
+	if !enabled {
+		p.server = nil
+		p.mu.Unlock()
+		if currentServer != nil {
+			p.stopServer(currentServer, currentAddr, "disabled")
+		}
+		return
+	}
+	if currentServer != nil && currentAddr == addr {
+		p.mu.Unlock()
+		return
+	}
+	p.server = nil
+	p.mu.Unlock()
+
+	if currentServer != nil {
+		p.stopServer(currentServer, currentAddr, "restarted")
+	}
+
+	p.startServer(addr)
+}
+
+func (p *pprofServer) Shutdown(ctx context.Context) error {
+	if p == nil {
+		return nil
+	}
+	p.mu.Lock()
+	currentServer := p.server
+	currentAddr := p.addr
+	p.server = nil
+	p.enabled = false
+	p.mu.Unlock()
+
+	if currentServer == nil {
+		return nil
+	}
+	return p.stopServerWithContext(ctx, currentServer, currentAddr, "shutdown")
+}
+
+func (p *pprofServer) startServer(addr string) {
+	mux := newPprofMux()
+	server := &http.Server{
+		Addr:              addr,
+		Handler:           mux,
+		ReadHeaderTimeout: 5 * time.Second,
+	}
+
+	p.mu.Lock()
+	if !p.enabled || p.addr != addr || p.server != nil {
+		p.mu.Unlock()
+		return
+	}
+	p.server = server
+	p.mu.Unlock()
+
+	log.Infof("pprof server starting on %s", addr)
+	go func() {
+		if errServe := server.ListenAndServe(); errServe != nil && !errors.Is(errServe, http.ErrServerClosed) {
+			log.Errorf("pprof server failed on %s: %v", addr, errServe)
+			p.mu.Lock()
+			if p.server == server {
+				p.server = nil
+			}
+			p.mu.Unlock()
+		}
+	}()
+}
+
+func (p *pprofServer) stopServer(server *http.Server, addr string, reason string) {
+	_ = p.stopServerWithContext(context.Background(), server, addr, reason)
+}
+
+func (p *pprofServer) stopServerWithContext(ctx context.Context, server *http.Server, addr string, reason string) error {
+	if server == nil {
+		return nil
+	}
+	stopCtx := ctx
+	if stopCtx == nil {
+		stopCtx = context.Background()
+	}
+	stopCtx, cancel := context.WithTimeout(stopCtx, 5*time.Second)
+	defer cancel()
+	if errStop := server.Shutdown(stopCtx); errStop != nil {
+		log.Errorf("pprof server stop failed on %s: %v", addr, errStop)
+		return errStop
+	}
+	log.Infof("pprof server stopped on %s (%s)", addr, reason)
+	return nil
+}
+
+func newPprofMux() *http.ServeMux {
+	mux := http.NewServeMux()
+	mux.HandleFunc("/debug/pprof/", pprof.Index)
+	mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
+	mux.HandleFunc("/debug/pprof/profile", pprof.Profile)
+	mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
+	mux.HandleFunc("/debug/pprof/trace", pprof.Trace)
+	mux.Handle("/debug/pprof/allocs", pprof.Handler("allocs"))
+	mux.Handle("/debug/pprof/block", pprof.Handler("block"))
+	mux.Handle("/debug/pprof/goroutine", pprof.Handler("goroutine"))
+	mux.Handle("/debug/pprof/heap", pprof.Handler("heap"))
+	mux.Handle("/debug/pprof/mutex", pprof.Handler("mutex"))
+	mux.Handle("/debug/pprof/threadcreate", pprof.Handler("threadcreate"))
+	return mux
+}
--- a/sdk/cliproxy/service.go
+++ b/sdk/cliproxy/service.go
@@ -58,6 +58,9 @@ type Service struct {
 	// server is the HTTP API server instance.
 	server *api.Server

+	// pprofServer manages the optional pprof HTTP debug server.
+	pprofServer *pprofServer
+
 	// serverErr channel for server startup/shutdown errors.
 	serverErr chan error

@@ -281,27 +284,42 @@ func (s *Service) wsOnDisconnected(channelID string, reason error) {
 }

 func (s *Service) applyCoreAuthAddOrUpdate(ctx context.Context, auth *coreauth.Auth) {
-	if s == nil || auth == nil || auth.ID == "" {
-		return
-	}
-	if s.coreManager == nil {
+	if s == nil || s.coreManager == nil || auth == nil || auth.ID == "" {
 		return
 	}
 	auth = auth.Clone()
 	s.ensureExecutorsForAuth(auth)
-	s.registerModelsForAuth(auth)
-	if existing, ok := s.coreManager.GetByID(auth.ID); ok && existing != nil {
+
+	// IMPORTANT: Update coreManager FIRST, before model registration.
+	// This ensures that configuration changes (proxy_url, prefix, etc.) take effect
+	// immediately for API calls, rather than waiting for model registration to complete.
+	// Model registration may involve network calls (e.g., FetchAntigravityModels) that
+	// could timeout if the new proxy_url is unreachable.
+	op := "register"
+	var err error
+	if existing, ok := s.coreManager.GetByID(auth.ID); ok {
 		auth.CreatedAt = existing.CreatedAt
 		auth.LastRefreshedAt = existing.LastRefreshedAt
 		auth.NextRefreshAfter = existing.NextRefreshAfter
-		if _, err := s.coreManager.Update(ctx, auth); err != nil {
-			log.Errorf("failed to update auth %s: %v", auth.ID, err)
+		op = "update"
+		_, err = s.coreManager.Update(ctx, auth)
+	} else {
+		_, err = s.coreManager.Register(ctx, auth)
+	}
+	if err != nil {
+		log.Errorf("failed to %s auth %s: %v", op, auth.ID, err)
+		current, ok := s.coreManager.GetByID(auth.ID)
+		if !ok || current.Disabled {
+			GlobalModelRegistry().UnregisterClient(auth.ID)
+			return
 		}
-		return
-	}
-	if _, err := s.coreManager.Register(ctx, auth); err != nil {
-		log.Errorf("failed to register auth %s: %v", auth.ID, err)
+		auth = current
 	}
+
+	// Register models after auth is updated in coreManager.
+	// This operation may block on network calls, but the auth configuration
+	// is already effective at this point.
+	s.registerModelsForAuth(auth)
 }

 func (s *Service) applyCoreAuthRemoval(ctx context.Context, id string) {
@@ -516,6 +534,8 @@ func (s *Service) Run(ctx context.Context) error {
 	time.Sleep(100 * time.Millisecond)
 	fmt.Printf("API server started successfully on: %s:%d\n", s.cfg.Host, s.cfg.Port)

+	s.applyPprofConfig(s.cfg)
+
 	if s.hooks.OnAfterStart != nil {
 		s.hooks.OnAfterStart(s)
 	}
@@ -561,6 +581,7 @@ func (s *Service) Run(ctx context.Context) error {
 		}

 		s.applyRetryConfig(newCfg)
+		s.applyPprofConfig(newCfg)
 		if s.server != nil {
 			s.server.UpdateClients(newCfg)
 		}
@@ -666,6 +687,13 @@ func (s *Service) Shutdown(ctx context.Context) error {
 			s.authQueueStop = nil
 		}

+		if errShutdownPprof := s.shutdownPprof(ctx); errShutdownPprof != nil {
+			log.Errorf("failed to stop pprof server: %v", errShutdownPprof)
+			if shutdownErr == nil {
+				shutdownErr = errShutdownPprof
+			}
+		}
+
 		// no legacy clients to persist

 		if s.server != nil {
--- a/test/builtin_tools_translation_test.go
+++ b/test/builtin_tools_translation_test.go
@@ -33,7 +33,7 @@ func TestOpenAIToCodex_PreservesBuiltinTools(t *testing.T) {
 	}
 }

-func TestOpenAIResponsesToOpenAI_PreservesBuiltinTools(t *testing.T) {
+func TestOpenAIResponsesToOpenAI_IgnoresBuiltinTools(t *testing.T) {
 	in := []byte(`{
 		"model":"gpt-5",
 		"input":[{"role":"user","content":[{"type":"input_text","text":"hi"}]}],
@@ -42,13 +42,7 @@ func TestOpenAIResponsesToOpenAI_PreservesBuiltinTools(t *testing.T) {

 	out := sdktranslator.TranslateRequest(sdktranslator.FormatOpenAIResponse, sdktranslator.FormatOpenAI, "gpt-5", in, false)

-	if got := gjson.GetBytes(out, "tools.#").Int(); got != 1 {
-		t.Fatalf("expected 1 tool, got %d: %s", got, string(out))
-	}
-	if got := gjson.GetBytes(out, "tools.0.type").String(); got != "web_search" {
-		t.Fatalf("expected tools[0].type=web_search, got %q: %s", got, string(out))
-	}
-	if got := gjson.GetBytes(out, "tools.0.search_context_size").String(); got != "low" {
-		t.Fatalf("expected tools[0].search_context_size=low, got %q: %s", got, string(out))
+	if got := gjson.GetBytes(out, "tools.#").Int(); got != 0 {
+		t.Fatalf("expected 0 tools (builtin tools not supported in Chat Completions), got %d: %s", got, string(out))
 	}
 }
Author	SHA1	Message	Date
Luis Pater	e35ffaa925	Merge pull request #186 from taetaetae/fix/kiro-claude-compaction-empty-content fix(kiro): handle empty content in Claude format assistant messages	2026-02-06 05:39:41 +08:00
Luis Pater	165e03f3a7	Merge branch 'router-for-me:main' into main	2026-02-06 05:32:10 +08:00
Luis Pater	86bdb7808c	Merge pull request #189 from PancakeZik/main feat: add Claude Opus 4.6 support for Kiro	2026-02-06 05:31:54 +08:00
Luis Pater	b4e034be1c	refactor(executor): centralize Codex client version and user agent constants - Introduced `codexClientVersion` and `codexUserAgent` constants for better maintainability. - Updated `EnsureHeader` calls to use the new constants.	2026-02-06 05:30:28 +08:00
Joao	84fcebf538	feat: add Claude Opus 4.6 support for Kiro - Add kiro-claude-opus-4-6 and kiro-claude-opus-4-6-agentic to model registry - Add model ID mappings for claude-opus-4.6 variants - Support both kiro- prefix and native format (claude-opus-4.6) - Tested and working with Kiro API	2026-02-05 21:26:29 +00:00
Luis Pater	74d9a1ffed	Merge branch 'router-for-me:main' into main	2026-02-06 03:27:27 +08:00
Luis Pater	a5a25dec57	refactor(translator, executor): remove redundant `bytes.Clone` calls for improved performance - Replaced all instances of `bytes.Clone` with direct references to enhance efficiency. - Simplified payload handling across executors and translators by eliminating unnecessary data duplication.	2026-02-06 03:26:29 +08:00
Luis Pater	c71905e5e8	Merge pull request #1440 from kvokka/add-cc-opus-4-6 feat(registry): register Claude 4.6 static data	2026-02-06 03:23:59 +08:00
kvokka	bc78d668ac	feat(registry): register Claude 4.6 static data Add model definition for Claude 4.6 Opus with 200k context length and thinking support capabilities.	2026-02-05 23:13:36 +04:00
Luis Pater	e93eebc2e9	Merge branch 'router-for-me:main' into main	2026-02-06 01:53:55 +08:00
Luis Pater	5bd0896ad7	feat(registry): add GPT 5.3 Codex model to static data	2026-02-06 01:52:41 +08:00
Luis Pater	09ecfbcaed	refactor(executor): optimize payload cloning and streamline SDK translator usage - Replaced unnecessary `bytes.Clone` calls for `opts.OriginalRequest` throughout executors. - Introduced intermediate variable `originalPayloadSource` to simplify payload processing. - Ensured better clarity and structure in request translation logic.	2026-02-06 01:44:20 +08:00
Luis Pater	f0bd14b64f	refactor(util): optimize JSON schema processing and keyword removal logic - Consolidated path-finding logic into a new `findPathsByFields` helper function. - Refactored repetitive loop structures to improve readability and performance. - Added depth-based sorting for deletion paths to ensure proper removal order.	2026-02-06 00:19:56 +08:00
taetaetae	14f044ce4f	refactor: extract default assistant content to shared constants Apply code review feedback from gemini-code-assist: - Move fallback strings to kirocommon package as exported constants - Update kiro_claude_request.go to use shared constants - Update kiro_openai_request.go to use shared constants - Improves maintainability and avoids duplication	2026-02-05 23:36:57 +09:00
taetaetae	88872baffc	fix(kiro): handle empty content in Claude format assistant messages Problem: - PR #181 fixed empty content for OpenAI format (kiro_openai_request.go) - But Claude format (kiro_claude_request.go) was not fixed - OpenCode uses Claude format (/v1/messages endpoint) - When assistant messages have only tool_use (no text), content becomes empty - This causes 'Improperly formed request' errors from Kiro API Example of problematic message format: { "role": "assistant", "content": [ {"type": "tool_use", "id": "...", "name": "todowrite", "input": {...}} ] } Solution: - Add empty content fallback in BuildAssistantMessageStruct (Claude format) - Same fix as PR #181 applied to kiro_openai_request.go Fixes compaction failures for OpenCode + Quotio + CLIProxyAPIPlus + Kiro stack	2026-02-05 23:27:35 +09:00
Luis Pater	dbecf5330e	Merge pull request #181 from taetaetae/fix/kiro-compaction-tool-use-content fix(kiro): handle tool_use in content array for compaction requests	2026-02-05 20:17:32 +08:00
Luis Pater	1c0e102637	Merge pull request #185 from router-for-me/plus v6.7.48	2026-02-05 19:53:42 +08:00
Luis Pater	6b6b343922	Merge branch 'main' into plus	2026-02-05 19:51:56 +08:00
Luis Pater	f7d82fda3f	feat(registry): add Kimi-K2.5 model to static data	2026-02-05 19:48:04 +08:00
Luis Pater	25c6b479c7	refactor(util, executor): optimize payload handling and schema processing - Replaced repetitive string operations with a centralized `escapeGJSONPathKey` function. - Streamlined handling of JSON schema cleaning for Gemini and Antigravity requests. - Improved payload management by transitioning from byte slices to strings for processing. - Removed unnecessary cloning of byte slices in several places.	2026-02-05 19:00:30 +08:00
Chén Mù	7cf9ff0345	Merge pull request #1429 from neavo/fix/gemini-python-sdk-thinking-fields fix(gemini): support snake_case thinking config fields from Python SDK	2026-02-05 14:32:58 +08:00
hkfires	209d74062a	fix(thinking): ensure includeThoughts is false for ModeNone in budget processing	2026-02-05 10:24:42 +08:00
hkfires	d86b13c9cb	fix(thinking): support user-defined includeThoughts setting with camelCase and snake_case variants Fixes #1378	2026-02-05 10:07:41 +08:00
hkfires	075e3ab69e	fix(test): rename test function to reflect behavior change for builtin tools	2026-02-05 09:25:34 +08:00
taetaetae	49ef22ab78	refactor: simplify inputMap initialization logic Apply code review feedback from gemini-code-assist: - Initialize inputMap upfront instead of using nested if blocks - Combine Exists() and IsObject() checks into single condition - Remove redundant nil check	2026-02-05 07:12:42 +09:00
taetaetae	ae4638712e	fix(kiro): handle tool_use in content array for compaction requests Problem: - PR #162 fixed empty string content but missed array content with tool_use - OpenCode's compaction requests send assistant messages with content as array - When content array contains only tool_use (no text), content becomes empty - This causes 'Improperly formed request' errors from Kiro API Example of problematic message format: { "role": "assistant", "content": [ {"type": "tool_use", "id": "...", "name": "todowrite", "input": {...}} ] } Solution: - Extract tool_use from content array (Anthropic/OpenCode format) - This is in addition to existing tool_calls handling (OpenAI format) - The empty content fallback from PR #162 will then work correctly Fixes compaction failures that persisted after PR #162 merge.	2026-02-05 07:08:14 +09:00
Luis Pater	c1c9483752	Merge pull request #1422 from dannycreations/feat-gemini-cli-claude-mime feat(gemini-cli): support image content in Claude request conversion	2026-02-05 01:21:09 +08:00
neavo	6c65fdf54b	fix(gemini): support snake_case thinking config fields from Python SDK Google official Gemini Python SDK sends thinking_level, thinking_budget, and include_thoughts (snake_case) instead of thinkingLevel, thinkingBudget, and includeThoughts (camelCase). This caused thinking configuration to be ignored when using Python SDK. Changes: - Extract layer: extractGeminiConfig now reads snake_case as fallback - Apply layer: Gemini/CLI/Antigravity appliers clean up snake_case fields - Translator layer: Gemini->OpenAI/Claude/Codex translators support fallback - Tests: Added 4 test cases for snake_case field coverage Fixes #1426	2026-02-04 21:12:47 +08:00
Luis Pater	4874253d1e	Merge pull request #1425 from router-for-me/auth fix(cliproxy): update auth before model registration	2026-02-04 15:01:01 +08:00
Luis Pater	b72250349f	Merge pull request #1423 from router-for-me/watcher feat(watcher): log auth field changes on reload	2026-02-04 15:00:38 +08:00
hkfires	116573311f	fix(cliproxy): update auth before model registration	2026-02-04 14:03:15 +08:00
hkfires	4af712544d	feat(watcher): log auth field changes on reload Cache parsed auth contents and compute redacted diffs for prefix, proxy_url, and disabled when auth files are added or updated.	2026-02-04 12:29:56 +08:00
dannycreations	3f9c9591bd	feat(gemini-cli): support image content in Claude request conversion - Add logic to handle `image` content type during request translation. - Map Claude base64 image data to Gemini's `inlineData` structure. - Support automatic extraction of `media_type` and `data` for image parts.	2026-02-04 11:00:37 +07:00
Luis Pater	1548c567ab	feat(pprof): add support for configurable pprof HTTP debug server - Introduced a new `pprof` server to enable/debug HTTP profiling. - Added configuration options for enabling/disabling and specifying the server address. - Integrated pprof server lifecycle management with `Service`. #1287	2026-02-04 02:39:26 +08:00
Luis Pater	5b23fc570c	Merge pull request #1396 from Xm798/fix/log-dir-tilde-expansion fix(logging): expand tilde in auth-dir path for log directory	2026-02-04 02:00:13 +08:00
Luis Pater	04e1c7a05a	docs: reorganize and update README entries for CLIProxyAPI projects	2026-02-04 01:49:27 +08:00
Luis Pater	9181e72204	Merge pull request #1409 from wangdabaoqq/main docs: Add a new client application - Lin Jun	2026-02-04 01:47:31 +08:00
Luis Pater	b854ee4680	fix(registry): remove redundant kiro model definition entry	2026-02-04 01:28:12 +08:00
Luis Pater	533a6bd15c	Merge pull request #176 from router-for-me/plus v6.7.45	2026-02-04 01:25:54 +08:00
Luis Pater	45546c1cf7	Merge branch 'main' into plus	2026-02-04 01:25:45 +08:00
Luis Pater	e2169e3987	Merge pull request #175 from Skyuno/fix/json-truncation-rework fix(kiro): Rework JSON Truncation Handling with SOFT_LIMIT_REACHED	2026-02-04 01:24:35 +08:00
Luis Pater	e85305c815	Merge pull request #174 from gogoing1024/main feat(registry): add kiro channel support for model definitions	2026-02-04 01:08:43 +08:00
Luis Pater	8d4554bf17	Merge pull request #173 from starsdream666/main 修复：docker镜像上传时用户名使用变量并增加手动构建，修复OAuth 排除列表与OAuth 模型别名中kiro无法获取模型问题	2026-02-04 01:06:49 +08:00
Luis Pater	f628e4dcbb	Merge pull request #172 from Skyuno/fix/idc-filename-collision fix(kiro): prioritize email for filename to prevent collisions	2026-02-04 01:04:33 +08:00
Luis Pater	7accae4b6a	Merge pull request #171 from cielhaidir/main feat(copilot): Add copilot usage monitoring in endpoint /api-call	2026-02-04 01:02:57 +08:00
Luis Pater	3354fae391	Merge pull request #162 from taetaetae/fix/kiro-compaction-empty-content fix(kiro): handle empty content in messages to prevent Bad Request errors	2026-02-04 01:01:48 +08:00
宝宝宝	4939865f6d	Add a new client application - Lin Jun	2026-02-03 23:55:24 +08:00
宝宝宝	3da7f7482e	Add a new client application - Lin Jun	2026-02-03 23:36:34 +08:00
宝宝宝	9072b029b2	Add a new client application - Lin Jun	2026-02-03 23:35:53 +08:00
宝宝宝	c296cfb8c0	docs: Add a new client application - Lin Jun	2026-02-03 23:32:50 +08:00
Luis Pater	2707377fcb	docs: add AICodeMirror sponsorship details to README files	2026-02-03 22:34:50 +08:00
Luis Pater	259f586ff7	Fixed: #1398 fix(translator): use model group caching for client signature validation	2026-02-03 22:04:52 +08:00
Luis Pater	d885b81f23	Fixed: #1403 fix(translator): handle "input" field transformation for OpenAI responses	2026-02-03 21:49:30 +08:00
Luis Pater	fe6bffd080	fixed: #1407 fix(translator): adjust "developer" role to "user" and ignore unsupported tool types	2026-02-03 21:41:17 +08:00
starsdream666	1a81e8a98a	一致性问题修复	2026-02-03 21:11:20 +08:00
yuechenglong.5	0b889c6028	feat(registry): add kiro channel support for model definitions Add kiro as a new supported channel in GetStaticModelDefinitionsByChannel function, enabling retrieval of Kiro model definitions alongside existing providers like qwen, iflow, and github-copilot.	2026-02-03 20:55:10 +08:00
starsdream666	f6bb0011f9	修复kiro模型列表缺失	2026-02-03 20:33:13 +08:00
Skyuno	fcdd91895e	Merge remote-tracking branch 'upstream/main' into fix/json-truncation-rework	2026-02-03 20:28:32 +08:00
Skyuno	8dc4fc4ff5	fix(idc): prioritize email for filename to prevent collisions - Use email as primary identifier for IDC tokens (unique, no sequence needed) - Add sequence number only when email is unavailable - Use startUrl identifier as secondary fallback with sequence - Update GenerateTokenFileName in aws.go with consistent logic	2026-02-03 20:04:36 +08:00
starsdream666	9e9a860bda	Merge branch 'router-for-me:main' into main	2026-02-03 16:50:42 +08:00
“cielhaidir”	6cd32028c3	refactor: clean up whitespace in enrichCopilotTokenResponse function	2026-02-03 13:14:21 +08:00
“cielhaidir”	ebd58ef33a	feat(copilot): enhance quota response with reset dates for enterprise and non-enterprise accounts	2026-02-03 13:13:17 +08:00
“cielhaidir”	92791194e5	feat(copilot): add GitHub Copilot quota management endpoints and response enrichment	2026-02-03 13:02:51 +08:00
taetaetae	1f7c58f7ce	refactor: use constants for default assistant messages Apply code review feedback from gemini-code-assist: - Define default messages as local constants to improve maintainability - Avoid magic strings in the empty content handling logic	2026-02-03 07:10:38 +09:00
Luis Pater	b9cdc2f54c	chore: remove `.air.toml` configuration file and update `.gitignore`	2026-02-03 01:52:35 +08:00
Luis Pater	5e23975d6e	Merge branch 'router-for-me:main' into main	2026-02-03 01:50:45 +08:00
Luis Pater	420937c848	Merge pull request #166 from cielhaidir/main feat: add .air.toml configuration file and update .gitignore for build artifacts	2026-02-03 01:46:02 +08:00
Luis Pater	e1a353ca20	Merge pull request #159 from Skyuno/fix/filter-web-search-tool fix(kiro): filter web search tool	2026-02-03 01:45:23 +08:00
Luis Pater	250f212fa3	fix(executor): handle "global" location in AI platform URL generation	2026-02-03 01:39:57 +08:00
Cyrus	a275db3fdb	fix(logging): expand tilde in auth-dir and log resolution errors - Use util.ResolveAuthDir to properly expand ~ to user home directory - Fixes issue where logs were created in literal "~/.cli-proxy-api" folder - Add warning log when auth-dir resolution fails for debugging Bug introduced in `62e2b67` (refactor(logging): centralize log directory resolution logic), where strings.TrimSpace was used instead of util.ResolveAuthDir to process auth-dir path.	2026-02-03 00:02:54 +08:00
“cielhaidir”	95a3e32a12	feat: add .air.toml configuration file and update .gitignore for build artifacts fix: improve PatchOAuthModelAlias logic for handling channel aliases feat: add support for GitHub Copilot in model definitions	2026-02-02 17:53:58 +08:00
Skyuno	3c7a5afdcc	feat: inject web_search alternative hint instead of silently filtering	2026-02-02 05:19:06 +08:00
Skyuno	5dc936a9a4	fix: filter out web_search/websearch tools unsupported by Kiro API	2026-02-02 05:19:06 +08:00
Skyuno	ba168ec003	fix(kiro): skip _partial field (may contain hallucinated paths), add pwd hint for retry	2026-02-02 05:17:39 +08:00
Skyuno	a12e22c66f	Revert "Merge pull request #150 from PancakeZik/fix/write-tool-truncation-handling" This reverts commit `fd5b669c87`, reversing changes made to `30d832c9b1`.	2026-02-02 05:17:39 +08:00
starsdream666	4c50a7281a	Update docker-image.yml	2026-02-02 00:01:00 +08:00
starsdream666	80d3fa384e	Update docker-image.yml	2026-02-01 23:58:06 +08:00
taetaetae	b45ede0b71	fix(kiro): handle empty content in messages to prevent Bad Request errors Problem: - OpenCode's /compaction command and auto-compaction (at 80%+ context) sends requests that can result in empty assistant message content - Kiro API strictly requires non-empty content for all messages - This causes 'Bad Request: Improperly formed request' errors - After compaction failure, the malformed message stays in history, breaking all subsequent requests in the session Solution: - Add fallback content for empty assistant messages in buildAssistantMessageFromOpenAI() - Add history truncation (max 50 messages) to prevent oversized requests - This ensures all messages have valid content before sending to Kiro API Fixes issues with: - /compaction command returning Bad Request - Auto-compaction breaking sessions - Conversations becoming unresponsive after compaction failure	2026-02-01 15:47:18 +09:00