Merge pull request #197 from router-for-me/plus

v6.8.4
Merge branch 'main' into plus
2026-03-09 15:25:17 +00:00 · 2026-02-07 09:37:04 +08:00 · 2026-02-07 09:36:52 +08:00 · 2026-02-07 09:35:04 +08:00 · 2026-02-07 08:55:14 +08:00 · 2026-02-07 08:52:51 +08:00
80 changed files with 2649 additions and 453 deletions
--- a/assets/cubence.png
+++ b/assets/cubence.png
--- a/cmd/server/main.go
+++ b/cmd/server/main.go
@@ -77,6 +77,7 @@ func main() {
 	var noBrowser bool
 	var oauthCallbackPort int
 	var antigravityLogin bool
+	var kimiLogin bool
 	var kiroLogin bool
 	var kiroGoogleLogin bool
 	var kiroAWSLogin bool
@@ -102,6 +103,7 @@ func main() {
 	flag.BoolVar(&useIncognito, "incognito", false, "Open browser in incognito/private mode for OAuth (useful for multiple accounts)")
 	flag.BoolVar(&noIncognito, "no-incognito", false, "Force disable incognito mode (uses existing browser session)")
 	flag.BoolVar(&antigravityLogin, "antigravity-login", false, "Login to Antigravity using OAuth")
+	flag.BoolVar(&kimiLogin, "kimi-login", false, "Login to Kimi using OAuth")
 	flag.BoolVar(&kiroLogin, "kiro-login", false, "Login to Kiro using Google OAuth")
 	flag.BoolVar(&kiroGoogleLogin, "kiro-google-login", false, "Login to Kiro using Google OAuth (same as --kiro-login)")
 	flag.BoolVar(&kiroAWSLogin, "kiro-aws-login", false, "Login to Kiro using AWS Builder ID (device code flow)")
@@ -501,6 +503,8 @@ func main() {
 		cmd.DoIFlowLogin(cfg, options)
 	} else if iflowCookie {
 		cmd.DoIFlowCookieAuth(cfg, options)
+	} else if kimiLogin {
+		cmd.DoKimiLogin(cfg, options)
 	} else if kiroLogin {
 		// For Kiro auth, default to incognito mode for multi-account support
 		// Users can explicitly override with --no-incognito
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -236,7 +236,7 @@ nonstream-keepalive-interval: 0

 # Global OAuth model name aliases (per channel)
 # These aliases rename model IDs for both model listing and request routing.
-# Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow, kiro, github-copilot.
+# Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow, kiro, github-copilot, kimi.
 # NOTE: Aliases do not apply to gemini-api-key, codex-api-key, claude-api-key, openai-compatibility, vertex-api-key, or ampcode.
 # You can repeat the same name with different aliases to expose multiple client model names.
 #oauth-model-alias:
@@ -280,6 +280,9 @@ nonstream-keepalive-interval: 0
 #   iflow:
 #     - name: "glm-4.7"
 #       alias: "glm-god"
+#   kimi:
+#     - name: "kimi-k2.5"
+#       alias: "k2.5"
 #   kiro:
 #     - name: "kiro-claude-opus-4-5"
 #       alias: "op45"
@@ -309,6 +312,8 @@ nonstream-keepalive-interval: 0
 #     - "vision-model"
 #   iflow:
 #     - "tstars2.0"
+#   kimi:
+#     - "kimi-k2-thinking"
 #   kiro:
 #     - "kiro-claude-haiku-4-5"
 #   github-copilot:
--- a/internal/api/handlers/management/auth_files.go
+++ b/internal/api/handlers/management/auth_files.go
@@ -29,6 +29,7 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/copilot"
 	geminiAuth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/gemini"
 	iflowauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/iflow"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/kimi"
 	kiroauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/kiro"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
@@ -1613,6 +1614,82 @@ func (h *Handler) RequestQwenToken(c *gin.Context) {
 	c.JSON(200, gin.H{"status": "ok", "url": authURL, "state": state})
 }

+func (h *Handler) RequestKimiToken(c *gin.Context) {
+	ctx := context.Background()
+
+	fmt.Println("Initializing Kimi authentication...")
+
+	state := fmt.Sprintf("kmi-%d", time.Now().UnixNano())
+	// Initialize Kimi auth service
+	kimiAuth := kimi.NewKimiAuth(h.cfg)
+
+	// Generate authorization URL
+	deviceFlow, errStartDeviceFlow := kimiAuth.StartDeviceFlow(ctx)
+	if errStartDeviceFlow != nil {
+		log.Errorf("Failed to generate authorization URL: %v", errStartDeviceFlow)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate authorization url"})
+		return
+	}
+	authURL := deviceFlow.VerificationURIComplete
+	if authURL == "" {
+		authURL = deviceFlow.VerificationURI
+	}
+
+	RegisterOAuthSession(state, "kimi")
+
+	go func() {
+		fmt.Println("Waiting for authentication...")
+		authBundle, errWaitForAuthorization := kimiAuth.WaitForAuthorization(ctx, deviceFlow)
+		if errWaitForAuthorization != nil {
+			SetOAuthSessionError(state, "Authentication failed")
+			fmt.Printf("Authentication failed: %v\n", errWaitForAuthorization)
+			return
+		}
+
+		// Create token storage
+		tokenStorage := kimiAuth.CreateTokenStorage(authBundle)
+
+		metadata := map[string]any{
+			"type":          "kimi",
+			"access_token":  authBundle.TokenData.AccessToken,
+			"refresh_token": authBundle.TokenData.RefreshToken,
+			"token_type":    authBundle.TokenData.TokenType,
+			"scope":         authBundle.TokenData.Scope,
+			"timestamp":     time.Now().UnixMilli(),
+		}
+		if authBundle.TokenData.ExpiresAt > 0 {
+			expired := time.Unix(authBundle.TokenData.ExpiresAt, 0).UTC().Format(time.RFC3339)
+			metadata["expired"] = expired
+		}
+		if strings.TrimSpace(authBundle.DeviceID) != "" {
+			metadata["device_id"] = strings.TrimSpace(authBundle.DeviceID)
+		}
+
+		fileName := fmt.Sprintf("kimi-%d.json", time.Now().UnixMilli())
+		record := &coreauth.Auth{
+			ID:       fileName,
+			Provider: "kimi",
+			FileName: fileName,
+			Label:    "Kimi User",
+			Storage:  tokenStorage,
+			Metadata: metadata,
+		}
+		savedPath, errSave := h.saveTokenRecord(ctx, record)
+		if errSave != nil {
+			log.Errorf("Failed to save authentication tokens: %v", errSave)
+			SetOAuthSessionError(state, "Failed to save authentication tokens")
+			return
+		}
+
+		fmt.Printf("Authentication successful! Token saved to %s\n", savedPath)
+		fmt.Println("You can now use Kimi services through this CLI")
+		CompleteOAuthSession(state)
+		CompleteOAuthSessionsByProvider("kimi")
+	}()
+
+	c.JSON(200, gin.H{"status": "ok", "url": authURL, "state": state})
+}
+
 func (h *Handler) RequestIFlowToken(c *gin.Context) {
 	ctx := context.Background()

--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -649,6 +649,7 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.GET("/gemini-cli-auth-url", s.mgmt.RequestGeminiCLIToken)
 		mgmt.GET("/antigravity-auth-url", s.mgmt.RequestAntigravityToken)
 		mgmt.GET("/qwen-auth-url", s.mgmt.RequestQwenToken)
+		mgmt.GET("/kimi-auth-url", s.mgmt.RequestKimiToken)
 		mgmt.GET("/iflow-auth-url", s.mgmt.RequestIFlowToken)
 		mgmt.POST("/iflow-auth-url", s.mgmt.RequestIFlowCookieToken)
 		mgmt.GET("/kiro-auth-url", s.mgmt.RequestKiroToken)
--- a/internal/auth/kimi/kimi.go
+++ b/internal/auth/kimi/kimi.go
@@ -0,0 +1,396 @@
+// Package kimi provides authentication and token management for Kimi (Moonshot AI) API.
+// It handles the RFC 8628 OAuth2 Device Authorization Grant flow for secure authentication.
+package kimi
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"os"
+	"runtime"
+	"strings"
+	"time"
+
+	"github.com/google/uuid"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	log "github.com/sirupsen/logrus"
+)
+
+const (
+	// kimiClientID is Kimi Code's OAuth client ID.
+	kimiClientID = "17e5f671-d194-4dfb-9706-5516cb48c098"
+	// kimiOAuthHost is the OAuth server endpoint.
+	kimiOAuthHost = "https://auth.kimi.com"
+	// kimiDeviceCodeURL is the endpoint for requesting device codes.
+	kimiDeviceCodeURL = kimiOAuthHost + "/api/oauth/device_authorization"
+	// kimiTokenURL is the endpoint for exchanging device codes for tokens.
+	kimiTokenURL = kimiOAuthHost + "/api/oauth/token"
+	// KimiAPIBaseURL is the base URL for Kimi API requests.
+	KimiAPIBaseURL = "https://api.kimi.com/coding"
+	// defaultPollInterval is the default interval for polling token endpoint.
+	defaultPollInterval = 5 * time.Second
+	// maxPollDuration is the maximum time to wait for user authorization.
+	maxPollDuration = 15 * time.Minute
+	// refreshThresholdSeconds is when to refresh token before expiry (5 minutes).
+	refreshThresholdSeconds = 300
+)
+
+// KimiAuth handles Kimi authentication flow.
+type KimiAuth struct {
+	deviceClient *DeviceFlowClient
+	cfg          *config.Config
+}
+
+// NewKimiAuth creates a new KimiAuth service instance.
+func NewKimiAuth(cfg *config.Config) *KimiAuth {
+	return &KimiAuth{
+		deviceClient: NewDeviceFlowClient(cfg),
+		cfg:          cfg,
+	}
+}
+
+// StartDeviceFlow initiates the device flow authentication.
+func (k *KimiAuth) StartDeviceFlow(ctx context.Context) (*DeviceCodeResponse, error) {
+	return k.deviceClient.RequestDeviceCode(ctx)
+}
+
+// WaitForAuthorization polls for user authorization and returns the auth bundle.
+func (k *KimiAuth) WaitForAuthorization(ctx context.Context, deviceCode *DeviceCodeResponse) (*KimiAuthBundle, error) {
+	tokenData, err := k.deviceClient.PollForToken(ctx, deviceCode)
+	if err != nil {
+		return nil, err
+	}
+
+	return &KimiAuthBundle{
+		TokenData: tokenData,
+		DeviceID:  k.deviceClient.deviceID,
+	}, nil
+}
+
+// CreateTokenStorage creates a new KimiTokenStorage from auth bundle.
+func (k *KimiAuth) CreateTokenStorage(bundle *KimiAuthBundle) *KimiTokenStorage {
+	expired := ""
+	if bundle.TokenData.ExpiresAt > 0 {
+		expired = time.Unix(bundle.TokenData.ExpiresAt, 0).UTC().Format(time.RFC3339)
+	}
+	return &KimiTokenStorage{
+		AccessToken:  bundle.TokenData.AccessToken,
+		RefreshToken: bundle.TokenData.RefreshToken,
+		TokenType:    bundle.TokenData.TokenType,
+		Scope:        bundle.TokenData.Scope,
+		DeviceID:     strings.TrimSpace(bundle.DeviceID),
+		Expired:      expired,
+		Type:         "kimi",
+	}
+}
+
+// DeviceFlowClient handles the OAuth2 device flow for Kimi.
+type DeviceFlowClient struct {
+	httpClient *http.Client
+	cfg        *config.Config
+	deviceID   string
+}
+
+// NewDeviceFlowClient creates a new device flow client.
+func NewDeviceFlowClient(cfg *config.Config) *DeviceFlowClient {
+	return NewDeviceFlowClientWithDeviceID(cfg, "")
+}
+
+// NewDeviceFlowClientWithDeviceID creates a new device flow client with the specified device ID.
+func NewDeviceFlowClientWithDeviceID(cfg *config.Config, deviceID string) *DeviceFlowClient {
+	client := &http.Client{Timeout: 30 * time.Second}
+	if cfg != nil {
+		client = util.SetProxy(&cfg.SDKConfig, client)
+	}
+	resolvedDeviceID := strings.TrimSpace(deviceID)
+	if resolvedDeviceID == "" {
+		resolvedDeviceID = getOrCreateDeviceID()
+	}
+	return &DeviceFlowClient{
+		httpClient: client,
+		cfg:        cfg,
+		deviceID:   resolvedDeviceID,
+	}
+}
+
+// getOrCreateDeviceID returns an in-memory device ID for the current authentication flow.
+func getOrCreateDeviceID() string {
+	return uuid.New().String()
+}
+
+// getDeviceModel returns a device model string.
+func getDeviceModel() string {
+	osName := runtime.GOOS
+	arch := runtime.GOARCH
+
+	switch osName {
+	case "darwin":
+		return fmt.Sprintf("macOS %s", arch)
+	case "windows":
+		return fmt.Sprintf("Windows %s", arch)
+	case "linux":
+		return fmt.Sprintf("Linux %s", arch)
+	default:
+		return fmt.Sprintf("%s %s", osName, arch)
+	}
+}
+
+// getHostname returns the machine hostname.
+func getHostname() string {
+	hostname, err := os.Hostname()
+	if err != nil {
+		return "unknown"
+	}
+	return hostname
+}
+
+// commonHeaders returns headers required for Kimi API requests.
+func (c *DeviceFlowClient) commonHeaders() map[string]string {
+	return map[string]string{
+		"X-Msh-Platform":     "cli-proxy-api",
+		"X-Msh-Version":      "1.0.0",
+		"X-Msh-Device-Name":  getHostname(),
+		"X-Msh-Device-Model": getDeviceModel(),
+		"X-Msh-Device-Id":    c.deviceID,
+	}
+}
+
+// RequestDeviceCode initiates the device flow by requesting a device code from Kimi.
+func (c *DeviceFlowClient) RequestDeviceCode(ctx context.Context) (*DeviceCodeResponse, error) {
+	data := url.Values{}
+	data.Set("client_id", kimiClientID)
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, kimiDeviceCodeURL, strings.NewReader(data.Encode()))
+	if err != nil {
+		return nil, fmt.Errorf("kimi: failed to create device code request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+	req.Header.Set("Accept", "application/json")
+	for k, v := range c.commonHeaders() {
+		req.Header.Set(k, v)
+	}
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("kimi: device code request failed: %w", err)
+	}
+	defer func() {
+		if errClose := resp.Body.Close(); errClose != nil {
+			log.Errorf("kimi device code: close body error: %v", errClose)
+		}
+	}()
+
+	bodyBytes, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("kimi: failed to read device code response: %w", err)
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("kimi: device code request failed with status %d: %s", resp.StatusCode, string(bodyBytes))
+	}
+
+	var deviceCode DeviceCodeResponse
+	if err = json.Unmarshal(bodyBytes, &deviceCode); err != nil {
+		return nil, fmt.Errorf("kimi: failed to parse device code response: %w", err)
+	}
+
+	return &deviceCode, nil
+}
+
+// PollForToken polls the token endpoint until the user authorizes or the device code expires.
+func (c *DeviceFlowClient) PollForToken(ctx context.Context, deviceCode *DeviceCodeResponse) (*KimiTokenData, error) {
+	if deviceCode == nil {
+		return nil, fmt.Errorf("kimi: device code is nil")
+	}
+
+	interval := time.Duration(deviceCode.Interval) * time.Second
+	if interval < defaultPollInterval {
+		interval = defaultPollInterval
+	}
+
+	deadline := time.Now().Add(maxPollDuration)
+	if deviceCode.ExpiresIn > 0 {
+		codeDeadline := time.Now().Add(time.Duration(deviceCode.ExpiresIn) * time.Second)
+		if codeDeadline.Before(deadline) {
+			deadline = codeDeadline
+		}
+	}
+
+	ticker := time.NewTicker(interval)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-ctx.Done():
+			return nil, fmt.Errorf("kimi: context cancelled: %w", ctx.Err())
+		case <-ticker.C:
+			if time.Now().After(deadline) {
+				return nil, fmt.Errorf("kimi: device code expired")
+			}
+
+			token, pollErr, shouldContinue := c.exchangeDeviceCode(ctx, deviceCode.DeviceCode)
+			if token != nil {
+				return token, nil
+			}
+			if !shouldContinue {
+				return nil, pollErr
+			}
+			// Continue polling
+		}
+	}
+}
+
+// exchangeDeviceCode attempts to exchange the device code for an access token.
+// Returns (token, error, shouldContinue).
+func (c *DeviceFlowClient) exchangeDeviceCode(ctx context.Context, deviceCode string) (*KimiTokenData, error, bool) {
+	data := url.Values{}
+	data.Set("client_id", kimiClientID)
+	data.Set("device_code", deviceCode)
+	data.Set("grant_type", "urn:ietf:params:oauth:grant-type:device_code")
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, kimiTokenURL, strings.NewReader(data.Encode()))
+	if err != nil {
+		return nil, fmt.Errorf("kimi: failed to create token request: %w", err), false
+	}
+	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+	req.Header.Set("Accept", "application/json")
+	for k, v := range c.commonHeaders() {
+		req.Header.Set(k, v)
+	}
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("kimi: token request failed: %w", err), false
+	}
+	defer func() {
+		if errClose := resp.Body.Close(); errClose != nil {
+			log.Errorf("kimi token exchange: close body error: %v", errClose)
+		}
+	}()
+
+	bodyBytes, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("kimi: failed to read token response: %w", err), false
+	}
+
+	// Parse response - Kimi returns 200 for both success and pending states
+	var oauthResp struct {
+		Error            string  `json:"error"`
+		ErrorDescription string  `json:"error_description"`
+		AccessToken      string  `json:"access_token"`
+		RefreshToken     string  `json:"refresh_token"`
+		TokenType        string  `json:"token_type"`
+		ExpiresIn        float64 `json:"expires_in"`
+		Scope            string  `json:"scope"`
+	}
+
+	if err = json.Unmarshal(bodyBytes, &oauthResp); err != nil {
+		return nil, fmt.Errorf("kimi: failed to parse token response: %w", err), false
+	}
+
+	if oauthResp.Error != "" {
+		switch oauthResp.Error {
+		case "authorization_pending":
+			return nil, nil, true // Continue polling
+		case "slow_down":
+			return nil, nil, true // Continue polling (with increased interval handled by caller)
+		case "expired_token":
+			return nil, fmt.Errorf("kimi: device code expired"), false
+		case "access_denied":
+			return nil, fmt.Errorf("kimi: access denied by user"), false
+		default:
+			return nil, fmt.Errorf("kimi: OAuth error: %s - %s", oauthResp.Error, oauthResp.ErrorDescription), false
+		}
+	}
+
+	if oauthResp.AccessToken == "" {
+		return nil, fmt.Errorf("kimi: empty access token in response"), false
+	}
+
+	var expiresAt int64
+	if oauthResp.ExpiresIn > 0 {
+		expiresAt = time.Now().Unix() + int64(oauthResp.ExpiresIn)
+	}
+
+	return &KimiTokenData{
+		AccessToken:  oauthResp.AccessToken,
+		RefreshToken: oauthResp.RefreshToken,
+		TokenType:    oauthResp.TokenType,
+		ExpiresAt:    expiresAt,
+		Scope:        oauthResp.Scope,
+	}, nil, false
+}
+
+// RefreshToken exchanges a refresh token for a new access token.
+func (c *DeviceFlowClient) RefreshToken(ctx context.Context, refreshToken string) (*KimiTokenData, error) {
+	data := url.Values{}
+	data.Set("client_id", kimiClientID)
+	data.Set("grant_type", "refresh_token")
+	data.Set("refresh_token", refreshToken)
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, kimiTokenURL, strings.NewReader(data.Encode()))
+	if err != nil {
+		return nil, fmt.Errorf("kimi: failed to create refresh request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+	req.Header.Set("Accept", "application/json")
+	for k, v := range c.commonHeaders() {
+		req.Header.Set(k, v)
+	}
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("kimi: refresh request failed: %w", err)
+	}
+	defer func() {
+		if errClose := resp.Body.Close(); errClose != nil {
+			log.Errorf("kimi refresh token: close body error: %v", errClose)
+		}
+	}()
+
+	bodyBytes, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("kimi: failed to read refresh response: %w", err)
+	}
+
+	if resp.StatusCode == http.StatusUnauthorized || resp.StatusCode == http.StatusForbidden {
+		return nil, fmt.Errorf("kimi: refresh token rejected (status %d)", resp.StatusCode)
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("kimi: refresh failed with status %d: %s", resp.StatusCode, string(bodyBytes))
+	}
+
+	var tokenResp struct {
+		AccessToken  string  `json:"access_token"`
+		RefreshToken string  `json:"refresh_token"`
+		TokenType    string  `json:"token_type"`
+		ExpiresIn    float64 `json:"expires_in"`
+		Scope        string  `json:"scope"`
+	}
+
+	if err = json.Unmarshal(bodyBytes, &tokenResp); err != nil {
+		return nil, fmt.Errorf("kimi: failed to parse refresh response: %w", err)
+	}
+
+	if tokenResp.AccessToken == "" {
+		return nil, fmt.Errorf("kimi: empty access token in refresh response")
+	}
+
+	var expiresAt int64
+	if tokenResp.ExpiresIn > 0 {
+		expiresAt = time.Now().Unix() + int64(tokenResp.ExpiresIn)
+	}
+
+	return &KimiTokenData{
+		AccessToken:  tokenResp.AccessToken,
+		RefreshToken: tokenResp.RefreshToken,
+		TokenType:    tokenResp.TokenType,
+		ExpiresAt:    expiresAt,
+		Scope:        tokenResp.Scope,
+	}, nil
+}
--- a/internal/auth/kimi/token.go
+++ b/internal/auth/kimi/token.go
@@ -0,0 +1,116 @@
+// Package kimi provides authentication and token management functionality
+// for Kimi (Moonshot AI) services. It handles OAuth2 device flow token storage,
+// serialization, and retrieval for maintaining authenticated sessions with the Kimi API.
+package kimi
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"time"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
+)
+
+// KimiTokenStorage stores OAuth2 token information for Kimi API authentication.
+type KimiTokenStorage struct {
+	// AccessToken is the OAuth2 access token used for authenticating API requests.
+	AccessToken string `json:"access_token"`
+	// RefreshToken is the OAuth2 refresh token used to obtain new access tokens.
+	RefreshToken string `json:"refresh_token"`
+	// TokenType is the type of token, typically "Bearer".
+	TokenType string `json:"token_type"`
+	// Scope is the OAuth2 scope granted to the token.
+	Scope string `json:"scope,omitempty"`
+	// DeviceID is the OAuth device flow identifier used for Kimi requests.
+	DeviceID string `json:"device_id,omitempty"`
+	// Expired is the RFC3339 timestamp when the access token expires.
+	Expired string `json:"expired,omitempty"`
+	// Type indicates the authentication provider type, always "kimi" for this storage.
+	Type string `json:"type"`
+}
+
+// KimiTokenData holds the raw OAuth token response from Kimi.
+type KimiTokenData struct {
+	// AccessToken is the OAuth2 access token.
+	AccessToken string `json:"access_token"`
+	// RefreshToken is the OAuth2 refresh token.
+	RefreshToken string `json:"refresh_token"`
+	// TokenType is the type of token, typically "Bearer".
+	TokenType string `json:"token_type"`
+	// ExpiresAt is the Unix timestamp when the token expires.
+	ExpiresAt int64 `json:"expires_at"`
+	// Scope is the OAuth2 scope granted to the token.
+	Scope string `json:"scope"`
+}
+
+// KimiAuthBundle bundles authentication data for storage.
+type KimiAuthBundle struct {
+	// TokenData contains the OAuth token information.
+	TokenData *KimiTokenData
+	// DeviceID is the device identifier used during OAuth device flow.
+	DeviceID string
+}
+
+// DeviceCodeResponse represents Kimi's device code response.
+type DeviceCodeResponse struct {
+	// DeviceCode is the device verification code.
+	DeviceCode string `json:"device_code"`
+	// UserCode is the code the user must enter at the verification URI.
+	UserCode string `json:"user_code"`
+	// VerificationURI is the URL where the user should enter the code.
+	VerificationURI string `json:"verification_uri,omitempty"`
+	// VerificationURIComplete is the URL with the code pre-filled.
+	VerificationURIComplete string `json:"verification_uri_complete"`
+	// ExpiresIn is the number of seconds until the device code expires.
+	ExpiresIn int `json:"expires_in"`
+	// Interval is the minimum number of seconds to wait between polling requests.
+	Interval int `json:"interval"`
+}
+
+// SaveTokenToFile serializes the Kimi token storage to a JSON file.
+func (ts *KimiTokenStorage) SaveTokenToFile(authFilePath string) error {
+	misc.LogSavingCredentials(authFilePath)
+	ts.Type = "kimi"
+
+	if err := os.MkdirAll(filepath.Dir(authFilePath), 0700); err != nil {
+		return fmt.Errorf("failed to create directory: %v", err)
+	}
+
+	f, err := os.Create(authFilePath)
+	if err != nil {
+		return fmt.Errorf("failed to create token file: %w", err)
+	}
+	defer func() {
+		_ = f.Close()
+	}()
+
+	encoder := json.NewEncoder(f)
+	encoder.SetIndent("", "  ")
+	if err = encoder.Encode(ts); err != nil {
+		return fmt.Errorf("failed to write token to file: %w", err)
+	}
+	return nil
+}
+
+// IsExpired checks if the token has expired.
+func (ts *KimiTokenStorage) IsExpired() bool {
+	if ts.Expired == "" {
+		return false // No expiry set, assume valid
+	}
+	t, err := time.Parse(time.RFC3339, ts.Expired)
+	if err != nil {
+		return true // Has expiry string but can't parse
+	}
+	// Consider expired if within refresh threshold
+	return time.Now().Add(time.Duration(refreshThresholdSeconds) * time.Second).After(t)
+}
+
+// NeedsRefresh checks if the token should be refreshed.
+func (ts *KimiTokenStorage) NeedsRefresh() bool {
+	if ts.RefreshToken == "" {
+		return false // Can't refresh without refresh token
+	}
+	return ts.IsExpired()
+}
--- a/internal/auth/kiro/aws_auth.go
+++ b/internal/auth/kiro/aws_auth.go
@@ -238,7 +238,7 @@ func (k *KiroAuth) ListAvailableModels(ctx context.Context, tokenData *KiroToken
 			Description    string  `json:"description"`
 			RateMultiplier float64 `json:"rateMultiplier"`
 			RateUnit       string  `json:"rateUnit"`
-			TokenLimits    struct {
+			TokenLimits    *struct {
 				MaxInputTokens int `json:"maxInputTokens"`
 			} `json:"tokenLimits"`
 		} `json:"models"`
@@ -250,13 +250,17 @@ func (k *KiroAuth) ListAvailableModels(ctx context.Context, tokenData *KiroToken

 	models := make([]*KiroModel, 0, len(result.Models))
 	for _, m := range result.Models {
+		maxInputTokens := 0
+		if m.TokenLimits != nil {
+			maxInputTokens = m.TokenLimits.MaxInputTokens
+		}
 		models = append(models, &KiroModel{
 			ModelID:        m.ModelID,
 			ModelName:      m.ModelName,
 			Description:    m.Description,
 			RateMultiplier: m.RateMultiplier,
 			RateUnit:       m.RateUnit,
-			MaxInputTokens: m.TokenLimits.MaxInputTokens,
+			MaxInputTokens: maxInputTokens,
 		})
 	}

--- a/internal/cmd/auth_manager.go
+++ b/internal/cmd/auth_manager.go
@@ -19,6 +19,7 @@ func newAuthManager() *sdkAuth.Manager {
 		sdkAuth.NewQwenAuthenticator(),
 		sdkAuth.NewIFlowAuthenticator(),
 		sdkAuth.NewAntigravityAuthenticator(),
+		sdkAuth.NewKimiAuthenticator(),
 		sdkAuth.NewKiroAuthenticator(),
 		sdkAuth.NewGitHubCopilotAuthenticator(),
 	)
--- a/internal/cmd/kimi_login.go
+++ b/internal/cmd/kimi_login.go
@@ -0,0 +1,44 @@
+package cmd
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	sdkAuth "github.com/router-for-me/CLIProxyAPI/v6/sdk/auth"
+	log "github.com/sirupsen/logrus"
+)
+
+// DoKimiLogin triggers the OAuth device flow for Kimi (Moonshot AI) and saves tokens.
+// It initiates the device flow authentication, displays the verification URL for the user,
+// and waits for authorization before saving the tokens.
+//
+// Parameters:
+//   - cfg: The application configuration containing proxy and auth directory settings
+//   - options: Login options including browser behavior settings
+func DoKimiLogin(cfg *config.Config, options *LoginOptions) {
+	if options == nil {
+		options = &LoginOptions{}
+	}
+
+	manager := newAuthManager()
+	authOpts := &sdkAuth.LoginOptions{
+		NoBrowser: options.NoBrowser,
+		Metadata:  map[string]string{},
+		Prompt:    options.Prompt,
+	}
+
+	record, savedPath, err := manager.Login(context.Background(), "kimi", cfg, authOpts)
+	if err != nil {
+		log.Errorf("Kimi authentication failed: %v", err)
+		return
+	}
+
+	if savedPath != "" {
+		fmt.Printf("Authentication saved to %s\n", savedPath)
+	}
+	if record != nil && record.Label != "" {
+		fmt.Printf("Authenticated as %s\n", record.Label)
+	}
+	fmt.Println("Kimi authentication successful!")
+}
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -535,14 +535,15 @@ func LoadConfig(configFile string) (*Config, error) {
 // If optional is true and the file is missing, it returns an empty Config.
 // If optional is true and the file is empty or invalid, it returns an empty Config.
 func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
-	// Perform oauth-model-alias migration before loading config.
-	// This migrates oauth-model-mappings to oauth-model-alias if needed.
-	if migrated, err := MigrateOAuthModelAlias(configFile); err != nil {
-		// Log warning but don't fail - config loading should still work
-		fmt.Printf("Warning: oauth-model-alias migration failed: %v\n", err)
-	} else if migrated {
-		fmt.Println("Migrated oauth-model-mappings to oauth-model-alias")
-	}
+	// NOTE: Startup oauth-model-alias migration is intentionally disabled.
+	// Reason: avoid mutating config.yaml during server startup.
+	// Re-enable the block below if automatic startup migration is needed again.
+	// if migrated, err := MigrateOAuthModelAlias(configFile); err != nil {
+	// 	// Log warning but don't fail - config loading should still work
+	// 	fmt.Printf("Warning: oauth-model-alias migration failed: %v\n", err)
+	// } else if migrated {
+	// 	fmt.Println("Migrated oauth-model-mappings to oauth-model-alias")
+	// }

 	// Read the entire configuration file into memory.
 	data, err := os.ReadFile(configFile)
@@ -583,18 +584,21 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
 		return nil, fmt.Errorf("failed to parse config file: %w", err)
 	}

-	var legacy legacyConfigData
-	if errLegacy := yaml.Unmarshal(data, &legacy); errLegacy == nil {
-		if cfg.migrateLegacyGeminiKeys(legacy.LegacyGeminiKeys) {
-			cfg.legacyMigrationPending = true
-		}
-		if cfg.migrateLegacyOpenAICompatibilityKeys(legacy.OpenAICompat) {
-			cfg.legacyMigrationPending = true
-		}
-		if cfg.migrateLegacyAmpConfig(&legacy) {
-			cfg.legacyMigrationPending = true
-		}
-	}
+	// NOTE: Startup legacy key migration is intentionally disabled.
+	// Reason: avoid mutating config.yaml during server startup.
+	// Re-enable the block below if automatic startup migration is needed again.
+	// var legacy legacyConfigData
+	// if errLegacy := yaml.Unmarshal(data, &legacy); errLegacy == nil {
+	// 	if cfg.migrateLegacyGeminiKeys(legacy.LegacyGeminiKeys) {
+	// 		cfg.legacyMigrationPending = true
+	// 	}
+	// 	if cfg.migrateLegacyOpenAICompatibilityKeys(legacy.OpenAICompat) {
+	// 		cfg.legacyMigrationPending = true
+	// 	}
+	// 	if cfg.migrateLegacyAmpConfig(&legacy) {
+	// 		cfg.legacyMigrationPending = true
+	// 	}
+	// }

 	// Hash remote management key if plaintext is detected (nested)
 	// We consider a value to be already hashed if it looks like a bcrypt hash ($2a$, $2b$, or $2y$ prefix).
@@ -658,17 +662,20 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
 	// Validate raw payload rules and drop invalid entries.
 	cfg.SanitizePayloadRules()

-	if cfg.legacyMigrationPending {
-		fmt.Println("Detected legacy configuration keys, attempting to persist the normalized config...")
-		if !optional && configFile != "" {
-			if err := SaveConfigPreserveComments(configFile, &cfg); err != nil {
-				return nil, fmt.Errorf("failed to persist migrated legacy config: %w", err)
-			}
-			fmt.Println("Legacy configuration normalized and persisted.")
-		} else {
-			fmt.Println("Legacy configuration normalized in memory; persistence skipped.")
-		}
-	}
+	// NOTE: Legacy migration persistence is intentionally disabled together with
+	// startup legacy migration to keep startup read-only for config.yaml.
+	// Re-enable the block below if automatic startup migration is needed again.
+	// if cfg.legacyMigrationPending {
+	// 	fmt.Println("Detected legacy configuration keys, attempting to persist the normalized config...")
+	// 	if !optional && configFile != "" {
+	// 		if err := SaveConfigPreserveComments(configFile, &cfg); err != nil {
+	// 			return nil, fmt.Errorf("failed to persist migrated legacy config: %w", err)
+	// 		}
+	// 		fmt.Println("Legacy configuration normalized and persisted.")
+	// 	} else {
+	// 		fmt.Println("Legacy configuration normalized in memory; persistence skipped.")
+	// 	}
+	// }

 	// Return the populated configuration struct.
 	return &cfg, nil
--- a/internal/config/oauth_model_alias_migration.go
+++ b/internal/config/oauth_model_alias_migration.go
@@ -17,6 +17,7 @@ var antigravityModelConversionTable = map[string]string{
 	"gemini-claude-sonnet-4-5":                "claude-sonnet-4-5",
 	"gemini-claude-sonnet-4-5-thinking":       "claude-sonnet-4-5-thinking",
 	"gemini-claude-opus-4-5-thinking":         "claude-opus-4-5-thinking",
+	"gemini-claude-opus-4-6-thinking":         "claude-opus-4-6-thinking",
 }

 // defaultAntigravityAliases returns the default oauth-model-alias configuration
@@ -30,6 +31,7 @@ func defaultAntigravityAliases() []OAuthModelAlias {
 		{Name: "claude-sonnet-4-5", Alias: "gemini-claude-sonnet-4-5"},
 		{Name: "claude-sonnet-4-5-thinking", Alias: "gemini-claude-sonnet-4-5-thinking"},
 		{Name: "claude-opus-4-5-thinking", Alias: "gemini-claude-opus-4-5-thinking"},
+		{Name: "claude-opus-4-6-thinking", Alias: "gemini-claude-opus-4-6-thinking"},
 	}
 }

--- a/internal/config/oauth_model_alias_migration_test.go
+++ b/internal/config/oauth_model_alias_migration_test.go
@@ -131,6 +131,9 @@ func TestMigrateOAuthModelAlias_ConvertsAntigravityModels(t *testing.T) {
 	if !strings.Contains(content, "claude-opus-4-5-thinking") {
 		t.Fatal("expected missing default alias claude-opus-4-5-thinking to be added")
 	}
+	if !strings.Contains(content, "claude-opus-4-6-thinking") {
+		t.Fatal("expected missing default alias claude-opus-4-6-thinking to be added")
+	}
 }

 func TestMigrateOAuthModelAlias_AddsDefaultIfNeitherExists(t *testing.T) {
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -376,6 +376,18 @@ func GetKiroModels() []*ModelInfo {
 			MaxCompletionTokens: 64000,
 			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
 		},
+		{
+			ID:                  "kiro-claude-opus-4-6",
+			Object:              "model",
+			Created:             1736899200, // 2025-01-15
+			OwnedBy:             "aws",
+			Type:                "kiro",
+			DisplayName:         "Kiro Claude Opus 4.6",
+			Description:         "Claude Opus 4.6 via Kiro (2.2x credit)",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
+		},
 		{
 			ID:                  "kiro-claude-opus-4-5",
 			Object:              "model",
@@ -425,6 +437,18 @@ func GetKiroModels() []*ModelInfo {
 			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		// --- Agentic Variants (Optimized for coding agents with chunked writes) ---
+		{
+			ID:                  "kiro-claude-opus-4-6-agentic",
+			Object:              "model",
+			Created:             1736899200, // 2025-01-15
+			OwnedBy:             "aws",
+			Type:                "kiro",
+			DisplayName:         "Kiro Claude Opus 4.6 (Agentic)",
+			Description:         "Claude Opus 4.6 optimized for coding agents (chunked writes)",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
+		},
 		{
 			ID:                  "kiro-claude-opus-4-5-agentic",
 			Object:              "model",
--- a/internal/registry/model_definitions_static_data.go
+++ b/internal/registry/model_definitions_static_data.go
@@ -15,7 +15,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 4.5 Haiku",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
-			// Thinking: not supported for Haiku models
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
 		},
 		{
 			ID:                  "claude-sonnet-4-5-20250929",
@@ -28,6 +28,18 @@ func GetClaudeModels() []*ModelInfo {
 			MaxCompletionTokens: 64000,
 			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
 		},
+		{
+			ID:                  "claude-opus-4-6",
+			Object:              "model",
+			Created:             1770318000, // 2026-02-05
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.6 Opus",
+			Description:         "Premium model combining maximum intelligence with practical performance",
+			ContextLength:       1000000,
+			MaxCompletionTokens: 128000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
+		},
 		{
 			ID:                  "claude-opus-4-5-20251101",
 			Object:              "model",
@@ -716,6 +728,20 @@ func GetOpenAIModels() []*ModelInfo {
 			SupportedParameters: []string{"tools"},
 			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
 		},
+		{
+			ID:                  "gpt-5.3-codex",
+			Object:              "model",
+			Created:             1770307200,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5.3",
+			DisplayName:         "GPT 5.3 Codex",
+			Description:         "Stable version of GPT 5.3 Codex, The best model for coding and agentic tasks across domains.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
+		},
 	}
 }

@@ -840,8 +866,50 @@ func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
 		"gemini-3-flash":             {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}},
 		"claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
 		"claude-opus-4-5-thinking":   {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
+		"claude-opus-4-6-thinking":   {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 128000},
 		"claude-sonnet-4-5":          {MaxCompletionTokens: 64000},
 		"gpt-oss-120b-medium":        {},
 		"tab_flash_lite_preview":     {},
 	}
 }
+
+// GetKimiModels returns the standard Kimi (Moonshot AI) model definitions
+func GetKimiModels() []*ModelInfo {
+	return []*ModelInfo{
+		{
+			ID:                  "kimi-k2",
+			Object:              "model",
+			Created:             1752192000, // 2025-07-11
+			OwnedBy:             "moonshot",
+			Type:                "kimi",
+			DisplayName:         "Kimi K2",
+			Description:         "Kimi K2 - Moonshot AI's flagship coding model",
+			ContextLength:       131072,
+			MaxCompletionTokens: 32768,
+		},
+		{
+			ID:                  "kimi-k2-thinking",
+			Object:              "model",
+			Created:             1762387200, // 2025-11-06
+			OwnedBy:             "moonshot",
+			Type:                "kimi",
+			DisplayName:         "Kimi K2 Thinking",
+			Description:         "Kimi K2 Thinking - Extended reasoning model",
+			ContextLength:       131072,
+			MaxCompletionTokens: 32768,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
+		},
+		{
+			ID:                  "kimi-k2.5",
+			Object:              "model",
+			Created:             1769472000, // 2026-01-26
+			OwnedBy:             "moonshot",
+			Type:                "kimi",
+			DisplayName:         "Kimi K2.5",
+			Description:         "Kimi K2.5 - Latest Moonshot AI coding model with improved capabilities",
+			ContextLength:       131072,
+			MaxCompletionTokens: 32768,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
+		},
+	}
+}
--- a/internal/runtime/executor/aistudio_executor.go
+++ b/internal/runtime/executor/aistudio_executor.go
@@ -141,7 +141,7 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
 		URL:       endpoint,
 		Method:    http.MethodPost,
 		Headers:   wsReq.Headers.Clone(),
-		Body:      bytes.Clone(body.payload),
+		Body:      body.payload,
 		Provider:  e.Identifier(),
 		AuthID:    authID,
 		AuthLabel: authLabel,
@@ -156,14 +156,14 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
 	}
 	recordAPIResponseMetadata(ctx, e.cfg, wsResp.Status, wsResp.Headers.Clone())
 	if len(wsResp.Body) > 0 {
-		appendAPIResponseChunk(ctx, e.cfg, bytes.Clone(wsResp.Body))
+		appendAPIResponseChunk(ctx, e.cfg, wsResp.Body)
 	}
 	if wsResp.Status < 200 || wsResp.Status >= 300 {
 		return resp, statusErr{code: wsResp.Status, msg: string(wsResp.Body)}
 	}
 	reporter.publish(ctx, parseGeminiUsage(wsResp.Body))
 	var param any
-	out := sdktranslator.TranslateNonStream(ctx, body.toFormat, opts.SourceFormat, req.Model, bytes.Clone(opts.OriginalRequest), bytes.Clone(translatedReq), bytes.Clone(wsResp.Body), &param)
+	out := sdktranslator.TranslateNonStream(ctx, body.toFormat, opts.SourceFormat, req.Model, opts.OriginalRequest, translatedReq, wsResp.Body, &param)
 	resp = cliproxyexecutor.Response{Payload: ensureColonSpacedJSON([]byte(out))}
 	return resp, nil
 }
@@ -199,7 +199,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 		URL:       endpoint,
 		Method:    http.MethodPost,
 		Headers:   wsReq.Headers.Clone(),
-		Body:      bytes.Clone(body.payload),
+		Body:      body.payload,
 		Provider:  e.Identifier(),
 		AuthID:    authID,
 		AuthLabel: authLabel,
@@ -225,7 +225,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 		}
 		var body bytes.Buffer
 		if len(firstEvent.Payload) > 0 {
-			appendAPIResponseChunk(ctx, e.cfg, bytes.Clone(firstEvent.Payload))
+			appendAPIResponseChunk(ctx, e.cfg, firstEvent.Payload)
 			body.Write(firstEvent.Payload)
 		}
 		if firstEvent.Type == wsrelay.MessageTypeStreamEnd {
@@ -244,7 +244,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 				metadataLogged = true
 			}
 			if len(event.Payload) > 0 {
-				appendAPIResponseChunk(ctx, e.cfg, bytes.Clone(event.Payload))
+				appendAPIResponseChunk(ctx, e.cfg, event.Payload)
 				body.Write(event.Payload)
 			}
 			if event.Type == wsrelay.MessageTypeStreamEnd {
@@ -274,12 +274,12 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 				}
 			case wsrelay.MessageTypeStreamChunk:
 				if len(event.Payload) > 0 {
-					appendAPIResponseChunk(ctx, e.cfg, bytes.Clone(event.Payload))
+					appendAPIResponseChunk(ctx, e.cfg, event.Payload)
 					filtered := FilterSSEUsageMetadata(event.Payload)
 					if detail, ok := parseGeminiStreamUsage(filtered); ok {
 						reporter.publish(ctx, detail)
 					}
-					lines := sdktranslator.TranslateStream(ctx, body.toFormat, opts.SourceFormat, req.Model, bytes.Clone(opts.OriginalRequest), translatedReq, bytes.Clone(filtered), &param)
+					lines := sdktranslator.TranslateStream(ctx, body.toFormat, opts.SourceFormat, req.Model, opts.OriginalRequest, translatedReq, filtered, &param)
 					for i := range lines {
 						out <- cliproxyexecutor.StreamChunk{Payload: ensureColonSpacedJSON([]byte(lines[i]))}
 					}
@@ -293,9 +293,9 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 					metadataLogged = true
 				}
 				if len(event.Payload) > 0 {
-					appendAPIResponseChunk(ctx, e.cfg, bytes.Clone(event.Payload))
+					appendAPIResponseChunk(ctx, e.cfg, event.Payload)
 				}
-				lines := sdktranslator.TranslateStream(ctx, body.toFormat, opts.SourceFormat, req.Model, bytes.Clone(opts.OriginalRequest), translatedReq, bytes.Clone(event.Payload), &param)
+				lines := sdktranslator.TranslateStream(ctx, body.toFormat, opts.SourceFormat, req.Model, opts.OriginalRequest, translatedReq, event.Payload, &param)
 				for i := range lines {
 					out <- cliproxyexecutor.StreamChunk{Payload: ensureColonSpacedJSON([]byte(lines[i]))}
 				}
@@ -350,7 +350,7 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A
 		URL:       endpoint,
 		Method:    http.MethodPost,
 		Headers:   wsReq.Headers.Clone(),
-		Body:      bytes.Clone(body.payload),
+		Body:      body.payload,
 		Provider:  e.Identifier(),
 		AuthID:    authID,
 		AuthLabel: authLabel,
@@ -364,7 +364,7 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A
 	}
 	recordAPIResponseMetadata(ctx, e.cfg, resp.Status, resp.Headers.Clone())
 	if len(resp.Body) > 0 {
-		appendAPIResponseChunk(ctx, e.cfg, bytes.Clone(resp.Body))
+		appendAPIResponseChunk(ctx, e.cfg, resp.Body)
 	}
 	if resp.Status < 200 || resp.Status >= 300 {
 		return cliproxyexecutor.Response{}, statusErr{code: resp.Status, msg: string(resp.Body)}
@@ -373,7 +373,7 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A
 	if totalTokens <= 0 {
 		return cliproxyexecutor.Response{}, fmt.Errorf("wsrelay: totalTokens missing in response")
 	}
-	translated := sdktranslator.TranslateTokenCount(ctx, body.toFormat, opts.SourceFormat, totalTokens, bytes.Clone(resp.Body))
+	translated := sdktranslator.TranslateTokenCount(ctx, body.toFormat, opts.SourceFormat, totalTokens, resp.Body)
 	return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
 }

@@ -393,12 +393,13 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, stream)
-	payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
+	payload := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, stream)
 	payload, err := thinking.ApplyThinking(payload, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return nil, translatedPayload{}, err
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -133,12 +133,13 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("antigravity")

-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -230,7 +231,7 @@ attemptLoop:

 			reporter.publish(ctx, parseAntigravityUsage(bodyBytes))
 			var param any
-			converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bodyBytes, &param)
+			converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, bodyBytes, &param)
 			resp = cliproxyexecutor.Response{Payload: []byte(converted)}
 			reporter.ensurePublished(ctx)
 			return resp, nil
@@ -274,12 +275,13 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("antigravity")

-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)

 	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -433,7 +435,7 @@ attemptLoop:

 			reporter.publish(ctx, parseAntigravityUsage(resp.Payload))
 			var param any
-			converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, resp.Payload, &param)
+			converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, resp.Payload, &param)
 			resp = cliproxyexecutor.Response{Payload: []byte(converted)}
 			reporter.ensurePublished(ctx)

@@ -665,12 +667,13 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("antigravity")

-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)

 	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -800,12 +803,12 @@ attemptLoop:
 						reporter.publish(ctx, detail)
 					}

-					chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bytes.Clone(payload), &param)
+					chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, bytes.Clone(payload), &param)
 					for i := range chunks {
 						out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
 					}
 				}
-				tail := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, []byte("[DONE]"), &param)
+				tail := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, []byte("[DONE]"), &param)
 				for i := range tail {
 					out <- cliproxyexecutor.StreamChunk{Payload: []byte(tail[i])}
 				}
@@ -872,7 +875,7 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
 	respCtx := context.WithValue(ctx, "alt", opts.Alt)

 	// Prepare payload once (doesn't depend on baseURL)
-	payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	payload := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	payload, err := thinking.ApplyThinking(payload, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -100,12 +100,13 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	to := sdktranslator.FromString("claude")
 	// Use streaming translation to preserve function calling, except for claude.
 	stream := from != to
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, stream)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, stream)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
@@ -216,7 +217,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 		to,
 		from,
 		req.Model,
-		bytes.Clone(opts.OriginalRequest),
+		opts.OriginalRequest,
 		bodyForTranslation,
 		data,
 		&param,
@@ -240,12 +241,13 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	defer reporter.trackFailure(ctx, &err)
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("claude")
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
@@ -381,7 +383,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 				to,
 				from,
 				req.Model,
-				bytes.Clone(opts.OriginalRequest),
+				opts.OriginalRequest,
 				bodyForTranslation,
 				bytes.Clone(line),
 				&param,
@@ -411,7 +413,7 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 	to := sdktranslator.FromString("claude")
 	// Use streaming translation to preserve function calling, except for claude.
 	stream := from != to
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, stream)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

 	if !strings.HasPrefix(baseModel, "claude-3-5-haiku") {
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -27,6 +27,11 @@ import (
 	"github.com/google/uuid"
 )

+const (
+	codexClientVersion = "0.98.0"
+	codexUserAgent     = "codex_cli_rs/0.98.0 (Mac OS 26.0.1; arm64) Apple_Terminal/464"
+)
+
 var dataTag = []byte("data:")

 // CodexExecutor is a stateless executor for Codex (OpenAI Responses API entrypoint).
@@ -88,12 +93,13 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -176,7 +182,7 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 		}

 		var param any
-		out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(originalPayload), body, line, &param)
+		out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, originalPayload, body, line, &param)
 		resp = cliproxyexecutor.Response{Payload: []byte(out)}
 		return resp, nil
 	}
@@ -197,12 +203,13 @@ func (e *CodexExecutor) executeCompact(ctx context.Context, auth *cliproxyauth.A

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai-response")
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -265,7 +272,7 @@ func (e *CodexExecutor) executeCompact(ctx context.Context, auth *cliproxyauth.A
 	reporter.publish(ctx, parseOpenAIUsage(data))
 	reporter.ensurePublished(ctx)
 	var param any
-	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(originalPayload), body, data, &param)
+	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, originalPayload, body, data, &param)
 	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
 }
@@ -286,12 +293,13 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -378,7 +386,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 				}
 			}

-			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(originalPayload), body, bytes.Clone(line), &param)
+			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, originalPayload, body, bytes.Clone(line), &param)
 			for i := range chunks {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
 			}
@@ -397,7 +405,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	body, err := thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -634,10 +642,10 @@ func applyCodexHeaders(r *http.Request, auth *cliproxyauth.Auth, token string, s
 		ginHeaders = ginCtx.Request.Header
 	}

-	misc.EnsureHeader(r.Header, ginHeaders, "Version", "0.21.0")
+	misc.EnsureHeader(r.Header, ginHeaders, "Version", codexClientVersion)
 	misc.EnsureHeader(r.Header, ginHeaders, "Openai-Beta", "responses=experimental")
 	misc.EnsureHeader(r.Header, ginHeaders, "Session_id", uuid.NewString())
-	misc.EnsureHeader(r.Header, ginHeaders, "User-Agent", "codex_cli_rs/0.50.0 (Mac OS 26.0.1; arm64) Apple_Terminal/464")
+	misc.EnsureHeader(r.Header, ginHeaders, "User-Agent", codexUserAgent)

 	if stream {
 		r.Header.Set("Accept", "text/event-stream")
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -119,12 +119,13 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini-cli")

-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	basePayload := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	basePayload, err = thinking.ApplyThinking(basePayload, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -223,7 +224,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 		if httpResp.StatusCode >= 200 && httpResp.StatusCode < 300 {
 			reporter.publish(ctx, parseGeminiCLIUsage(data))
 			var param any
-			out := sdktranslator.TranslateNonStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), payload, data, &param)
+			out := sdktranslator.TranslateNonStream(respCtx, to, from, attemptModel, opts.OriginalRequest, payload, data, &param)
 			resp = cliproxyexecutor.Response{Payload: []byte(out)}
 			return resp, nil
 		}
@@ -272,12 +273,13 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini-cli")

-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	basePayload := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)

 	basePayload, err = thinking.ApplyThinking(basePayload, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -399,14 +401,14 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 						reporter.publish(ctx, detail)
 					}
 					if bytes.HasPrefix(line, dataTag) {
-						segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone(line), &param)
+						segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, opts.OriginalRequest, reqBody, bytes.Clone(line), &param)
 						for i := range segments {
 							out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])}
 						}
 					}
 				}

-				segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone([]byte("[DONE]")), &param)
+				segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, opts.OriginalRequest, reqBody, []byte("[DONE]"), &param)
 				for i := range segments {
 					out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])}
 				}
@@ -428,12 +430,12 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 			appendAPIResponseChunk(ctx, e.cfg, data)
 			reporter.publish(ctx, parseGeminiCLIUsage(data))
 			var param any
-			segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), reqBody, data, &param)
+			segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, opts.OriginalRequest, reqBody, data, &param)
 			for i := range segments {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])}
 			}

-			segments = sdktranslator.TranslateStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone([]byte("[DONE]")), &param)
+			segments = sdktranslator.TranslateStream(respCtx, to, from, attemptModel, opts.OriginalRequest, reqBody, []byte("[DONE]"), &param)
 			for i := range segments {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])}
 			}
@@ -485,7 +487,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
 	// The loop variable attemptModel is only used as the concrete model id sent to the upstream
 	// Gemini CLI endpoint when iterating fallback variants.
 	for range models {
-		payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+		payload := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 		payload, err = thinking.ApplyThinking(payload, req.Model, from.String(), to.String(), e.Identifier())
 		if err != nil {
--- a/internal/runtime/executor/gemini_executor.go
+++ b/internal/runtime/executor/gemini_executor.go
@@ -116,12 +116,13 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	// Official Gemini API via API key or OAuth bearer
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -203,7 +204,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	appendAPIResponseChunk(ctx, e.cfg, data)
 	reporter.publish(ctx, parseGeminiUsage(data))
 	var param any
-	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
+	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, body, data, &param)
 	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
 }
@@ -222,12 +223,13 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -318,12 +320,12 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 			if detail, ok := parseGeminiStreamUsage(payload); ok {
 				reporter.publish(ctx, detail)
 			}
-			lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(payload), &param)
+			lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(payload), &param)
 			for i := range lines {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
 			}
 		}
-		lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone([]byte("[DONE]")), &param)
+		lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, []byte("[DONE]"), &param)
 		for i := range lines {
 			out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
 		}
@@ -344,7 +346,7 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
-	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
--- a/internal/runtime/executor/gemini_vertex_executor.go
+++ b/internal/runtime/executor/gemini_vertex_executor.go
@@ -318,12 +318,13 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 		from := opts.SourceFormat
 		to := sdktranslator.FromString("gemini")

-		originalPayload := bytes.Clone(req.Payload)
+		originalPayloadSource := req.Payload
 		if len(opts.OriginalRequest) > 0 {
-			originalPayload = bytes.Clone(opts.OriginalRequest)
+			originalPayloadSource = opts.OriginalRequest
 		}
+		originalPayload := originalPayloadSource
 		originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-		body = sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+		body = sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 		body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 		if err != nil {
@@ -417,7 +418,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	var param any
-	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
+	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, body, data, &param)
 	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
 }
@@ -432,12 +433,13 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")

-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -521,7 +523,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	appendAPIResponseChunk(ctx, e.cfg, data)
 	reporter.publish(ctx, parseGeminiUsage(data))
 	var param any
-	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
+	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, body, data, &param)
 	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
 }
@@ -536,12 +538,13 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")

-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -632,12 +635,12 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 			if detail, ok := parseGeminiStreamUsage(line); ok {
 				reporter.publish(ctx, detail)
 			}
-			lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(line), &param)
+			lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(line), &param)
 			for i := range lines {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
 			}
 		}
-		lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, []byte("[DONE]"), &param)
+		lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, []byte("[DONE]"), &param)
 		for i := range lines {
 			out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
 		}
@@ -660,12 +663,13 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")

-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -756,12 +760,12 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 			if detail, ok := parseGeminiStreamUsage(line); ok {
 				reporter.publish(ctx, detail)
 			}
-			lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(line), &param)
+			lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(line), &param)
 			for i := range lines {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
 			}
 		}
-		lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, []byte("[DONE]"), &param)
+		lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, []byte("[DONE]"), &param)
 		for i := range lines {
 			out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
 		}
@@ -781,7 +785,7 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")

-	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -865,7 +869,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")

-	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -87,12 +87,13 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), "iflow", e.Identifier())
@@ -163,7 +164,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	var param any
 	// Note: TranslateNonStream uses req.Model (original with suffix) to preserve
 	// the original model name in the response for client compatibility.
-	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
+	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, body, data, &param)
 	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
 }
@@ -189,12 +190,13 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), "iflow", e.Identifier())
@@ -274,7 +276,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 			if detail, ok := parseOpenAIStreamUsage(line); ok {
 				reporter.publish(ctx, detail)
 			}
-			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(line), &param)
+			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(line), &param)
 			for i := range chunks {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
 			}
@@ -296,7 +298,7 @@ func (e *IFlowExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	enc, err := tokenizerForModel(baseModel)
 	if err != nil {
--- a/internal/runtime/executor/kimi_executor.go
+++ b/internal/runtime/executor/kimi_executor.go
@@ -0,0 +1,618 @@
+package executor
+
+import (
+	"bufio"
+	"bytes"
+	"context"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"time"
+
+	kimiauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/kimi"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
+	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
+	log "github.com/sirupsen/logrus"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// KimiExecutor is a stateless executor for Kimi API using OpenAI-compatible chat completions.
+type KimiExecutor struct {
+	ClaudeExecutor
+	cfg *config.Config
+}
+
+// NewKimiExecutor creates a new Kimi executor.
+func NewKimiExecutor(cfg *config.Config) *KimiExecutor { return &KimiExecutor{cfg: cfg} }
+
+// Identifier returns the executor identifier.
+func (e *KimiExecutor) Identifier() string { return "kimi" }
+
+// PrepareRequest injects Kimi credentials into the outgoing HTTP request.
+func (e *KimiExecutor) PrepareRequest(req *http.Request, auth *cliproxyauth.Auth) error {
+	if req == nil {
+		return nil
+	}
+	token := kimiCreds(auth)
+	if strings.TrimSpace(token) != "" {
+		req.Header.Set("Authorization", "Bearer "+token)
+	}
+	return nil
+}
+
+// HttpRequest injects Kimi credentials into the request and executes it.
+func (e *KimiExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth, req *http.Request) (*http.Response, error) {
+	if req == nil {
+		return nil, fmt.Errorf("kimi executor: request is nil")
+	}
+	if ctx == nil {
+		ctx = req.Context()
+	}
+	httpReq := req.WithContext(ctx)
+	if err := e.PrepareRequest(httpReq, auth); err != nil {
+		return nil, err
+	}
+	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	return httpClient.Do(httpReq)
+}
+
+// Execute performs a non-streaming chat completion request to Kimi.
+func (e *KimiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
+	from := opts.SourceFormat
+	if from.String() == "claude" {
+		auth.Attributes["base_url"] = kimiauth.KimiAPIBaseURL
+		return e.ClaudeExecutor.Execute(ctx, auth, req, opts)
+	}
+
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
+	token := kimiCreds(auth)
+
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.trackFailure(ctx, &err)
+
+	to := sdktranslator.FromString("openai")
+	originalPayloadSource := req.Payload
+	if len(opts.OriginalRequest) > 0 {
+		originalPayloadSource = opts.OriginalRequest
+	}
+	originalPayload := bytes.Clone(originalPayloadSource)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+
+	// Strip kimi- prefix for upstream API
+	upstreamModel := stripKimiPrefix(baseModel)
+	body, err = sjson.SetBytes(body, "model", upstreamModel)
+	if err != nil {
+		return resp, fmt.Errorf("kimi executor: failed to set model in payload: %w", err)
+	}
+
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), "kimi", e.Identifier())
+	if err != nil {
+		return resp, err
+	}
+
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
+	body, err = normalizeKimiToolMessageLinks(body)
+	if err != nil {
+		return resp, err
+	}
+
+	url := kimiauth.KimiAPIBaseURL + "/v1/chat/completions"
+	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
+	if err != nil {
+		return resp, err
+	}
+	applyKimiHeadersWithAuth(httpReq, token, false, auth)
+	var authID, authLabel, authType, authValue string
+	if auth != nil {
+		authID = auth.ID
+		authLabel = auth.Label
+		authType, authValue = auth.AccountInfo()
+	}
+	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+		URL:       url,
+		Method:    http.MethodPost,
+		Headers:   httpReq.Header.Clone(),
+		Body:      body,
+		Provider:  e.Identifier(),
+		AuthID:    authID,
+		AuthLabel: authLabel,
+		AuthType:  authType,
+		AuthValue: authValue,
+	})
+
+	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpResp, err := httpClient.Do(httpReq)
+	if err != nil {
+		recordAPIResponseError(ctx, e.cfg, err)
+		return resp, err
+	}
+	defer func() {
+		if errClose := httpResp.Body.Close(); errClose != nil {
+			log.Errorf("kimi executor: close response body error: %v", errClose)
+		}
+	}()
+	recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
+		b, _ := io.ReadAll(httpResp.Body)
+		appendAPIResponseChunk(ctx, e.cfg, b)
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
+		return resp, err
+	}
+	data, err := io.ReadAll(httpResp.Body)
+	if err != nil {
+		recordAPIResponseError(ctx, e.cfg, err)
+		return resp, err
+	}
+	appendAPIResponseChunk(ctx, e.cfg, data)
+	reporter.publish(ctx, parseOpenAIUsage(data))
+	var param any
+	// Note: TranslateNonStream uses req.Model (original with suffix) to preserve
+	// the original model name in the response for client compatibility.
+	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, body, data, &param)
+	resp = cliproxyexecutor.Response{Payload: []byte(out)}
+	return resp, nil
+}
+
+// ExecuteStream performs a streaming chat completion request to Kimi.
+func (e *KimiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
+	from := opts.SourceFormat
+	if from.String() == "claude" {
+		auth.Attributes["base_url"] = kimiauth.KimiAPIBaseURL
+		return e.ClaudeExecutor.ExecuteStream(ctx, auth, req, opts)
+	}
+
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+	token := kimiCreds(auth)
+
+	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
+	defer reporter.trackFailure(ctx, &err)
+
+	to := sdktranslator.FromString("openai")
+	originalPayloadSource := req.Payload
+	if len(opts.OriginalRequest) > 0 {
+		originalPayloadSource = opts.OriginalRequest
+	}
+	originalPayload := bytes.Clone(originalPayloadSource)
+	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+
+	// Strip kimi- prefix for upstream API
+	upstreamModel := stripKimiPrefix(baseModel)
+	body, err = sjson.SetBytes(body, "model", upstreamModel)
+	if err != nil {
+		return nil, fmt.Errorf("kimi executor: failed to set model in payload: %w", err)
+	}
+
+	body, err = thinking.ApplyThinking(body, req.Model, from.String(), "kimi", e.Identifier())
+	if err != nil {
+		return nil, err
+	}
+
+	body, err = sjson.SetBytes(body, "stream_options.include_usage", true)
+	if err != nil {
+		return nil, fmt.Errorf("kimi executor: failed to set stream_options in payload: %w", err)
+	}
+	requestedModel := payloadRequestedModel(opts, req.Model)
+	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
+	body, err = normalizeKimiToolMessageLinks(body)
+	if err != nil {
+		return nil, err
+	}
+
+	url := kimiauth.KimiAPIBaseURL + "/v1/chat/completions"
+	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
+	if err != nil {
+		return nil, err
+	}
+	applyKimiHeadersWithAuth(httpReq, token, true, auth)
+	var authID, authLabel, authType, authValue string
+	if auth != nil {
+		authID = auth.ID
+		authLabel = auth.Label
+		authType, authValue = auth.AccountInfo()
+	}
+	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+		URL:       url,
+		Method:    http.MethodPost,
+		Headers:   httpReq.Header.Clone(),
+		Body:      body,
+		Provider:  e.Identifier(),
+		AuthID:    authID,
+		AuthLabel: authLabel,
+		AuthType:  authType,
+		AuthValue: authValue,
+	})
+
+	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+	httpResp, err := httpClient.Do(httpReq)
+	if err != nil {
+		recordAPIResponseError(ctx, e.cfg, err)
+		return nil, err
+	}
+	recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
+		b, _ := io.ReadAll(httpResp.Body)
+		appendAPIResponseChunk(ctx, e.cfg, b)
+		logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
+		if errClose := httpResp.Body.Close(); errClose != nil {
+			log.Errorf("kimi executor: close response body error: %v", errClose)
+		}
+		err = statusErr{code: httpResp.StatusCode, msg: string(b)}
+		return nil, err
+	}
+	out := make(chan cliproxyexecutor.StreamChunk)
+	stream = out
+	go func() {
+		defer close(out)
+		defer func() {
+			if errClose := httpResp.Body.Close(); errClose != nil {
+				log.Errorf("kimi executor: close response body error: %v", errClose)
+			}
+		}()
+		scanner := bufio.NewScanner(httpResp.Body)
+		scanner.Buffer(nil, 1_048_576) // 1MB
+		var param any
+		for scanner.Scan() {
+			line := scanner.Bytes()
+			appendAPIResponseChunk(ctx, e.cfg, line)
+			if detail, ok := parseOpenAIStreamUsage(line); ok {
+				reporter.publish(ctx, detail)
+			}
+			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(line), &param)
+			for i := range chunks {
+				out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
+			}
+		}
+		doneChunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, []byte("[DONE]"), &param)
+		for i := range doneChunks {
+			out <- cliproxyexecutor.StreamChunk{Payload: []byte(doneChunks[i])}
+		}
+		if errScan := scanner.Err(); errScan != nil {
+			recordAPIResponseError(ctx, e.cfg, errScan)
+			reporter.publishFailure(ctx)
+			out <- cliproxyexecutor.StreamChunk{Err: errScan}
+		}
+	}()
+	return stream, nil
+}
+
+// CountTokens estimates token count for Kimi requests.
+func (e *KimiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+	auth.Attributes["base_url"] = kimiauth.KimiAPIBaseURL
+	return e.ClaudeExecutor.CountTokens(ctx, auth, req, opts)
+}
+
+func normalizeKimiToolMessageLinks(body []byte) ([]byte, error) {
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		return body, nil
+	}
+
+	messages := gjson.GetBytes(body, "messages")
+	if !messages.Exists() || !messages.IsArray() {
+		return body, nil
+	}
+
+	out := body
+	pending := make([]string, 0)
+	patched := 0
+	patchedReasoning := 0
+	ambiguous := 0
+	latestReasoning := ""
+	hasLatestReasoning := false
+
+	removePending := func(id string) {
+		for idx := range pending {
+			if pending[idx] != id {
+				continue
+			}
+			pending = append(pending[:idx], pending[idx+1:]...)
+			return
+		}
+	}
+
+	msgs := messages.Array()
+	for msgIdx := range msgs {
+		msg := msgs[msgIdx]
+		role := strings.TrimSpace(msg.Get("role").String())
+		switch role {
+		case "assistant":
+			reasoning := msg.Get("reasoning_content")
+			if reasoning.Exists() {
+				reasoningText := reasoning.String()
+				if strings.TrimSpace(reasoningText) != "" {
+					latestReasoning = reasoningText
+					hasLatestReasoning = true
+				}
+			}
+
+			toolCalls := msg.Get("tool_calls")
+			if !toolCalls.Exists() || !toolCalls.IsArray() || len(toolCalls.Array()) == 0 {
+				continue
+			}
+
+			if !reasoning.Exists() || strings.TrimSpace(reasoning.String()) == "" {
+				reasoningText := fallbackAssistantReasoning(msg, hasLatestReasoning, latestReasoning)
+				path := fmt.Sprintf("messages.%d.reasoning_content", msgIdx)
+				next, err := sjson.SetBytes(out, path, reasoningText)
+				if err != nil {
+					return body, fmt.Errorf("kimi executor: failed to set assistant reasoning_content: %w", err)
+				}
+				out = next
+				patchedReasoning++
+			}
+
+			for _, tc := range toolCalls.Array() {
+				id := strings.TrimSpace(tc.Get("id").String())
+				if id == "" {
+					continue
+				}
+				pending = append(pending, id)
+			}
+		case "tool":
+			toolCallID := strings.TrimSpace(msg.Get("tool_call_id").String())
+			if toolCallID == "" {
+				toolCallID = strings.TrimSpace(msg.Get("call_id").String())
+				if toolCallID != "" {
+					path := fmt.Sprintf("messages.%d.tool_call_id", msgIdx)
+					next, err := sjson.SetBytes(out, path, toolCallID)
+					if err != nil {
+						return body, fmt.Errorf("kimi executor: failed to set tool_call_id from call_id: %w", err)
+					}
+					out = next
+					patched++
+				}
+			}
+			if toolCallID == "" {
+				if len(pending) == 1 {
+					toolCallID = pending[0]
+					path := fmt.Sprintf("messages.%d.tool_call_id", msgIdx)
+					next, err := sjson.SetBytes(out, path, toolCallID)
+					if err != nil {
+						return body, fmt.Errorf("kimi executor: failed to infer tool_call_id: %w", err)
+					}
+					out = next
+					patched++
+				} else if len(pending) > 1 {
+					ambiguous++
+				}
+			}
+			if toolCallID != "" {
+				removePending(toolCallID)
+			}
+		}
+	}
+
+	if patched > 0 || patchedReasoning > 0 {
+		log.WithFields(log.Fields{
+			"patched_tool_messages":      patched,
+			"patched_reasoning_messages": patchedReasoning,
+		}).Debug("kimi executor: normalized tool message fields")
+	}
+	if ambiguous > 0 {
+		log.WithFields(log.Fields{
+			"ambiguous_tool_messages": ambiguous,
+			"pending_tool_calls":      len(pending),
+		}).Warn("kimi executor: tool messages missing tool_call_id with ambiguous candidates")
+	}
+
+	return out, nil
+}
+
+func fallbackAssistantReasoning(msg gjson.Result, hasLatest bool, latest string) string {
+	if hasLatest && strings.TrimSpace(latest) != "" {
+		return latest
+	}
+
+	content := msg.Get("content")
+	if content.Type == gjson.String {
+		if text := strings.TrimSpace(content.String()); text != "" {
+			return text
+		}
+	}
+	if content.IsArray() {
+		parts := make([]string, 0, len(content.Array()))
+		for _, item := range content.Array() {
+			text := strings.TrimSpace(item.Get("text").String())
+			if text == "" {
+				continue
+			}
+			parts = append(parts, text)
+		}
+		if len(parts) > 0 {
+			return strings.Join(parts, "\n")
+		}
+	}
+
+	return "[reasoning unavailable]"
+}
+
+// Refresh refreshes the Kimi token using the refresh token.
+func (e *KimiExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
+	log.Debugf("kimi executor: refresh called")
+	if auth == nil {
+		return nil, fmt.Errorf("kimi executor: auth is nil")
+	}
+	// Expect refresh_token in metadata for OAuth-based accounts
+	var refreshToken string
+	if auth.Metadata != nil {
+		if v, ok := auth.Metadata["refresh_token"].(string); ok && strings.TrimSpace(v) != "" {
+			refreshToken = v
+		}
+	}
+	if strings.TrimSpace(refreshToken) == "" {
+		// Nothing to refresh
+		return auth, nil
+	}
+
+	client := kimiauth.NewDeviceFlowClientWithDeviceID(e.cfg, resolveKimiDeviceID(auth))
+	td, err := client.RefreshToken(ctx, refreshToken)
+	if err != nil {
+		return nil, err
+	}
+	if auth.Metadata == nil {
+		auth.Metadata = make(map[string]any)
+	}
+	auth.Metadata["access_token"] = td.AccessToken
+	if td.RefreshToken != "" {
+		auth.Metadata["refresh_token"] = td.RefreshToken
+	}
+	if td.ExpiresAt > 0 {
+		exp := time.Unix(td.ExpiresAt, 0).UTC().Format(time.RFC3339)
+		auth.Metadata["expired"] = exp
+	}
+	auth.Metadata["type"] = "kimi"
+	now := time.Now().Format(time.RFC3339)
+	auth.Metadata["last_refresh"] = now
+	return auth, nil
+}
+
+// applyKimiHeaders sets required headers for Kimi API requests.
+// Headers match kimi-cli client for compatibility.
+func applyKimiHeaders(r *http.Request, token string, stream bool) {
+	r.Header.Set("Content-Type", "application/json")
+	r.Header.Set("Authorization", "Bearer "+token)
+	// Match kimi-cli headers exactly
+	r.Header.Set("User-Agent", "KimiCLI/1.10.6")
+	r.Header.Set("X-Msh-Platform", "kimi_cli")
+	r.Header.Set("X-Msh-Version", "1.10.6")
+	r.Header.Set("X-Msh-Device-Name", getKimiHostname())
+	r.Header.Set("X-Msh-Device-Model", getKimiDeviceModel())
+	r.Header.Set("X-Msh-Device-Id", getKimiDeviceID())
+	if stream {
+		r.Header.Set("Accept", "text/event-stream")
+		return
+	}
+	r.Header.Set("Accept", "application/json")
+}
+
+func resolveKimiDeviceIDFromAuth(auth *cliproxyauth.Auth) string {
+	if auth == nil || auth.Metadata == nil {
+		return ""
+	}
+
+	deviceIDRaw, ok := auth.Metadata["device_id"]
+	if !ok {
+		return ""
+	}
+
+	deviceID, ok := deviceIDRaw.(string)
+	if !ok {
+		return ""
+	}
+
+	return strings.TrimSpace(deviceID)
+}
+
+func resolveKimiDeviceIDFromStorage(auth *cliproxyauth.Auth) string {
+	if auth == nil {
+		return ""
+	}
+
+	storage, ok := auth.Storage.(*kimiauth.KimiTokenStorage)
+	if !ok || storage == nil {
+		return ""
+	}
+
+	return strings.TrimSpace(storage.DeviceID)
+}
+
+func resolveKimiDeviceID(auth *cliproxyauth.Auth) string {
+	deviceID := resolveKimiDeviceIDFromAuth(auth)
+	if deviceID != "" {
+		return deviceID
+	}
+	return resolveKimiDeviceIDFromStorage(auth)
+}
+
+func applyKimiHeadersWithAuth(r *http.Request, token string, stream bool, auth *cliproxyauth.Auth) {
+	applyKimiHeaders(r, token, stream)
+
+	if deviceID := resolveKimiDeviceID(auth); deviceID != "" {
+		r.Header.Set("X-Msh-Device-Id", deviceID)
+	}
+}
+
+// getKimiHostname returns the machine hostname.
+func getKimiHostname() string {
+	hostname, err := os.Hostname()
+	if err != nil {
+		return "unknown"
+	}
+	return hostname
+}
+
+// getKimiDeviceModel returns a device model string matching kimi-cli format.
+func getKimiDeviceModel() string {
+	return fmt.Sprintf("%s %s", runtime.GOOS, runtime.GOARCH)
+}
+
+// getKimiDeviceID returns a stable device ID, matching kimi-cli storage location.
+func getKimiDeviceID() string {
+	homeDir, err := os.UserHomeDir()
+	if err != nil {
+		return "cli-proxy-api-device"
+	}
+	// Check kimi-cli's device_id location first (platform-specific)
+	var kimiShareDir string
+	switch runtime.GOOS {
+	case "darwin":
+		kimiShareDir = filepath.Join(homeDir, "Library", "Application Support", "kimi")
+	case "windows":
+		appData := os.Getenv("APPDATA")
+		if appData == "" {
+			appData = filepath.Join(homeDir, "AppData", "Roaming")
+		}
+		kimiShareDir = filepath.Join(appData, "kimi")
+	default: // linux and other unix-like
+		kimiShareDir = filepath.Join(homeDir, ".local", "share", "kimi")
+	}
+	deviceIDPath := filepath.Join(kimiShareDir, "device_id")
+	if data, err := os.ReadFile(deviceIDPath); err == nil {
+		return strings.TrimSpace(string(data))
+	}
+	return "cli-proxy-api-device"
+}
+
+// kimiCreds extracts the access token from auth.
+func kimiCreds(a *cliproxyauth.Auth) (token string) {
+	if a == nil {
+		return ""
+	}
+	// Check metadata first (OAuth flow stores tokens here)
+	if a.Metadata != nil {
+		if v, ok := a.Metadata["access_token"].(string); ok && strings.TrimSpace(v) != "" {
+			return v
+		}
+	}
+	// Fallback to attributes (API key style)
+	if a.Attributes != nil {
+		if v := a.Attributes["access_token"]; v != "" {
+			return v
+		}
+		if v := a.Attributes["api_key"]; v != "" {
+			return v
+		}
+	}
+	return ""
+}
+
+// stripKimiPrefix removes the "kimi-" prefix from model names for the upstream API.
+func stripKimiPrefix(model string) string {
+	model = strings.TrimSpace(model)
+	if strings.HasPrefix(strings.ToLower(model), "kimi-") {
+		return model[5:]
+	}
+	return model
+}
--- a/internal/runtime/executor/kimi_executor_test.go
+++ b/internal/runtime/executor/kimi_executor_test.go
@@ -0,0 +1,205 @@
+package executor
+
+import (
+	"testing"
+
+	"github.com/tidwall/gjson"
+)
+
+func TestNormalizeKimiToolMessageLinks_UsesCallIDFallback(t *testing.T) {
+	body := []byte(`{
+		"messages":[
+			{"role":"assistant","tool_calls":[{"id":"list_directory:1","type":"function","function":{"name":"list_directory","arguments":"{}"}}]},
+			{"role":"tool","call_id":"list_directory:1","content":"[]"}
+		]
+	}`)
+
+	out, err := normalizeKimiToolMessageLinks(body)
+	if err != nil {
+		t.Fatalf("normalizeKimiToolMessageLinks() error = %v", err)
+	}
+
+	got := gjson.GetBytes(out, "messages.1.tool_call_id").String()
+	if got != "list_directory:1" {
+		t.Fatalf("messages.1.tool_call_id = %q, want %q", got, "list_directory:1")
+	}
+}
+
+func TestNormalizeKimiToolMessageLinks_InferSinglePendingID(t *testing.T) {
+	body := []byte(`{
+		"messages":[
+			{"role":"assistant","tool_calls":[{"id":"call_123","type":"function","function":{"name":"read_file","arguments":"{}"}}]},
+			{"role":"tool","content":"file-content"}
+		]
+	}`)
+
+	out, err := normalizeKimiToolMessageLinks(body)
+	if err != nil {
+		t.Fatalf("normalizeKimiToolMessageLinks() error = %v", err)
+	}
+
+	got := gjson.GetBytes(out, "messages.1.tool_call_id").String()
+	if got != "call_123" {
+		t.Fatalf("messages.1.tool_call_id = %q, want %q", got, "call_123")
+	}
+}
+
+func TestNormalizeKimiToolMessageLinks_AmbiguousMissingIDIsNotInferred(t *testing.T) {
+	body := []byte(`{
+		"messages":[
+			{"role":"assistant","tool_calls":[
+				{"id":"call_1","type":"function","function":{"name":"list_directory","arguments":"{}"}},
+				{"id":"call_2","type":"function","function":{"name":"read_file","arguments":"{}"}}
+			]},
+			{"role":"tool","content":"result-without-id"}
+		]
+	}`)
+
+	out, err := normalizeKimiToolMessageLinks(body)
+	if err != nil {
+		t.Fatalf("normalizeKimiToolMessageLinks() error = %v", err)
+	}
+
+	if gjson.GetBytes(out, "messages.1.tool_call_id").Exists() {
+		t.Fatalf("messages.1.tool_call_id should be absent for ambiguous case, got %q", gjson.GetBytes(out, "messages.1.tool_call_id").String())
+	}
+}
+
+func TestNormalizeKimiToolMessageLinks_PreservesExistingToolCallID(t *testing.T) {
+	body := []byte(`{
+		"messages":[
+			{"role":"assistant","tool_calls":[{"id":"call_1","type":"function","function":{"name":"list_directory","arguments":"{}"}}]},
+			{"role":"tool","tool_call_id":"call_1","call_id":"different-id","content":"result"}
+		]
+	}`)
+
+	out, err := normalizeKimiToolMessageLinks(body)
+	if err != nil {
+		t.Fatalf("normalizeKimiToolMessageLinks() error = %v", err)
+	}
+
+	got := gjson.GetBytes(out, "messages.1.tool_call_id").String()
+	if got != "call_1" {
+		t.Fatalf("messages.1.tool_call_id = %q, want %q", got, "call_1")
+	}
+}
+
+func TestNormalizeKimiToolMessageLinks_InheritsPreviousReasoningForAssistantToolCalls(t *testing.T) {
+	body := []byte(`{
+		"messages":[
+			{"role":"assistant","content":"plan","reasoning_content":"previous reasoning"},
+			{"role":"assistant","tool_calls":[{"id":"call_1","type":"function","function":{"name":"list_directory","arguments":"{}"}}]}
+		]
+	}`)
+
+	out, err := normalizeKimiToolMessageLinks(body)
+	if err != nil {
+		t.Fatalf("normalizeKimiToolMessageLinks() error = %v", err)
+	}
+
+	got := gjson.GetBytes(out, "messages.1.reasoning_content").String()
+	if got != "previous reasoning" {
+		t.Fatalf("messages.1.reasoning_content = %q, want %q", got, "previous reasoning")
+	}
+}
+
+func TestNormalizeKimiToolMessageLinks_InsertsFallbackReasoningWhenMissing(t *testing.T) {
+	body := []byte(`{
+		"messages":[
+			{"role":"assistant","tool_calls":[{"id":"call_1","type":"function","function":{"name":"list_directory","arguments":"{}"}}]}
+		]
+	}`)
+
+	out, err := normalizeKimiToolMessageLinks(body)
+	if err != nil {
+		t.Fatalf("normalizeKimiToolMessageLinks() error = %v", err)
+	}
+
+	reasoning := gjson.GetBytes(out, "messages.0.reasoning_content")
+	if !reasoning.Exists() {
+		t.Fatalf("messages.0.reasoning_content should exist")
+	}
+	if reasoning.String() != "[reasoning unavailable]" {
+		t.Fatalf("messages.0.reasoning_content = %q, want %q", reasoning.String(), "[reasoning unavailable]")
+	}
+}
+
+func TestNormalizeKimiToolMessageLinks_UsesContentAsReasoningFallback(t *testing.T) {
+	body := []byte(`{
+		"messages":[
+			{"role":"assistant","content":[{"type":"text","text":"first line"},{"type":"text","text":"second line"}],"tool_calls":[{"id":"call_1","type":"function","function":{"name":"list_directory","arguments":"{}"}}]}
+		]
+	}`)
+
+	out, err := normalizeKimiToolMessageLinks(body)
+	if err != nil {
+		t.Fatalf("normalizeKimiToolMessageLinks() error = %v", err)
+	}
+
+	got := gjson.GetBytes(out, "messages.0.reasoning_content").String()
+	if got != "first line\nsecond line" {
+		t.Fatalf("messages.0.reasoning_content = %q, want %q", got, "first line\nsecond line")
+	}
+}
+
+func TestNormalizeKimiToolMessageLinks_ReplacesEmptyReasoningContent(t *testing.T) {
+	body := []byte(`{
+		"messages":[
+			{"role":"assistant","content":"assistant summary","tool_calls":[{"id":"call_1","type":"function","function":{"name":"list_directory","arguments":"{}"}}],"reasoning_content":""}
+		]
+	}`)
+
+	out, err := normalizeKimiToolMessageLinks(body)
+	if err != nil {
+		t.Fatalf("normalizeKimiToolMessageLinks() error = %v", err)
+	}
+
+	got := gjson.GetBytes(out, "messages.0.reasoning_content").String()
+	if got != "assistant summary" {
+		t.Fatalf("messages.0.reasoning_content = %q, want %q", got, "assistant summary")
+	}
+}
+
+func TestNormalizeKimiToolMessageLinks_PreservesExistingAssistantReasoning(t *testing.T) {
+	body := []byte(`{
+		"messages":[
+			{"role":"assistant","tool_calls":[{"id":"call_1","type":"function","function":{"name":"list_directory","arguments":"{}"}}],"reasoning_content":"keep me"}
+		]
+	}`)
+
+	out, err := normalizeKimiToolMessageLinks(body)
+	if err != nil {
+		t.Fatalf("normalizeKimiToolMessageLinks() error = %v", err)
+	}
+
+	got := gjson.GetBytes(out, "messages.0.reasoning_content").String()
+	if got != "keep me" {
+		t.Fatalf("messages.0.reasoning_content = %q, want %q", got, "keep me")
+	}
+}
+
+func TestNormalizeKimiToolMessageLinks_RepairsIDsAndReasoningTogether(t *testing.T) {
+	body := []byte(`{
+		"messages":[
+			{"role":"assistant","tool_calls":[{"id":"call_1","type":"function","function":{"name":"list_directory","arguments":"{}"}}],"reasoning_content":"r1"},
+			{"role":"tool","call_id":"call_1","content":"[]"},
+			{"role":"assistant","tool_calls":[{"id":"call_2","type":"function","function":{"name":"read_file","arguments":"{}"}}]},
+			{"role":"tool","call_id":"call_2","content":"file"}
+		]
+	}`)
+
+	out, err := normalizeKimiToolMessageLinks(body)
+	if err != nil {
+		t.Fatalf("normalizeKimiToolMessageLinks() error = %v", err)
+	}
+
+	if got := gjson.GetBytes(out, "messages.1.tool_call_id").String(); got != "call_1" {
+		t.Fatalf("messages.1.tool_call_id = %q, want %q", got, "call_1")
+	}
+	if got := gjson.GetBytes(out, "messages.3.tool_call_id").String(); got != "call_2" {
+		t.Fatalf("messages.3.tool_call_id = %q, want %q", got, "call_2")
+	}
+	if got := gjson.GetBytes(out, "messages.2.reasoning_content").String(); got != "r1" {
+		t.Fatalf("messages.2.reasoning_content = %q, want %q", got, "r1")
+	}
+}
--- a/internal/runtime/executor/kiro_executor.go
+++ b/internal/runtime/executor/kiro_executor.go
@@ -1681,6 +1681,7 @@ func (e *KiroExecutor) mapModelToKiro(model string) string {
 	modelMap := map[string]string{
 		// Amazon Q format (amazonq- prefix) - same API as Kiro
 		"amazonq-auto":                       "auto",
+		"amazonq-claude-opus-4-6":            "claude-opus-4.6",
 		"amazonq-claude-opus-4-5":            "claude-opus-4.5",
 		"amazonq-claude-sonnet-4-5":          "claude-sonnet-4.5",
 		"amazonq-claude-sonnet-4-5-20250929": "claude-sonnet-4.5",
@@ -1688,6 +1689,7 @@ func (e *KiroExecutor) mapModelToKiro(model string) string {
 		"amazonq-claude-sonnet-4-20250514":   "claude-sonnet-4",
 		"amazonq-claude-haiku-4-5":           "claude-haiku-4.5",
 		// Kiro format (kiro- prefix) - valid model names that should be preserved
+		"kiro-claude-opus-4-6":            "claude-opus-4.6",
 		"kiro-claude-opus-4-5":            "claude-opus-4.5",
 		"kiro-claude-sonnet-4-5":          "claude-sonnet-4.5",
 		"kiro-claude-sonnet-4-5-20250929": "claude-sonnet-4.5",
@@ -1696,6 +1698,8 @@ func (e *KiroExecutor) mapModelToKiro(model string) string {
 		"kiro-claude-haiku-4-5":           "claude-haiku-4.5",
 		"kiro-auto":                       "auto",
 		// Native format (no prefix) - used by Kiro IDE directly
+		"claude-opus-4-6":            "claude-opus-4.6",
+		"claude-opus-4.6":            "claude-opus-4.6",
 		"claude-opus-4-5":            "claude-opus-4.5",
 		"claude-opus-4.5":            "claude-opus-4.5",
 		"claude-haiku-4-5":           "claude-haiku-4.5",
@@ -1707,10 +1711,12 @@ func (e *KiroExecutor) mapModelToKiro(model string) string {
 		"claude-sonnet-4-20250514":   "claude-sonnet-4",
 		"auto":                       "auto",
 		// Agentic variants (same backend model IDs, but with special system prompt)
+		"claude-opus-4.6-agentic":        "claude-opus-4.6",
 		"claude-opus-4.5-agentic":        "claude-opus-4.5",
 		"claude-sonnet-4.5-agentic":      "claude-sonnet-4.5",
 		"claude-sonnet-4-agentic":        "claude-sonnet-4",
 		"claude-haiku-4.5-agentic":       "claude-haiku-4.5",
+		"kiro-claude-opus-4-6-agentic":   "claude-opus-4.6",
 		"kiro-claude-opus-4-5-agentic":   "claude-opus-4.5",
 		"kiro-claude-sonnet-4-5-agentic": "claude-sonnet-4.5",
 		"kiro-claude-sonnet-4-agentic":   "claude-sonnet-4",
@@ -2096,6 +2102,22 @@ func (e *KiroExecutor) parseEventStream(body io.Reader) (string, []kiroclaude.Ki
 				}
 			}

+		case "contextUsageEvent":
+			// Handle context usage events from Kiro API
+			// Format: {"contextUsageEvent": {"contextUsagePercentage": 0.53}}
+			if ctxUsage, ok := event["contextUsageEvent"].(map[string]interface{}); ok {
+				if ctxPct, ok := ctxUsage["contextUsagePercentage"].(float64); ok {
+					upstreamContextPercentage = ctxPct
+					log.Debugf("kiro: parseEventStream received contextUsageEvent: %.2f%%", ctxPct*100)
+				}
+			} else {
+				// Try direct field (fallback)
+				if ctxPct, ok := event["contextUsagePercentage"].(float64); ok {
+					upstreamContextPercentage = ctxPct
+					log.Debugf("kiro: parseEventStream received contextUsagePercentage (direct): %.2f%%", ctxPct*100)
+				}
+			}
+
 		case "error", "exception", "internalServerException", "invalidStateEvent":
 			// Handle error events from Kiro API stream
 			errMsg := ""
@@ -2699,6 +2721,22 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
 				}
 			}

+		case "contextUsageEvent":
+			// Handle context usage events from Kiro API
+			// Format: {"contextUsageEvent": {"contextUsagePercentage": 0.53}}
+			if ctxUsage, ok := event["contextUsageEvent"].(map[string]interface{}); ok {
+				if ctxPct, ok := ctxUsage["contextUsagePercentage"].(float64); ok {
+					upstreamContextPercentage = ctxPct
+					log.Debugf("kiro: streamToChannel received contextUsageEvent: %.2f%%", ctxPct*100)
+				}
+			} else {
+				// Try direct field (fallback)
+				if ctxPct, ok := event["contextUsagePercentage"].(float64); ok {
+					upstreamContextPercentage = ctxPct
+					log.Debugf("kiro: streamToChannel received contextUsagePercentage (direct): %.2f%%", ctxPct*100)
+				}
+			}
+
 		case "error", "exception", "internalServerException":
 			// Handle error events from Kiro API stream
 			errMsg := ""
--- a/internal/runtime/executor/logging_helpers.go
+++ b/internal/runtime/executor/logging_helpers.go
@@ -80,7 +80,7 @@ func recordAPIRequest(ctx context.Context, cfg *config.Config, info upstreamRequ
 	writeHeaders(builder, info.Headers)
 	builder.WriteString("\nBody:\n")
 	if len(info.Body) > 0 {
-		builder.WriteString(string(bytes.Clone(info.Body)))
+		builder.WriteString(string(info.Body))
 	} else {
 		builder.WriteString("<empty>")
 	}
@@ -152,7 +152,7 @@ func appendAPIResponseChunk(ctx context.Context, cfg *config.Config, chunk []byt
 	if cfg == nil || !cfg.RequestLog {
 		return
 	}
-	data := bytes.TrimSpace(bytes.Clone(chunk))
+	data := bytes.TrimSpace(chunk)
 	if len(data) == 0 {
 		return
 	}
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -88,12 +88,13 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 		to = sdktranslator.FromString("openai-response")
 		endpoint = "/responses/compact"
 	}
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, opts.Stream)
-	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), opts.Stream)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, opts.Stream)
 	requestedModel := payloadRequestedModel(opts, req.Model)
 	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated, requestedModel)
 	if opts.Alt == "responses/compact" {
@@ -170,7 +171,7 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 	reporter.ensurePublished(ctx)
 	// Translate response back to source format when needed
 	var param any
-	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, body, &param)
+	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, body, &param)
 	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
 }
@@ -189,12 +190,13 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)
 	requestedModel := payloadRequestedModel(opts, req.Model)
 	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated, requestedModel)

@@ -283,7 +285,7 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy

 			// OpenAI-compatible streams are SSE: lines typically prefixed with "data: ".
 			// Pass through translator; it yields one or more chunks for the target schema.
-			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bytes.Clone(line), &param)
+			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, bytes.Clone(line), &param)
 			for i := range chunks {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
 			}
@@ -304,7 +306,7 @@ func (e *OpenAICompatExecutor) CountTokens(ctx context.Context, auth *cliproxyau

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	modelForCounting := baseModel

--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -81,12 +81,13 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
@@ -150,7 +151,7 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	var param any
 	// Note: TranslateNonStream uses req.Model (original with suffix) to preserve
 	// the original model name in the response for client compatibility.
-	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
+	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, body, data, &param)
 	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
 }
@@ -171,12 +172,13 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	originalPayload := bytes.Clone(req.Payload)
+	originalPayloadSource := req.Payload
 	if len(opts.OriginalRequest) > 0 {
-		originalPayload = bytes.Clone(opts.OriginalRequest)
+		originalPayloadSource = opts.OriginalRequest
 	}
+	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)
 	body, _ = sjson.SetBytes(body, "model", baseModel)

 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
@@ -253,12 +255,12 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 			if detail, ok := parseOpenAIStreamUsage(line); ok {
 				reporter.publish(ctx, detail)
 			}
-			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(line), &param)
+			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(line), &param)
 			for i := range chunks {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
 			}
 		}
-		doneChunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone([]byte("[DONE]")), &param)
+		doneChunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, []byte("[DONE]"), &param)
 		for i := range doneChunks {
 			out <- cliproxyexecutor.StreamChunk{Payload: []byte(doneChunks[i])}
 		}
@@ -276,7 +278,7 @@ func (e *QwenExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth,

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)

 	modelName := gjson.GetBytes(body, "model").String()
 	if strings.TrimSpace(modelName) == "" {
--- a/internal/runtime/executor/thinking_providers.go
+++ b/internal/runtime/executor/thinking_providers.go
@@ -7,5 +7,6 @@ import (
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/gemini"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/geminicli"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/iflow"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/kimi"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/openai"
 )
--- a/internal/store/gitstore.go
+++ b/internal/store/gitstore.go
@@ -21,6 +21,9 @@ import (
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 )

+// gcInterval defines minimum time between garbage collection runs.
+const gcInterval = 5 * time.Minute
+
 // GitTokenStore persists token records and auth metadata using git as the backing storage.
 type GitTokenStore struct {
 	mu        sync.Mutex
@@ -31,6 +34,7 @@ type GitTokenStore struct {
 	remote    string
 	username  string
 	password  string
+	lastGC    time.Time
 }

 // NewGitTokenStore creates a token store that saves credentials to disk through the
@@ -613,6 +617,7 @@ func (s *GitTokenStore) commitAndPushLocked(message string, relPaths ...string)
 	} else if errRewrite := s.rewriteHeadAsSingleCommit(repo, headRef.Name(), commitHash, message, signature); errRewrite != nil {
 		return errRewrite
 	}
+	s.maybeRunGC(repo)
 	if err = repo.Push(&git.PushOptions{Auth: s.gitAuth(), Force: true}); err != nil {
 		if errors.Is(err, git.NoErrAlreadyUpToDate) {
 			return nil
@@ -652,6 +657,23 @@ func (s *GitTokenStore) rewriteHeadAsSingleCommit(repo *git.Repository, branch p
 	return nil
 }

+func (s *GitTokenStore) maybeRunGC(repo *git.Repository) {
+	now := time.Now()
+	if now.Sub(s.lastGC) < gcInterval {
+		return
+	}
+	s.lastGC = now
+
+	pruneOpts := git.PruneOptions{
+		OnlyObjectsOlderThan: now,
+		Handler:              repo.DeleteObject,
+	}
+	if err := repo.Prune(pruneOpts); err != nil && !errors.Is(err, git.ErrLooseObjectsNotSupported) {
+		return
+	}
+	_ = repo.RepackObjects(&git.RepackConfig{})
+}
+
 // PersistConfig commits and pushes configuration changes to git.
 func (s *GitTokenStore) PersistConfig(_ context.Context) error {
 	if err := s.EnsureRepository(); err != nil {
--- a/internal/thinking/apply.go
+++ b/internal/thinking/apply.go
@@ -18,6 +18,7 @@ var providerAppliers = map[string]ProviderApplier{
 	"codex":       nil,
 	"iflow":       nil,
 	"antigravity": nil,
+	"kimi":        nil,
 }

 // GetProviderApplier returns the ProviderApplier for the given provider name.
@@ -326,6 +327,9 @@ func extractThinkingConfig(body []byte, provider string) ThinkingConfig {
 			return config
 		}
 		return extractOpenAIConfig(body)
+	case "kimi":
+		// Kimi uses OpenAI-compatible reasoning_effort format
+		return extractOpenAIConfig(body)
 	default:
 		return ThinkingConfig{}
 	}
--- a/internal/thinking/provider/kimi/apply.go
+++ b/internal/thinking/provider/kimi/apply.go
@@ -0,0 +1,126 @@
+// Package kimi implements thinking configuration for Kimi (Moonshot AI) models.
+//
+// Kimi models use the OpenAI-compatible reasoning_effort format with discrete levels
+// (low/medium/high). The provider strips any existing thinking config and applies
+// the unified ThinkingConfig in OpenAI format.
+package kimi
+
+import (
+	"fmt"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// Applier implements thinking.ProviderApplier for Kimi models.
+//
+// Kimi-specific behavior:
+//   - Output format: reasoning_effort (string: low/medium/high)
+//   - Uses OpenAI-compatible format
+//   - Supports budget-to-level conversion
+type Applier struct{}
+
+var _ thinking.ProviderApplier = (*Applier)(nil)
+
+// NewApplier creates a new Kimi thinking applier.
+func NewApplier() *Applier {
+	return &Applier{}
+}
+
+func init() {
+	thinking.RegisterProvider("kimi", NewApplier())
+}
+
+// Apply applies thinking configuration to Kimi request body.
+//
+// Expected output format:
+//
+//	{
+//	  "reasoning_effort": "high"
+//	}
+func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
+	if thinking.IsUserDefinedModel(modelInfo) {
+		return applyCompatibleKimi(body, config)
+	}
+	if modelInfo.Thinking == nil {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	var effort string
+	switch config.Mode {
+	case thinking.ModeLevel:
+		if config.Level == "" {
+			return body, nil
+		}
+		effort = string(config.Level)
+	case thinking.ModeNone:
+		// Kimi uses "none" to disable thinking
+		effort = string(thinking.LevelNone)
+	case thinking.ModeBudget:
+		// Convert budget to level using threshold mapping
+		level, ok := thinking.ConvertBudgetToLevel(config.Budget)
+		if !ok {
+			return body, nil
+		}
+		effort = level
+	case thinking.ModeAuto:
+		// Auto mode maps to "auto" effort
+		effort = string(thinking.LevelAuto)
+	default:
+		return body, nil
+	}
+
+	if effort == "" {
+		return body, nil
+	}
+
+	result, err := sjson.SetBytes(body, "reasoning_effort", effort)
+	if err != nil {
+		return body, fmt.Errorf("kimi thinking: failed to set reasoning_effort: %w", err)
+	}
+	return result, nil
+}
+
+// applyCompatibleKimi applies thinking config for user-defined Kimi models.
+func applyCompatibleKimi(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	var effort string
+	switch config.Mode {
+	case thinking.ModeLevel:
+		if config.Level == "" {
+			return body, nil
+		}
+		effort = string(config.Level)
+	case thinking.ModeNone:
+		effort = string(thinking.LevelNone)
+		if config.Level != "" {
+			effort = string(config.Level)
+		}
+	case thinking.ModeAuto:
+		effort = string(thinking.LevelAuto)
+	case thinking.ModeBudget:
+		// Convert budget to level
+		level, ok := thinking.ConvertBudgetToLevel(config.Budget)
+		if !ok {
+			return body, nil
+		}
+		effort = level
+	default:
+		return body, nil
+	}
+
+	result, err := sjson.SetBytes(body, "reasoning_effort", effort)
+	if err != nil {
+		return body, fmt.Errorf("kimi thinking: failed to set reasoning_effort: %w", err)
+	}
+	return result, nil
+}
--- a/internal/translator/antigravity/claude/antigravity_claude_request.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_request.go
@@ -6,7 +6,6 @@
 package claude

 import (
-	"bytes"
 	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/cache"
@@ -37,7 +36,7 @@ import (
 //   - []byte: The transformed request data in Gemini CLI API format
 func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _ bool) []byte {
 	enableThoughtTranslate := true
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON

 	// system instruction
 	systemInstructionJSON := ""
--- a/internal/translator/antigravity/gemini/antigravity_gemini_request.go
+++ b/internal/translator/antigravity/gemini/antigravity_gemini_request.go
@@ -6,7 +6,6 @@
 package gemini

 import (
-	"bytes"
 	"fmt"
 	"strings"

@@ -34,7 +33,7 @@ import (
 // Returns:
 //   - []byte: The transformed request data in Gemini API format
 func ConvertGeminiRequestToAntigravity(modelName string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	template := ""
 	template = `{"project":"","request":{},"model":""}`
 	template, _ = sjson.SetRaw(template, "request", string(rawJSON))
--- a/internal/translator/antigravity/openai/responses/antigravity_openai-responses_request.go
+++ b/internal/translator/antigravity/openai/responses/antigravity_openai-responses_request.go
@@ -1,14 +1,12 @@
 package responses

 import (
-	"bytes"
-
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/antigravity/gemini"
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/openai/responses"
 )

 func ConvertOpenAIResponsesRequestToAntigravity(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	rawJSON = ConvertOpenAIResponsesRequestToGemini(modelName, rawJSON, stream)
 	return ConvertGeminiRequestToAntigravity(modelName, rawJSON, stream)
 }
--- a/internal/translator/claude/gemini-cli/claude_gemini-cli_request.go
+++ b/internal/translator/claude/gemini-cli/claude_gemini-cli_request.go
@@ -6,8 +6,6 @@
 package geminiCLI

 import (
-	"bytes"
-
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/claude/gemini"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -30,7 +28,7 @@ import (
 // Returns:
 //   - []byte: The transformed request data in Claude Code API format
 func ConvertGeminiCLIRequestToClaude(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON

 	modelResult := gjson.GetBytes(rawJSON, "model")
 	// Extract the inner request object and promote it to the top level
--- a/internal/translator/claude/gemini/claude_gemini_request.go
+++ b/internal/translator/claude/gemini/claude_gemini_request.go
@@ -6,7 +6,6 @@
 package gemini

 import (
-	"bytes"
 	"crypto/rand"
 	"crypto/sha256"
 	"encoding/hex"
@@ -46,7 +45,7 @@ var (
 // Returns:
 //   - []byte: The transformed request data in Claude Code API format
 func ConvertGeminiRequestToClaude(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON

 	if account == "" {
 		u, _ := uuid.NewRandom()
--- a/internal/translator/claude/openai/chat-completions/claude_openai_request.go
+++ b/internal/translator/claude/openai/chat-completions/claude_openai_request.go
@@ -6,7 +6,6 @@
 package chat_completions

 import (
-	"bytes"
 	"crypto/rand"
 	"crypto/sha256"
 	"encoding/hex"
@@ -44,7 +43,7 @@ var (
 // Returns:
 //   - []byte: The transformed request data in Claude Code API format
 func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON

 	if account == "" {
 		u, _ := uuid.NewRandom()
--- a/internal/translator/claude/openai/responses/claude_openai-responses_request.go
+++ b/internal/translator/claude/openai/responses/claude_openai-responses_request.go
@@ -1,7 +1,6 @@
 package responses

 import (
-	"bytes"
 	"crypto/rand"
 	"crypto/sha256"
 	"encoding/hex"
@@ -32,7 +31,7 @@ var (
 // - max_output_tokens -> max_tokens
 // - stream passthrough via parameter
 func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON

 	if account == "" {
 		u, _ := uuid.NewRandom()
--- a/internal/translator/codex/claude/codex_claude_request.go
+++ b/internal/translator/codex/claude/codex_claude_request.go
@@ -6,7 +6,6 @@
 package claude

 import (
-	"bytes"
 	"fmt"
 	"strconv"
 	"strings"
@@ -35,7 +34,7 @@ import (
 // Returns:
 //   - []byte: The transformed request data in internal client format
 func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON

 	template := `{"model":"","instructions":"","input":[]}`

--- a/internal/translator/codex/claude/codex_claude_response.go
+++ b/internal/translator/codex/claude/codex_claude_response.go
@@ -113,10 +113,10 @@ func ConvertCodexResponseToClaude(_ context.Context, _ string, originalRequestRa
 		template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
 		p := (*param).(*ConvertCodexResponseToClaudeParams).HasToolCall
 		stopReason := rootResult.Get("response.stop_reason").String()
-		if stopReason != "" {
-			template, _ = sjson.Set(template, "delta.stop_reason", stopReason)
-		} else if p {
+		if p {
 			template, _ = sjson.Set(template, "delta.stop_reason", "tool_use")
+		} else if stopReason == "max_tokens" || stopReason == "stop" {
+			template, _ = sjson.Set(template, "delta.stop_reason", stopReason)
 		} else {
 			template, _ = sjson.Set(template, "delta.stop_reason", "end_turn")
 		}
--- a/internal/translator/codex/gemini-cli/codex_gemini-cli_request.go
+++ b/internal/translator/codex/gemini-cli/codex_gemini-cli_request.go
@@ -6,8 +6,6 @@
 package geminiCLI

 import (
-	"bytes"
-
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/codex/gemini"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -30,7 +28,7 @@ import (
 // Returns:
 //   - []byte: The transformed request data in Codex API format
 func ConvertGeminiCLIRequestToCodex(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON

 	rawJSON = []byte(gjson.GetBytes(rawJSON, "request").Raw)
 	rawJSON, _ = sjson.SetBytes(rawJSON, "model", modelName)
--- a/internal/translator/codex/gemini/codex_gemini_request.go
+++ b/internal/translator/codex/gemini/codex_gemini_request.go
@@ -6,7 +6,6 @@
 package gemini

 import (
-	"bytes"
 	"crypto/rand"
 	"fmt"
 	"math/big"
@@ -37,7 +36,7 @@ import (
 // Returns:
 //   - []byte: The transformed request data in Codex API format
 func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	// Base template
 	out := `{"model":"","instructions":"","input":[]}`

--- a/internal/translator/codex/openai/chat-completions/codex_openai_request.go
+++ b/internal/translator/codex/openai/chat-completions/codex_openai_request.go
@@ -7,8 +7,6 @@
 package chat_completions

 import (
-	"bytes"
-
 	"strconv"
 	"strings"

@@ -29,7 +27,7 @@ import (
 // Returns:
 //   - []byte: The transformed request data in OpenAI Responses API format
 func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	// Start with empty JSON object
 	out := `{"instructions":""}`

--- a/internal/translator/codex/openai/responses/codex_openai-responses_request.go
+++ b/internal/translator/codex/openai/responses/codex_openai-responses_request.go
@@ -1,7 +1,6 @@
 package responses

 import (
-	"bytes"
 	"fmt"

 	"github.com/tidwall/gjson"
@@ -9,7 +8,7 @@ import (
 )

 func ConvertOpenAIResponsesRequestToCodex(modelName string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON

 	inputResult := gjson.GetBytes(rawJSON, "input")
 	if inputResult.Type == gjson.String {
--- a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
+++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
@@ -35,7 +35,7 @@ const geminiCLIClaudeThoughtSignature = "skip_thought_signature_validator"
 // Returns:
 //   - []byte: The transformed request data in Gemini CLI API format
 func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	rawJSON = bytes.Replace(rawJSON, []byte(`"url":{"type":"string","format":"uri",`), []byte(`"url":{"type":"string",`), -1)

 	// Build output Gemini CLI request JSON
--- a/internal/translator/gemini-cli/gemini/gemini-cli_gemini_request.go
+++ b/internal/translator/gemini-cli/gemini/gemini-cli_gemini_request.go
@@ -6,7 +6,6 @@
 package gemini

 import (
-	"bytes"
 	"fmt"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
@@ -33,7 +32,7 @@ import (
 // Returns:
 //   - []byte: The transformed request data in Gemini API format
 func ConvertGeminiRequestToGeminiCLI(_ string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	template := ""
 	template = `{"project":"","request":{},"model":""}`
 	template, _ = sjson.SetRaw(template, "request", string(rawJSON))
--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
@@ -3,7 +3,6 @@
 package chat_completions

 import (
-	"bytes"
 	"fmt"
 	"strings"

@@ -28,7 +27,7 @@ const geminiCLIFunctionThoughtSignature = "skip_thought_signature_validator"
 // Returns:
 //   - []byte: The transformed request data in Gemini CLI API format
 func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	// Base envelope (no default thinkingConfig)
 	out := []byte(`{"project":"","request":{"contents":[]},"model":"gemini-2.5-pro"}`)

--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go
@@ -14,6 +14,7 @@ import (
 	"time"

 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/openai/chat-completions"
+	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -77,14 +78,20 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ
 		template, _ = sjson.Set(template, "id", responseIDResult.String())
 	}

-	// Extract and set the finish reason.
-	if finishReasonResult := gjson.GetBytes(rawJSON, "response.candidates.0.finishReason"); finishReasonResult.Exists() {
-		template, _ = sjson.Set(template, "choices.0.finish_reason", strings.ToLower(finishReasonResult.String()))
-		template, _ = sjson.Set(template, "choices.0.native_finish_reason", strings.ToLower(finishReasonResult.String()))
+	finishReason := ""
+	if stopReasonResult := gjson.GetBytes(rawJSON, "response.stop_reason"); stopReasonResult.Exists() {
+		finishReason = stopReasonResult.String()
 	}
+	if finishReason == "" {
+		if finishReasonResult := gjson.GetBytes(rawJSON, "response.candidates.0.finishReason"); finishReasonResult.Exists() {
+			finishReason = finishReasonResult.String()
+		}
+	}
+	finishReason = strings.ToLower(finishReason)

 	// Extract and set usage metadata (token counts).
 	if usageResult := gjson.GetBytes(rawJSON, "response.usageMetadata"); usageResult.Exists() {
+		cachedTokenCount := usageResult.Get("cachedContentTokenCount").Int()
 		if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
 			template, _ = sjson.Set(template, "usage.completion_tokens", candidatesTokenCountResult.Int())
 		}
@@ -97,6 +104,14 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ
 		if thoughtsTokenCount > 0 {
 			template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount)
 		}
+		// Include cached token count if present (indicates prompt caching is working)
+		if cachedTokenCount > 0 {
+			var err error
+			template, err = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount)
+			if err != nil {
+				log.Warnf("gemini-cli openai response: failed to set cached_tokens: %v", err)
+			}
+		}
 	}

 	// Process the main content part of the response.
@@ -187,6 +202,12 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ
 	if hasFunctionCall {
 		template, _ = sjson.Set(template, "choices.0.finish_reason", "tool_calls")
 		template, _ = sjson.Set(template, "choices.0.native_finish_reason", "tool_calls")
+	} else if finishReason != "" && (*param).(*convertCliResponseToOpenAIChatParams).FunctionIndex == 0 {
+		// Only pass through specific finish reasons
+		if finishReason == "max_tokens" || finishReason == "stop" {
+			template, _ = sjson.Set(template, "choices.0.finish_reason", finishReason)
+			template, _ = sjson.Set(template, "choices.0.native_finish_reason", finishReason)
+		}
 	}

 	return []string{template}
--- a/internal/translator/gemini-cli/openai/responses/gemini-cli_openai-responses_request.go
+++ b/internal/translator/gemini-cli/openai/responses/gemini-cli_openai-responses_request.go
@@ -1,14 +1,12 @@
 package responses

 import (
-	"bytes"
-
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini-cli/gemini"
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/openai/responses"
 )

 func ConvertOpenAIResponsesRequestToGeminiCLI(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	rawJSON = ConvertOpenAIResponsesRequestToGemini(modelName, rawJSON, stream)
 	return ConvertGeminiRequestToGeminiCLI(modelName, rawJSON, stream)
 }
--- a/internal/translator/gemini/claude/gemini_claude_request.go
+++ b/internal/translator/gemini/claude/gemini_claude_request.go
@@ -28,7 +28,7 @@ const geminiClaudeThoughtSignature = "skip_thought_signature_validator"
 // Returns:
 //   - []byte: The transformed request in Gemini CLI format.
 func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	rawJSON = bytes.Replace(rawJSON, []byte(`"url":{"type":"string","format":"uri",`), []byte(`"url":{"type":"string",`), -1)

 	// Build output Gemini CLI request JSON
--- a/internal/translator/gemini/gemini-cli/gemini_gemini-cli_request.go
+++ b/internal/translator/gemini/gemini-cli/gemini_gemini-cli_request.go
@@ -6,7 +6,6 @@
 package geminiCLI

 import (
-	"bytes"
 	"fmt"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
@@ -19,7 +18,7 @@ import (
 // It extracts the model name, system instruction, message contents, and tool declarations
 // from the raw JSON request and returns them in the format expected by the internal client.
 func ConvertGeminiCLIRequestToGemini(_ string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	modelResult := gjson.GetBytes(rawJSON, "model")
 	rawJSON = []byte(gjson.GetBytes(rawJSON, "request").Raw)
 	rawJSON, _ = sjson.SetBytes(rawJSON, "model", modelResult.String())
--- a/internal/translator/gemini/gemini/gemini_gemini_request.go
+++ b/internal/translator/gemini/gemini/gemini_gemini_request.go
@@ -4,7 +4,6 @@
 package gemini

 import (
-	"bytes"
 	"fmt"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
@@ -19,7 +18,7 @@ import (
 //
 // It keeps the payload otherwise unchanged.
 func ConvertGeminiRequestToGemini(_ string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	// Fast path: if no contents field, only attach safety settings
 	contents := gjson.GetBytes(rawJSON, "contents")
 	if !contents.Exists() {
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
@@ -3,7 +3,6 @@
 package chat_completions

 import (
-	"bytes"
 	"fmt"
 	"strings"

@@ -28,7 +27,7 @@ const geminiFunctionThoughtSignature = "skip_thought_signature_validator"
 // Returns:
 //   - []byte: The transformed request data in Gemini API format
 func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	// Base envelope (no default thinkingConfig)
 	out := []byte(`{"contents":[]}`)

--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go
@@ -129,11 +129,16 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR
 			candidateIndex := int(candidate.Get("index").Int())
 			template, _ = sjson.Set(template, "choices.0.index", candidateIndex)

-			// Extract and set the finish reason.
-			if finishReasonResult := candidate.Get("finishReason"); finishReasonResult.Exists() {
-				template, _ = sjson.Set(template, "choices.0.finish_reason", strings.ToLower(finishReasonResult.String()))
-				template, _ = sjson.Set(template, "choices.0.native_finish_reason", strings.ToLower(finishReasonResult.String()))
+			finishReason := ""
+			if stopReasonResult := gjson.GetBytes(rawJSON, "stop_reason"); stopReasonResult.Exists() {
+				finishReason = stopReasonResult.String()
 			}
+			if finishReason == "" {
+				if finishReasonResult := gjson.GetBytes(rawJSON, "candidates.0.finishReason"); finishReasonResult.Exists() {
+					finishReason = finishReasonResult.String()
+				}
+			}
+			finishReason = strings.ToLower(finishReason)

 			partsResult := candidate.Get("content.parts")
 			hasFunctionCall := false
@@ -225,6 +230,12 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR
 			if hasFunctionCall {
 				template, _ = sjson.Set(template, "choices.0.finish_reason", "tool_calls")
 				template, _ = sjson.Set(template, "choices.0.native_finish_reason", "tool_calls")
+			} else if finishReason != "" {
+				// Only pass through specific finish reasons
+				if finishReason == "max_tokens" || finishReason == "stop" {
+					template, _ = sjson.Set(template, "choices.0.finish_reason", finishReason)
+					template, _ = sjson.Set(template, "choices.0.native_finish_reason", finishReason)
+				}
 			}

 			responseStrings = append(responseStrings, template)
--- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
+++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
@@ -1,7 +1,6 @@
 package responses

 import (
-	"bytes"
 	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
@@ -12,7 +11,7 @@ import (
 const geminiResponsesThoughtSignature = "skip_thought_signature_validator"

 func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON

 	// Note: modelName and stream parameters are part of the fixed method signature
 	_ = modelName // Unused but required by interface
--- a/internal/translator/kiro/claude/kiro_claude_request.go
+++ b/internal/translator/kiro/claude/kiro_claude_request.go
@@ -608,18 +608,22 @@ func processMessages(messages gjson.Result, modelID, origin string) ([]KiroHisto

 		if role == "user" {
 			userMsg, toolResults := BuildUserMessageStruct(msg, modelID, origin)
+			// CRITICAL: Kiro API requires content to be non-empty for ALL user messages
+			// This includes both history messages and the current message.
+			// When user message contains only tool_result (no text), content will be empty.
+			// This commonly happens in compaction requests from OpenCode.
+			if strings.TrimSpace(userMsg.Content) == "" {
+				if len(toolResults) > 0 {
+					userMsg.Content = kirocommon.DefaultUserContentWithToolResults
+				} else {
+					userMsg.Content = kirocommon.DefaultUserContent
+				}
+				log.Debugf("kiro: user content was empty, using default: %s", userMsg.Content)
+			}
 			if isLastMessage {
 				currentUserMsg = &userMsg
 				currentToolResults = toolResults
 			} else {
-				// CRITICAL: Kiro API requires content to be non-empty for history messages too
-				if strings.TrimSpace(userMsg.Content) == "" {
-					if len(toolResults) > 0 {
-						userMsg.Content = "Tool results provided."
-					} else {
-						userMsg.Content = "Continue"
-					}
-				}
 				// For history messages, embed tool results in context
 				if len(toolResults) > 0 {
 					userMsg.UserInputMessageContext = &KiroUserInputMessageContext{
@@ -883,8 +887,21 @@ func BuildAssistantMessageStruct(msg gjson.Result) KiroAssistantResponseMessage
 		contentBuilder.WriteString(content.String())
 	}

+	// CRITICAL FIX: Kiro API requires non-empty content for assistant messages
+	// This can happen with compaction requests where assistant messages have only tool_use
+	// (no text content). Without this fix, Kiro API returns "Improperly formed request" error.
+	finalContent := contentBuilder.String()
+	if strings.TrimSpace(finalContent) == "" {
+		if len(toolUses) > 0 {
+			finalContent = kirocommon.DefaultAssistantContentWithTools
+		} else {
+			finalContent = kirocommon.DefaultAssistantContent
+		}
+		log.Debugf("kiro: assistant content was empty, using default: %s", finalContent)
+	}
+
 	return KiroAssistantResponseMessage{
-		Content:  contentBuilder.String(),
+		Content:  finalContent,
 		ToolUses: toolUses,
 	}
 }
--- a/internal/translator/kiro/common/constants.go
+++ b/internal/translator/kiro/common/constants.go
@@ -29,6 +29,26 @@ const (
 	// InlineCodeMarker is the markdown inline code marker (backtick).
 	InlineCodeMarker = "`"

+	// DefaultAssistantContentWithTools is the fallback content for assistant messages
+	// that have tool_use but no text content. Kiro API requires non-empty content.
+	// IMPORTANT: Use a minimal neutral string that the model won't mimic in responses.
+	// Previously "I'll help you with that." which caused the model to parrot it back.
+	DefaultAssistantContentWithTools = "."
+
+	// DefaultAssistantContent is the fallback content for assistant messages
+	// that have no content at all. Kiro API requires non-empty content.
+	// IMPORTANT: Use a minimal neutral string that the model won't mimic in responses.
+	// Previously "I understand." which could leak into model behavior.
+	DefaultAssistantContent = "."
+
+	// DefaultUserContentWithToolResults is the fallback content for user messages
+	// that have only tool_result (no text). Kiro API requires non-empty content.
+	DefaultUserContentWithToolResults = "Tool results provided."
+
+	// DefaultUserContent is the fallback content for user messages
+	// that have no content at all. Kiro API requires non-empty content.
+	DefaultUserContent = "Continue"
+
 	// KiroAgenticSystemPrompt is injected only for -agentic models to prevent timeouts on large writes.
 	// AWS Kiro API has a 2-3 minute timeout for large file write operations.
 	KiroAgenticSystemPrompt = `
--- a/internal/translator/kiro/openai/kiro_openai_request.go
+++ b/internal/translator/kiro/openai/kiro_openai_request.go
@@ -718,13 +718,10 @@ func buildAssistantMessageFromOpenAI(msg gjson.Result) KiroAssistantResponseMess
 	// This can happen with compaction requests or error recovery scenarios
 	finalContent := contentBuilder.String()
 	if strings.TrimSpace(finalContent) == "" {
-		const defaultAssistantContentWithTools = "I'll help you with that."
-		const defaultAssistantContent = "I understand."
-
 		if len(toolUses) > 0 {
-			finalContent = defaultAssistantContentWithTools
+			finalContent = kirocommon.DefaultAssistantContentWithTools
 		} else {
-			finalContent = defaultAssistantContent
+			finalContent = kirocommon.DefaultAssistantContent
 		}
 		log.Debugf("kiro-openai: assistant content was empty, using default: %s", finalContent)
 	}
--- a/internal/translator/openai/claude/openai_claude_request.go
+++ b/internal/translator/openai/claude/openai_claude_request.go
@@ -6,7 +6,6 @@
 package claude

 import (
-	"bytes"
 	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
@@ -18,7 +17,7 @@ import (
 // It extracts the model name, system instruction, message contents, and tool declarations
 // from the raw JSON request and returns them in the format expected by the OpenAI API.
 func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	// Base OpenAI Chat Completions API template
 	out := `{"model":"","messages":[]}`

--- a/internal/translator/openai/gemini-cli/openai_gemini_request.go
+++ b/internal/translator/openai/gemini-cli/openai_gemini_request.go
@@ -6,8 +6,6 @@
 package geminiCLI

 import (
-	"bytes"
-
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/openai/gemini"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -17,7 +15,7 @@ import (
 // It extracts the model name, generation config, message contents, and tool declarations
 // from the raw JSON request and returns them in the format expected by the OpenAI API.
 func ConvertGeminiCLIRequestToOpenAI(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	rawJSON = []byte(gjson.GetBytes(rawJSON, "request").Raw)
 	rawJSON, _ = sjson.SetBytes(rawJSON, "model", modelName)
 	if gjson.GetBytes(rawJSON, "systemInstruction").Exists() {
--- a/internal/translator/openai/gemini/openai_gemini_request.go
+++ b/internal/translator/openai/gemini/openai_gemini_request.go
@@ -6,7 +6,6 @@
 package gemini

 import (
-	"bytes"
 	"crypto/rand"
 	"fmt"
 	"math/big"
@@ -21,7 +20,7 @@ import (
 // It extracts the model name, generation config, message contents, and tool declarations
 // from the raw JSON request and returns them in the format expected by the OpenAI API.
 func ConvertGeminiRequestToOpenAI(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	// Base OpenAI Chat Completions API template
 	out := `{"model":"","messages":[]}`

--- a/internal/translator/openai/openai/chat-completions/openai_openai_request.go
+++ b/internal/translator/openai/openai/chat-completions/openai_openai_request.go
@@ -3,7 +3,6 @@
 package chat_completions

 import (
-	"bytes"
 	"github.com/tidwall/sjson"
 )

@@ -25,7 +24,7 @@ func ConvertOpenAIRequestToOpenAI(modelName string, inputRawJSON []byte, _ bool)
 		// If there's an error, return the original JSON or handle the error appropriately.
 		// For now, we'll return the original, but in a real scenario, logging or a more robust error
 		// handling mechanism would be needed.
-		return bytes.Clone(inputRawJSON)
+		return inputRawJSON
 	}
 	return updatedJSON
 }
--- a/internal/translator/openai/openai/responses/openai_openai-responses_request.go
+++ b/internal/translator/openai/openai/responses/openai_openai-responses_request.go
@@ -1,7 +1,6 @@
 package responses

 import (
-	"bytes"
 	"strings"

 	"github.com/tidwall/gjson"
@@ -28,7 +27,7 @@ import (
 // Returns:
 //   - []byte: The transformed request data in OpenAI chat completions format
 func ConvertOpenAIResponsesRequestToOpenAIChatCompletions(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := bytes.Clone(inputRawJSON)
+	rawJSON := inputRawJSON
 	// Base OpenAI chat completions template with default values
 	out := `{"model":"","messages":[],"stream":false}`

--- a/internal/util/claude_model_test.go
+++ b/internal/util/claude_model_test.go
@@ -11,6 +11,7 @@ func TestIsClaudeThinkingModel(t *testing.T) {
 		// Claude thinking models - should return true
 		{"claude-sonnet-4-5-thinking", "claude-sonnet-4-5-thinking", true},
 		{"claude-opus-4-5-thinking", "claude-opus-4-5-thinking", true},
+		{"claude-opus-4-6-thinking", "claude-opus-4-6-thinking", true},
 		{"Claude-Sonnet-Thinking uppercase", "Claude-Sonnet-4-5-Thinking", true},
 		{"claude thinking mixed case", "Claude-THINKING-Model", true},

--- a/internal/util/gemini_schema.go
+++ b/internal/util/gemini_schema.go
@@ -61,14 +61,20 @@ func cleanJSONSchema(jsonStr string, addPlaceholder bool) string {

 // removeKeywords removes all occurrences of specified keywords from the JSON schema.
 func removeKeywords(jsonStr string, keywords []string) string {
+	deletePaths := make([]string, 0)
+	pathsByField := findPathsByFields(jsonStr, keywords)
 	for _, key := range keywords {
-		for _, p := range findPaths(jsonStr, key) {
+		for _, p := range pathsByField[key] {
 			if isPropertyDefinition(trimSuffix(p, "."+key)) {
 				continue
 			}
-			jsonStr, _ = sjson.Delete(jsonStr, p)
+			deletePaths = append(deletePaths, p)
 		}
 	}
+	sortByDepth(deletePaths)
+	for _, p := range deletePaths {
+		jsonStr, _ = sjson.Delete(jsonStr, p)
+	}
 	return jsonStr
 }

@@ -235,8 +241,9 @@ var unsupportedConstraints = []string{
 }

 func moveConstraintsToDescription(jsonStr string) string {
+	pathsByField := findPathsByFields(jsonStr, unsupportedConstraints)
 	for _, key := range unsupportedConstraints {
-		for _, p := range findPaths(jsonStr, key) {
+		for _, p := range pathsByField[key] {
 			val := gjson.Get(jsonStr, p)
 			if !val.Exists() || val.IsObject() || val.IsArray() {
 				continue
@@ -424,14 +431,21 @@ func removeUnsupportedKeywords(jsonStr string) string {
 		"$schema", "$defs", "definitions", "const", "$ref", "additionalProperties",
 		"propertyNames", // Gemini doesn't support property name validation
 	)
+
+	deletePaths := make([]string, 0)
+	pathsByField := findPathsByFields(jsonStr, keywords)
 	for _, key := range keywords {
-		for _, p := range findPaths(jsonStr, key) {
+		for _, p := range pathsByField[key] {
 			if isPropertyDefinition(trimSuffix(p, "."+key)) {
 				continue
 			}
-			jsonStr, _ = sjson.Delete(jsonStr, p)
+			deletePaths = append(deletePaths, p)
 		}
 	}
+	sortByDepth(deletePaths)
+	for _, p := range deletePaths {
+		jsonStr, _ = sjson.Delete(jsonStr, p)
+	}
 	// Remove x-* extension fields (e.g., x-google-enum-descriptions) that are not supported by Gemini API
 	jsonStr = removeExtensionFields(jsonStr)
 	return jsonStr
@@ -581,6 +595,42 @@ func findPaths(jsonStr, field string) []string {
 	return paths
 }

+func findPathsByFields(jsonStr string, fields []string) map[string][]string {
+	set := make(map[string]struct{}, len(fields))
+	for _, field := range fields {
+		set[field] = struct{}{}
+	}
+	paths := make(map[string][]string, len(set))
+	walkForFields(gjson.Parse(jsonStr), "", set, paths)
+	return paths
+}
+
+func walkForFields(value gjson.Result, path string, fields map[string]struct{}, paths map[string][]string) {
+	switch value.Type {
+	case gjson.JSON:
+		value.ForEach(func(key, val gjson.Result) bool {
+			keyStr := key.String()
+			safeKey := escapeGJSONPathKey(keyStr)
+
+			var childPath string
+			if path == "" {
+				childPath = safeKey
+			} else {
+				childPath = path + "." + safeKey
+			}
+
+			if _, ok := fields[keyStr]; ok {
+				paths[keyStr] = append(paths[keyStr], childPath)
+			}
+
+			walkForFields(val, childPath, fields, paths)
+			return true
+		})
+	case gjson.String, gjson.Number, gjson.True, gjson.False, gjson.Null:
+		// Terminal types - no further traversal needed
+	}
+}
+
 func sortByDepth(paths []string) {
 	sort.Slice(paths, func(i, j int) bool { return len(paths[i]) > len(paths[j]) })
 }
--- a/sdk/api/handlers/handlers.go
+++ b/sdk/api/handlers/handlers.go
@@ -378,9 +378,13 @@ func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType
 	}
 	reqMeta := requestExecutionMetadata(ctx)
 	reqMeta[coreexecutor.RequestedModelMetadataKey] = normalizedModel
+	payload := rawJSON
+	if len(payload) == 0 {
+		payload = nil
+	}
 	req := coreexecutor.Request{
 		Model:   normalizedModel,
-		Payload: cloneBytes(rawJSON),
+		Payload: payload,
 	}
 	opts := coreexecutor.Options{
 		Stream:          false,
@@ -405,7 +409,7 @@ func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType
 		}
 		return nil, &interfaces.ErrorMessage{StatusCode: status, Error: err, Addon: addon}
 	}
-	return cloneBytes(resp.Payload), nil
+	return resp.Payload, nil
 }

 // ExecuteCountWithAuthManager executes a non-streaming request via the core auth manager.
@@ -417,9 +421,13 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle
 	}
 	reqMeta := requestExecutionMetadata(ctx)
 	reqMeta[coreexecutor.RequestedModelMetadataKey] = normalizedModel
+	payload := rawJSON
+	if len(payload) == 0 {
+		payload = nil
+	}
 	req := coreexecutor.Request{
 		Model:   normalizedModel,
-		Payload: cloneBytes(rawJSON),
+		Payload: payload,
 	}
 	opts := coreexecutor.Options{
 		Stream:          false,
@@ -444,7 +452,7 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle
 		}
 		return nil, &interfaces.ErrorMessage{StatusCode: status, Error: err, Addon: addon}
 	}
-	return cloneBytes(resp.Payload), nil
+	return resp.Payload, nil
 }

 // ExecuteStreamWithAuthManager executes a streaming request via the core auth manager.
@@ -459,9 +467,13 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
 	}
 	reqMeta := requestExecutionMetadata(ctx)
 	reqMeta[coreexecutor.RequestedModelMetadataKey] = normalizedModel
+	payload := rawJSON
+	if len(payload) == 0 {
+		payload = nil
+	}
 	req := coreexecutor.Request{
 		Model:   normalizedModel,
-		Payload: cloneBytes(rawJSON),
+		Payload: payload,
 	}
 	opts := coreexecutor.Options{
 		Stream:          true,
@@ -685,7 +697,7 @@ func (h *BaseAPIHandler) WriteErrorResponse(c *gin.Context, msg *interfaces.Erro
 	var previous []byte
 	if existing, exists := c.Get("API_RESPONSE"); exists {
 		if existingBytes, ok := existing.([]byte); ok && len(existingBytes) > 0 {
-			previous = bytes.Clone(existingBytes)
+			previous = existingBytes
 		}
 	}
 	appendAPIResponse(c, body)
--- a/sdk/api/management.go
+++ b/sdk/api/management.go
@@ -18,6 +18,7 @@ type ManagementTokenRequester interface {
 	RequestCodexToken(*gin.Context)
 	RequestAntigravityToken(*gin.Context)
 	RequestQwenToken(*gin.Context)
+	RequestKimiToken(*gin.Context)
 	RequestIFlowToken(*gin.Context)
 	RequestIFlowCookieToken(*gin.Context)
 	GetAuthStatus(c *gin.Context)
@@ -55,6 +56,10 @@ func (m *managementTokenRequester) RequestQwenToken(c *gin.Context) {
 	m.handler.RequestQwenToken(c)
 }

+func (m *managementTokenRequester) RequestKimiToken(c *gin.Context) {
+	m.handler.RequestKimiToken(c)
+}
+
 func (m *managementTokenRequester) RequestIFlowToken(c *gin.Context) {
 	m.handler.RequestIFlowToken(c)
 }
--- a/sdk/auth/kimi.go
+++ b/sdk/auth/kimi.go
@@ -0,0 +1,123 @@
+package auth
+
+import (
+	"context"
+	"fmt"
+	"strings"
+	"time"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/kimi"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/browser"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+	log "github.com/sirupsen/logrus"
+)
+
+// kimiRefreshLead is the duration before token expiry when refresh should occur.
+var kimiRefreshLead = 5 * time.Minute
+
+// KimiAuthenticator implements the OAuth device flow login for Kimi (Moonshot AI).
+type KimiAuthenticator struct{}
+
+// NewKimiAuthenticator constructs a new Kimi authenticator.
+func NewKimiAuthenticator() Authenticator {
+	return &KimiAuthenticator{}
+}
+
+// Provider returns the provider key for kimi.
+func (KimiAuthenticator) Provider() string {
+	return "kimi"
+}
+
+// RefreshLead returns the duration before token expiry when refresh should occur.
+// Kimi tokens expire and need to be refreshed before expiry.
+func (KimiAuthenticator) RefreshLead() *time.Duration {
+	return &kimiRefreshLead
+}
+
+// Login initiates the Kimi device flow authentication.
+func (a KimiAuthenticator) Login(ctx context.Context, cfg *config.Config, opts *LoginOptions) (*coreauth.Auth, error) {
+	if cfg == nil {
+		return nil, fmt.Errorf("cliproxy auth: configuration is required")
+	}
+	if opts == nil {
+		opts = &LoginOptions{}
+	}
+
+	authSvc := kimi.NewKimiAuth(cfg)
+
+	// Start the device flow
+	fmt.Println("Starting Kimi authentication...")
+	deviceCode, err := authSvc.StartDeviceFlow(ctx)
+	if err != nil {
+		return nil, fmt.Errorf("kimi: failed to start device flow: %w", err)
+	}
+
+	// Display the verification URL
+	verificationURL := deviceCode.VerificationURIComplete
+	if verificationURL == "" {
+		verificationURL = deviceCode.VerificationURI
+	}
+
+	fmt.Printf("\nTo authenticate, please visit:\n%s\n\n", verificationURL)
+	if deviceCode.UserCode != "" {
+		fmt.Printf("User code: %s\n\n", deviceCode.UserCode)
+	}
+
+	// Try to open the browser automatically
+	if !opts.NoBrowser {
+		if browser.IsAvailable() {
+			if errOpen := browser.OpenURL(verificationURL); errOpen != nil {
+				log.Warnf("Failed to open browser automatically: %v", errOpen)
+			} else {
+				fmt.Println("Browser opened automatically.")
+			}
+		}
+	}
+
+	fmt.Println("Waiting for authorization...")
+	if deviceCode.ExpiresIn > 0 {
+		fmt.Printf("(This will timeout in %d seconds if not authorized)\n", deviceCode.ExpiresIn)
+	}
+
+	// Wait for user authorization
+	authBundle, err := authSvc.WaitForAuthorization(ctx, deviceCode)
+	if err != nil {
+		return nil, fmt.Errorf("kimi: %w", err)
+	}
+
+	// Create the token storage
+	tokenStorage := authSvc.CreateTokenStorage(authBundle)
+
+	// Build metadata with token information
+	metadata := map[string]any{
+		"type":          "kimi",
+		"access_token":  authBundle.TokenData.AccessToken,
+		"refresh_token": authBundle.TokenData.RefreshToken,
+		"token_type":    authBundle.TokenData.TokenType,
+		"scope":         authBundle.TokenData.Scope,
+		"timestamp":     time.Now().UnixMilli(),
+	}
+
+	if authBundle.TokenData.ExpiresAt > 0 {
+		exp := time.Unix(authBundle.TokenData.ExpiresAt, 0).UTC().Format(time.RFC3339)
+		metadata["expired"] = exp
+	}
+	if strings.TrimSpace(authBundle.DeviceID) != "" {
+		metadata["device_id"] = strings.TrimSpace(authBundle.DeviceID)
+	}
+
+	// Generate a unique filename
+	fileName := fmt.Sprintf("kimi-%d.json", time.Now().UnixMilli())
+
+	fmt.Println("\nKimi authentication successful!")
+
+	return &coreauth.Auth{
+		ID:       fileName,
+		Provider: a.Provider(),
+		FileName: fileName,
+		Label:    "Kimi User",
+		Storage:  tokenStorage,
+		Metadata: metadata,
+	}, nil
+}
--- a/sdk/auth/refresh_registry.go
+++ b/sdk/auth/refresh_registry.go
@@ -14,6 +14,7 @@ func init() {
 	registerRefreshLead("gemini", func() Authenticator { return NewGeminiAuthenticator() })
 	registerRefreshLead("gemini-cli", func() Authenticator { return NewGeminiAuthenticator() })
 	registerRefreshLead("antigravity", func() Authenticator { return NewAntigravityAuthenticator() })
+	registerRefreshLead("kimi", func() Authenticator { return NewKimiAuthenticator() })
 	registerRefreshLead("kiro", func() Authenticator { return NewKiroAuthenticator() })
 	registerRefreshLead("github-copilot", func() Authenticator { return NewGitHubCopilotAuthenticator() })
 }
--- a/sdk/cliproxy/auth/conductor.go
+++ b/sdk/cliproxy/auth/conductor.go
@@ -607,6 +607,9 @@ func (m *Manager) executeMixedOnce(ctx context.Context, providers []string, req
 				result.RetryAfter = ra
 			}
 			m.MarkResult(execCtx, result)
+			if isRequestInvalidError(errExec) {
+				return cliproxyexecutor.Response{}, errExec
+			}
 			lastErr = errExec
 			continue
 		}
@@ -660,6 +663,9 @@ func (m *Manager) executeCountMixedOnce(ctx context.Context, providers []string,
 				result.RetryAfter = ra
 			}
 			m.MarkResult(execCtx, result)
+			if isRequestInvalidError(errExec) {
+				return cliproxyexecutor.Response{}, errExec
+			}
 			lastErr = errExec
 			continue
 		}
@@ -711,6 +717,9 @@ func (m *Manager) executeStreamMixedOnce(ctx context.Context, providers []string
 			result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: false, Error: rerr}
 			result.RetryAfter = retryAfterFromError(errStream)
 			m.MarkResult(execCtx, result)
+			if isRequestInvalidError(errStream) {
+				return nil, errStream
+			}
 			lastErr = errStream
 			continue
 		}
@@ -1110,6 +1119,9 @@ func (m *Manager) shouldRetryAfterError(err error, attempt int, providers []stri
 	if status := statusCodeFromError(err); status == http.StatusOK {
 		return 0, false
 	}
+	if isRequestInvalidError(err) {
+		return 0, false
+	}
 	wait, found := m.closestCooldownWait(providers, model, attempt)
 	if !found || wait > maxWait {
 		return 0, false
@@ -1299,7 +1311,7 @@ func updateAggregatedAvailability(auth *Auth, now time.Time) {
 			stateUnavailable = true
 		} else if state.Unavailable {
 			if state.NextRetryAfter.IsZero() {
-				stateUnavailable = true
+				stateUnavailable = false
 			} else if state.NextRetryAfter.After(now) {
 				stateUnavailable = true
 				if earliestRetry.IsZero() || state.NextRetryAfter.Before(earliestRetry) {
@@ -1430,6 +1442,21 @@ func statusCodeFromResult(err *Error) int {
 	return err.StatusCode()
 }

+// isRequestInvalidError returns true if the error represents a client request
+// error that should not be retried. Specifically, it checks for 400 Bad Request
+// with "invalid_request_error" in the message, indicating the request itself is
+// malformed and switching to a different auth will not help.
+func isRequestInvalidError(err error) bool {
+	if err == nil {
+		return false
+	}
+	status := statusCodeFromError(err)
+	if status != http.StatusBadRequest {
+		return false
+	}
+	return strings.Contains(err.Error(), "invalid_request_error")
+}
+
 func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Duration, now time.Time) {
 	if auth == nil {
 		return
--- a/sdk/cliproxy/auth/conductor_availability_test.go
+++ b/sdk/cliproxy/auth/conductor_availability_test.go
@@ -0,0 +1,61 @@
+package auth
+
+import (
+	"testing"
+	"time"
+)
+
+func TestUpdateAggregatedAvailability_UnavailableWithoutNextRetryDoesNotBlockAuth(t *testing.T) {
+	t.Parallel()
+
+	now := time.Now()
+	model := "test-model"
+	auth := &Auth{
+		ID: "a",
+		ModelStates: map[string]*ModelState{
+			model: {
+				Status:      StatusError,
+				Unavailable: true,
+			},
+		},
+	}
+
+	updateAggregatedAvailability(auth, now)
+
+	if auth.Unavailable {
+		t.Fatalf("auth.Unavailable = true, want false")
+	}
+	if !auth.NextRetryAfter.IsZero() {
+		t.Fatalf("auth.NextRetryAfter = %v, want zero", auth.NextRetryAfter)
+	}
+}
+
+func TestUpdateAggregatedAvailability_FutureNextRetryBlocksAuth(t *testing.T) {
+	t.Parallel()
+
+	now := time.Now()
+	model := "test-model"
+	next := now.Add(5 * time.Minute)
+	auth := &Auth{
+		ID: "a",
+		ModelStates: map[string]*ModelState{
+			model: {
+				Status:         StatusError,
+				Unavailable:    true,
+				NextRetryAfter: next,
+			},
+		},
+	}
+
+	updateAggregatedAvailability(auth, now)
+
+	if !auth.Unavailable {
+		t.Fatalf("auth.Unavailable = false, want true")
+	}
+	if auth.NextRetryAfter.IsZero() {
+		t.Fatalf("auth.NextRetryAfter = zero, want %v", next)
+	}
+	if auth.NextRetryAfter.Sub(next) > time.Second || next.Sub(auth.NextRetryAfter) > time.Second {
+		t.Fatalf("auth.NextRetryAfter = %v, want %v", auth.NextRetryAfter, next)
+	}
+}
--- a/sdk/cliproxy/auth/oauth_model_alias.go
+++ b/sdk/cliproxy/auth/oauth_model_alias.go
@@ -221,7 +221,7 @@ func modelAliasChannel(auth *Auth) string {
 // and auth kind. Returns empty string if the provider/authKind combination doesn't support
 // OAuth model alias (e.g., API key authentication).
 //
-// Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow, kiro, github-copilot.
+// Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow, kiro, github-copilot, kimi.
 func OAuthModelAliasChannel(provider, authKind string) string {
 	provider = strings.ToLower(strings.TrimSpace(provider))
 	authKind = strings.ToLower(strings.TrimSpace(authKind))
@@ -245,7 +245,7 @@ func OAuthModelAliasChannel(provider, authKind string) string {
 			return ""
 		}
 		return "codex"
-	case "gemini-cli", "aistudio", "antigravity", "qwen", "iflow", "kiro", "github-copilot":
+	case "gemini-cli", "aistudio", "antigravity", "qwen", "iflow", "kiro", "github-copilot", "kimi":
 		return provider
 	default:
 		return ""
--- a/sdk/cliproxy/auth/oauth_model_alias_test.go
+++ b/sdk/cliproxy/auth/oauth_model_alias_test.go
@@ -79,6 +79,15 @@ func TestResolveOAuthUpstreamModel_SuffixPreservation(t *testing.T) {
 			input:   "gemini-2.5-pro(none)",
 			want:    "gemini-2.5-pro-exp-03-25(none)",
 		},
+		{
+			name: "kimi suffix preserved",
+			aliases: map[string][]internalconfig.OAuthModelAlias{
+				"kimi": {{Name: "kimi-k2.5", Alias: "k2.5"}},
+			},
+			channel: "kimi",
+			input:   "k2.5(high)",
+			want:    "kimi-k2.5(high)",
+		},
 		{
 			name: "case insensitive alias lookup with suffix",
 			aliases: map[string][]internalconfig.OAuthModelAlias{
@@ -161,6 +170,8 @@ func createAuthForChannel(channel string) *Auth {
 		return &Auth{Provider: "qwen"}
 	case "iflow":
 		return &Auth{Provider: "iflow"}
+	case "kimi":
+		return &Auth{Provider: "kimi"}
 	case "kiro":
 		return &Auth{Provider: "kiro"}
 	default:
@@ -168,6 +179,14 @@ func createAuthForChannel(channel string) *Auth {
 	}
 }

+func TestOAuthModelAliasChannel_Kimi(t *testing.T) {
+	t.Parallel()
+
+	if got := OAuthModelAliasChannel("kimi", "oauth"); got != "kimi" {
+		t.Fatalf("OAuthModelAliasChannel() = %q, want %q", got, "kimi")
+	}
+}
+
 func TestApplyOAuthModelAlias_SuffixPreservation(t *testing.T) {
 	t.Parallel()

--- a/sdk/cliproxy/auth/selector.go
+++ b/sdk/cliproxy/auth/selector.go
@@ -12,6 +12,7 @@ import (
 	"sync"
 	"time"

+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 )

@@ -19,6 +20,7 @@ import (
 type RoundRobinSelector struct {
 	mu      sync.Mutex
 	cursors map[string]int
+	maxKeys int
 }

 // FillFirstSelector selects the first available credential (deterministic ordering).
@@ -119,6 +121,19 @@ func authPriority(auth *Auth) int {
 	return parsed
 }

+func canonicalModelKey(model string) string {
+	model = strings.TrimSpace(model)
+	if model == "" {
+		return ""
+	}
+	parsed := thinking.ParseSuffix(model)
+	modelName := strings.TrimSpace(parsed.ModelName)
+	if modelName == "" {
+		return model
+	}
+	return modelName
+}
+
 func collectAvailableByPriority(auths []*Auth, model string, now time.Time) (available map[int][]*Auth, cooldownCount int, earliest time.Time) {
 	available = make(map[int][]*Auth)
 	for i := 0; i < len(auths); i++ {
@@ -185,11 +200,18 @@ func (s *RoundRobinSelector) Pick(ctx context.Context, provider, model string, o
 	if err != nil {
 		return nil, err
 	}
-	key := provider + ":" + model
+	key := provider + ":" + canonicalModelKey(model)
 	s.mu.Lock()
 	if s.cursors == nil {
 		s.cursors = make(map[string]int)
 	}
+	limit := s.maxKeys
+	if limit <= 0 {
+		limit = 4096
+	}
+	if _, ok := s.cursors[key]; !ok && len(s.cursors) >= limit {
+		s.cursors = make(map[string]int)
+	}
 	index := s.cursors[key]

 	if index >= 2_147_483_640 {
@@ -223,7 +245,14 @@ func isAuthBlockedForModel(auth *Auth, model string, now time.Time) (bool, block
 	}
 	if model != "" {
 		if len(auth.ModelStates) > 0 {
-			if state, ok := auth.ModelStates[model]; ok && state != nil {
+			state, ok := auth.ModelStates[model]
+			if (!ok || state == nil) && model != "" {
+				baseModel := canonicalModelKey(model)
+				if baseModel != "" && baseModel != model {
+					state, ok = auth.ModelStates[baseModel]
+				}
+			}
+			if ok && state != nil {
 				if state.Status == StatusDisabled {
 					return true, blockReasonDisabled, time.Time{}
 				}
--- a/sdk/cliproxy/auth/selector_test.go
+++ b/sdk/cliproxy/auth/selector_test.go
@@ -2,7 +2,9 @@ package auth

 import (
 	"context"
+	"encoding/json"
 	"errors"
+	"net/http"
 	"sync"
 	"testing"
 	"time"
@@ -175,3 +177,228 @@ func TestRoundRobinSelectorPick_Concurrent(t *testing.T) {
 	default:
 	}
 }
+
+func TestSelectorPick_AllCooldownReturnsModelCooldownError(t *testing.T) {
+	t.Parallel()
+
+	model := "test-model"
+	now := time.Now()
+	next := now.Add(60 * time.Second)
+	auths := []*Auth{
+		{
+			ID: "a",
+			ModelStates: map[string]*ModelState{
+				model: {
+					Status:         StatusActive,
+					Unavailable:    true,
+					NextRetryAfter: next,
+					Quota: QuotaState{
+						Exceeded:      true,
+						NextRecoverAt: next,
+					},
+				},
+			},
+		},
+		{
+			ID: "b",
+			ModelStates: map[string]*ModelState{
+				model: {
+					Status:         StatusActive,
+					Unavailable:    true,
+					NextRetryAfter: next,
+					Quota: QuotaState{
+						Exceeded:      true,
+						NextRecoverAt: next,
+					},
+				},
+			},
+		},
+	}
+
+	t.Run("mixed provider redacts provider field", func(t *testing.T) {
+		t.Parallel()
+
+		selector := &FillFirstSelector{}
+		_, err := selector.Pick(context.Background(), "mixed", model, cliproxyexecutor.Options{}, auths)
+		if err == nil {
+			t.Fatalf("Pick() error = nil")
+		}
+
+		var mce *modelCooldownError
+		if !errors.As(err, &mce) {
+			t.Fatalf("Pick() error = %T, want *modelCooldownError", err)
+		}
+		if mce.StatusCode() != http.StatusTooManyRequests {
+			t.Fatalf("StatusCode() = %d, want %d", mce.StatusCode(), http.StatusTooManyRequests)
+		}
+
+		headers := mce.Headers()
+		if got := headers.Get("Retry-After"); got == "" {
+			t.Fatalf("Headers().Get(Retry-After) = empty")
+		}
+
+		var payload map[string]any
+		if err := json.Unmarshal([]byte(mce.Error()), &payload); err != nil {
+			t.Fatalf("json.Unmarshal(Error()) error = %v", err)
+		}
+		rawErr, ok := payload["error"].(map[string]any)
+		if !ok {
+			t.Fatalf("Error() payload missing error object: %v", payload)
+		}
+		if got, _ := rawErr["code"].(string); got != "model_cooldown" {
+			t.Fatalf("Error().error.code = %q, want %q", got, "model_cooldown")
+		}
+		if _, ok := rawErr["provider"]; ok {
+			t.Fatalf("Error().error.provider exists for mixed provider: %v", rawErr["provider"])
+		}
+	})
+
+	t.Run("non-mixed provider includes provider field", func(t *testing.T) {
+		t.Parallel()
+
+		selector := &FillFirstSelector{}
+		_, err := selector.Pick(context.Background(), "gemini", model, cliproxyexecutor.Options{}, auths)
+		if err == nil {
+			t.Fatalf("Pick() error = nil")
+		}
+
+		var mce *modelCooldownError
+		if !errors.As(err, &mce) {
+			t.Fatalf("Pick() error = %T, want *modelCooldownError", err)
+		}
+
+		var payload map[string]any
+		if err := json.Unmarshal([]byte(mce.Error()), &payload); err != nil {
+			t.Fatalf("json.Unmarshal(Error()) error = %v", err)
+		}
+		rawErr, ok := payload["error"].(map[string]any)
+		if !ok {
+			t.Fatalf("Error() payload missing error object: %v", payload)
+		}
+		if got, _ := rawErr["provider"].(string); got != "gemini" {
+			t.Fatalf("Error().error.provider = %q, want %q", got, "gemini")
+		}
+	})
+}
+
+func TestIsAuthBlockedForModel_UnavailableWithoutNextRetryIsNotBlocked(t *testing.T) {
+	t.Parallel()
+
+	now := time.Now()
+	model := "test-model"
+	auth := &Auth{
+		ID: "a",
+		ModelStates: map[string]*ModelState{
+			model: {
+				Status:      StatusActive,
+				Unavailable: true,
+				Quota: QuotaState{
+					Exceeded: true,
+				},
+			},
+		},
+	}
+
+	blocked, reason, next := isAuthBlockedForModel(auth, model, now)
+	if blocked {
+		t.Fatalf("blocked = true, want false")
+	}
+	if reason != blockReasonNone {
+		t.Fatalf("reason = %v, want %v", reason, blockReasonNone)
+	}
+	if !next.IsZero() {
+		t.Fatalf("next = %v, want zero", next)
+	}
+}
+
+func TestFillFirstSelectorPick_ThinkingSuffixFallsBackToBaseModelState(t *testing.T) {
+	t.Parallel()
+
+	selector := &FillFirstSelector{}
+	now := time.Now()
+
+	baseModel := "test-model"
+	requestedModel := "test-model(high)"
+
+	high := &Auth{
+		ID:         "high",
+		Attributes: map[string]string{"priority": "10"},
+		ModelStates: map[string]*ModelState{
+			baseModel: {
+				Status:         StatusActive,
+				Unavailable:    true,
+				NextRetryAfter: now.Add(30 * time.Minute),
+				Quota: QuotaState{
+					Exceeded: true,
+				},
+			},
+		},
+	}
+	low := &Auth{
+		ID:         "low",
+		Attributes: map[string]string{"priority": "0"},
+	}
+
+	got, err := selector.Pick(context.Background(), "mixed", requestedModel, cliproxyexecutor.Options{}, []*Auth{high, low})
+	if err != nil {
+		t.Fatalf("Pick() error = %v", err)
+	}
+	if got == nil {
+		t.Fatalf("Pick() auth = nil")
+	}
+	if got.ID != "low" {
+		t.Fatalf("Pick() auth.ID = %q, want %q", got.ID, "low")
+	}
+}
+
+func TestRoundRobinSelectorPick_ThinkingSuffixSharesCursor(t *testing.T) {
+	t.Parallel()
+
+	selector := &RoundRobinSelector{}
+	auths := []*Auth{
+		{ID: "b"},
+		{ID: "a"},
+	}
+
+	first, err := selector.Pick(context.Background(), "gemini", "test-model(high)", cliproxyexecutor.Options{}, auths)
+	if err != nil {
+		t.Fatalf("Pick() first error = %v", err)
+	}
+	second, err := selector.Pick(context.Background(), "gemini", "test-model(low)", cliproxyexecutor.Options{}, auths)
+	if err != nil {
+		t.Fatalf("Pick() second error = %v", err)
+	}
+	if first == nil || second == nil {
+		t.Fatalf("Pick() returned nil auth")
+	}
+	if first.ID != "a" {
+		t.Fatalf("Pick() first auth.ID = %q, want %q", first.ID, "a")
+	}
+	if second.ID != "b" {
+		t.Fatalf("Pick() second auth.ID = %q, want %q", second.ID, "b")
+	}
+}
+
+func TestRoundRobinSelectorPick_CursorKeyCap(t *testing.T) {
+	t.Parallel()
+
+	selector := &RoundRobinSelector{maxKeys: 2}
+	auths := []*Auth{{ID: "a"}}
+
+	_, _ = selector.Pick(context.Background(), "gemini", "m1", cliproxyexecutor.Options{}, auths)
+	_, _ = selector.Pick(context.Background(), "gemini", "m2", cliproxyexecutor.Options{}, auths)
+	_, _ = selector.Pick(context.Background(), "gemini", "m3", cliproxyexecutor.Options{}, auths)
+
+	selector.mu.Lock()
+	defer selector.mu.Unlock()
+
+	if selector.cursors == nil {
+		t.Fatalf("selector.cursors = nil")
+	}
+	if len(selector.cursors) != 1 {
+		t.Fatalf("len(selector.cursors) = %d, want %d", len(selector.cursors), 1)
+	}
+	if _, ok := selector.cursors["gemini:m3"]; !ok {
+		t.Fatalf("selector.cursors missing key %q", "gemini:m3")
+	}
+}
--- a/sdk/cliproxy/service.go
+++ b/sdk/cliproxy/service.go
@@ -409,6 +409,8 @@ func (s *Service) ensureExecutorsForAuth(a *coreauth.Auth) {
 		s.coreManager.RegisterExecutor(executor.NewQwenExecutor(s.cfg))
 	case "iflow":
 		s.coreManager.RegisterExecutor(executor.NewIFlowExecutor(s.cfg))
+	case "kimi":
+		s.coreManager.RegisterExecutor(executor.NewKimiExecutor(s.cfg))
 	case "kiro":
 		s.coreManager.RegisterExecutor(executor.NewKiroExecutor(s.cfg))
 	case "github-copilot":
@@ -826,6 +828,9 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
 	case "iflow":
 		models = registry.GetIFlowModels()
 		models = applyExcludedModels(models, excluded)
+	case "kimi":
+		models = registry.GetKimiModels()
+    models = applyExcludedModels(models, excluded)
 	case "github-copilot":
 		models = registry.GetGitHubCopilotModels()
 		models = applyExcludedModels(models, excluded)
--- a/test/config_migration_test.go
+++ b/test/config_migration_test.go
@@ -1,195 +0,0 @@
-package test
-
-import (
-	"os"
-	"path/filepath"
-	"strings"
-	"testing"
-
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
-)
-
-func TestLegacyConfigMigration(t *testing.T) {
-	t.Run("onlyLegacyFields", func(t *testing.T) {
-		path := writeConfig(t, `
-port: 8080
-generative-language-api-key:
-  - "legacy-gemini-1"
-openai-compatibility:
-  - name: "legacy-provider"
-    base-url: "https://example.com"
-    api-keys:
-      - "legacy-openai-1"
-amp-upstream-url: "https://amp.example.com"
-amp-upstream-api-key: "amp-legacy-key"
-amp-restrict-management-to-localhost: false
-amp-model-mappings:
-  - from: "old-model"
-    to: "new-model"
-`)
-		cfg, err := config.LoadConfig(path)
-		if err != nil {
-			t.Fatalf("load legacy config: %v", err)
-		}
-		if got := len(cfg.GeminiKey); got != 1 || cfg.GeminiKey[0].APIKey != "legacy-gemini-1" {
-			t.Fatalf("gemini migration mismatch: %+v", cfg.GeminiKey)
-		}
-		if got := len(cfg.OpenAICompatibility); got != 1 {
-			t.Fatalf("expected 1 openai-compat provider, got %d", got)
-		}
-		if entries := cfg.OpenAICompatibility[0].APIKeyEntries; len(entries) != 1 || entries[0].APIKey != "legacy-openai-1" {
-			t.Fatalf("openai-compat migration mismatch: %+v", entries)
-		}
-		if cfg.AmpCode.UpstreamURL != "https://amp.example.com" || cfg.AmpCode.UpstreamAPIKey != "amp-legacy-key" {
-			t.Fatalf("amp migration failed: %+v", cfg.AmpCode)
-		}
-		if cfg.AmpCode.RestrictManagementToLocalhost {
-			t.Fatalf("expected amp restriction to be false after migration")
-		}
-		if got := len(cfg.AmpCode.ModelMappings); got != 1 || cfg.AmpCode.ModelMappings[0].From != "old-model" {
-			t.Fatalf("amp mappings migration mismatch: %+v", cfg.AmpCode.ModelMappings)
-		}
-		updated := readFile(t, path)
-		if strings.Contains(updated, "generative-language-api-key") {
-			t.Fatalf("legacy gemini key still present:\n%s", updated)
-		}
-		if strings.Contains(updated, "amp-upstream-url") || strings.Contains(updated, "amp-restrict-management-to-localhost") {
-			t.Fatalf("legacy amp keys still present:\n%s", updated)
-		}
-		if strings.Contains(updated, "\n    api-keys:") {
-			t.Fatalf("legacy openai compat keys still present:\n%s", updated)
-		}
-	})
-
-	t.Run("mixedLegacyAndNewFields", func(t *testing.T) {
-		path := writeConfig(t, `
-gemini-api-key:
-  - api-key: "new-gemini"
-generative-language-api-key:
-  - "new-gemini"
-  - "legacy-gemini-only"
-openai-compatibility:
-  - name: "mixed-provider"
-    base-url: "https://mixed.example.com"
-    api-key-entries:
-      - api-key: "new-entry"
-    api-keys:
-      - "legacy-entry"
-      - "new-entry"
-`)
-		cfg, err := config.LoadConfig(path)
-		if err != nil {
-			t.Fatalf("load mixed config: %v", err)
-		}
-		if got := len(cfg.GeminiKey); got != 2 {
-			t.Fatalf("expected 2 gemini entries, got %d: %+v", got, cfg.GeminiKey)
-		}
-		seen := make(map[string]struct{}, len(cfg.GeminiKey))
-		for _, entry := range cfg.GeminiKey {
-			if _, exists := seen[entry.APIKey]; exists {
-				t.Fatalf("duplicate gemini key %q after migration", entry.APIKey)
-			}
-			seen[entry.APIKey] = struct{}{}
-		}
-		provider := cfg.OpenAICompatibility[0]
-		if got := len(provider.APIKeyEntries); got != 2 {
-			t.Fatalf("expected 2 openai entries, got %d: %+v", got, provider.APIKeyEntries)
-		}
-		entrySeen := make(map[string]struct{}, len(provider.APIKeyEntries))
-		for _, entry := range provider.APIKeyEntries {
-			if _, ok := entrySeen[entry.APIKey]; ok {
-				t.Fatalf("duplicate openai key %q after migration", entry.APIKey)
-			}
-			entrySeen[entry.APIKey] = struct{}{}
-		}
-	})
-
-	t.Run("onlyNewFields", func(t *testing.T) {
-		path := writeConfig(t, `
-gemini-api-key:
-  - api-key: "new-only"
-openai-compatibility:
-  - name: "new-only-provider"
-    base-url: "https://new-only.example.com"
-    api-key-entries:
-      - api-key: "new-only-entry"
-ampcode:
-  upstream-url: "https://amp.new"
-  upstream-api-key: "new-amp-key"
-  restrict-management-to-localhost: true
-  model-mappings:
-    - from: "a"
-      to: "b"
-`)
-		cfg, err := config.LoadConfig(path)
-		if err != nil {
-			t.Fatalf("load new config: %v", err)
-		}
-		if len(cfg.GeminiKey) != 1 || cfg.GeminiKey[0].APIKey != "new-only" {
-			t.Fatalf("unexpected gemini entries: %+v", cfg.GeminiKey)
-		}
-		if len(cfg.OpenAICompatibility) != 1 || len(cfg.OpenAICompatibility[0].APIKeyEntries) != 1 {
-			t.Fatalf("unexpected openai compat entries: %+v", cfg.OpenAICompatibility)
-		}
-		if cfg.AmpCode.UpstreamURL != "https://amp.new" || cfg.AmpCode.UpstreamAPIKey != "new-amp-key" {
-			t.Fatalf("unexpected amp config: %+v", cfg.AmpCode)
-		}
-	})
-
-	t.Run("duplicateNamesDifferentBase", func(t *testing.T) {
-		path := writeConfig(t, `
-openai-compatibility:
-  - name: "dup-provider"
-    base-url: "https://provider-a"
-    api-keys:
-      - "key-a"
-  - name: "dup-provider"
-    base-url: "https://provider-b"
-    api-keys:
-      - "key-b"
-`)
-		cfg, err := config.LoadConfig(path)
-		if err != nil {
-			t.Fatalf("load duplicate config: %v", err)
-		}
-		if len(cfg.OpenAICompatibility) != 2 {
-			t.Fatalf("expected 2 providers, got %d", len(cfg.OpenAICompatibility))
-		}
-		for _, entry := range cfg.OpenAICompatibility {
-			if len(entry.APIKeyEntries) != 1 {
-				t.Fatalf("expected 1 key entry per provider: %+v", entry)
-			}
-			switch entry.BaseURL {
-			case "https://provider-a":
-				if entry.APIKeyEntries[0].APIKey != "key-a" {
-					t.Fatalf("provider-a key mismatch: %+v", entry.APIKeyEntries)
-				}
-			case "https://provider-b":
-				if entry.APIKeyEntries[0].APIKey != "key-b" {
-					t.Fatalf("provider-b key mismatch: %+v", entry.APIKeyEntries)
-				}
-			default:
-				t.Fatalf("unexpected provider base url: %s", entry.BaseURL)
-			}
-		}
-	})
-}
-
-func writeConfig(t *testing.T, content string) string {
-	t.Helper()
-	dir := t.TempDir()
-	path := filepath.Join(dir, "config.yaml")
-	if err := os.WriteFile(path, []byte(strings.TrimSpace(content)+"\n"), 0o644); err != nil {
-		t.Fatalf("write temp config: %v", err)
-	}
-	return path
-}
-
-func readFile(t *testing.T, path string) string {
-	t.Helper()
-	data, err := os.ReadFile(path)
-	if err != nil {
-		t.Fatalf("read temp config: %v", err)
-	}
-	return string(data)
-}
--- a/test/thinking_conversion_test.go
+++ b/test/thinking_conversion_test.go
@@ -15,6 +15,7 @@ import (
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/gemini"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/geminicli"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/iflow"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/kimi"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/openai"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
Author	SHA1	Message	Date
Luis Pater	d109be159c	Merge pull request #197 from router-for-me/plus v6.8.4	2026-02-07 09:37:04 +08:00
Luis Pater	eddf31e55b	Merge branch 'main' into plus	2026-02-07 09:36:52 +08:00
Luis Pater	7e9d0db6aa	Merge pull request #1467 from dusty-du/fix/kimi-toolcall-reasoning-content Fix Kimi tool-call payload normalization for reasoning_content	2026-02-07 09:35:04 +08:00
Luis Pater	2f1874ede5	chore(docs): remove Cubence sponsorship from README files and delete related asset	2026-02-07 08:55:14 +08:00
Luis Pater	6b83585b53	Merge branch 'router-for-me:main' into main	2026-02-07 08:52:51 +08:00
Luis Pater	78ef04fcf1	fix(kimi): reduce redundant payload cloning and simplify translation calls	2026-02-07 08:51:48 +08:00
hkfires	b7e4f00c5f	fix(translator): correct gemini-cli log prefix	2026-02-07 08:40:09 +08:00
Luis Pater	c20507c15e	Merge branch 'router-for-me:main' into main	2026-02-07 06:43:17 +08:00
Luis Pater	f7d0019df7	fix(kimi): update base URL and integrate ClaudeExecutor fallback - Updated `KimiAPIBaseURL` to remove versioning from the root path. - Integrated `ClaudeExecutor` fallback in `KimiExecutor` methods for compatibility with Claude requests. - Simplified token counting by delegating to `ClaudeExecutor`.	2026-02-07 06:42:08 +08:00
test	52364af5bf	Fix Kimi tool-call reasoning_content normalization	2026-02-06 14:46:16 -05:00
Luis Pater	f410dd0440	Merge pull request #1390 from sususu98/fix/400-invalid-request-no-retry fix(auth): 400 invalid_request_error 立即返回不再重试	2026-02-07 03:14:25 +08:00
Luis Pater	eb5582c17c	Merge pull request #1386 from shenshuoyaoyouguang/sync-auth-changes fix(auth): normalize model key for thinking suffix in selectors	2026-02-07 03:12:01 +08:00
Luis Pater	1c6cb2bec3	Merge pull request #1239 from ThanhNguyxn/fix/gitstore-gc-after-squash fix(store): run GC after squashing history to prevent loose object accumulation	2026-02-07 02:51:27 +08:00
Luis Pater	80b5e79e75	fix(translator): normalize and restrict `stop_reason`/`finish_reason` usage - Standardized the handling of `stop_reason` and `finish_reason` across Codex and Gemini responses. - Restricted pass-through of specific reasons (`max_tokens`, `stop`) for consistency. - Enhanced fallback logic for undefined reasons.	2026-02-07 02:07:51 +08:00
Luis Pater	d182e893b6	Merge pull request #194 from PancakeZik/fix/assistant-content-parroting fix: replace assistant placeholder text to prevent model parroting	2026-02-07 01:38:58 +08:00
Luis Pater	2e8d49a641	Merge pull request #191 from CheesesNguyen/feat/kiro-api-models-and-context-usage feat(kiro): add contextUsageEvent handler	2026-02-07 01:33:49 +08:00
Luis Pater	6abd7d27d9	Merge pull request #190 from taetaetae/fix/kiro-claude-compaction-current-user-empty-content fix(kiro): handle empty content in current user message for compaction	2026-02-07 01:33:01 +08:00
Luis Pater	8fa12af403	Merge pull request #195 from router-for-me/plus v6.8.1	2026-02-07 01:31:40 +08:00
Luis Pater	77586ed7d3	Merge branch 'main' into plus	2026-02-07 01:31:21 +08:00
Luis Pater	394497fb2f	Merge pull request #1465 from router-for-me/kimi-fix fix(kimi): add OAuth model-alias channel support and cover OAuth excl…	2026-02-07 01:27:30 +08:00
LTbinglingfeng	fc7b6ef086	fix(kimi): add OAuth model-alias channel support and cover OAuth excluded-models with tests	2026-02-07 01:16:39 +08:00
Joao	98edcad39d	fix: replace assistant placeholder text to prevent model parroting Kiro API requires non-empty content on assistant messages, so CLIProxyAPI injects placeholder text when assistant messages only contain tool_use blocks (no text). The previous placeholders were conversational phrases: - DefaultAssistantContentWithTools: "I'll help you with that." - DefaultAssistantContent: "I understand." In agentic sessions with many tool calls, these phrases appeared dozens of times in conversation history. Opus 4.6 (and likely other models) picked up on this pattern and started parroting "I'll help you with that." before every tool call in its actual responses. Fix: Replace both placeholders with a single dot ".", which satisfies Kiro's non-empty requirement without giving the model a phrase to mimic.	2026-02-06 16:42:21 +00:00
Luis Pater	1187aa8222	feat(translator): capture cached token count in usage metadata and handle prompt caching - Added support to extract and include `cachedContentTokenCount` in `usage.prompt_tokens_details`. - Logged warnings for failures to set cached token count for better debugging.	2026-02-06 21:28:40 +08:00
Luis Pater	a35d66443b	Merge pull request #192 from router-for-me/plus v6.8.0	2026-02-06 21:04:40 +08:00
Luis Pater	40ad4a42ea	Merge branch 'main' into plus	2026-02-06 21:04:32 +08:00
Luis Pater	dc9b4dd017	Merge branch 'kimi-provider-support-v2' into dev	2026-02-06 20:51:48 +08:00
Luis Pater	68cb81a258	feat: add Kimi authentication support and streamline device ID handling - Introduced `RequestKimiToken` API for Kimi authentication flow. - Integrated device ID management throughout Kimi-related components. - Enhanced header management for Kimi API requests with device ID context.	2026-02-06 20:43:30 +08:00
CheesesNguyen	16693053f5	feat(kiro): add contextUsageEvent handler and simplify model structs - Add contextUsageEvent case handler in kiro_executor.go for both parseEventStream and streamToChannel functions - Handle nested format: {"contextUsageEvent": {"contextUsagePercentage": 0.53}} - Keep KiroModel struct minimal with only essential fields - Remove unused KiroPromptCachingInfo struct from kiro_model_converter.go - Remove unused SupportedInputTypes and PromptCaching fields from KiroAPIModel	2026-02-06 11:12:27 +07:00
taetaetae	4e3bad3907	fix(kiro): handle empty content in current user message for compaction Problem: - PR #186 fixed empty content for assistant messages and history user messages - But current user message (isLastMessage == true) was not fixed - When user message contains only tool_result (no text), content becomes empty - This causes 'Improperly formed request' errors from Kiro API - Compaction requests from OpenCode commonly have this pattern Solution: - Move empty content check BEFORE the isLastMessage branch - Apply fallback content to ALL user messages, not just history - Add DefaultUserContentWithToolResults and DefaultUserContent constants Fixes compaction failures for OpenCode + Quotio + CLIProxyAPIPlus + Kiro stack	2026-02-06 11:58:43 +09:00
hkfires	c874f19f2a	refactor(config): disable automatic migration during server startup	2026-02-06 09:57:47 +08:00
test	f5f26f0cbe	Add Kimi (Moonshot AI) provider support - OAuth2 device authorization grant flow (RFC 8628) for authentication - Streaming and non-streaming chat completions via OpenAI-compatible API - Models: kimi-k2, kimi-k2-thinking, kimi-k2.5 - CLI `--kimi-login` command for device flow auth - Token management with automatic refresh - Thinking/reasoning effort support for thinking-enabled models Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-02-05 19:24:46 -05:00
Luis Pater	e7e3ca1efb	Merge branch 'router-for-me:main' into main	2026-02-06 05:45:21 +08:00
Luis Pater	4b00312fef	Merge pull request #1435 from tianyicui/fix/haiku-4-5-thinking-support fix: Enable extended thinking support for Claude Haiku 4.5	2026-02-06 05:44:14 +08:00
Luis Pater	c5fd3db01e	Merge pull request #1446 from qyhfrank/fix-claude-opus-4-6-model-metadata fix(registry): correct Claude Opus 4.6 model metadata	2026-02-06 05:43:32 +08:00
Luis Pater	e35ffaa925	Merge pull request #186 from taetaetae/fix/kiro-claude-compaction-empty-content fix(kiro): handle empty content in Claude format assistant messages	2026-02-06 05:39:41 +08:00
Frank Qing	f870a9d2a7	fix(registry): correct Claude Opus 4.6 model metadata	2026-02-06 05:39:41 +08:00
Luis Pater	165e03f3a7	Merge branch 'router-for-me:main' into main	2026-02-06 05:32:10 +08:00
Luis Pater	86bdb7808c	Merge pull request #189 from PancakeZik/main feat: add Claude Opus 4.6 support for Kiro	2026-02-06 05:31:54 +08:00
Luis Pater	b4e034be1c	refactor(executor): centralize Codex client version and user agent constants - Introduced `codexClientVersion` and `codexUserAgent` constants for better maintainability. - Updated `EnsureHeader` calls to use the new constants.	2026-02-06 05:30:28 +08:00
Joao	84fcebf538	feat: add Claude Opus 4.6 support for Kiro - Add kiro-claude-opus-4-6 and kiro-claude-opus-4-6-agentic to model registry - Add model ID mappings for claude-opus-4.6 variants - Support both kiro- prefix and native format (claude-opus-4.6) - Tested and working with Kiro API	2026-02-05 21:26:29 +00:00
Luis Pater	74d9a1ffed	Merge branch 'router-for-me:main' into main	2026-02-06 03:27:27 +08:00
Luis Pater	a5a25dec57	refactor(translator, executor): remove redundant `bytes.Clone` calls for improved performance - Replaced all instances of `bytes.Clone` with direct references to enhance efficiency. - Simplified payload handling across executors and translators by eliminating unnecessary data duplication.	2026-02-06 03:26:29 +08:00
Luis Pater	c71905e5e8	Merge pull request #1440 from kvokka/add-cc-opus-4-6 feat(registry): register Claude 4.6 static data	2026-02-06 03:23:59 +08:00
kvokka	bc78d668ac	feat(registry): register Claude 4.6 static data Add model definition for Claude 4.6 Opus with 200k context length and thinking support capabilities.	2026-02-05 23:13:36 +04:00
Luis Pater	e93eebc2e9	Merge branch 'router-for-me:main' into main	2026-02-06 01:53:55 +08:00
Luis Pater	5bd0896ad7	feat(registry): add GPT 5.3 Codex model to static data	2026-02-06 01:52:41 +08:00
Luis Pater	09ecfbcaed	refactor(executor): optimize payload cloning and streamline SDK translator usage - Replaced unnecessary `bytes.Clone` calls for `opts.OriginalRequest` throughout executors. - Introduced intermediate variable `originalPayloadSource` to simplify payload processing. - Ensured better clarity and structure in request translation logic.	2026-02-06 01:44:20 +08:00
Luis Pater	f0bd14b64f	refactor(util): optimize JSON schema processing and keyword removal logic - Consolidated path-finding logic into a new `findPathsByFields` helper function. - Refactored repetitive loop structures to improve readability and performance. - Added depth-based sorting for deletion paths to ensure proper removal order.	2026-02-06 00:19:56 +08:00
taetaetae	14f044ce4f	refactor: extract default assistant content to shared constants Apply code review feedback from gemini-code-assist: - Move fallback strings to kirocommon package as exported constants - Update kiro_claude_request.go to use shared constants - Update kiro_openai_request.go to use shared constants - Improves maintainability and avoids duplication	2026-02-05 23:36:57 +09:00
taetaetae	88872baffc	fix(kiro): handle empty content in Claude format assistant messages Problem: - PR #181 fixed empty content for OpenAI format (kiro_openai_request.go) - But Claude format (kiro_claude_request.go) was not fixed - OpenCode uses Claude format (/v1/messages endpoint) - When assistant messages have only tool_use (no text), content becomes empty - This causes 'Improperly formed request' errors from Kiro API Example of problematic message format: { "role": "assistant", "content": [ {"type": "tool_use", "id": "...", "name": "todowrite", "input": {...}} ] } Solution: - Add empty content fallback in BuildAssistantMessageStruct (Claude format) - Same fix as PR #181 applied to kiro_openai_request.go Fixes compaction failures for OpenCode + Quotio + CLIProxyAPIPlus + Kiro stack	2026-02-05 23:27:35 +09:00
Tianyi Cui	706590c62a	fix: Enable extended thinking support for Claude Haiku 4.5 Claude Haiku 4.5 (claude-haiku-4-5-20251001) supports extended thinking according to Anthropic's official documentation: https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking The model was incorrectly marked as not supporting thinking in the static model definitions. This fix adds ThinkingSupport with the same parameters as other Claude 4.5 models (Sonnet, Opus): - Min: 1024 tokens - Max: 128000 tokens - ZeroAllowed: true - DynamicAllowed: false	2026-02-05 19:03:23 +08:00
sususu98	233be6272a	fix(auth): 400 invalid_request_error 立即返回不再重试当上游返回 400 Bad Request 且错误消息包含 invalid_request_error 时，表示请求本身格式错误，切换账户不会改变结果。修改： - 添加 isRequestInvalidError 判定函数 - 内层循环遇到此错误立即返回，不遍历其他账户 - 外层循环不再对此类错误进行重试	2026-02-02 17:35:51 +08:00
chujian	47cb52385e	sdk/cliproxy/auth: update selector tests	2026-02-02 05:26:04 +08:00
ThanhNguyxn	a406ca2d5a	fix(store): add proper GC with Handler and interval gating Address maintainer feedback on PR #1239: - Add Handler: repo.DeleteObject to prevent nil panic in Prune - Handle ErrLooseObjectsNotSupported gracefully - Add 5-minute interval gating to avoid repack overhead on every write - Remove sirupsen/logrus dependency (best-effort silent GC) Fixes #1104	2026-02-01 11:19:43 +07:00