Merge pull request #23 from router-for-me/plus

v6.6.9
Merge branch 'main' into plus
2026-03-30 01:06:39 +00:00 · 2025-12-14 00:07:57 +08:00 · 2025-12-14 00:07:46 +08:00 · 2025-12-13 23:58:34 +08:00 · 2025-12-13 13:40:39 +08:00 · 2025-12-13 13:37:47 +08:00
38 changed files with 3558 additions and 3211 deletions
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ The Plus release stays in lockstep with the mainline features.
 ## Differences from the Mainline
 - Added GitHub Copilot support (OAuth login), provided by [em4go](https://github.com/em4go/CLIProxyAPI/tree/feature/github-copilot-auth)
- Added Kiro (AWS CodeWhisperer) support (OAuth login), provided by [fuko2935](https://github.com/fuko2935/CLIProxyAPI/tree/feature/kiro-integration)
+- Added Kiro (AWS CodeWhisperer) support (OAuth login), provided by [fuko2935](https://github.com/fuko2935/CLIProxyAPI/tree/feature/kiro-integration), [Ravens2121](https://github.com/Ravens2121/CLIProxyAPIPlus/)
 ## Contributing
--- a/README_CN.md
+++ b/README_CN.md
@@ -11,7 +11,7 @@
 ## 与主线版本版本差异
 - 新增 GitHub Copilot 支持（OAuth 登录），由[em4go](https://github.com/em4go/CLIProxyAPI/tree/feature/github-copilot-auth)提供
- 新增 Kiro (AWS CodeWhisperer) 支持 (OAuth 登录), 由[fuko2935](https://github.com/fuko2935/CLIProxyAPI/tree/feature/kiro-integration)提供
+- 新增 Kiro (AWS CodeWhisperer) 支持 (OAuth 登录), 由[fuko2935](https://github.com/fuko2935/CLIProxyAPI/tree/feature/kiro-integration)、[Ravens2121](https://github.com/Ravens2121/CLIProxyAPIPlus/)提供
 ## 贡献
--- a/internal/api/handlers/management/auth_files.go
+++ b/internal/api/handlers/management/auth_files.go
@@ -3,6 +3,9 @@ package management
 import (
 	"bytes"
 	"context"
 	"crypto/rand"
 	"crypto/sha256"
 	"encoding/base64"
 	"encoding/json"
 	"errors"
 	"fmt"
@@ -23,6 +26,7 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/codex"
 	geminiAuth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/gemini"
 	iflowauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/iflow"
 	kiroauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/kiro"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
@@ -1745,6 +1749,17 @@ func (h *Handler) RequestIFlowCookieToken(c *gin.Context) {
 		return
 	}
 	// Check for duplicate BXAuth before authentication
 	bxAuth := iflowauth.ExtractBXAuth(cookieValue)
 	if existingFile, err := iflowauth.CheckDuplicateBXAuth(h.cfg.AuthDir, bxAuth); err != nil {
 		c.JSON(http.StatusInternalServerError, gin.H{"status": "error", "error": "failed to check duplicate"})
 		return
 	} else if existingFile != "" {
 		existingFileName := filepath.Base(existingFile)
 		c.JSON(http.StatusConflict, gin.H{"status": "error", "error": "duplicate BXAuth found", "existing_file": existingFileName})
 		return
 	}
 	authSvc := iflowauth.NewIFlowAuth(h.cfg)
 	tokenData, errAuth := authSvc.AuthenticateWithCookie(ctx, cookieValue)
 	if errAuth != nil {
@@ -1767,11 +1782,12 @@ func (h *Handler) RequestIFlowCookieToken(c *gin.Context) {
 	}
 	tokenStorage.Email = email
 	timestamp := time.Now().Unix()
 	record := &coreauth.Auth{
-		ID:       fmt.Sprintf("iflow-%s.json", fileName),
+		ID:       fmt.Sprintf("iflow-%s-%d.json", fileName, timestamp),
 		Provider: "iflow",
-		FileName: fmt.Sprintf("iflow-%s.json", fileName),
+		FileName: fmt.Sprintf("iflow-%s-%d.json", fileName, timestamp),
 		Storage:  tokenStorage,
 		Metadata: map[string]any{
 			"email":        email,
@@ -2142,9 +2158,35 @@ func checkCloudAPIIsEnabled(ctx context.Context, httpClient *http.Client, projec
 func (h *Handler) GetAuthStatus(c *gin.Context) {
 	state := c.Query("state")
-	if err, ok := getOAuthStatus(state); ok {
+	if statusValue, ok := getOAuthStatus(state); ok {
-		if err != "" {
+		if statusValue != "" {
-			c.JSON(200, gin.H{"status": "error", "error": err})
+			// Check for device_code prefix (Kiro AWS Builder ID flow)
 			// Format: "device_code|verification_url|user_code"
 			// Using "|" as separator because URLs contain ":"
 			if strings.HasPrefix(statusValue, "device_code|") {
 				parts := strings.SplitN(statusValue, "|", 3)
 				if len(parts) == 3 {
 					c.JSON(200, gin.H{
 						"status":           "device_code",
 						"verification_url": parts[1],
 						"user_code":        parts[2],
 					})
 					return
 				}
 			}
 			// Check for auth_url prefix (Kiro social auth flow)
 			// Format: "auth_url|url"
 			// Using "|" as separator because URLs contain ":"
 			if strings.HasPrefix(statusValue, "auth_url|") {
 				authURL := strings.TrimPrefix(statusValue, "auth_url|")
 				c.JSON(200, gin.H{
 					"status": "auth_url",
 					"url":    authURL,
 				})
 				return
 			}
 			// Otherwise treat as error
 			c.JSON(200, gin.H{"status": "error", "error": statusValue})
 		} else {
 			c.JSON(200, gin.H{"status": "wait"})
 			return
@@ -2154,3 +2196,295 @@ func (h *Handler) GetAuthStatus(c *gin.Context) {
 	}
 	deleteOAuthStatus(state)
 }
 const kiroCallbackPort = 9876
 func (h *Handler) RequestKiroToken(c *gin.Context) {
 	ctx := context.Background()
 	// Get the login method from query parameter (default: aws for device code flow)
 	method := strings.ToLower(strings.TrimSpace(c.Query("method")))
 	if method == "" {
 		method = "aws"
 	}
 	fmt.Println("Initializing Kiro authentication...")
 	state := fmt.Sprintf("kiro-%d", time.Now().UnixNano())
 	switch method {
 	case "aws", "builder-id":
 		// AWS Builder ID uses device code flow (no callback needed)
 		go func() {
 			ssoClient := kiroauth.NewSSOOIDCClient(h.cfg)
 			// Step 1: Register client
 			fmt.Println("Registering client...")
 			regResp, err := ssoClient.RegisterClient(ctx)
 			if err != nil {
 				log.Errorf("Failed to register client: %v", err)
 				setOAuthStatus(state, "Failed to register client")
 				return
 			}
 			// Step 2: Start device authorization
 			fmt.Println("Starting device authorization...")
 			authResp, err := ssoClient.StartDeviceAuthorization(ctx, regResp.ClientID, regResp.ClientSecret)
 			if err != nil {
 				log.Errorf("Failed to start device auth: %v", err)
 				setOAuthStatus(state, "Failed to start device authorization")
 				return
 			}
 			// Store the verification URL for the frontend to display
 			// Using "|" as separator because URLs contain ":"
 			setOAuthStatus(state, "device_code|"+authResp.VerificationURIComplete+"|"+authResp.UserCode)
 			// Step 3: Poll for token
 			fmt.Println("Waiting for authorization...")
 			interval := 5 * time.Second
 			if authResp.Interval > 0 {
 				interval = time.Duration(authResp.Interval) * time.Second
 			}
 			deadline := time.Now().Add(time.Duration(authResp.ExpiresIn) * time.Second)
 			for time.Now().Before(deadline) {
 				select {
 				case <-ctx.Done():
 					setOAuthStatus(state, "Authorization cancelled")
 					return
 				case <-time.After(interval):
 					tokenResp, err := ssoClient.CreateToken(ctx, regResp.ClientID, regResp.ClientSecret, authResp.DeviceCode)
 					if err != nil {
 						errStr := err.Error()
 						if strings.Contains(errStr, "authorization_pending") {
 							continue
 						}
 						if strings.Contains(errStr, "slow_down") {
 							interval += 5 * time.Second
 							continue
 						}
 						log.Errorf("Token creation failed: %v", err)
 						setOAuthStatus(state, "Token creation failed")
 						return
 					}
 					// Success! Save the token
 					expiresAt := time.Now().Add(time.Duration(tokenResp.ExpiresIn) * time.Second)
 					email := kiroauth.ExtractEmailFromJWT(tokenResp.AccessToken)
 					idPart := kiroauth.SanitizeEmailForFilename(email)
 					if idPart == "" {
 						idPart = fmt.Sprintf("%d", time.Now().UnixNano()%100000)
 					}
 					now := time.Now()
 					fileName := fmt.Sprintf("kiro-aws-%s.json", idPart)
 					record := &coreauth.Auth{
 						ID:       fileName,
 						Provider: "kiro",
 						FileName: fileName,
 						Metadata: map[string]any{
 							"type":          "kiro",
 							"access_token":  tokenResp.AccessToken,
 							"refresh_token": tokenResp.RefreshToken,
 							"expires_at":    expiresAt.Format(time.RFC3339),
 							"auth_method":   "builder-id",
 							"provider":      "AWS",
 							"client_id":     regResp.ClientID,
 							"client_secret": regResp.ClientSecret,
 							"email":         email,
 							"last_refresh":  now.Format(time.RFC3339),
 						},
 					}
 					savedPath, errSave := h.saveTokenRecord(ctx, record)
 					if errSave != nil {
 						log.Errorf("Failed to save authentication tokens: %v", errSave)
 						setOAuthStatus(state, "Failed to save authentication tokens")
 						return
 					}
 					fmt.Printf("Authentication successful! Token saved to %s\n", savedPath)
 					if email != "" {
 						fmt.Printf("Authenticated as: %s\n", email)
 					}
 					deleteOAuthStatus(state)
 					return
 				}
 			}
 			setOAuthStatus(state, "Authorization timed out")
 		}()
 		// Return immediately with the state for polling
 		c.JSON(200, gin.H{"status": "ok", "state": state, "method": "device_code"})
 	case "google", "github":
 		// Social auth uses protocol handler - for WEB UI we use a callback forwarder
 		provider := "Google"
 		if method == "github" {
 			provider = "Github"
 		}
 		isWebUI := isWebUIRequest(c)
 		if isWebUI {
 			targetURL, errTarget := h.managementCallbackURL("/kiro/callback")
 			if errTarget != nil {
 				log.WithError(errTarget).Error("failed to compute kiro callback target")
 				c.JSON(http.StatusInternalServerError, gin.H{"error": "callback server unavailable"})
 				return
 			}
 			if _, errStart := startCallbackForwarder(kiroCallbackPort, "kiro", targetURL); errStart != nil {
 				log.WithError(errStart).Error("failed to start kiro callback forwarder")
 				c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to start callback server"})
 				return
 			}
 		}
 		go func() {
 			if isWebUI {
 				defer stopCallbackForwarder(kiroCallbackPort)
 			}
 			socialClient := kiroauth.NewSocialAuthClient(h.cfg)
 			// Generate PKCE codes
 			codeVerifier, codeChallenge, err := generateKiroPKCE()
 			if err != nil {
 				log.Errorf("Failed to generate PKCE: %v", err)
 				setOAuthStatus(state, "Failed to generate PKCE")
 				return
 			}
 			// Build login URL
 			authURL := fmt.Sprintf("%s/login?idp=%s&redirect_uri=%s&code_challenge=%s&code_challenge_method=S256&state=%s&prompt=select_account",
 				"https://prod.us-east-1.auth.desktop.kiro.dev",
 				provider,
 				url.QueryEscape(kiroauth.KiroRedirectURI),
 				codeChallenge,
 				state,
 			)
 			// Store auth URL for frontend
 			// Using "|" as separator because URLs contain ":"
 			setOAuthStatus(state, "auth_url|"+authURL)
 			// Wait for callback file
 			waitFile := filepath.Join(h.cfg.AuthDir, fmt.Sprintf(".oauth-kiro-%s.oauth", state))
 			deadline := time.Now().Add(5 * time.Minute)
 			for {
 				if time.Now().After(deadline) {
 					log.Error("oauth flow timed out")
 					setOAuthStatus(state, "OAuth flow timed out")
 					return
 				}
 				if data, errR := os.ReadFile(waitFile); errR == nil {
 					var m map[string]string
 					_ = json.Unmarshal(data, &m)
 					_ = os.Remove(waitFile)
 					if errStr := m["error"]; errStr != "" {
 						log.Errorf("Authentication failed: %s", errStr)
 						setOAuthStatus(state, "Authentication failed")
 						return
 					}
 					if m["state"] != state {
 						log.Errorf("State mismatch")
 						setOAuthStatus(state, "State mismatch")
 						return
 					}
 					code := m["code"]
 					if code == "" {
 						log.Error("No authorization code received")
 						setOAuthStatus(state, "No authorization code received")
 						return
 					}
 					// Exchange code for tokens
 					tokenReq := &kiroauth.CreateTokenRequest{
 						Code:         code,
 						CodeVerifier: codeVerifier,
 						RedirectURI:  kiroauth.KiroRedirectURI,
 					}
 					tokenResp, errToken := socialClient.CreateToken(ctx, tokenReq)
 					if errToken != nil {
 						log.Errorf("Failed to exchange code for tokens: %v", errToken)
 						setOAuthStatus(state, "Failed to exchange code for tokens")
 						return
 					}
 					// Save the token
 					expiresIn := tokenResp.ExpiresIn
 					if expiresIn <= 0 {
 						expiresIn = 3600
 					}
 					expiresAt := time.Now().Add(time.Duration(expiresIn) * time.Second)
 					email := kiroauth.ExtractEmailFromJWT(tokenResp.AccessToken)
 					idPart := kiroauth.SanitizeEmailForFilename(email)
 					if idPart == "" {
 						idPart = fmt.Sprintf("%d", time.Now().UnixNano()%100000)
 					}
 					now := time.Now()
 					fileName := fmt.Sprintf("kiro-%s-%s.json", strings.ToLower(provider), idPart)
 					record := &coreauth.Auth{
 						ID:       fileName,
 						Provider: "kiro",
 						FileName: fileName,
 						Metadata: map[string]any{
 							"type":          "kiro",
 							"access_token":  tokenResp.AccessToken,
 							"refresh_token": tokenResp.RefreshToken,
 							"profile_arn":   tokenResp.ProfileArn,
 							"expires_at":    expiresAt.Format(time.RFC3339),
 							"auth_method":   "social",
 							"provider":      provider,
 							"email":         email,
 							"last_refresh":  now.Format(time.RFC3339),
 						},
 					}
 					savedPath, errSave := h.saveTokenRecord(ctx, record)
 					if errSave != nil {
 						log.Errorf("Failed to save authentication tokens: %v", errSave)
 						setOAuthStatus(state, "Failed to save authentication tokens")
 						return
 					}
 					fmt.Printf("Authentication successful! Token saved to %s\n", savedPath)
 					if email != "" {
 						fmt.Printf("Authenticated as: %s\n", email)
 					}
 					deleteOAuthStatus(state)
 					return
 				}
 				time.Sleep(500 * time.Millisecond)
 			}
 		}()
 		setOAuthStatus(state, "")
 		c.JSON(200, gin.H{"status": "ok", "state": state, "method": "social"})
 	default:
 		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid method, use 'aws', 'google', or 'github'"})
 	}
 }
 // generateKiroPKCE generates PKCE code verifier and challenge for Kiro OAuth.
 func generateKiroPKCE() (verifier, challenge string, err error) {
 	b := make([]byte, 32)
 	if _, err := io.ReadFull(rand.Reader, b); err != nil {
 		return "", "", fmt.Errorf("failed to generate random bytes: %w", err)
 	}
 	verifier = base64.RawURLEncoding.EncodeToString(b)
 	h := sha256.Sum256([]byte(verifier))
 	challenge = base64.RawURLEncoding.EncodeToString(h[:])
 	return verifier, challenge, nil
 }
--- a/internal/api/modules/amp/proxy.go
+++ b/internal/api/modules/amp/proxy.go
@@ -3,8 +3,11 @@ package amp
 import (
 	"bytes"
 	"compress/gzip"
 	"context"
 	"errors"
 	"fmt"
 	"io"
 	"net"
 	"net/http"
 	"net/http/httputil"
 	"net/url"
@@ -62,7 +65,15 @@ func createReverseProxy(upstreamURL string, secretSource SecretSource) (*httputi
 	// Modify incoming responses to handle gzip without Content-Encoding
 	// This addresses the same issue as inline handler gzip handling, but at the proxy level
 	proxy.ModifyResponse = func(resp *http.Response) error {
-		// Only process successful responses
+		// Log upstream error responses for diagnostics (502, 503, etc.)
 		// These are NOT proxy connection errors - the upstream responded with an error status
 		if resp.StatusCode >= 500 {
 			log.Errorf("amp upstream responded with error [%d] for %s %s", resp.StatusCode, resp.Request.Method, resp.Request.URL.Path)
 		} else if resp.StatusCode >= 400 {
 			log.Warnf("amp upstream responded with client error [%d] for %s %s", resp.StatusCode, resp.Request.Method, resp.Request.URL.Path)
 		}
 		// Only process successful responses for gzip decompression
 		if resp.StatusCode < 200 || resp.StatusCode >= 300 {
 			return nil
 		}
@@ -146,9 +157,29 @@ func createReverseProxy(upstreamURL string, secretSource SecretSource) (*httputi
 		return nil
 	}
-	// Error handler for proxy failures
+	// Error handler for proxy failures with detailed error classification for diagnostics
 	proxy.ErrorHandler = func(rw http.ResponseWriter, req *http.Request, err error) {
-		log.Errorf("amp upstream proxy error for %s %s: %v", req.Method, req.URL.Path, err)
+		// Classify the error type for better diagnostics
 		var errType string
 		if errors.Is(err, context.DeadlineExceeded) {
 			errType = "timeout"
 		} else if errors.Is(err, context.Canceled) {
 			errType = "canceled"
 		} else if netErr, ok := err.(net.Error); ok && netErr.Timeout() {
 			errType = "dial_timeout"
 		} else if _, ok := err.(net.Error); ok {
 			errType = "network_error"
 		} else {
 			errType = "connection_error"
 		}
 		// Don't log as error for context canceled - it's usually client closing connection
 		if errors.Is(err, context.Canceled) {
 			log.Debugf("amp upstream proxy [%s]: client canceled request for %s %s", errType, req.Method, req.URL.Path)
 		} else {
 			log.Errorf("amp upstream proxy error [%s] for %s %s: %v", errType, req.Method, req.URL.Path, err)
 		}
 		rw.Header().Set("Content-Type", "application/json")
 		rw.WriteHeader(http.StatusBadGateway)
 		_, _ = rw.Write([]byte(`{"error":"amp_upstream_proxy_error","message":"Failed to reach Amp upstream"}`))
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -349,6 +349,12 @@ func (s *Server) setupRoutes() {
 			},
 		})
 	})
 	// Event logging endpoint - handles Claude Code telemetry requests
 	// Returns 200 OK to prevent 404 errors in logs
 	s.engine.POST("/api/event_logging/batch", func(c *gin.Context) {
 		c.JSON(http.StatusOK, gin.H{"status": "ok"})
 	})
 	s.engine.POST("/v1internal:method", geminiCLIHandlers.CLIHandler)
 	// OAuth callback endpoints (reuse main server port)
@@ -415,6 +421,18 @@ func (s *Server) setupRoutes() {
 		c.String(http.StatusOK, oauthCallbackSuccessHTML)
 	})
 	s.engine.GET("/kiro/callback", func(c *gin.Context) {
 		code := c.Query("code")
 		state := c.Query("state")
 		errStr := c.Query("error")
 		if state != "" {
 			file := fmt.Sprintf("%s/.oauth-kiro-%s.oauth", s.cfg.AuthDir, state)
 			_ = os.WriteFile(file, []byte(fmt.Sprintf(`{"code":"%s","state":"%s","error":"%s"}`, code, state, errStr)), 0o600)
 		}
 		c.Header("Content-Type", "text/html; charset=utf-8")
 		c.String(http.StatusOK, oauthCallbackSuccessHTML)
 	})
 	// Management routes are registered lazily by registerManagementRoutes when a secret is configured.
 }
@@ -580,6 +598,7 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.GET("/qwen-auth-url", s.mgmt.RequestQwenToken)
 		mgmt.GET("/iflow-auth-url", s.mgmt.RequestIFlowToken)
 		mgmt.POST("/iflow-auth-url", s.mgmt.RequestIFlowCookieToken)
 		mgmt.GET("/kiro-auth-url", s.mgmt.RequestKiroToken)
 		mgmt.GET("/get-auth-status", s.mgmt.GetAuthStatus)
 	}
 }
--- a/internal/auth/iflow/cookie_helpers.go
+++ b/internal/auth/iflow/cookie_helpers.go
@@ -1,7 +1,10 @@
 package iflow
 import (
 	"encoding/json"
 	"fmt"
 	"os"
 	"path/filepath"
 	"strings"
 )
@@ -36,3 +39,61 @@ func SanitizeIFlowFileName(raw string) string {
 	}
 	return strings.TrimSpace(result.String())
 }
 // ExtractBXAuth extracts the BXAuth value from a cookie string.
 func ExtractBXAuth(cookie string) string {
 	parts := strings.Split(cookie, ";")
 	for _, part := range parts {
 		part = strings.TrimSpace(part)
 		if strings.HasPrefix(part, "BXAuth=") {
 			return strings.TrimPrefix(part, "BXAuth=")
 		}
 	}
 	return ""
 }
 // CheckDuplicateBXAuth checks if the given BXAuth value already exists in any iflow auth file.
 // Returns the path of the existing file if found, empty string otherwise.
 func CheckDuplicateBXAuth(authDir, bxAuth string) (string, error) {
 	if bxAuth == "" {
 		return "", nil
 	}
 	entries, err := os.ReadDir(authDir)
 	if err != nil {
 		if os.IsNotExist(err) {
 			return "", nil
 		}
 		return "", fmt.Errorf("read auth dir failed: %w", err)
 	}
 	for _, entry := range entries {
 		if entry.IsDir() {
 			continue
 		}
 		name := entry.Name()
 		if !strings.HasPrefix(name, "iflow-") || !strings.HasSuffix(name, ".json") {
 			continue
 		}
 		filePath := filepath.Join(authDir, name)
 		data, err := os.ReadFile(filePath)
 		if err != nil {
 			continue
 		}
 		var tokenData struct {
 			Cookie string `json:"cookie"`
 		}
 		if err := json.Unmarshal(data, &tokenData); err != nil {
 			continue
 		}
 		existingBXAuth := ExtractBXAuth(tokenData.Cookie)
 		if existingBXAuth != "" && existingBXAuth == bxAuth {
 			return filePath, nil
 		}
 	}
 	return "", nil
 }
--- a/internal/auth/iflow/iflow_auth.go
+++ b/internal/auth/iflow/iflow_auth.go
@@ -506,11 +506,18 @@ func (ia *IFlowAuth) CreateCookieTokenStorage(data *IFlowTokenData) *IFlowTokenS
 		return nil
 	}
 	// Only save the BXAuth field from the cookie
 	bxAuth := ExtractBXAuth(data.Cookie)
 	cookieToSave := ""
 	if bxAuth != "" {
 		cookieToSave = "BXAuth=" + bxAuth + ";"
 	}
 	return &IFlowTokenStorage{
 		APIKey:      data.APIKey,
 		Email:       data.Email,
 		Expire:      data.Expire,
-		Cookie:      data.Cookie,
+		Cookie:      cookieToSave,
 		LastRefresh: time.Now().Format(time.RFC3339),
 		Type:        "iflow",
 	}
--- a/internal/auth/kiro/social_auth.go
+++ b/internal/auth/kiro/social_auth.go
@@ -126,8 +126,8 @@ func (c *SocialAuthClient) buildLoginURL(provider, redirectURI, codeChallenge, s
 	)
 }
-// createToken exchanges the authorization code for tokens.
+// CreateToken exchanges the authorization code for tokens.
-func (c *SocialAuthClient) createToken(ctx context.Context, req *CreateTokenRequest) (*SocialTokenResponse, error) {
+func (c *SocialAuthClient) CreateToken(ctx context.Context, req *CreateTokenRequest) (*SocialTokenResponse, error) {
 	body, err := json.Marshal(req)
 	if err != nil {
 		return nil, fmt.Errorf("failed to marshal token request: %w", err)
@@ -326,7 +326,7 @@ func (c *SocialAuthClient) LoginWithSocial(ctx context.Context, provider SocialP
 		RedirectURI:  KiroRedirectURI,
 	}
-	tokenResp, err := c.createToken(ctx, tokenReq)
+	tokenResp, err := c.CreateToken(ctx, tokenReq)
 	if err != nil {
 		return nil, fmt.Errorf("failed to exchange code for tokens: %w", err)
 	}
--- a/internal/cmd/iflow_cookie.go
+++ b/internal/cmd/iflow_cookie.go
@@ -5,7 +5,9 @@ import (
 	"context"
 	"fmt"
 	"os"
 	"path/filepath"
 	"strings"
 	"time"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/iflow"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
@@ -37,6 +39,16 @@ func DoIFlowCookieAuth(cfg *config.Config, options *LoginOptions) {
 		return
 	}
 	// Check for duplicate BXAuth before authentication
 	bxAuth := iflow.ExtractBXAuth(cookie)
 	if existingFile, err := iflow.CheckDuplicateBXAuth(cfg.AuthDir, bxAuth); err != nil {
 		fmt.Printf("Failed to check duplicate: %v\n", err)
 		return
 	} else if existingFile != "" {
 		fmt.Printf("Duplicate BXAuth found, authentication already exists: %s\n", filepath.Base(existingFile))
 		return
 	}
 	// Authenticate with cookie
 	auth := iflow.NewIFlowAuth(cfg)
 	ctx := context.Background()
@@ -82,5 +94,5 @@ func promptForCookie(promptFn func(string) (string, error)) (string, error) {
 // getAuthFilePath returns the auth file path for the given provider and email
 func getAuthFilePath(cfg *config.Config, provider, email string) string {
 	fileName := iflow.SanitizeIFlowFileName(email)
-	return fmt.Sprintf("%s/%s-%s.json", cfg.AuthDir, provider, fileName)
+	return fmt.Sprintf("%s/%s-%s-%d.json", cfg.AuthDir, provider, fileName, time.Now().Unix())
 }
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -64,6 +64,10 @@ type Config struct {
 	// KiroKey defines a list of Kiro (AWS CodeWhisperer) configurations.
 	KiroKey []KiroKey `yaml:"kiro" json:"kiro"`
 	// KiroPreferredEndpoint sets the global default preferred endpoint for all Kiro providers.
 	// Values: "ide" (default, CodeWhisperer) or "cli" (Amazon Q).
 	KiroPreferredEndpoint string `yaml:"kiro-preferred-endpoint" json:"kiro-preferred-endpoint"`
 	// Codex defines a list of Codex API key configurations as specified in the YAML configuration file.
 	CodexKey []CodexKey `yaml:"codex-api-key" json:"codex-api-key"`
@@ -278,6 +282,10 @@ type KiroKey struct {
 	// AgentTaskType sets the Kiro API task type. Known values: "vibe", "dev", "chat".
 	// Leave empty to let API use defaults. Different values may inject different system prompts.
 	AgentTaskType string `yaml:"agent-task-type,omitempty" json:"agent-task-type,omitempty"`
 	// PreferredEndpoint sets the preferred Kiro API endpoint/quota.
 	// Values: "codewhisperer" (default, IDE quota) or "amazonq" (CLI quota).
 	PreferredEndpoint string `yaml:"preferred-endpoint,omitempty" json:"preferred-endpoint,omitempty"`
 }
 // OpenAICompatibility represents the configuration for OpenAI API compatibility
@@ -504,6 +512,7 @@ func (cfg *Config) SanitizeKiroKeys() {
 		entry.ProfileArn = strings.TrimSpace(entry.ProfileArn)
 		entry.Region = strings.TrimSpace(entry.Region)
 		entry.ProxyURL = strings.TrimSpace(entry.ProxyURL)
 		entry.PreferredEndpoint = strings.TrimSpace(entry.PreferredEndpoint)
 	}
 }
--- a/internal/misc/codex_instructions/gpt_5_1_prompt.md-004-e0fb3ca1dbea0c418cf8b3c7b76ed671d62147e3
+++ b/internal/misc/codex_instructions/gpt_5_1_prompt.md-004-e0fb3ca1dbea0c418cf8b3c7b76ed671d62147e3
--- a/internal/misc/codex_instructions/gpt_5_codex_prompt.md-009-e0fb3ca1dbea0c418cf8b3c7b76ed671d62147e3
+++ b/internal/misc/codex_instructions/gpt_5_codex_prompt.md-009-e0fb3ca1dbea0c418cf8b3c7b76ed671d62147e3
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -580,7 +580,7 @@ func GetOpenAIModels() []*ModelInfo {
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
+			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}},
 		},
 	}
 }
@@ -884,8 +884,9 @@ func GetGitHubCopilotModels() []*ModelInfo {
 // GetKiroModels returns the Kiro (AWS CodeWhisperer) model definitions
 func GetKiroModels() []*ModelInfo {
 	return []*ModelInfo{
 		// --- Base Models ---
 		{
-			ID:                  "kiro-claude-opus-4.5",
+			ID:                  "kiro-claude-opus-4-5",
 			Object:              "model",
 			Created:             1732752000,
 			OwnedBy:             "aws",
@@ -894,9 +895,10 @@ func GetKiroModels() []*ModelInfo {
 			Description:         "Claude Opus 4.5 via Kiro (2.2x credit)",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
 			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
-			ID:                  "kiro-claude-sonnet-4.5",
+			ID:                  "kiro-claude-sonnet-4-5",
 			Object:              "model",
 			Created:             1732752000,
 			OwnedBy:             "aws",
@@ -905,6 +907,7 @@ func GetKiroModels() []*ModelInfo {
 			Description:         "Claude Sonnet 4.5 via Kiro (1.3x credit)",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
 			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                  "kiro-claude-sonnet-4",
@@ -916,9 +919,10 @@ func GetKiroModels() []*ModelInfo {
 			Description:         "Claude Sonnet 4 via Kiro (1.3x credit)",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
 			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
-			ID:                  "kiro-claude-haiku-4.5",
+			ID:                  "kiro-claude-haiku-4-5",
 			Object:              "model",
 			Created:             1732752000,
 			OwnedBy:             "aws",
@@ -927,22 +931,11 @@ func GetKiroModels() []*ModelInfo {
 			Description:         "Claude Haiku 4.5 via Kiro (0.4x credit)",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
-		},
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
 		// --- Chat Variant (No tool calling, for pure conversation) ---
 		{
 			ID:                  "kiro-claude-opus-4.5-chat",
 			Object:              "model",
 			Created:             1732752000,
 			OwnedBy:             "aws",
 			Type:                "kiro",
 			DisplayName:         "Kiro Claude Opus 4.5 (Chat)",
 			Description:         "Claude Opus 4.5 for chat only (no tool calling)",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
 		},
 		// --- Agentic Variants (Optimized for coding agents with chunked writes) ---
 		{
-			ID:                  "kiro-claude-opus-4.5-agentic",
+			ID:                  "kiro-claude-opus-4-5-agentic",
 			Object:              "model",
 			Created:             1732752000,
 			OwnedBy:             "aws",
@@ -951,9 +944,10 @@ func GetKiroModels() []*ModelInfo {
 			Description:         "Claude Opus 4.5 optimized for coding agents (chunked writes)",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
 			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
-			ID:                  "kiro-claude-sonnet-4.5-agentic",
+			ID:                  "kiro-claude-sonnet-4-5-agentic",
 			Object:              "model",
 			Created:             1732752000,
 			OwnedBy:             "aws",
@@ -962,6 +956,31 @@ func GetKiroModels() []*ModelInfo {
 			Description:         "Claude Sonnet 4.5 optimized for coding agents (chunked writes)",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
 			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                  "kiro-claude-sonnet-4-agentic",
 			Object:              "model",
 			Created:             1732752000,
 			OwnedBy:             "aws",
 			Type:                "kiro",
 			DisplayName:         "Kiro Claude Sonnet 4 (Agentic)",
 			Description:         "Claude Sonnet 4 optimized for coding agents (chunked writes)",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
 			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                  "kiro-claude-haiku-4-5-agentic",
 			Object:              "model",
 			Created:             1732752000,
 			OwnedBy:             "aws",
 			Type:                "kiro",
 			DisplayName:         "Kiro Claude Haiku 4.5 (Agentic)",
 			Description:         "Claude Haiku 4.5 optimized for coding agents (chunked writes)",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
 			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
 		},
 	}
 }
--- a/internal/registry/model_registry.go
+++ b/internal/registry/model_registry.go
@@ -748,7 +748,8 @@ func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string)
 		}
 		return result
-	case "claude":
+	case "claude", "kiro", "antigravity":
 		// Claude, Kiro, and Antigravity all use Claude-compatible format for Claude Code client
 		result := map[string]any{
 			"id":       model.ID,
 			"object":   "model",
@@ -763,6 +764,19 @@ func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string)
 		if model.DisplayName != "" {
 			result["display_name"] = model.DisplayName
 		}
 		// Add thinking support for Claude Code client
 		// Claude Code checks for "thinking" field (simple boolean) to enable tab toggle
 		// Also add "extended_thinking" for detailed budget info
 		if model.Thinking != nil {
 			result["thinking"] = true
 			result["extended_thinking"] = map[string]any{
 				"supported":       true,
 				"min":             model.Thinking.Min,
 				"max":             model.Thinking.Max,
 				"zero_allowed":    model.Thinking.ZeroAllowed,
 				"dynamic_allowed": model.Thinking.DynamicAllowed,
 			}
 		}
 		return result
 	case "gemini":
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -54,8 +54,8 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
-	body = normalizeThinkingConfig(body, upstreamModel)
+	body = normalizeThinkingConfig(body, upstreamModel, false)
 	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return resp, errValidate
 	}
@@ -152,8 +152,8 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	to := sdktranslator.FromString("codex")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
-	body = normalizeThinkingConfig(body, upstreamModel)
+	body = normalizeThinkingConfig(body, upstreamModel, false)
 	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return nil, errValidate
 	}
@@ -254,7 +254,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
 	modelForCounting := req.Model
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
 	body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.SetBytes(body, "stream", false)
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -57,12 +57,12 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
-	body = normalizeThinkingConfig(body, upstreamModel)
+	body = normalizeThinkingConfig(body, upstreamModel, false)
 	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return resp, errValidate
 	}
@@ -148,12 +148,12 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
-	body = normalizeThinkingConfig(body, upstreamModel)
+	body = normalizeThinkingConfig(body, upstreamModel, false)
 	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return nil, errValidate
 	}
@@ -219,7 +219,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 		}()
 		scanner := bufio.NewScanner(httpResp.Body)
-		scanner.Buffer(nil, 52_428_800) // 50MB 
+		scanner.Buffer(nil, 52_428_800) // 50MB
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
--- a/internal/runtime/executor/kiro_executor.go
+++ b/internal/runtime/executor/kiro_executor.go
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -54,16 +54,18 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), opts.Stream)
-	if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
+	modelOverride := e.resolveUpstreamModel(req.Model, auth)
 	if modelOverride != "" {
 		translated = e.overrideModel(translated, modelOverride)
 	}
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
-	translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
+	allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
 	translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
-	if upstreamModel != "" {
+	if upstreamModel != "" && modelOverride == "" {
 		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
 	}
-	translated = normalizeThinkingConfig(translated, upstreamModel)
+	translated = normalizeThinkingConfig(translated, upstreamModel, allowCompat)
 	if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
 		return resp, errValidate
 	}
@@ -148,16 +150,18 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
+	modelOverride := e.resolveUpstreamModel(req.Model, auth)
 	if modelOverride != "" {
 		translated = e.overrideModel(translated, modelOverride)
 	}
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
-	translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
+	allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
 	translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
-	if upstreamModel != "" {
+	if upstreamModel != "" && modelOverride == "" {
 		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
 	}
-	translated = normalizeThinkingConfig(translated, upstreamModel)
+	translated = normalizeThinkingConfig(translated, upstreamModel, allowCompat)
 	if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
 		return nil, errValidate
 	}
@@ -323,6 +327,27 @@ func (e *OpenAICompatExecutor) resolveUpstreamModel(alias string, auth *cliproxy
 	return ""
 }
 func (e *OpenAICompatExecutor) allowCompatReasoningEffort(model string, auth *cliproxyauth.Auth) bool {
 	trimmed := strings.TrimSpace(model)
 	if trimmed == "" || e == nil || e.cfg == nil {
 		return false
 	}
 	compat := e.resolveCompatConfig(auth)
 	if compat == nil || len(compat.Models) == 0 {
 		return false
 	}
 	for i := range compat.Models {
 		entry := compat.Models[i]
 		if strings.EqualFold(strings.TrimSpace(entry.Alias), trimmed) {
 			return true
 		}
 		if strings.EqualFold(strings.TrimSpace(entry.Name), trimmed) {
 			return true
 		}
 	}
 	return false
 }
 func (e *OpenAICompatExecutor) resolveCompatConfig(auth *cliproxyauth.Auth) *config.OpenAICompatibility {
 	if auth == nil || e.cfg == nil {
 		return nil
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -48,19 +48,35 @@ func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model str
 // applyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path.
 // Metadata values take precedence over any existing field when the model supports thinking, intentionally
 // overwriting caller-provided values to honor suffix/default metadata priority.
-func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string) []byte {
+func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string, allowCompat bool) []byte {
 	if len(metadata) == 0 {
 		return payload
 	}
 	if !util.ModelSupportsThinking(model) {
 		return payload
 	}
 	if field == "" {
 		return payload
 	}
 	baseModel := util.ResolveOriginalModel(model, metadata)
 	if baseModel == "" {
 		baseModel = model
 	}
 	if !util.ModelSupportsThinking(baseModel) && !allowCompat {
 		return payload
 	}
 	if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
-		if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
+		if util.ModelUsesThinkingLevels(baseModel) || allowCompat {
-			return updated
+			if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
 				return updated
 			}
 		}
 	}
 	// Fallback: numeric thinking_budget suffix for level-based (OpenAI-style) models.
 	if util.ModelUsesThinkingLevels(baseModel) || allowCompat {
 		if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
 			if effort, ok := util.OpenAIThinkingBudgetToEffort(baseModel, *budget); ok && effort != "" {
 				if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
 					return updated
 				}
 			}
 		}
 	}
 	return payload
@@ -219,31 +235,40 @@ func matchModelPattern(pattern, model string) bool {
 // normalizeThinkingConfig normalizes thinking-related fields in the payload
 // based on model capabilities. For models without thinking support, it strips
 // reasoning fields. For models with level-based thinking, it validates and
-// normalizes the reasoning effort level.
+// normalizes the reasoning effort level. For models with numeric budget thinking,
-func normalizeThinkingConfig(payload []byte, model string) []byte {
+// it strips the effort string fields.
 func normalizeThinkingConfig(payload []byte, model string, allowCompat bool) []byte {
 	if len(payload) == 0 || model == "" {
 		return payload
 	}
 	if !util.ModelSupportsThinking(model) {
-		return stripThinkingFields(payload)
+		if allowCompat {
 			return payload
 		}
 		return stripThinkingFields(payload, false)
 	}
 	if util.ModelUsesThinkingLevels(model) {
 		return normalizeReasoningEffortLevel(payload, model)
 	}
-	return payload
+	// Model supports thinking but uses numeric budgets, not levels.
 	// Strip effort string fields since they are not applicable.
 	return stripThinkingFields(payload, true)
 }
 // stripThinkingFields removes thinking-related fields from the payload for
-// models that do not support thinking.
+// models that do not support thinking. If effortOnly is true, only removes
-func stripThinkingFields(payload []byte) []byte {
+// effort string fields (for models using numeric budgets).
 func stripThinkingFields(payload []byte, effortOnly bool) []byte {
 	fieldsToRemove := []string{
 		"reasoning",
 		"reasoning_effort",
 		"reasoning.effort",
 	}
 	if !effortOnly {
 		fieldsToRemove = append([]string{"reasoning"}, fieldsToRemove...)
 	}
 	out := payload
 	for _, field := range fieldsToRemove {
 		if gjson.GetBytes(out, field).Exists() {
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -51,12 +51,12 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
-	body = normalizeThinkingConfig(body, upstreamModel)
+	body = normalizeThinkingConfig(body, upstreamModel, false)
 	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return resp, errValidate
 	}
@@ -131,12 +131,12 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
-	body = normalizeThinkingConfig(body, upstreamModel)
+	body = normalizeThinkingConfig(body, upstreamModel, false)
 	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return nil, errValidate
 	}
--- a/internal/runtime/executor/token_helpers.go
+++ b/internal/runtime/executor/token_helpers.go
@@ -2,43 +2,107 @@ package executor
 import (
 	"fmt"
 	"regexp"
 	"strconv"
 	"strings"
 	"sync"
 	"github.com/tidwall/gjson"
 	"github.com/tiktoken-go/tokenizer"
 )
 // tokenizerCache stores tokenizer instances to avoid repeated creation
 var tokenizerCache sync.Map
 // TokenizerWrapper wraps a tokenizer codec with an adjustment factor for models
 // where tiktoken may not accurately estimate token counts (e.g., Claude models)
 type TokenizerWrapper struct {
 	Codec            tokenizer.Codec
 	AdjustmentFactor float64 // 1.0 means no adjustment, >1.0 means tiktoken underestimates
 }
 // Count returns the token count with adjustment factor applied
 func (tw *TokenizerWrapper) Count(text string) (int, error) {
 	count, err := tw.Codec.Count(text)
 	if err != nil {
 		return 0, err
 	}
 	if tw.AdjustmentFactor != 1.0 && tw.AdjustmentFactor > 0 {
 		return int(float64(count) * tw.AdjustmentFactor), nil
 	}
 	return count, nil
 }
 // getTokenizer returns a cached tokenizer for the given model.
 // This improves performance by avoiding repeated tokenizer creation.
 func getTokenizer(model string) (*TokenizerWrapper, error) {
 	// Check cache first
 	if cached, ok := tokenizerCache.Load(model); ok {
 		return cached.(*TokenizerWrapper), nil
 	}
 	// Cache miss, create new tokenizer
 	wrapper, err := tokenizerForModel(model)
 	if err != nil {
 		return nil, err
 	}
 	// Store in cache (use LoadOrStore to handle race conditions)
 	actual, _ := tokenizerCache.LoadOrStore(model, wrapper)
 	return actual.(*TokenizerWrapper), nil
 }
 // tokenizerForModel returns a tokenizer codec suitable for an OpenAI-style model id.
-func tokenizerForModel(model string) (tokenizer.Codec, error) {
+// For Claude models, applies a 1.1 adjustment factor since tiktoken may underestimate.
 func tokenizerForModel(model string) (*TokenizerWrapper, error) {
 	sanitized := strings.ToLower(strings.TrimSpace(model))
 	// Claude models use cl100k_base with 1.1 adjustment factor
 	// because tiktoken may underestimate Claude's actual token count
 	if strings.Contains(sanitized, "claude") || strings.HasPrefix(sanitized, "kiro-") || strings.HasPrefix(sanitized, "amazonq-") {
 		enc, err := tokenizer.Get(tokenizer.Cl100kBase)
 		if err != nil {
 			return nil, err
 		}
 		return &TokenizerWrapper{Codec: enc, AdjustmentFactor: 1.1}, nil
 	}
 	var enc tokenizer.Codec
 	var err error
 	switch {
 	case sanitized == "":
-		return tokenizer.Get(tokenizer.Cl100kBase)
+		enc, err = tokenizer.Get(tokenizer.Cl100kBase)
 	case strings.HasPrefix(sanitized, "gpt-5"):
-		return tokenizer.ForModel(tokenizer.GPT5)
+		enc, err = tokenizer.ForModel(tokenizer.GPT5)
 	case strings.HasPrefix(sanitized, "gpt-5.1"):
-		return tokenizer.ForModel(tokenizer.GPT5)
+		enc, err = tokenizer.ForModel(tokenizer.GPT5)
 	case strings.HasPrefix(sanitized, "gpt-4.1"):
-		return tokenizer.ForModel(tokenizer.GPT41)
+		enc, err = tokenizer.ForModel(tokenizer.GPT41)
 	case strings.HasPrefix(sanitized, "gpt-4o"):
-		return tokenizer.ForModel(tokenizer.GPT4o)
+		enc, err = tokenizer.ForModel(tokenizer.GPT4o)
 	case strings.HasPrefix(sanitized, "gpt-4"):
-		return tokenizer.ForModel(tokenizer.GPT4)
+		enc, err = tokenizer.ForModel(tokenizer.GPT4)
 	case strings.HasPrefix(sanitized, "gpt-3.5"), strings.HasPrefix(sanitized, "gpt-3"):
-		return tokenizer.ForModel(tokenizer.GPT35Turbo)
+		enc, err = tokenizer.ForModel(tokenizer.GPT35Turbo)
 	case strings.HasPrefix(sanitized, "o1"):
-		return tokenizer.ForModel(tokenizer.O1)
+		enc, err = tokenizer.ForModel(tokenizer.O1)
 	case strings.HasPrefix(sanitized, "o3"):
-		return tokenizer.ForModel(tokenizer.O3)
+		enc, err = tokenizer.ForModel(tokenizer.O3)
 	case strings.HasPrefix(sanitized, "o4"):
-		return tokenizer.ForModel(tokenizer.O4Mini)
+		enc, err = tokenizer.ForModel(tokenizer.O4Mini)
 	default:
-		return tokenizer.Get(tokenizer.O200kBase)
+		enc, err = tokenizer.Get(tokenizer.O200kBase)
 	}
 	if err != nil {
 		return nil, err
 	}
 	return &TokenizerWrapper{Codec: enc, AdjustmentFactor: 1.0}, nil
 }
 // countOpenAIChatTokens approximates prompt tokens for OpenAI chat completions payloads.
-func countOpenAIChatTokens(enc tokenizer.Codec, payload []byte) (int64, error) {
+func countOpenAIChatTokens(enc *TokenizerWrapper, payload []byte) (int64, error) {
 	if enc == nil {
 		return 0, fmt.Errorf("encoder is nil")
 	}
@@ -62,11 +126,206 @@ func countOpenAIChatTokens(enc tokenizer.Codec, payload []byte) (int64, error) {
 		return 0, nil
 	}
 	// Count text tokens
 	count, err := enc.Count(joined)
 	if err != nil {
 		return 0, err
 	}
-	return int64(count), nil
+
 	// Extract and add image tokens from placeholders
 	imageTokens := extractImageTokens(joined)
 	return int64(count) + int64(imageTokens), nil
 }
 // countClaudeChatTokens approximates prompt tokens for Claude API chat completions payloads.
 // This handles Claude's message format with system, messages, and tools.
 // Image tokens are estimated based on image dimensions when available.
 func countClaudeChatTokens(enc *TokenizerWrapper, payload []byte) (int64, error) {
 	if enc == nil {
 		return 0, fmt.Errorf("encoder is nil")
 	}
 	if len(payload) == 0 {
 		return 0, nil
 	}
 	root := gjson.ParseBytes(payload)
 	segments := make([]string, 0, 32)
 	// Collect system prompt (can be string or array of content blocks)
 	collectClaudeSystem(root.Get("system"), &segments)
 	// Collect messages
 	collectClaudeMessages(root.Get("messages"), &segments)
 	// Collect tools
 	collectClaudeTools(root.Get("tools"), &segments)
 	joined := strings.TrimSpace(strings.Join(segments, "\n"))
 	if joined == "" {
 		return 0, nil
 	}
 	// Count text tokens
 	count, err := enc.Count(joined)
 	if err != nil {
 		return 0, err
 	}
 	// Extract and add image tokens from placeholders
 	imageTokens := extractImageTokens(joined)
 	return int64(count) + int64(imageTokens), nil
 }
 // imageTokenPattern matches [IMAGE:xxx tokens] format for extracting estimated image tokens
 var imageTokenPattern = regexp.MustCompile(`\[IMAGE:(\d+) tokens\]`)
 // extractImageTokens extracts image token estimates from placeholder text.
 // Placeholders are in the format [IMAGE:xxx tokens] where xxx is the estimated token count.
 func extractImageTokens(text string) int {
 	matches := imageTokenPattern.FindAllStringSubmatch(text, -1)
 	total := 0
 	for _, match := range matches {
 		if len(match) > 1 {
 			if tokens, err := strconv.Atoi(match[1]); err == nil {
 				total += tokens
 			}
 		}
 	}
 	return total
 }
 // estimateImageTokens calculates estimated tokens for an image based on dimensions.
 // Based on Claude's image token calculation: tokens ≈ (width * height) / 750
 // Minimum 85 tokens, maximum 1590 tokens (for 1568x1568 images).
 func estimateImageTokens(width, height float64) int {
 	if width <= 0 || height <= 0 {
 		// No valid dimensions, use default estimate (medium-sized image)
 		return 1000
 	}
 	tokens := int(width * height / 750)
 	// Apply bounds
 	if tokens < 85 {
 		tokens = 85
 	}
 	if tokens > 1590 {
 		tokens = 1590
 	}
 	return tokens
 }
 // collectClaudeSystem extracts text from Claude's system field.
 // System can be a string or an array of content blocks.
 func collectClaudeSystem(system gjson.Result, segments *[]string) {
 	if !system.Exists() {
 		return
 	}
 	if system.Type == gjson.String {
 		addIfNotEmpty(segments, system.String())
 		return
 	}
 	if system.IsArray() {
 		system.ForEach(func(_, block gjson.Result) bool {
 			blockType := block.Get("type").String()
 			if blockType == "text" || blockType == "" {
 				addIfNotEmpty(segments, block.Get("text").String())
 			}
 			// Also handle plain string blocks
 			if block.Type == gjson.String {
 				addIfNotEmpty(segments, block.String())
 			}
 			return true
 		})
 	}
 }
 // collectClaudeMessages extracts text from Claude's messages array.
 func collectClaudeMessages(messages gjson.Result, segments *[]string) {
 	if !messages.Exists() || !messages.IsArray() {
 		return
 	}
 	messages.ForEach(func(_, message gjson.Result) bool {
 		addIfNotEmpty(segments, message.Get("role").String())
 		collectClaudeContent(message.Get("content"), segments)
 		return true
 	})
 }
 // collectClaudeContent extracts text from Claude's content field.
 // Content can be a string or an array of content blocks.
 // For images, estimates token count based on dimensions when available.
 func collectClaudeContent(content gjson.Result, segments *[]string) {
 	if !content.Exists() {
 		return
 	}
 	if content.Type == gjson.String {
 		addIfNotEmpty(segments, content.String())
 		return
 	}
 	if content.IsArray() {
 		content.ForEach(func(_, part gjson.Result) bool {
 			partType := part.Get("type").String()
 			switch partType {
 			case "text":
 				addIfNotEmpty(segments, part.Get("text").String())
 			case "image":
 				// Estimate image tokens based on dimensions if available
 				source := part.Get("source")
 				if source.Exists() {
 					width := source.Get("width").Float()
 					height := source.Get("height").Float()
 					if width > 0 && height > 0 {
 						tokens := estimateImageTokens(width, height)
 						addIfNotEmpty(segments, fmt.Sprintf("[IMAGE:%d tokens]", tokens))
 					} else {
 						// No dimensions available, use default estimate
 						addIfNotEmpty(segments, "[IMAGE:1000 tokens]")
 					}
 				} else {
 					// No source info, use default estimate
 					addIfNotEmpty(segments, "[IMAGE:1000 tokens]")
 				}
 			case "tool_use":
 				addIfNotEmpty(segments, part.Get("id").String())
 				addIfNotEmpty(segments, part.Get("name").String())
 				if input := part.Get("input"); input.Exists() {
 					addIfNotEmpty(segments, input.Raw)
 				}
 			case "tool_result":
 				addIfNotEmpty(segments, part.Get("tool_use_id").String())
 				collectClaudeContent(part.Get("content"), segments)
 			case "thinking":
 				addIfNotEmpty(segments, part.Get("thinking").String())
 			default:
 				// For unknown types, try to extract any text content
 				if part.Type == gjson.String {
 					addIfNotEmpty(segments, part.String())
 				} else if part.Type == gjson.JSON {
 					addIfNotEmpty(segments, part.Raw)
 				}
 			}
 			return true
 		})
 	}
 }
 // collectClaudeTools extracts text from Claude's tools array.
 func collectClaudeTools(tools gjson.Result, segments *[]string) {
 	if !tools.Exists() || !tools.IsArray() {
 		return
 	}
 	tools.ForEach(func(_, tool gjson.Result) bool {
 		addIfNotEmpty(segments, tool.Get("name").String())
 		addIfNotEmpty(segments, tool.Get("description").String())
 		if inputSchema := tool.Get("input_schema"); inputSchema.Exists() {
 			addIfNotEmpty(segments, inputSchema.Raw)
 		}
 		return true
 	})
 }
 // buildOpenAIUsageJSON returns a minimal usage structure understood by downstream translators.
--- a/internal/translator/codex/claude/codex_claude_request.go
+++ b/internal/translator/codex/claude/codex_claude_request.go
@@ -214,7 +214,7 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	// Add additional configuration parameters for the Codex API.
 	template, _ = sjson.Set(template, "parallel_tool_calls", true)
-	template, _ = sjson.Set(template, "reasoning.effort", "low")
+	template, _ = sjson.Set(template, "reasoning.effort", "medium")
 	template, _ = sjson.Set(template, "reasoning.summary", "auto")
 	template, _ = sjson.Set(template, "stream", true)
 	template, _ = sjson.Set(template, "store", false)
--- a/internal/translator/codex/gemini/codex_gemini_request.go
+++ b/internal/translator/codex/gemini/codex_gemini_request.go
@@ -245,7 +245,7 @@ func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	// Fixed flags aligning with Codex expectations
 	out, _ = sjson.Set(out, "parallel_tool_calls", true)
-	out, _ = sjson.Set(out, "reasoning.effort", "low")
+	out, _ = sjson.Set(out, "reasoning.effort", "medium")
 	out, _ = sjson.Set(out, "reasoning.summary", "auto")
 	out, _ = sjson.Set(out, "stream", true)
 	out, _ = sjson.Set(out, "store", false)
--- a/internal/translator/codex/openai/chat-completions/codex_openai_request.go
+++ b/internal/translator/codex/openai/chat-completions/codex_openai_request.go
@@ -60,7 +60,7 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
 	if v := gjson.GetBytes(rawJSON, "reasoning_effort"); v.Exists() {
 		out, _ = sjson.Set(out, "reasoning.effort", v.Value())
 	} else {
-		out, _ = sjson.Set(out, "reasoning.effort", "low")
+		out, _ = sjson.Set(out, "reasoning.effort", "medium")
 	}
 	out, _ = sjson.Set(out, "parallel_tool_calls", true)
 	out, _ = sjson.Set(out, "reasoning.summary", "auto")
--- a/internal/translator/kiro/openai/chat-completions/kiro_openai_request.go
+++ b/internal/translator/kiro/openai/chat-completions/kiro_openai_request.go
@@ -10,9 +10,18 @@ import (
 	"github.com/tidwall/sjson"
 )
 // reasoningEffortToBudget maps OpenAI reasoning_effort values to Claude thinking budget_tokens.
 // OpenAI uses "low", "medium", "high" while Claude uses numeric budget_tokens.
 var reasoningEffortToBudget = map[string]int{
 	"low":    4000,
 	"medium": 16000,
 	"high":   32000,
 }
 // ConvertOpenAIRequestToKiro transforms an OpenAI Chat Completions API request into Kiro (Claude) format.
 // Kiro uses Claude-compatible format internally, so we primarily pass through to Claude format.
 // Supports tool calling: OpenAI tools -> Claude tools, tool_calls -> tool_use, tool messages -> tool_result.
 // Supports reasoning/thinking: OpenAI reasoning_effort -> Claude thinking parameter.
 func ConvertOpenAIRequestToKiro(modelName string, inputRawJSON []byte, stream bool) []byte {
 	rawJSON := bytes.Clone(inputRawJSON)
 	root := gjson.ParseBytes(rawJSON)
@@ -38,6 +47,26 @@ func ConvertOpenAIRequestToKiro(modelName string, inputRawJSON []byte, stream bo
 		out, _ = sjson.Set(out, "top_p", v.Float())
 	}
 	// Handle OpenAI reasoning_effort parameter -> Claude thinking parameter
 	// OpenAI format: {"reasoning_effort": "low"|"medium"|"high"}
 	// Claude format: {"thinking": {"type": "enabled", "budget_tokens": N}}
 	if v := root.Get("reasoning_effort"); v.Exists() {
 		effort := v.String()
 		if budget, ok := reasoningEffortToBudget[effort]; ok {
 			thinking := map[string]interface{}{
 				"type":          "enabled",
 				"budget_tokens": budget,
 			}
 			out, _ = sjson.Set(out, "thinking", thinking)
 		}
 	}
 	// Also support direct thinking parameter passthrough (for Claude API compatibility)
 	// Claude format: {"thinking": {"type": "enabled", "budget_tokens": N}}
 	if v := root.Get("thinking"); v.Exists() && v.IsObject() {
 		out, _ = sjson.Set(out, "thinking", v.Value())
 	}
 	// Convert OpenAI tools to Claude tools format
 	if tools := root.Get("tools"); tools.Exists() && tools.IsArray() {
 		claudeTools := make([]interface{}, 0)
--- a/internal/translator/kiro/openai/chat-completions/kiro_openai_response.go
+++ b/internal/translator/kiro/openai/chat-completions/kiro_openai_response.go
@@ -134,6 +134,28 @@ func convertClaudeEventToOpenAI(jsonStr string, model string) []string {
 				result, _ := json.Marshal(response)
 				results = append(results, string(result))
 			}
 		} else if deltaType == "thinking_delta" {
 			// Thinking/reasoning content delta - convert to OpenAI reasoning_content format
 			thinkingDelta := root.Get("delta.thinking").String()
 			if thinkingDelta != "" {
 				response := map[string]interface{}{
 					"id":      "chatcmpl-" + uuid.New().String()[:24],
 					"object":  "chat.completion.chunk",
 					"created": time.Now().Unix(),
 					"model":   model,
 					"choices": []map[string]interface{}{
 						{
 							"index": 0,
 							"delta": map[string]interface{}{
 								"reasoning_content": thinkingDelta,
 							},
 							"finish_reason": nil,
 						},
 					},
 				}
 				result, _ := json.Marshal(response)
 				results = append(results, string(result))
 			}
 		} else if deltaType == "input_json_delta" {
 			// Tool input delta (streaming arguments)
 			partialJSON := root.Get("delta.partial_json").String()
@@ -298,6 +320,7 @@ func ConvertKiroResponseToOpenAINonStream(ctx context.Context, model string, ori
 	root := gjson.ParseBytes(rawResponse)
 	var content string
 	var reasoningContent string
 	var toolCalls []map[string]interface{}
 	contentArray := root.Get("content")
@@ -306,6 +329,9 @@ func ConvertKiroResponseToOpenAINonStream(ctx context.Context, model string, ori
 			itemType := item.Get("type").String()
 			if itemType == "text" {
 				content += item.Get("text").String()
 			} else if itemType == "thinking" {
 				// Extract thinking/reasoning content
 				reasoningContent += item.Get("thinking").String()
 			} else if itemType == "tool_use" {
 				// Convert Claude tool_use to OpenAI tool_calls format
 				inputJSON := item.Get("input").String()
@@ -339,6 +365,11 @@ func ConvertKiroResponseToOpenAINonStream(ctx context.Context, model string, ori
 		"content": content,
 	}
 	// Add reasoning_content if present (OpenAI reasoning format)
 	if reasoningContent != "" {
 		message["reasoning_content"] = reasoningContent
 	}
 	// Add tool_calls if present
 	if len(toolCalls) > 0 {
 		message["tool_calls"] = toolCalls
--- a/internal/util/claude_thinking.go
+++ b/internal/util/claude_thinking.go
@@ -28,6 +28,9 @@ func ApplyClaudeThinkingConfig(body []byte, budget *int) []byte {
 // It uses the unified ResolveThinkingConfigFromMetadata and normalizes the budget.
 // Returns the normalized budget (nil if thinking should not be enabled) and whether it matched.
 func ResolveClaudeThinkingConfig(modelName string, metadata map[string]any) (*int, bool) {
 	if !ModelSupportsThinking(modelName) {
 		return nil, false
 	}
 	budget, include, matched := ResolveThinkingConfigFromMetadata(modelName, metadata)
 	if !matched {
 		return nil, false
--- a/internal/util/gemini_thinking.go
+++ b/internal/util/gemini_thinking.go
@@ -25,9 +25,15 @@ func ApplyGeminiThinkingConfig(body []byte, budget *int, includeThoughts *bool)
 			updated = rewritten
 		}
 	}
-	if includeThoughts != nil {
+	// Default to including thoughts when a budget override is present but no explicit include flag is provided.
 	incl := includeThoughts
 	if incl == nil && budget != nil && *budget != 0 {
 		defaultInclude := true
 		incl = &defaultInclude
 	}
 	if incl != nil {
 		valuePath := "generationConfig.thinkingConfig.include_thoughts"
-		rewritten, err := sjson.SetBytes(updated, valuePath, *includeThoughts)
+		rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
 		if err == nil {
 			updated = rewritten
 		}
@@ -47,9 +53,15 @@ func ApplyGeminiCLIThinkingConfig(body []byte, budget *int, includeThoughts *boo
 			updated = rewritten
 		}
 	}
-	if includeThoughts != nil {
+	// Default to including thoughts when a budget override is present but no explicit include flag is provided.
 	incl := includeThoughts
 	if incl == nil && budget != nil && *budget != 0 {
 		defaultInclude := true
 		incl = &defaultInclude
 	}
 	if incl != nil {
 		valuePath := "request.generationConfig.thinkingConfig.include_thoughts"
-		rewritten, err := sjson.SetBytes(updated, valuePath, *includeThoughts)
+		rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
 		if err == nil {
 			updated = rewritten
 		}
--- a/internal/util/openai_thinking.go
+++ b/internal/util/openai_thinking.go
@@ -0,0 +1,34 @@
 package util
 // OpenAIThinkingBudgetToEffort maps a numeric thinking budget (tokens)
 // into an OpenAI-style reasoning effort level for level-based models.
 //
 // Ranges:
 //   - 0            -> "none"
 //   - 1..1024      -> "low"
 //   - 1025..8192   -> "medium"
 //   - 8193..24576  -> "high"
 //   - 24577..      -> highest supported level for the model (defaults to "xhigh")
 //
 // Negative values (except the dynamic -1 handled elsewhere) are treated as unsupported.
 func OpenAIThinkingBudgetToEffort(model string, budget int) (string, bool) {
 	switch {
 	case budget < 0:
 		return "", false
 	case budget == 0:
 		return "none", true
 	case budget > 0 && budget <= 1024:
 		return "low", true
 	case budget <= 8192:
 		return "medium", true
 	case budget <= 24576:
 		return "high", true
 	case budget > 24576:
 		if levels := GetModelThinkingLevels(model); len(levels) > 0 {
 			return levels[len(levels)-1], true
 		}
 		return "xhigh", true
 	default:
 		return "", false
 	}
 }
--- a/internal/util/thinking.go
+++ b/internal/util/thinking.go
@@ -25,33 +25,33 @@ func ModelSupportsThinking(model string) bool {
 // or min (0 if zero is allowed and mid <= 0).
 func NormalizeThinkingBudget(model string, budget int) int {
 	if budget == -1 { // dynamic
-		if found, min, max, zeroAllowed, dynamicAllowed := thinkingRangeFromRegistry(model); found {
+		if found, minBudget, maxBudget, zeroAllowed, dynamicAllowed := thinkingRangeFromRegistry(model); found {
 			if dynamicAllowed {
 				return -1
 			}
-			mid := (min + max) / 2
+			mid := (minBudget + maxBudget) / 2
 			if mid <= 0 && zeroAllowed {
 				return 0
 			}
 			if mid <= 0 {
-				return min
+				return minBudget
 			}
 			return mid
 		}
 		return -1
 	}
-	if found, min, max, zeroAllowed, _ := thinkingRangeFromRegistry(model); found {
+	if found, minBudget, maxBudget, zeroAllowed, _ := thinkingRangeFromRegistry(model); found {
 		if budget == 0 {
 			if zeroAllowed {
 				return 0
 			}
-			return min
+			return minBudget
 		}
-		if budget < min {
+		if budget < minBudget {
-			return min
+			return minBudget
 		}
-		if budget > max {
+		if budget > maxBudget {
-			return max
+			return maxBudget
 		}
 		return budget
 	}
@@ -105,3 +105,16 @@ func NormalizeReasoningEffortLevel(model, effort string) (string, bool) {
 	}
 	return "", false
 }
 // IsOpenAICompatibilityModel reports whether the model is registered as an OpenAI-compatibility model.
 // These models may not advertise Thinking metadata in the registry.
 func IsOpenAICompatibilityModel(model string) bool {
 	if model == "" {
 		return false
 	}
 	info := registry.GetGlobalRegistry().GetModelInfo(model)
 	if info == nil {
 		return false
 	}
 	return strings.EqualFold(strings.TrimSpace(info.Type), "openai-compatibility")
 }
--- a/internal/util/thinking_suffix.go
+++ b/internal/util/thinking_suffix.go
@@ -163,6 +163,11 @@ func ResolveThinkingConfigFromMetadata(model string, metadata map[string]any) (*
 	if !matched {
 		return nil, nil, false
 	}
 	// Level-based models (OpenAI-style) do not accept numeric thinking budgets in
 	// Claude/Gemini-style protocols, so we don't derive budgets for them here.
 	if ModelUsesThinkingLevels(model) {
 		return nil, nil, false
 	}
 	if budget == nil && effort != nil {
 		if derived, ok := ThinkingEffortToBudget(model, *effort); ok {
--- a/internal/watcher/watcher.go
+++ b/internal/watcher/watcher.go
@@ -14,6 +14,7 @@ import (
 	"os"
 	"path/filepath"
 	"reflect"
 	"runtime"
 	"sort"
 	"strings"
 	"sync"
@@ -62,6 +63,7 @@ type Watcher struct {
 	reloadCallback    func(*config.Config)
 	watcher           *fsnotify.Watcher
 	lastAuthHashes    map[string]string
 	lastRemoveTimes   map[string]time.Time
 	lastConfigHash    string
 	authQueue         chan<- AuthUpdate
 	currentAuths      map[string]*coreauth.Auth
@@ -128,8 +130,9 @@ type AuthUpdate struct {
 const (
 	// replaceCheckDelay is a short delay to allow atomic replace (rename) to settle
 	// before deciding whether a Remove event indicates a real deletion.
-	replaceCheckDelay    = 50 * time.Millisecond
+	replaceCheckDelay        = 50 * time.Millisecond
-	configReloadDebounce = 150 * time.Millisecond
+	configReloadDebounce     = 150 * time.Millisecond
 	authRemoveDebounceWindow = 1 * time.Second
 )
 // NewWatcher creates a new file watcher instance
@@ -750,8 +753,9 @@ func (w *Watcher) authFileUnchanged(path string) (bool, error) {
 	sum := sha256.Sum256(data)
 	curHash := hex.EncodeToString(sum[:])
 	normalized := w.normalizeAuthPath(path)
 	w.clientsMutex.RLock()
-	prevHash, ok := w.lastAuthHashes[path]
+	prevHash, ok := w.lastAuthHashes[normalized]
 	w.clientsMutex.RUnlock()
 	if ok && prevHash == curHash {
 		return true, nil
@@ -760,19 +764,63 @@ func (w *Watcher) authFileUnchanged(path string) (bool, error) {
 }
 func (w *Watcher) isKnownAuthFile(path string) bool {
 	normalized := w.normalizeAuthPath(path)
 	w.clientsMutex.RLock()
 	defer w.clientsMutex.RUnlock()
-	_, ok := w.lastAuthHashes[path]
+	_, ok := w.lastAuthHashes[normalized]
 	return ok
 }
 func (w *Watcher) normalizeAuthPath(path string) string {
 	trimmed := strings.TrimSpace(path)
 	if trimmed == "" {
 		return ""
 	}
 	cleaned := filepath.Clean(trimmed)
 	if runtime.GOOS == "windows" {
 		cleaned = strings.TrimPrefix(cleaned, `\\?\`)
 		cleaned = strings.ToLower(cleaned)
 	}
 	return cleaned
 }
 func (w *Watcher) shouldDebounceRemove(normalizedPath string, now time.Time) bool {
 	if normalizedPath == "" {
 		return false
 	}
 	w.clientsMutex.Lock()
 	if w.lastRemoveTimes == nil {
 		w.lastRemoveTimes = make(map[string]time.Time)
 	}
 	if last, ok := w.lastRemoveTimes[normalizedPath]; ok {
 		if now.Sub(last) < authRemoveDebounceWindow {
 			w.clientsMutex.Unlock()
 			return true
 		}
 	}
 	w.lastRemoveTimes[normalizedPath] = now
 	if len(w.lastRemoveTimes) > 128 {
 		cutoff := now.Add(-2 * authRemoveDebounceWindow)
 		for p, t := range w.lastRemoveTimes {
 			if t.Before(cutoff) {
 				delete(w.lastRemoveTimes, p)
 			}
 		}
 	}
 	w.clientsMutex.Unlock()
 	return false
 }
 // handleEvent processes individual file system events
 func (w *Watcher) handleEvent(event fsnotify.Event) {
 	// Filter only relevant events: config file or auth-dir JSON files.
 	configOps := fsnotify.Write | fsnotify.Create | fsnotify.Rename
-	isConfigEvent := event.Name == w.configPath && event.Op&configOps != 0
+	normalizedName := w.normalizeAuthPath(event.Name)
 	normalizedConfigPath := w.normalizeAuthPath(w.configPath)
 	normalizedAuthDir := w.normalizeAuthPath(w.authDir)
 	isConfigEvent := normalizedName == normalizedConfigPath && event.Op&configOps != 0
 	authOps := fsnotify.Create | fsnotify.Write | fsnotify.Remove | fsnotify.Rename
-	isAuthJSON := strings.HasPrefix(event.Name, w.authDir) && strings.HasSuffix(event.Name, ".json") && event.Op&authOps != 0
+  isAuthJSON := strings.HasPrefix(normalizedName, normalizedAuthDir) && strings.HasSuffix(normalizedName, ".json") && event.Op&authOps != 0
 	// Check for Kiro IDE token file changes
 	isKiroIDEToken := w.isKiroIDETokenFile(event.Name) && event.Op&authOps != 0
@@ -800,6 +848,10 @@ func (w *Watcher) handleEvent(event fsnotify.Event) {
 	// Handle auth directory changes incrementally (.json only)
 	if event.Op&(fsnotify.Remove|fsnotify.Rename) != 0 {
 		if w.shouldDebounceRemove(normalizedName, now) {
 			log.Debugf("debouncing remove event for %s", filepath.Base(event.Name))
 			return
 		}
 		// Atomic replace on some platforms may surface as Rename (or Remove) before the new file is ready.
 		// Wait briefly; if the path exists again, treat as an update instead of removal.
 		time.Sleep(replaceCheckDelay)
@@ -1062,7 +1114,8 @@ func (w *Watcher) reloadClients(rescanAuth bool, affectedOAuthProviders []string
 				if !info.IsDir() && strings.HasSuffix(strings.ToLower(info.Name()), ".json") {
 					if data, errReadFile := os.ReadFile(path); errReadFile == nil && len(data) > 0 {
 						sum := sha256.Sum256(data)
-						w.lastAuthHashes[path] = hex.EncodeToString(sum[:])
+						normalizedPath := w.normalizeAuthPath(path)
 						w.lastAuthHashes[normalizedPath] = hex.EncodeToString(sum[:])
 					}
 				}
 				return nil
@@ -1109,6 +1162,7 @@ func (w *Watcher) addOrUpdateClient(path string) {
 	sum := sha256.Sum256(data)
 	curHash := hex.EncodeToString(sum[:])
 	normalized := w.normalizeAuthPath(path)
 	w.clientsMutex.Lock()
@@ -1118,14 +1172,14 @@ func (w *Watcher) addOrUpdateClient(path string) {
 		w.clientsMutex.Unlock()
 		return
 	}
-	if prev, ok := w.lastAuthHashes[path]; ok && prev == curHash {
+	if prev, ok := w.lastAuthHashes[normalized]; ok && prev == curHash {
 		log.Debugf("auth file unchanged (hash match), skipping reload: %s", filepath.Base(path))
 		w.clientsMutex.Unlock()
 		return
 	}
 	// Update hash cache
-	w.lastAuthHashes[path] = curHash
+	w.lastAuthHashes[normalized] = curHash
 	w.clientsMutex.Unlock() // Unlock before the callback
@@ -1140,10 +1194,11 @@ func (w *Watcher) addOrUpdateClient(path string) {
 // removeClient handles the removal of a single client.
 func (w *Watcher) removeClient(path string) {
 	normalized := w.normalizeAuthPath(path)
 	w.clientsMutex.Lock()
 	cfg := w.config
-	delete(w.lastAuthHashes, path)
+	delete(w.lastAuthHashes, normalized)
 	w.clientsMutex.Unlock() // Release the lock before the callback
@@ -1317,6 +1372,12 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth {
 			if kk.AgentTaskType != "" {
 				attrs["agent_task_type"] = kk.AgentTaskType
 			}
 			if kk.PreferredEndpoint != "" {
 				attrs["preferred_endpoint"] = kk.PreferredEndpoint
 			} else if cfg.KiroPreferredEndpoint != "" {
 				// Apply global default if not overridden by specific key
 				attrs["preferred_endpoint"] = cfg.KiroPreferredEndpoint
 			}
 			if refreshToken != "" {
 				attrs["refresh_token"] = refreshToken
 			}
@@ -1532,6 +1593,17 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth {
 					a.NextRefreshAfter = expiresAt.Add(-30 * time.Minute)
 				}
 			}
 			// Apply global preferred endpoint setting if not present in metadata
 			if cfg.KiroPreferredEndpoint != "" {
 				// Check if already set in metadata (which takes precedence in executor)
 				if _, hasMeta := metadata["preferred_endpoint"]; !hasMeta {
 					if a.Attributes == nil {
 						a.Attributes = make(map[string]string)
 					}
 					a.Attributes["preferred_endpoint"] = cfg.KiroPreferredEndpoint
 				}
 			}
 		}
 		applyAuthExcludedModelsMeta(a, cfg, nil, "oauth")
--- a/sdk/api/handlers/claude/code_handlers.go
+++ b/sdk/api/handlers/claude/code_handlers.go
@@ -7,7 +7,6 @@
 package claude
 import (
 	"bufio"
 	"bytes"
 	"compress/gzip"
 	"context"
@@ -219,52 +218,24 @@ func (h *ClaudeCodeAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON [
 }
 func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
-	// v6.1: Intelligent Buffered Streamer strategy
+	// OpenAI-style stream forwarding: write each SSE chunk and flush immediately.
-	// Enhanced buffering with larger buffer size (16KB) and longer flush interval (120ms).
+	// This guarantees clients see incremental output even for small responses.
 	// Smart flush only when buffer is sufficiently filled (≥50%), dramatically reducing
 	// flush frequency from ~12.5Hz to ~5-8Hz while maintaining low latency.
 	writer := bufio.NewWriterSize(c.Writer, 16*1024) // 4KB → 16KB
 	ticker := time.NewTicker(120 * time.Millisecond) // 80ms → 120ms
 	defer ticker.Stop()
 	var chunkIdx int
 	for {
 		select {
 		case <-c.Request.Context().Done():
 			// Context cancelled, flush any remaining data before exit
 			_ = writer.Flush()
 			cancel(c.Request.Context().Err())
 			return
 		case <-ticker.C:
 			// Smart flush: only flush when buffer has sufficient data (≥50% full)
 			// This reduces flush frequency while ensuring data flows naturally
 			buffered := writer.Buffered()
 			if buffered >= 8*1024 { // At least 8KB (50% of 16KB buffer)
 				if err := writer.Flush(); err != nil {
 					// Error flushing, cancel and return
 					cancel(err)
 					return
 				}
 				flusher.Flush() // Also flush the underlying http.ResponseWriter
 			}
 		case chunk, ok := <-data:
 			if !ok {
-				// Stream ended, flush remaining data
+				flusher.Flush()
 				_ = writer.Flush()
 				cancel(nil)
 				return
 			}
 			// Forward the complete SSE event block directly (already formatted by the translator).
 			// The translator returns a complete SSE-compliant event block, including event:, data:, and separators.
 			// The handler just needs to forward it without reassembly.
 			if len(chunk) > 0 {
-				_, _ = writer.Write(chunk)
+				_, _ = c.Writer.Write(chunk)
 				flusher.Flush()
 			}
 			chunkIdx++
 		case errMsg, ok := <-errs:
 			if !ok {
@@ -276,21 +247,20 @@ func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.
 					status = errMsg.StatusCode
 				}
 				c.Status(status)
 				// An error occurred: emit as a proper SSE error event
 				errorBytes, _ := json.Marshal(h.toClaudeError(errMsg))
-				_, _ = writer.WriteString("event: error\n")
+				_, _ = fmt.Fprintf(c.Writer, "event: error\ndata: %s\n\n", errorBytes)
 				_, _ = writer.WriteString("data: ")
 				_, _ = writer.Write(errorBytes)
 				_, _ = writer.WriteString("\n\n")
 				_ = writer.Flush()
 				flusher.Flush()
 			}
 			var execErr error
 			if errMsg != nil {
 				execErr = errMsg.Error
 			}
 			cancel(execErr)
 			return
 		case <-time.After(500 * time.Millisecond):
 		}
 	}
 }
--- a/sdk/api/handlers/handlers.go
+++ b/sdk/api/handlers/handlers.go
@@ -136,19 +136,29 @@ func (h *BaseAPIHandler) GetContextWithCancel(handler interfaces.APIHandler, c *
 	newCtx = context.WithValue(newCtx, "gin", c)
 	newCtx = context.WithValue(newCtx, "handler", handler)
 	return newCtx, func(params ...interface{}) {
-		if h.Cfg.RequestLog {
+		if h.Cfg.RequestLog && len(params) == 1 {
-			if len(params) == 1 {
+			var payload []byte
-				data := params[0]
+			switch data := params[0].(type) {
-				switch data.(type) {
+			case []byte:
-				case []byte:
+				payload = data
-					appendAPIResponse(c, data.([]byte))
+			case error:
-				case error:
+				if data != nil {
-					appendAPIResponse(c, []byte(data.(error).Error()))
+					payload = []byte(data.Error())
 				case string:
 					appendAPIResponse(c, []byte(data.(string)))
 				case bool:
 				case nil:
 				}
 			case string:
 				payload = []byte(data)
 			}
 			if len(payload) > 0 {
 				if existing, exists := c.Get("API_RESPONSE"); exists {
 					if existingBytes, ok := existing.([]byte); ok && len(existingBytes) > 0 {
 						trimmedPayload := bytes.TrimSpace(payload)
 						if len(trimmedPayload) > 0 && bytes.Contains(existingBytes, trimmedPayload) {
 							cancel()
 							return
 						}
 					}
 				}
 				appendAPIResponse(c, payload)
 			}
 		}
--- a/sdk/auth/iflow.go
+++ b/sdk/auth/iflow.go
@@ -107,7 +107,7 @@ func (a *IFlowAuthenticator) Login(ctx context.Context, cfg *config.Config, opts
 		return nil, fmt.Errorf("iflow authentication failed: missing account identifier")
 	}
-	fileName := fmt.Sprintf("iflow-%s.json", email)
+	fileName := fmt.Sprintf("iflow-%s-%d.json", email, time.Now().Unix())
 	metadata := map[string]any{
 		"email":         email,
 		"api_key":       tokenStorage.APIKey,
--- a/sdk/auth/kiro.go
+++ b/sdk/auth/kiro.go
@@ -47,8 +47,9 @@ func (a *KiroAuthenticator) Provider() string {
 }
 // RefreshLead indicates how soon before expiry a refresh should be attempted.
 // Set to 5 minutes to match Antigravity and avoid frequent refresh checks while still ensuring timely token refresh.
 func (a *KiroAuthenticator) RefreshLead() *time.Duration {
-	d := 30 * time.Minute
+	d := 5 * time.Minute
 	return &d
 }
@@ -103,7 +104,8 @@ func (a *KiroAuthenticator) Login(ctx context.Context, cfg *config.Config, opts
 			"source":      "aws-builder-id",
 			"email":       tokenData.Email,
 		},
-		NextRefreshAfter: expiresAt.Add(-30 * time.Minute),
+		// NextRefreshAfter is aligned with RefreshLead (5min)
 		NextRefreshAfter: expiresAt.Add(-5 * time.Minute),
 	}
 	if tokenData.Email != "" {
@@ -165,7 +167,8 @@ func (a *KiroAuthenticator) LoginWithGoogle(ctx context.Context, cfg *config.Con
 			"source":      "google-oauth",
 			"email":       tokenData.Email,
 		},
-		NextRefreshAfter: expiresAt.Add(-30 * time.Minute),
+		// NextRefreshAfter is aligned with RefreshLead (5min)
 		NextRefreshAfter: expiresAt.Add(-5 * time.Minute),
 	}
 	if tokenData.Email != "" {
@@ -227,7 +230,8 @@ func (a *KiroAuthenticator) LoginWithGitHub(ctx context.Context, cfg *config.Con
 			"source":      "github-oauth",
 			"email":       tokenData.Email,
 		},
-		NextRefreshAfter: expiresAt.Add(-30 * time.Minute),
+		// NextRefreshAfter is aligned with RefreshLead (5min)
 		NextRefreshAfter: expiresAt.Add(-5 * time.Minute),
 	}
 	if tokenData.Email != "" {
@@ -291,7 +295,8 @@ func (a *KiroAuthenticator) ImportFromKiroIDE(ctx context.Context, cfg *config.C
 			"source":      "kiro-ide-import",
 			"email":       tokenData.Email,
 		},
-		NextRefreshAfter: expiresAt.Add(-30 * time.Minute),
+		// NextRefreshAfter is aligned with RefreshLead (5min)
 		NextRefreshAfter: expiresAt.Add(-5 * time.Minute),
 	}
 	// Display the email if extracted
@@ -351,7 +356,8 @@ func (a *KiroAuthenticator) Refresh(ctx context.Context, cfg *config.Config, aut
 	updated.Metadata["refresh_token"] = tokenData.RefreshToken
 	updated.Metadata["expires_at"] = tokenData.ExpiresAt
 	updated.Metadata["last_refresh"] = now.Format(time.RFC3339) // For double-check optimization
-	updated.NextRefreshAfter = expiresAt.Add(-30 * time.Minute)
+	// NextRefreshAfter is aligned with RefreshLead (5min)
 	updated.NextRefreshAfter = expiresAt.Add(-5 * time.Minute)
 	return updated, nil
 }
--- a/sdk/cliproxy/auth/manager.go
+++ b/sdk/cliproxy/auth/manager.go
@@ -40,7 +40,7 @@ type RefreshEvaluator interface {
 const (
 	refreshCheckInterval  = 5 * time.Second
 	refreshPendingBackoff = time.Minute
-	refreshFailureBackoff = 5 * time.Minute
+	refreshFailureBackoff = 1 * time.Minute
 	quotaBackoffBase      = time.Second
 	quotaBackoffMax       = 30 * time.Minute
 )
@@ -1471,7 +1471,9 @@ func (m *Manager) refreshAuth(ctx context.Context, id string) {
 		updated.Runtime = auth.Runtime
 	}
 	updated.LastRefreshedAt = now
-	updated.NextRefreshAfter = time.Time{}
+	// Preserve NextRefreshAfter set by the Authenticator
 	// If the Authenticator set a reasonable refresh time, it should not be overwritten
 	// If the Authenticator did not set it (zero value), shouldRefresh will use default logic
 	updated.LastError = nil
 	updated.UpdatedAt = now
 	_, _ = m.Update(ctx, updated)
--- a/test/thinking_conversion_test.go
+++ b/test/thinking_conversion_test.go
@@ -0,0 +1,561 @@
 package test
 import (
 	"fmt"
 	"net/http"
 	"strings"
 	"testing"
 	"time"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
 // statusErr mirrors executor.statusErr to keep validation behavior aligned.
 type statusErr struct {
 	code int
 	msg  string
 }
 func (e statusErr) Error() string { return e.msg }
 // registerCoreModels loads representative models across providers into the registry
 // so NormalizeThinkingBudget and level validation use real ranges.
 func registerCoreModels(t *testing.T) func() {
 	t.Helper()
 	reg := registry.GetGlobalRegistry()
 	uid := fmt.Sprintf("thinking-core-%d", time.Now().UnixNano())
 	reg.RegisterClient(uid+"-gemini", "gemini", registry.GetGeminiModels())
 	reg.RegisterClient(uid+"-claude", "claude", registry.GetClaudeModels())
 	reg.RegisterClient(uid+"-openai", "codex", registry.GetOpenAIModels())
 	reg.RegisterClient(uid+"-qwen", "qwen", registry.GetQwenModels())
 	return func() {
 		reg.UnregisterClient(uid + "-gemini")
 		reg.UnregisterClient(uid + "-claude")
 		reg.UnregisterClient(uid + "-openai")
 		reg.UnregisterClient(uid + "-qwen")
 	}
 }
 func buildRawPayload(fromProtocol, modelWithSuffix string) []byte {
 	switch fromProtocol {
 	case "gemini":
 		return []byte(fmt.Sprintf(`{"model":"%s","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, modelWithSuffix))
 	case "openai-response":
 		return []byte(fmt.Sprintf(`{"model":"%s","input":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`, modelWithSuffix))
 	default: // openai / claude and other chat-style payloads
 		return []byte(fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, modelWithSuffix))
 	}
 }
 // applyThinkingMetadataLocal mirrors executor.applyThinkingMetadata.
 func applyThinkingMetadataLocal(payload []byte, metadata map[string]any, model string) []byte {
 	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
 	if !ok || (budgetOverride == nil && includeOverride == nil) {
 		return payload
 	}
 	if !util.ModelSupportsThinking(model) {
 		return payload
 	}
 	if budgetOverride != nil {
 		norm := util.NormalizeThinkingBudget(model, *budgetOverride)
 		budgetOverride = &norm
 	}
 	return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
 }
 // applyReasoningEffortMetadataLocal mirrors executor.applyReasoningEffortMetadata.
 func applyReasoningEffortMetadataLocal(payload []byte, metadata map[string]any, model, field string) []byte {
 	if len(metadata) == 0 {
 		return payload
 	}
 	if !util.ModelSupportsThinking(model) {
 		return payload
 	}
 	if field == "" {
 		return payload
 	}
 	if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
 		if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
 			return updated
 		}
 	}
 	if util.ModelUsesThinkingLevels(model) {
 		if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
 			if effort, ok := util.OpenAIThinkingBudgetToEffort(model, *budget); ok && effort != "" {
 				if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
 					return updated
 				}
 			}
 		}
 	}
 	return payload
 }
 // normalizeThinkingConfigLocal mirrors executor.normalizeThinkingConfig.
 func normalizeThinkingConfigLocal(payload []byte, model string) []byte {
 	if len(payload) == 0 || model == "" {
 		return payload
 	}
 	if !util.ModelSupportsThinking(model) {
 		return stripThinkingFieldsLocal(payload, false)
 	}
 	if util.ModelUsesThinkingLevels(model) {
 		return normalizeReasoningEffortLevelLocal(payload, model)
 	}
 	// Model supports thinking but uses numeric budgets, not levels.
 	// Strip effort string fields since they are not applicable.
 	return stripThinkingFieldsLocal(payload, true)
 }
 // stripThinkingFieldsLocal mirrors executor.stripThinkingFields.
 func stripThinkingFieldsLocal(payload []byte, effortOnly bool) []byte {
 	fieldsToRemove := []string{
 		"reasoning_effort",
 		"reasoning.effort",
 	}
 	if !effortOnly {
 		fieldsToRemove = append([]string{"reasoning"}, fieldsToRemove...)
 	}
 	out := payload
 	for _, field := range fieldsToRemove {
 		if gjson.GetBytes(out, field).Exists() {
 			out, _ = sjson.DeleteBytes(out, field)
 		}
 	}
 	return out
 }
 // normalizeReasoningEffortLevelLocal mirrors executor.normalizeReasoningEffortLevel.
 func normalizeReasoningEffortLevelLocal(payload []byte, model string) []byte {
 	out := payload
 	if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() {
 		if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
 			out, _ = sjson.SetBytes(out, "reasoning_effort", normalized)
 		}
 	}
 	if effort := gjson.GetBytes(out, "reasoning.effort"); effort.Exists() {
 		if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
 			out, _ = sjson.SetBytes(out, "reasoning.effort", normalized)
 		}
 	}
 	return out
 }
 // validateThinkingConfigLocal mirrors executor.validateThinkingConfig.
 func validateThinkingConfigLocal(payload []byte, model string) error {
 	if len(payload) == 0 || model == "" {
 		return nil
 	}
 	if !util.ModelSupportsThinking(model) || !util.ModelUsesThinkingLevels(model) {
 		return nil
 	}
 	levels := util.GetModelThinkingLevels(model)
 	checkField := func(path string) error {
 		if effort := gjson.GetBytes(payload, path); effort.Exists() {
 			if _, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); !ok {
 				return statusErr{
 					code: http.StatusBadRequest,
 					msg:  fmt.Sprintf("unsupported reasoning effort level %q for model %s (supported: %s)", effort.String(), model, strings.Join(levels, ", ")),
 				}
 			}
 		}
 		return nil
 	}
 	if err := checkField("reasoning_effort"); err != nil {
 		return err
 	}
 	if err := checkField("reasoning.effort"); err != nil {
 		return err
 	}
 	return nil
 }
 // normalizeCodexPayload mirrors codex_executor's reasoning + streaming tweaks.
 func normalizeCodexPayload(body []byte, upstreamModel string) ([]byte, error) {
 	body = normalizeThinkingConfigLocal(body, upstreamModel)
 	if err := validateThinkingConfigLocal(body, upstreamModel); err != nil {
 		return body, err
 	}
 	body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	body, _ = sjson.SetBytes(body, "stream", true)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	return body, nil
 }
 // buildBodyForProtocol runs a minimal request through the same translation and
 // thinking pipeline used in executors for the given target protocol.
 func buildBodyForProtocol(t *testing.T, fromProtocol, toProtocol, modelWithSuffix string) ([]byte, error) {
 	t.Helper()
 	normalizedModel, metadata := util.NormalizeThinkingModel(modelWithSuffix)
 	upstreamModel := util.ResolveOriginalModel(normalizedModel, metadata)
 	raw := buildRawPayload(fromProtocol, modelWithSuffix)
 	stream := fromProtocol != toProtocol
 	body := sdktranslator.TranslateRequest(
 		sdktranslator.FromString(fromProtocol),
 		sdktranslator.FromString(toProtocol),
 		normalizedModel,
 		raw,
 		stream,
 	)
 	var err error
 	switch toProtocol {
 	case "gemini":
 		body = applyThinkingMetadataLocal(body, metadata, normalizedModel)
 		body = util.ApplyDefaultThinkingIfNeeded(normalizedModel, body)
 		body = util.NormalizeGeminiThinkingBudget(normalizedModel, body)
 		body = util.StripThinkingConfigIfUnsupported(normalizedModel, body)
 	case "claude":
 		if budget, ok := util.ResolveClaudeThinkingConfig(normalizedModel, metadata); ok {
 			body = util.ApplyClaudeThinkingConfig(body, budget)
 		}
 	case "openai":
 		body = applyReasoningEffortMetadataLocal(body, metadata, normalizedModel, "reasoning_effort")
 		body = normalizeThinkingConfigLocal(body, upstreamModel)
 		err = validateThinkingConfigLocal(body, upstreamModel)
 	case "codex": // OpenAI responses / codex
 		body = applyReasoningEffortMetadataLocal(body, metadata, normalizedModel, "reasoning.effort")
 		// Mirror CodexExecutor final normalization and model override so tests log the final body.
 		body, err = normalizeCodexPayload(body, upstreamModel)
 	default:
 	}
 	// Mirror executor behavior: final payload uses the upstream (base) model name.
 	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
 	// For tests we only keep model + thinking-related fields to avoid noise.
 	body = filterThinkingBody(toProtocol, body, upstreamModel, normalizedModel)
 	return body, err
 }
 // filterThinkingBody projects the translated payload down to only model and
 // thinking-related fields for the given target protocol.
 func filterThinkingBody(toProtocol string, body []byte, upstreamModel, normalizedModel string) []byte {
 	if len(body) == 0 {
 		return body
 	}
 	out := []byte(`{}`)
 	// Preserve model if present, otherwise fall back to upstream/normalized model.
 	if m := gjson.GetBytes(body, "model"); m.Exists() {
 		out, _ = sjson.SetBytes(out, "model", m.Value())
 	} else if upstreamModel != "" {
 		out, _ = sjson.SetBytes(out, "model", upstreamModel)
 	} else if normalizedModel != "" {
 		out, _ = sjson.SetBytes(out, "model", normalizedModel)
 	}
 	switch toProtocol {
 	case "gemini":
 		if tc := gjson.GetBytes(body, "generationConfig.thinkingConfig"); tc.Exists() {
 			out, _ = sjson.SetRawBytes(out, "generationConfig.thinkingConfig", []byte(tc.Raw))
 		}
 	case "claude":
 		if tcfg := gjson.GetBytes(body, "thinking"); tcfg.Exists() {
 			out, _ = sjson.SetRawBytes(out, "thinking", []byte(tcfg.Raw))
 		}
 	case "openai":
 		if re := gjson.GetBytes(body, "reasoning_effort"); re.Exists() {
 			out, _ = sjson.SetBytes(out, "reasoning_effort", re.Value())
 		}
 	case "codex":
 		if re := gjson.GetBytes(body, "reasoning.effort"); re.Exists() {
 			out, _ = sjson.SetBytes(out, "reasoning.effort", re.Value())
 		}
 	}
 	return out
 }
 func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) {
 	cleanup := registerCoreModels(t)
 	defer cleanup()
 	models := []string{
 		"gpt-5",             // supports levels (low/medium/high)
 		"gemini-2.5-pro",    // supports numeric budget
 		"qwen3-coder-flash", // no thinking support
 	}
 	fromProtocols := []string{"openai", "claude", "gemini", "openai-response"}
 	toProtocols := []string{"gemini", "claude", "openai", "codex"}
 	type scenario struct {
 		name        string
 		modelSuffix string
 		expectFn    func(info *registry.ModelInfo) (present bool, budget int64)
 	}
 	buildBudgetFn := func(raw int) func(info *registry.ModelInfo) (bool, int64) {
 		return func(info *registry.ModelInfo) (bool, int64) {
 			if info == nil || info.Thinking == nil {
 				return false, 0
 			}
 			return true, int64(util.NormalizeThinkingBudget(info.ID, raw))
 		}
 	}
 	levelBudgetFn := func(level string) func(info *registry.ModelInfo) (bool, int64) {
 		return func(info *registry.ModelInfo) (bool, int64) {
 			if info == nil || info.Thinking == nil {
 				return false, 0
 			}
 			if b, ok := util.ThinkingEffortToBudget(info.ID, level); ok {
 				return true, int64(b)
 			}
 			return false, 0
 		}
 	}
 	for _, model := range models {
 		info := registry.GetGlobalRegistry().GetModelInfo(model)
 		min, max := 0, 0
 		if info != nil && info.Thinking != nil {
 			min = info.Thinking.Min
 			max = info.Thinking.Max
 		}
 		for _, from := range fromProtocols {
 			// Scenario selection follows protocol semantics:
 			// - OpenAI-style protocols (openai/openai-response) express thinking as levels.
 			// - Claude/Gemini-style protocols express thinking as numeric budgets.
 			cases := []scenario{
 				{name: "no-suffix", modelSuffix: model, expectFn: func(_ *registry.ModelInfo) (bool, int64) { return false, 0 }},
 			}
 			if from == "openai" || from == "openai-response" {
 				cases = append(cases,
 					scenario{name: "level-low", modelSuffix: fmt.Sprintf("%s(low)", model), expectFn: levelBudgetFn("low")},
 					scenario{name: "level-high", modelSuffix: fmt.Sprintf("%s(high)", model), expectFn: levelBudgetFn("high")},
 					scenario{name: "level-auto", modelSuffix: fmt.Sprintf("%s(auto)", model), expectFn: levelBudgetFn("auto")},
 				)
 			} else { // claude or gemini
 				if util.ModelUsesThinkingLevels(model) {
 					// Numeric budgets for level-based models are mapped into levels when needed.
 					cases = append(cases,
 						scenario{name: "numeric-0", modelSuffix: fmt.Sprintf("%s(0)", model), expectFn: buildBudgetFn(0)},
 						scenario{name: "numeric-1024", modelSuffix: fmt.Sprintf("%s(1024)", model), expectFn: buildBudgetFn(1024)},
 						scenario{name: "numeric-1025", modelSuffix: fmt.Sprintf("%s(1025)", model), expectFn: buildBudgetFn(1025)},
 						scenario{name: "numeric-8192", modelSuffix: fmt.Sprintf("%s(8192)", model), expectFn: buildBudgetFn(8192)},
 						scenario{name: "numeric-8193", modelSuffix: fmt.Sprintf("%s(8193)", model), expectFn: buildBudgetFn(8193)},
 						scenario{name: "numeric-24576", modelSuffix: fmt.Sprintf("%s(24576)", model), expectFn: buildBudgetFn(24576)},
 						scenario{name: "numeric-24577", modelSuffix: fmt.Sprintf("%s(24577)", model), expectFn: buildBudgetFn(24577)},
 					)
 				} else {
 					cases = append(cases,
 						scenario{name: "numeric-below-min", modelSuffix: fmt.Sprintf("%s(%d)", model, min-10), expectFn: buildBudgetFn(min - 10)},
 						scenario{name: "numeric-above-max", modelSuffix: fmt.Sprintf("%s(%d)", model, max+10), expectFn: buildBudgetFn(max + 10)},
 					)
 				}
 			}
 			for _, to := range toProtocols {
 				if from == to {
 					continue
 				}
 				t.Logf("─────────────────────────────────────────────────────────────────────────────────")
 				t.Logf("  %s -> %s | model: %s", from, to, model)
 				t.Logf("─────────────────────────────────────────────────────────────────────────────────")
 				for _, cs := range cases {
 					from := from
 					to := to
 					cs := cs
 					testName := fmt.Sprintf("%s->%s/%s/%s", from, to, model, cs.name)
 					t.Run(testName, func(t *testing.T) {
 						normalizedModel, metadata := util.NormalizeThinkingModel(cs.modelSuffix)
 						expectPresent, expectValue, expectErr := func() (bool, string, bool) {
 							switch to {
 							case "gemini":
 								budget, include, ok := util.ResolveThinkingConfigFromMetadata(normalizedModel, metadata)
 								if !ok || !util.ModelSupportsThinking(normalizedModel) {
 									return false, "", false
 								}
 								if include != nil && !*include {
 									return false, "", false
 								}
 								if budget == nil {
 									return false, "", false
 								}
 								norm := util.NormalizeThinkingBudget(normalizedModel, *budget)
 								return true, fmt.Sprintf("%d", norm), false
 							case "claude":
 								if !util.ModelSupportsThinking(normalizedModel) {
 									return false, "", false
 								}
 								budget, ok := util.ResolveClaudeThinkingConfig(normalizedModel, metadata)
 								if !ok || budget == nil {
 									return false, "", false
 								}
 								return true, fmt.Sprintf("%d", *budget), false
 							case "openai":
 								if !util.ModelSupportsThinking(normalizedModel) {
 									return false, "", false
 								}
 								if !util.ModelUsesThinkingLevels(normalizedModel) {
 									// Non-levels models don't support effort strings in openai
 									return false, "", false
 								}
 								effort, ok := util.ReasoningEffortFromMetadata(metadata)
 								if !ok || strings.TrimSpace(effort) == "" {
 									if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
 										if mapped, okMap := util.OpenAIThinkingBudgetToEffort(normalizedModel, *budget); okMap {
 											effort = mapped
 											ok = true
 										}
 									}
 								}
 								if !ok || strings.TrimSpace(effort) == "" {
 									return false, "", false
 								}
 								effort = strings.ToLower(strings.TrimSpace(effort))
 								if normalized, okLevel := util.NormalizeReasoningEffortLevel(normalizedModel, effort); okLevel {
 									return true, normalized, false
 								}
 								return false, "", true // validation would fail
 							case "codex":
 								if !util.ModelSupportsThinking(normalizedModel) {
 									return false, "", false
 								}
 								if !util.ModelUsesThinkingLevels(normalizedModel) {
 									// Non-levels models don't support effort strings in codex
 									if from != "openai-response" {
 										return false, "", false
 									}
 									return false, "", false
 								}
 								effort, ok := util.ReasoningEffortFromMetadata(metadata)
 								if ok && strings.TrimSpace(effort) != "" {
 									effort = strings.ToLower(strings.TrimSpace(effort))
 									if normalized, okLevel := util.NormalizeReasoningEffortLevel(normalizedModel, effort); okLevel {
 										return true, normalized, false
 									}
 									return false, "", true
 								}
 								if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
 									if mapped, okMap := util.OpenAIThinkingBudgetToEffort(normalizedModel, *budget); okMap && mapped != "" {
 										mapped = strings.ToLower(strings.TrimSpace(mapped))
 										if normalized, okLevel := util.NormalizeReasoningEffortLevel(normalizedModel, mapped); okLevel {
 											return true, normalized, false
 										}
 										return false, "", true
 									}
 								}
 								if from != "openai-response" {
 									// Codex translators default reasoning.effort to "medium" when
 									// no explicit thinking suffix/metadata is provided.
 									return true, "medium", false
 								}
 								return false, "", false
 							default:
 								return false, "", false
 							}
 						}()
 						body, err := buildBodyForProtocol(t, from, to, cs.modelSuffix)
 						actualPresent, actualValue := func() (bool, string) {
 							path := ""
 							switch to {
 							case "gemini":
 								path = "generationConfig.thinkingConfig.thinkingBudget"
 							case "claude":
 								path = "thinking.budget_tokens"
 							case "openai":
 								path = "reasoning_effort"
 							case "codex":
 								path = "reasoning.effort"
 							}
 							if path == "" {
 								return false, ""
 							}
 							val := gjson.GetBytes(body, path)
 							if to == "codex" && !val.Exists() {
 								reasoning := gjson.GetBytes(body, "reasoning")
 								if reasoning.Exists() {
 									val = reasoning.Get("effort")
 								}
 							}
 							if !val.Exists() {
 								return false, ""
 							}
 							if val.Type == gjson.Number {
 								return true, fmt.Sprintf("%d", val.Int())
 							}
 							return true, val.String()
 						}()
 						t.Logf("from=%s to=%s model=%s suffix=%s present(expect=%v got=%v) value(expect=%s got=%s) err(expect=%v got=%v) body=%s",
 							from, to, model, cs.modelSuffix, expectPresent, actualPresent, expectValue, actualValue, expectErr, err != nil, string(body))
 						if expectErr {
 							if err == nil {
 								t.Fatalf("expected validation error but got none, body=%s", string(body))
 							}
 							return
 						}
 						if err != nil {
 							t.Fatalf("unexpected error: %v body=%s", err, string(body))
 						}
 						if expectPresent != actualPresent {
 							t.Fatalf("presence mismatch: expect %v got %v body=%s", expectPresent, actualPresent, string(body))
 						}
 						if expectPresent && expectValue != actualValue {
 							t.Fatalf("value mismatch: expect %s got %s body=%s", expectValue, actualValue, string(body))
 						}
 					})
 				}
 			}
 		}
 	}
 }
 func TestOpenAIThinkingBudgetToEffortRanges(t *testing.T) {
 	cleanup := registerCoreModels(t)
 	defer cleanup()
 	cases := []struct {
 		name   string
 		model  string
 		budget int
 		want   string
 		ok     bool
 	}{
 		{name: "zero-none", model: "gpt-5", budget: 0, want: "none", ok: true},
 		{name: "low-min", model: "gpt-5", budget: 1, want: "low", ok: true},
 		{name: "low-max", model: "gpt-5", budget: 1024, want: "low", ok: true},
 		{name: "medium-min", model: "gpt-5", budget: 1025, want: "medium", ok: true},
 		{name: "medium-max", model: "gpt-5", budget: 8192, want: "medium", ok: true},
 		{name: "high-min", model: "gpt-5", budget: 8193, want: "high", ok: true},
 		{name: "high-max", model: "gpt-5", budget: 24576, want: "high", ok: true},
 		{name: "over-max-clamps-to-highest", model: "gpt-5", budget: 24577, want: "high", ok: true},
 		{name: "over-max-xhigh-model", model: "gpt-5.2", budget: 50000, want: "xhigh", ok: true},
 		{name: "negative-unsupported", model: "gpt-5", budget: -5, want: "", ok: false},
 	}
 	for _, cs := range cases {
 		cs := cs
 		t.Run(cs.name, func(t *testing.T) {
 			got, ok := util.OpenAIThinkingBudgetToEffort(cs.model, cs.budget)
 			if ok != cs.ok {
 				t.Fatalf("ok mismatch for model=%s budget=%d: expect %v got %v", cs.model, cs.budget, cs.ok, ok)
 			}
 			if got != cs.want {
 				t.Fatalf("value mismatch for model=%s budget=%d: expect %q got %q", cs.model, cs.budget, cs.want, got)
 			}
 		})
 	}
 }
Author	SHA1	Message	Date
Luis Pater	7ecc7aabda	Merge pull request #23 from router-for-me/plus v6.6.9	2025-12-14 00:07:57 +08:00
Luis Pater	79033aee34	Merge branch 'main' into plus	2025-12-14 00:07:46 +08:00
Luis Pater	b6ad243e9e	Merge pull request #498 from teeverc/fix/claude-streaming-flush fix(claude): flush Claude SSE chunks immediately	2025-12-13 23:58:34 +08:00
Luis Pater	92ca5078c1	docs(readme): update contributors for Kiro integration (AWS CodeWhisperer)	2025-12-13 13:40:39 +08:00
Luis Pater	aca8523060	Merge pull request #22 from Ravens2121/master feat(kiro): enhance thinking support and fix truncation issues	2025-12-13 13:37:47 +08:00
Ravens2121	1ea0cff3a4	fix: add missing import declarations for net and time packages	2025-12-13 12:57:47 +08:00
Ravens2121	75793a18f0	feat(kiro): Add Kiro OAuth login entry and auth file filter in Web UI 为Kiro供应商添加WEB UI OAuth登录入口和认证文件过滤器 ## Changes / 更改内容 ### Frontend / 前端 (management.html) - Add Kiro OAuth card UI with support for AWS Builder ID, Google, and GitHub login methods - 添加Kiro OAuth卡片UI，支持AWS Builder ID、Google和GitHub三种登录方式 - Add i18n translations for Kiro OAuth (Chinese and English) - 添加Kiro OAuth的中英文国际化翻译 - Add Kiro filter button in auth files management page - 在认证文件管理页面添加Kiro过滤按钮 - Implement JavaScript methods: startKiroOAuth(), openKiroLink(), copyKiroLink(), copyKiroDeviceCode(), startKiroOAuthPolling(), resetKiroOAuthUI() - 实现JavaScript方法：startKiroOAuth()、openKiroLink()、copyKiroLink()、copyKiroDeviceCode()、startKiroOAuthPolling()、resetKiroOAuthUI() ### Backend / 后端 - Add /kiro-auth-url endpoint for Kiro OAuth authentication (auth_files.go) - 添加/kiro-auth-url端点用于Kiro OAuth认证 (auth_files.go) - Fix GetAuthStatus() to correctly parse device_code and auth_url status - 修复GetAuthStatus()以正确解析device_code和auth_url状态 - Change status delimiter from ':' to '\|' to avoid URL parsing issues - 将状态分隔符从':'改为'\|'以避免URL解析问题 - Export CreateToken method in social_auth.go - 在social_auth.go中导出CreateToken方法 - Register Kiro OAuth routes in server.go - 在server.go中注册Kiro OAuth路由 ## Files Modified / 修改的文件 - management.html - internal/api/handlers/management/auth_files.go - internal/api/server.go - internal/auth/kiro/social_auth.go	2025-12-13 11:39:22 +08:00
Ravens2121	58866b21cb	feat: optimize connection pooling and improve Kiro executor reliability ## 中文说明 ### 连接池优化 - 为 AMP 代理、SOCKS5 代理和 HTTP 代理配置优化的连接池参数 - MaxIdleConnsPerHost 从默认的 2 增加到 20，支持更多并发用户 - MaxConnsPerHost 设为 0（无限制），避免连接瓶颈 - 添加 IdleConnTimeout (90s) 和其他超时配置 ### Kiro 执行器增强 - 添加 Event Stream 消息解析的边界保护，防止越界访问 - 实现实时使用量估算（每 5000 字符或 15 秒发送 ping 事件） - 正确从上游事件中提取并传递 stop_reason - 改进输入 token 计算，优先使用 Claude 格式解析 - 添加 max_tokens 截断警告日志 ### Token 计算改进 - 添加 tokenizer 缓存（sync.Map）避免重复创建 - 为 Claude/Kiro/AmazonQ 模型添加 1.1 调整因子 - 新增 countClaudeChatTokens 函数支持 Claude API 格式 - 支持图像 token 估算（基于尺寸计算） ### 认证刷新优化 - RefreshLead 从 30 分钟改为 5 分钟，与 Antigravity 保持一致 - 修复 NextRefreshAfter 设置，防止频繁刷新检查 - refreshFailureBackoff 从 5 分钟改为 1 分钟，加快失败恢复 --- ## English Description ### Connection Pool Optimization - Configure optimized connection pool parameters for AMP proxy, SOCKS5 proxy, and HTTP proxy - Increase MaxIdleConnsPerHost from default 2 to 20 to support more concurrent users - Set MaxConnsPerHost to 0 (unlimited) to avoid connection bottlenecks - Add IdleConnTimeout (90s) and other timeout configurations ### Kiro Executor Enhancements - Add boundary protection for Event Stream message parsing to prevent out-of-bounds access - Implement real-time usage estimation (send ping events every 5000 chars or 15 seconds) - Correctly extract and pass stop_reason from upstream events - Improve input token calculation, prioritize Claude format parsing - Add max_tokens truncation warning logs ### Token Calculation Improvements - Add tokenizer cache (sync.Map) to avoid repeated creation - Add 1.1 adjustment factor for Claude/Kiro/AmazonQ models - Add countClaudeChatTokens function to support Claude API format - Support image token estimation (calculated based on dimensions) ### Authentication Refresh Optimization - Change RefreshLead from 30 minutes to 5 minutes, consistent with Antigravity - Fix NextRefreshAfter setting to prevent frequent refresh checks - Change refreshFailureBackoff from 5 minutes to 1 minute for faster failure recovery	2025-12-13 10:21:40 +08:00
Luis Pater	660aabc437	fix(executor): add `allowCompat` support for reasoning effort normalization Introduced `allowCompat` parameter to improve compatibility handling for reasoning effort in payloads across OpenAI and similar models.	2025-12-13 04:06:02 +08:00
Ravens2121	db80b20bc2	feat(kiro): enhance thinking support and fix truncation issues - Thinking Support: - Enabled thinking support for all Kiro Claude models, including Haiku 4.5 and agentic variants. - Updated `model_definitions.go` with thinking configuration (Min: 1024, Max: 32000, ZeroAllowed: true). - Fixed `extended_thinking` field names in `model_registry.go` (from `min_budget`/`max_budget` to `min`/`max`) to comply with Claude API specs, enabling thinking control in clients like Claude Code. - Kiro Executor Fixes: - Fixed `budget_tokens` handling: explicitly disable thinking when budget is 0 or negative. - Removed aggressive duplicate content filtering logic that caused truncation/data loss. - Enhanced thinking tag parsing with `extractThinkingFromContent` to correctly handle interleaved thinking/text blocks. - Added EOF handling to flush pending thinking tag characters, preventing data loss at stream end. - Performance: - Optimized Claude stream handler (v6.2) with reduced buffer size (4KB) and faster flush interval (50ms) to minimize latency and prevent timeouts.	2025-12-13 03:57:13 +08:00
Luis Pater	566120e8d5	Merge pull request #505 from router-for-me/think fix(thinking): map budgets to effort levels	2025-12-12 22:17:11 +08:00
Luis Pater	f3f0f1717d	Merge branch 'dev' into think	2025-12-12 22:16:44 +08:00
Luis Pater	05b499fb83	Merge branch 'router-for-me:main' into main	2025-12-12 22:09:08 +08:00
Luis Pater	7621ec609e	Merge pull request #501 from huynguyen03dev/fix/openai-compat-model-alias-resolution fix(openai-compat): prevent model alias from being overwritten	2025-12-12 21:58:15 +08:00
Luis Pater	9f511f0024	fix(executor): improve model compatibility handling for OpenAI-compatibility Enhances payload handling by introducing OpenAI-compatibility checks and refining how reasoning metadata is resolved, ensuring broader model support.	2025-12-12 21:57:25 +08:00
hkfires	374faa2640	fix(thinking): map budgets to effort levels Ensure thinking settings translate correctly across providers: - Only apply reasoning_effort to level-based models and derive it from numeric budget suffixes when present - Strip effort string fields for budget-based models and skip Claude/Gemini budget resolution for level-based or unsupported models - Default Gemini include_thoughts when a nonzero budget override is set - Add cross-protocol conversion and budget range tests	2025-12-12 21:33:20 +08:00
Luis Pater	ba6aa5fbbe	Merge branch 'router-for-me:main' into main	2025-12-12 20:09:31 +08:00
Luis Pater	1c52a89535	Merge pull request #502 from router-for-me/iflow fix(auth): prevent duplicate iflow BXAuth tokens	2025-12-12 20:03:37 +08:00
hkfires	e7cedbee6e	fix(auth): prevent duplicate iflow BXAuth tokens	2025-12-12 19:57:19 +08:00
Luis Pater	75ce0919a0	Merge pull request #21 from router-for-me/plus v6.6.6	2025-12-12 18:42:09 +08:00
Luis Pater	7f4f6bc9ca	Merge branch 'main' into plus	2025-12-12 18:41:39 +08:00
Luis Pater	b8194e717c	Merge pull request #500 from router-for-me/think fix(codex): raise default reasoning effort to medium	2025-12-12 18:35:26 +08:00
huynguyen03.dev	15c3cc3a50	fix(openai-compat): prevent model alias from being overwritten by ResolveOriginalModel When using OpenAI-compatible providers with model aliases (e.g., glm-4.6-zai -> glm-4.6), the alias resolution was correctly applied but then immediately overwritten by ResolveOriginalModel, causing 'Unknown Model' errors from upstream APIs. This fix skips the ResolveOriginalModel override when a model alias has already been resolved, ensuring the correct model name is sent to the upstream provider. Co-authored-by: Amp <amp@ampcode.com>	2025-12-12 17:20:24 +07:00
hkfires	d131435e25	fix(codex): raise default reasoning effort to medium	2025-12-12 18:18:48 +08:00
Luis Pater	6e43669498	Fixed: #440 feat(watcher): normalize auth file paths and implement debounce for remove events	2025-12-12 16:50:56 +08:00
teeverc	5ab3032335	Update sdk/api/handlers/claude/code_handlers.go thank you gemini Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>	2025-12-12 00:26:01 -08:00
teeverc	1215c635a0	fix: flush Claude SSE chunks immediately to match OpenAI behavior - Write each SSE chunk directly to c.Writer and flush immediately - Remove buffered writer and ticker-based flushing that caused delayed output - Add 500ms timeout case for consistency with OpenAI/Gemini handlers - Clean up unused bufio import This fixes the 'not streaming' issue where small responses were held in the buffer until timeout/threshold was reached. Amp-Thread-ID: https://ampcode.com/threads/T-019b1186-164e-740c-96ab-856f64ee6bee Co-authored-by: Amp <amp@ampcode.com>	2025-12-12 00:14:19 -08:00
Luis Pater	54d4fd7f84	remove PR_DOCUMENTATION.md	2025-12-12 16:10:15 +08:00
Luis Pater	8dc690a638	Merge pull request #20 from Ravens2121/master feat(kiro): 支持思考模型 (Thinking Mode) 并通过多配额故障转移增强稳定性	2025-12-12 16:09:28 +08:00
Ravens2121	fdeb84db2b	Merge branch 'router-for-me:main' into master	2025-12-12 13:44:07 +08:00
Ravens2121	84920cb670	feat(kiro): add multi-endpoint fallback & thinking mode support	2025-12-12 13:43:36 +08:00
Ravens2121	204bba9dea	refactor(kiro): update Kiro executor to use CodeWhisperer endpoint and improve tool calling support	2025-12-12 09:27:30 +08:00
Luis Pater	35fdd7bc05	Merge branch 'router-for-me:main' into main	2025-12-12 08:54:36 +08:00
Luis Pater	fc054db51a	Merge pull request #494 from ben-vargas/fix-gpt-reasoning-none fix(models): add "none" reasoning effort level to gpt-5.2	2025-12-12 08:53:19 +08:00
Luis Pater	6e2306a5f2	refactor(handlers): improve request logging and payload handling	2025-12-12 08:52:52 +08:00
Ben Vargas	b09e2115d1	fix(models): add "none" reasoning effort level to gpt-5.2 Per OpenAI API documentation, gpt-5.2 supports reasoning_effort values of "none", "low", "medium", "high", and "xhigh". The "none" level was missing from the model definition. Reference: https://platform.openai.com/docs/api-reference/chat/create#chat_create-reasoning_effort	2025-12-11 15:26:23 -07:00
Luis Pater	6a94afab6c	Merge branch 'router-for-me:main' into main	2025-12-12 04:08:54 +08:00
Luis Pater	a68c97a40f	Fixed: #492	2025-12-12 04:08:11 +08:00
Ravens2121	40e7f066e4	feat(kiro): enhance Kiro executor with retry, deduplication and event filtering	2025-12-12 01:59:06 +08:00