Merge pull request #23 from router-for-me/plus

v6.6.9
2026-04-26 16:06:42 +00:00 · 2025-12-14 00:07:57 +08:00
parent 92ca5078c1 79033aee34
commit 7ecc7aabda
11 changed files with 697 additions and 77 deletions
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -54,8 +54,8 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
-	body = normalizeThinkingConfig(body, upstreamModel)
+	body = normalizeThinkingConfig(body, upstreamModel, false)
 	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return resp, errValidate
 	}
@@ -152,8 +152,8 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	to := sdktranslator.FromString("codex")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
-	body = normalizeThinkingConfig(body, upstreamModel)
+	body = normalizeThinkingConfig(body, upstreamModel, false)
 	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return nil, errValidate
 	}
@@ -254,7 +254,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
 	modelForCounting := req.Model
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
 	body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.SetBytes(body, "stream", false)
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -57,12 +57,12 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
-	body = normalizeThinkingConfig(body, upstreamModel)
+	body = normalizeThinkingConfig(body, upstreamModel, false)
 	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return resp, errValidate
 	}
@@ -148,12 +148,12 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
-	body = normalizeThinkingConfig(body, upstreamModel)
+	body = normalizeThinkingConfig(body, upstreamModel, false)
 	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return nil, errValidate
 	}
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -59,12 +59,13 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 		translated = e.overrideModel(translated, modelOverride)
 	}
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
-	translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
+	allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
 	translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" && modelOverride == "" {
 		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
 	}
-	translated = normalizeThinkingConfig(translated, upstreamModel)
+	translated = normalizeThinkingConfig(translated, upstreamModel, allowCompat)
 	if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
 		return resp, errValidate
 	}
@@ -154,12 +155,13 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 		translated = e.overrideModel(translated, modelOverride)
 	}
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
-	translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
+	allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
 	translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" && modelOverride == "" {
 		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
 	}
-	translated = normalizeThinkingConfig(translated, upstreamModel)
+	translated = normalizeThinkingConfig(translated, upstreamModel, allowCompat)
 	if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
 		return nil, errValidate
 	}
@@ -325,6 +327,27 @@ func (e *OpenAICompatExecutor) resolveUpstreamModel(alias string, auth *cliproxy
 	return ""
 }
 func (e *OpenAICompatExecutor) allowCompatReasoningEffort(model string, auth *cliproxyauth.Auth) bool {
 	trimmed := strings.TrimSpace(model)
 	if trimmed == "" || e == nil || e.cfg == nil {
 		return false
 	}
 	compat := e.resolveCompatConfig(auth)
 	if compat == nil || len(compat.Models) == 0 {
 		return false
 	}
 	for i := range compat.Models {
 		entry := compat.Models[i]
 		if strings.EqualFold(strings.TrimSpace(entry.Alias), trimmed) {
 			return true
 		}
 		if strings.EqualFold(strings.TrimSpace(entry.Name), trimmed) {
 			return true
 		}
 	}
 	return false
 }
 func (e *OpenAICompatExecutor) resolveCompatConfig(auth *cliproxyauth.Auth) *config.OpenAICompatibility {
 	if auth == nil || e.cfg == nil {
 		return nil
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -48,7 +48,7 @@ func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model str
 // applyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path.
 // Metadata values take precedence over any existing field when the model supports thinking, intentionally
 // overwriting caller-provided values to honor suffix/default metadata priority.
-func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string) []byte {
+func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string, allowCompat bool) []byte {
 	if len(metadata) == 0 {
 		return payload
 	}
@@ -59,12 +59,24 @@ func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model
 	if baseModel == "" {
 		baseModel = model
 	}
-	if !util.ModelSupportsThinking(baseModel) && !util.IsOpenAICompatibilityModel(baseModel) {
+	if !util.ModelSupportsThinking(baseModel) && !allowCompat {
 		return payload
 	}
 	if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
-		if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
+		if util.ModelUsesThinkingLevels(baseModel) || allowCompat {
-			return updated
+			if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
 				return updated
 			}
 		}
 	}
 	// Fallback: numeric thinking_budget suffix for level-based (OpenAI-style) models.
 	if util.ModelUsesThinkingLevels(baseModel) || allowCompat {
 		if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
 			if effort, ok := util.OpenAIThinkingBudgetToEffort(baseModel, *budget); ok && effort != "" {
 				if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
 					return updated
 				}
 			}
 		}
 	}
 	return payload
@@ -223,34 +235,40 @@ func matchModelPattern(pattern, model string) bool {
 // normalizeThinkingConfig normalizes thinking-related fields in the payload
 // based on model capabilities. For models without thinking support, it strips
 // reasoning fields. For models with level-based thinking, it validates and
-// normalizes the reasoning effort level.
+// normalizes the reasoning effort level. For models with numeric budget thinking,
-func normalizeThinkingConfig(payload []byte, model string) []byte {
+// it strips the effort string fields.
 func normalizeThinkingConfig(payload []byte, model string, allowCompat bool) []byte {
 	if len(payload) == 0 || model == "" {
 		return payload
 	}
 	if !util.ModelSupportsThinking(model) {
-		if util.IsOpenAICompatibilityModel(model) {
+		if allowCompat {
 			return payload
 		}
-		return stripThinkingFields(payload)
+		return stripThinkingFields(payload, false)
 	}
 	if util.ModelUsesThinkingLevels(model) {
 		return normalizeReasoningEffortLevel(payload, model)
 	}
-	return payload
+	// Model supports thinking but uses numeric budgets, not levels.
 	// Strip effort string fields since they are not applicable.
 	return stripThinkingFields(payload, true)
 }
 // stripThinkingFields removes thinking-related fields from the payload for
-// models that do not support thinking.
+// models that do not support thinking. If effortOnly is true, only removes
-func stripThinkingFields(payload []byte) []byte {
+// effort string fields (for models using numeric budgets).
 func stripThinkingFields(payload []byte, effortOnly bool) []byte {
 	fieldsToRemove := []string{
 		"reasoning",
 		"reasoning_effort",
 		"reasoning.effort",
 	}
 	if !effortOnly {
 		fieldsToRemove = append([]string{"reasoning"}, fieldsToRemove...)
 	}
 	out := payload
 	for _, field := range fieldsToRemove {
 		if gjson.GetBytes(out, field).Exists() {
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -51,12 +51,12 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
-	body = normalizeThinkingConfig(body, upstreamModel)
+	body = normalizeThinkingConfig(body, upstreamModel, false)
 	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return resp, errValidate
 	}
@@ -131,12 +131,12 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
-	body = normalizeThinkingConfig(body, upstreamModel)
+	body = normalizeThinkingConfig(body, upstreamModel, false)
 	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return nil, errValidate
 	}
--- a/internal/util/claude_thinking.go
+++ b/internal/util/claude_thinking.go
@@ -28,6 +28,9 @@ func ApplyClaudeThinkingConfig(body []byte, budget *int) []byte {
 // It uses the unified ResolveThinkingConfigFromMetadata and normalizes the budget.
 // Returns the normalized budget (nil if thinking should not be enabled) and whether it matched.
 func ResolveClaudeThinkingConfig(modelName string, metadata map[string]any) (*int, bool) {
 	if !ModelSupportsThinking(modelName) {
 		return nil, false
 	}
 	budget, include, matched := ResolveThinkingConfigFromMetadata(modelName, metadata)
 	if !matched {
 		return nil, false
--- a/internal/util/gemini_thinking.go
+++ b/internal/util/gemini_thinking.go
@@ -25,9 +25,15 @@ func ApplyGeminiThinkingConfig(body []byte, budget *int, includeThoughts *bool)
 			updated = rewritten
 		}
 	}
-	if includeThoughts != nil {
+	// Default to including thoughts when a budget override is present but no explicit include flag is provided.
 	incl := includeThoughts
 	if incl == nil && budget != nil && *budget != 0 {
 		defaultInclude := true
 		incl = &defaultInclude
 	}
 	if incl != nil {
 		valuePath := "generationConfig.thinkingConfig.include_thoughts"
-		rewritten, err := sjson.SetBytes(updated, valuePath, *includeThoughts)
+		rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
 		if err == nil {
 			updated = rewritten
 		}
@@ -47,9 +53,15 @@ func ApplyGeminiCLIThinkingConfig(body []byte, budget *int, includeThoughts *boo
 			updated = rewritten
 		}
 	}
-	if includeThoughts != nil {
+	// Default to including thoughts when a budget override is present but no explicit include flag is provided.
 	incl := includeThoughts
 	if incl == nil && budget != nil && *budget != 0 {
 		defaultInclude := true
 		incl = &defaultInclude
 	}
 	if incl != nil {
 		valuePath := "request.generationConfig.thinkingConfig.include_thoughts"
-		rewritten, err := sjson.SetBytes(updated, valuePath, *includeThoughts)
+		rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
 		if err == nil {
 			updated = rewritten
 		}
--- a/internal/util/openai_thinking.go
+++ b/internal/util/openai_thinking.go
@@ -0,0 +1,34 @@
 package util
 // OpenAIThinkingBudgetToEffort maps a numeric thinking budget (tokens)
 // into an OpenAI-style reasoning effort level for level-based models.
 //
 // Ranges:
 //   - 0            -> "none"
 //   - 1..1024      -> "low"
 //   - 1025..8192   -> "medium"
 //   - 8193..24576  -> "high"
 //   - 24577..      -> highest supported level for the model (defaults to "xhigh")
 //
 // Negative values (except the dynamic -1 handled elsewhere) are treated as unsupported.
 func OpenAIThinkingBudgetToEffort(model string, budget int) (string, bool) {
 	switch {
 	case budget < 0:
 		return "", false
 	case budget == 0:
 		return "none", true
 	case budget > 0 && budget <= 1024:
 		return "low", true
 	case budget <= 8192:
 		return "medium", true
 	case budget <= 24576:
 		return "high", true
 	case budget > 24576:
 		if levels := GetModelThinkingLevels(model); len(levels) > 0 {
 			return levels[len(levels)-1], true
 		}
 		return "xhigh", true
 	default:
 		return "", false
 	}
 }
--- a/internal/util/thinking_suffix.go
+++ b/internal/util/thinking_suffix.go
@@ -163,6 +163,11 @@ func ResolveThinkingConfigFromMetadata(model string, metadata map[string]any) (*
 	if !matched {
 		return nil, nil, false
 	}
 	// Level-based models (OpenAI-style) do not accept numeric thinking budgets in
 	// Claude/Gemini-style protocols, so we don't derive budgets for them here.
 	if ModelUsesThinkingLevels(model) {
 		return nil, nil, false
 	}
 	if budget == nil && effort != nil {
 		if derived, ok := ThinkingEffortToBudget(model, *effort); ok {
--- a/sdk/api/handlers/claude/code_handlers.go
+++ b/sdk/api/handlers/claude/code_handlers.go
@@ -7,7 +7,6 @@
 package claude
 import (
 	"bufio"
 	"bytes"
 	"compress/gzip"
 	"context"
@@ -219,58 +218,24 @@ func (h *ClaudeCodeAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON [
 }
 func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
-	// v6.2: Immediate flush strategy for SSE streams
+	// OpenAI-style stream forwarding: write each SSE chunk and flush immediately.
-	// SSE requires immediate data delivery to prevent client timeouts.
+	// This guarantees clients see incremental output even for small responses.
 	// Previous buffering strategy (16KB buffer, 8KB threshold) caused delays
 	// because SSE events are typically small (< 1KB), leading to client retries.
 	writer := bufio.NewWriterSize(c.Writer, 4*1024) // 4KB buffer (smaller for faster flush)
 	ticker := time.NewTicker(50 * time.Millisecond) // 50ms interval for responsive streaming
 	defer ticker.Stop()
 	var chunkIdx int
 	for {
 		select {
 		case <-c.Request.Context().Done():
 			// Context cancelled, flush any remaining data before exit
 			_ = writer.Flush()
 			cancel(c.Request.Context().Err())
 			return
 		case <-ticker.C:
 			// Flush any buffered data on timer to ensure responsiveness
 			// For SSE, we flush whenever there's any data to prevent client timeouts
 			if writer.Buffered() > 0 {
 				if err := writer.Flush(); err != nil {
 					// Error flushing, cancel and return
 					cancel(err)
 					return
 				}
 				flusher.Flush() // Also flush the underlying http.ResponseWriter
 			}
 		case chunk, ok := <-data:
 			if !ok {
 				// Stream ended, flush remaining data
 				_ = writer.Flush()
 				flusher.Flush()
 				cancel(nil)
 				return
 			}
 			// Forward the complete SSE event block directly (already formatted by the translator).
 			// The translator returns a complete SSE-compliant event block, including event:, data:, and separators.
 			// The handler just needs to forward it without reassembly.
 			if len(chunk) > 0 {
-				_, _ = writer.Write(chunk)
+				_, _ = c.Writer.Write(chunk)
-				// Immediately flush for first few chunks to establish connection quickly
+				flusher.Flush()
 				// This prevents client timeout/retry on slow backends like Kiro
 				if chunkIdx < 3 {
 					_ = writer.Flush()
 					flusher.Flush()
 				}
 			}
 			chunkIdx++
 		case errMsg, ok := <-errs:
 			if !ok {
@@ -282,21 +247,20 @@ func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.
 					status = errMsg.StatusCode
 				}
 				c.Status(status)
 				// An error occurred: emit as a proper SSE error event
 				errorBytes, _ := json.Marshal(h.toClaudeError(errMsg))
-				_, _ = writer.WriteString("event: error\n")
+				_, _ = fmt.Fprintf(c.Writer, "event: error\ndata: %s\n\n", errorBytes)
 				_, _ = writer.WriteString("data: ")
 				_, _ = writer.Write(errorBytes)
 				_, _ = writer.WriteString("\n\n")
 				_ = writer.Flush()
 				flusher.Flush()
 			}
 			var execErr error
 			if errMsg != nil {
 				execErr = errMsg.Error
 			}
 			cancel(execErr)
 			return
 		case <-time.After(500 * time.Millisecond):
 		}
 	}
 }
--- a/test/thinking_conversion_test.go
+++ b/test/thinking_conversion_test.go
@@ -0,0 +1,561 @@
 package test
 import (
 	"fmt"
 	"net/http"
 	"strings"
 	"testing"
 	"time"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
 // statusErr mirrors executor.statusErr to keep validation behavior aligned.
 type statusErr struct {
 	code int
 	msg  string
 }
 func (e statusErr) Error() string { return e.msg }
 // registerCoreModels loads representative models across providers into the registry
 // so NormalizeThinkingBudget and level validation use real ranges.
 func registerCoreModels(t *testing.T) func() {
 	t.Helper()
 	reg := registry.GetGlobalRegistry()
 	uid := fmt.Sprintf("thinking-core-%d", time.Now().UnixNano())
 	reg.RegisterClient(uid+"-gemini", "gemini", registry.GetGeminiModels())
 	reg.RegisterClient(uid+"-claude", "claude", registry.GetClaudeModels())
 	reg.RegisterClient(uid+"-openai", "codex", registry.GetOpenAIModels())
 	reg.RegisterClient(uid+"-qwen", "qwen", registry.GetQwenModels())
 	return func() {
 		reg.UnregisterClient(uid + "-gemini")
 		reg.UnregisterClient(uid + "-claude")
 		reg.UnregisterClient(uid + "-openai")
 		reg.UnregisterClient(uid + "-qwen")
 	}
 }
 func buildRawPayload(fromProtocol, modelWithSuffix string) []byte {
 	switch fromProtocol {
 	case "gemini":
 		return []byte(fmt.Sprintf(`{"model":"%s","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, modelWithSuffix))
 	case "openai-response":
 		return []byte(fmt.Sprintf(`{"model":"%s","input":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`, modelWithSuffix))
 	default: // openai / claude and other chat-style payloads
 		return []byte(fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, modelWithSuffix))
 	}
 }
 // applyThinkingMetadataLocal mirrors executor.applyThinkingMetadata.
 func applyThinkingMetadataLocal(payload []byte, metadata map[string]any, model string) []byte {
 	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
 	if !ok || (budgetOverride == nil && includeOverride == nil) {
 		return payload
 	}
 	if !util.ModelSupportsThinking(model) {
 		return payload
 	}
 	if budgetOverride != nil {
 		norm := util.NormalizeThinkingBudget(model, *budgetOverride)
 		budgetOverride = &norm
 	}
 	return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
 }
 // applyReasoningEffortMetadataLocal mirrors executor.applyReasoningEffortMetadata.
 func applyReasoningEffortMetadataLocal(payload []byte, metadata map[string]any, model, field string) []byte {
 	if len(metadata) == 0 {
 		return payload
 	}
 	if !util.ModelSupportsThinking(model) {
 		return payload
 	}
 	if field == "" {
 		return payload
 	}
 	if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
 		if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
 			return updated
 		}
 	}
 	if util.ModelUsesThinkingLevels(model) {
 		if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
 			if effort, ok := util.OpenAIThinkingBudgetToEffort(model, *budget); ok && effort != "" {
 				if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
 					return updated
 				}
 			}
 		}
 	}
 	return payload
 }
 // normalizeThinkingConfigLocal mirrors executor.normalizeThinkingConfig.
 func normalizeThinkingConfigLocal(payload []byte, model string) []byte {
 	if len(payload) == 0 || model == "" {
 		return payload
 	}
 	if !util.ModelSupportsThinking(model) {
 		return stripThinkingFieldsLocal(payload, false)
 	}
 	if util.ModelUsesThinkingLevels(model) {
 		return normalizeReasoningEffortLevelLocal(payload, model)
 	}
 	// Model supports thinking but uses numeric budgets, not levels.
 	// Strip effort string fields since they are not applicable.
 	return stripThinkingFieldsLocal(payload, true)
 }
 // stripThinkingFieldsLocal mirrors executor.stripThinkingFields.
 func stripThinkingFieldsLocal(payload []byte, effortOnly bool) []byte {
 	fieldsToRemove := []string{
 		"reasoning_effort",
 		"reasoning.effort",
 	}
 	if !effortOnly {
 		fieldsToRemove = append([]string{"reasoning"}, fieldsToRemove...)
 	}
 	out := payload
 	for _, field := range fieldsToRemove {
 		if gjson.GetBytes(out, field).Exists() {
 			out, _ = sjson.DeleteBytes(out, field)
 		}
 	}
 	return out
 }
 // normalizeReasoningEffortLevelLocal mirrors executor.normalizeReasoningEffortLevel.
 func normalizeReasoningEffortLevelLocal(payload []byte, model string) []byte {
 	out := payload
 	if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() {
 		if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
 			out, _ = sjson.SetBytes(out, "reasoning_effort", normalized)
 		}
 	}
 	if effort := gjson.GetBytes(out, "reasoning.effort"); effort.Exists() {
 		if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
 			out, _ = sjson.SetBytes(out, "reasoning.effort", normalized)
 		}
 	}
 	return out
 }
 // validateThinkingConfigLocal mirrors executor.validateThinkingConfig.
 func validateThinkingConfigLocal(payload []byte, model string) error {
 	if len(payload) == 0 || model == "" {
 		return nil
 	}
 	if !util.ModelSupportsThinking(model) || !util.ModelUsesThinkingLevels(model) {
 		return nil
 	}
 	levels := util.GetModelThinkingLevels(model)
 	checkField := func(path string) error {
 		if effort := gjson.GetBytes(payload, path); effort.Exists() {
 			if _, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); !ok {
 				return statusErr{
 					code: http.StatusBadRequest,
 					msg:  fmt.Sprintf("unsupported reasoning effort level %q for model %s (supported: %s)", effort.String(), model, strings.Join(levels, ", ")),
 				}
 			}
 		}
 		return nil
 	}
 	if err := checkField("reasoning_effort"); err != nil {
 		return err
 	}
 	if err := checkField("reasoning.effort"); err != nil {
 		return err
 	}
 	return nil
 }
 // normalizeCodexPayload mirrors codex_executor's reasoning + streaming tweaks.
 func normalizeCodexPayload(body []byte, upstreamModel string) ([]byte, error) {
 	body = normalizeThinkingConfigLocal(body, upstreamModel)
 	if err := validateThinkingConfigLocal(body, upstreamModel); err != nil {
 		return body, err
 	}
 	body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	body, _ = sjson.SetBytes(body, "stream", true)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	return body, nil
 }
 // buildBodyForProtocol runs a minimal request through the same translation and
 // thinking pipeline used in executors for the given target protocol.
 func buildBodyForProtocol(t *testing.T, fromProtocol, toProtocol, modelWithSuffix string) ([]byte, error) {
 	t.Helper()
 	normalizedModel, metadata := util.NormalizeThinkingModel(modelWithSuffix)
 	upstreamModel := util.ResolveOriginalModel(normalizedModel, metadata)
 	raw := buildRawPayload(fromProtocol, modelWithSuffix)
 	stream := fromProtocol != toProtocol
 	body := sdktranslator.TranslateRequest(
 		sdktranslator.FromString(fromProtocol),
 		sdktranslator.FromString(toProtocol),
 		normalizedModel,
 		raw,
 		stream,
 	)
 	var err error
 	switch toProtocol {
 	case "gemini":
 		body = applyThinkingMetadataLocal(body, metadata, normalizedModel)
 		body = util.ApplyDefaultThinkingIfNeeded(normalizedModel, body)
 		body = util.NormalizeGeminiThinkingBudget(normalizedModel, body)
 		body = util.StripThinkingConfigIfUnsupported(normalizedModel, body)
 	case "claude":
 		if budget, ok := util.ResolveClaudeThinkingConfig(normalizedModel, metadata); ok {
 			body = util.ApplyClaudeThinkingConfig(body, budget)
 		}
 	case "openai":
 		body = applyReasoningEffortMetadataLocal(body, metadata, normalizedModel, "reasoning_effort")
 		body = normalizeThinkingConfigLocal(body, upstreamModel)
 		err = validateThinkingConfigLocal(body, upstreamModel)
 	case "codex": // OpenAI responses / codex
 		body = applyReasoningEffortMetadataLocal(body, metadata, normalizedModel, "reasoning.effort")
 		// Mirror CodexExecutor final normalization and model override so tests log the final body.
 		body, err = normalizeCodexPayload(body, upstreamModel)
 	default:
 	}
 	// Mirror executor behavior: final payload uses the upstream (base) model name.
 	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
 	// For tests we only keep model + thinking-related fields to avoid noise.
 	body = filterThinkingBody(toProtocol, body, upstreamModel, normalizedModel)
 	return body, err
 }
 // filterThinkingBody projects the translated payload down to only model and
 // thinking-related fields for the given target protocol.
 func filterThinkingBody(toProtocol string, body []byte, upstreamModel, normalizedModel string) []byte {
 	if len(body) == 0 {
 		return body
 	}
 	out := []byte(`{}`)
 	// Preserve model if present, otherwise fall back to upstream/normalized model.
 	if m := gjson.GetBytes(body, "model"); m.Exists() {
 		out, _ = sjson.SetBytes(out, "model", m.Value())
 	} else if upstreamModel != "" {
 		out, _ = sjson.SetBytes(out, "model", upstreamModel)
 	} else if normalizedModel != "" {
 		out, _ = sjson.SetBytes(out, "model", normalizedModel)
 	}
 	switch toProtocol {
 	case "gemini":
 		if tc := gjson.GetBytes(body, "generationConfig.thinkingConfig"); tc.Exists() {
 			out, _ = sjson.SetRawBytes(out, "generationConfig.thinkingConfig", []byte(tc.Raw))
 		}
 	case "claude":
 		if tcfg := gjson.GetBytes(body, "thinking"); tcfg.Exists() {
 			out, _ = sjson.SetRawBytes(out, "thinking", []byte(tcfg.Raw))
 		}
 	case "openai":
 		if re := gjson.GetBytes(body, "reasoning_effort"); re.Exists() {
 			out, _ = sjson.SetBytes(out, "reasoning_effort", re.Value())
 		}
 	case "codex":
 		if re := gjson.GetBytes(body, "reasoning.effort"); re.Exists() {
 			out, _ = sjson.SetBytes(out, "reasoning.effort", re.Value())
 		}
 	}
 	return out
 }
 func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) {
 	cleanup := registerCoreModels(t)
 	defer cleanup()
 	models := []string{
 		"gpt-5",             // supports levels (low/medium/high)
 		"gemini-2.5-pro",    // supports numeric budget
 		"qwen3-coder-flash", // no thinking support
 	}
 	fromProtocols := []string{"openai", "claude", "gemini", "openai-response"}
 	toProtocols := []string{"gemini", "claude", "openai", "codex"}
 	type scenario struct {
 		name        string
 		modelSuffix string
 		expectFn    func(info *registry.ModelInfo) (present bool, budget int64)
 	}
 	buildBudgetFn := func(raw int) func(info *registry.ModelInfo) (bool, int64) {
 		return func(info *registry.ModelInfo) (bool, int64) {
 			if info == nil || info.Thinking == nil {
 				return false, 0
 			}
 			return true, int64(util.NormalizeThinkingBudget(info.ID, raw))
 		}
 	}
 	levelBudgetFn := func(level string) func(info *registry.ModelInfo) (bool, int64) {
 		return func(info *registry.ModelInfo) (bool, int64) {
 			if info == nil || info.Thinking == nil {
 				return false, 0
 			}
 			if b, ok := util.ThinkingEffortToBudget(info.ID, level); ok {
 				return true, int64(b)
 			}
 			return false, 0
 		}
 	}
 	for _, model := range models {
 		info := registry.GetGlobalRegistry().GetModelInfo(model)
 		min, max := 0, 0
 		if info != nil && info.Thinking != nil {
 			min = info.Thinking.Min
 			max = info.Thinking.Max
 		}
 		for _, from := range fromProtocols {
 			// Scenario selection follows protocol semantics:
 			// - OpenAI-style protocols (openai/openai-response) express thinking as levels.
 			// - Claude/Gemini-style protocols express thinking as numeric budgets.
 			cases := []scenario{
 				{name: "no-suffix", modelSuffix: model, expectFn: func(_ *registry.ModelInfo) (bool, int64) { return false, 0 }},
 			}
 			if from == "openai" || from == "openai-response" {
 				cases = append(cases,
 					scenario{name: "level-low", modelSuffix: fmt.Sprintf("%s(low)", model), expectFn: levelBudgetFn("low")},
 					scenario{name: "level-high", modelSuffix: fmt.Sprintf("%s(high)", model), expectFn: levelBudgetFn("high")},
 					scenario{name: "level-auto", modelSuffix: fmt.Sprintf("%s(auto)", model), expectFn: levelBudgetFn("auto")},
 				)
 			} else { // claude or gemini
 				if util.ModelUsesThinkingLevels(model) {
 					// Numeric budgets for level-based models are mapped into levels when needed.
 					cases = append(cases,
 						scenario{name: "numeric-0", modelSuffix: fmt.Sprintf("%s(0)", model), expectFn: buildBudgetFn(0)},
 						scenario{name: "numeric-1024", modelSuffix: fmt.Sprintf("%s(1024)", model), expectFn: buildBudgetFn(1024)},
 						scenario{name: "numeric-1025", modelSuffix: fmt.Sprintf("%s(1025)", model), expectFn: buildBudgetFn(1025)},
 						scenario{name: "numeric-8192", modelSuffix: fmt.Sprintf("%s(8192)", model), expectFn: buildBudgetFn(8192)},
 						scenario{name: "numeric-8193", modelSuffix: fmt.Sprintf("%s(8193)", model), expectFn: buildBudgetFn(8193)},
 						scenario{name: "numeric-24576", modelSuffix: fmt.Sprintf("%s(24576)", model), expectFn: buildBudgetFn(24576)},
 						scenario{name: "numeric-24577", modelSuffix: fmt.Sprintf("%s(24577)", model), expectFn: buildBudgetFn(24577)},
 					)
 				} else {
 					cases = append(cases,
 						scenario{name: "numeric-below-min", modelSuffix: fmt.Sprintf("%s(%d)", model, min-10), expectFn: buildBudgetFn(min - 10)},
 						scenario{name: "numeric-above-max", modelSuffix: fmt.Sprintf("%s(%d)", model, max+10), expectFn: buildBudgetFn(max + 10)},
 					)
 				}
 			}
 			for _, to := range toProtocols {
 				if from == to {
 					continue
 				}
 				t.Logf("─────────────────────────────────────────────────────────────────────────────────")
 				t.Logf("  %s -> %s | model: %s", from, to, model)
 				t.Logf("─────────────────────────────────────────────────────────────────────────────────")
 				for _, cs := range cases {
 					from := from
 					to := to
 					cs := cs
 					testName := fmt.Sprintf("%s->%s/%s/%s", from, to, model, cs.name)
 					t.Run(testName, func(t *testing.T) {
 						normalizedModel, metadata := util.NormalizeThinkingModel(cs.modelSuffix)
 						expectPresent, expectValue, expectErr := func() (bool, string, bool) {
 							switch to {
 							case "gemini":
 								budget, include, ok := util.ResolveThinkingConfigFromMetadata(normalizedModel, metadata)
 								if !ok || !util.ModelSupportsThinking(normalizedModel) {
 									return false, "", false
 								}
 								if include != nil && !*include {
 									return false, "", false
 								}
 								if budget == nil {
 									return false, "", false
 								}
 								norm := util.NormalizeThinkingBudget(normalizedModel, *budget)
 								return true, fmt.Sprintf("%d", norm), false
 							case "claude":
 								if !util.ModelSupportsThinking(normalizedModel) {
 									return false, "", false
 								}
 								budget, ok := util.ResolveClaudeThinkingConfig(normalizedModel, metadata)
 								if !ok || budget == nil {
 									return false, "", false
 								}
 								return true, fmt.Sprintf("%d", *budget), false
 							case "openai":
 								if !util.ModelSupportsThinking(normalizedModel) {
 									return false, "", false
 								}
 								if !util.ModelUsesThinkingLevels(normalizedModel) {
 									// Non-levels models don't support effort strings in openai
 									return false, "", false
 								}
 								effort, ok := util.ReasoningEffortFromMetadata(metadata)
 								if !ok || strings.TrimSpace(effort) == "" {
 									if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
 										if mapped, okMap := util.OpenAIThinkingBudgetToEffort(normalizedModel, *budget); okMap {
 											effort = mapped
 											ok = true
 										}
 									}
 								}
 								if !ok || strings.TrimSpace(effort) == "" {
 									return false, "", false
 								}
 								effort = strings.ToLower(strings.TrimSpace(effort))
 								if normalized, okLevel := util.NormalizeReasoningEffortLevel(normalizedModel, effort); okLevel {
 									return true, normalized, false
 								}
 								return false, "", true // validation would fail
 							case "codex":
 								if !util.ModelSupportsThinking(normalizedModel) {
 									return false, "", false
 								}
 								if !util.ModelUsesThinkingLevels(normalizedModel) {
 									// Non-levels models don't support effort strings in codex
 									if from != "openai-response" {
 										return false, "", false
 									}
 									return false, "", false
 								}
 								effort, ok := util.ReasoningEffortFromMetadata(metadata)
 								if ok && strings.TrimSpace(effort) != "" {
 									effort = strings.ToLower(strings.TrimSpace(effort))
 									if normalized, okLevel := util.NormalizeReasoningEffortLevel(normalizedModel, effort); okLevel {
 										return true, normalized, false
 									}
 									return false, "", true
 								}
 								if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
 									if mapped, okMap := util.OpenAIThinkingBudgetToEffort(normalizedModel, *budget); okMap && mapped != "" {
 										mapped = strings.ToLower(strings.TrimSpace(mapped))
 										if normalized, okLevel := util.NormalizeReasoningEffortLevel(normalizedModel, mapped); okLevel {
 											return true, normalized, false
 										}
 										return false, "", true
 									}
 								}
 								if from != "openai-response" {
 									// Codex translators default reasoning.effort to "medium" when
 									// no explicit thinking suffix/metadata is provided.
 									return true, "medium", false
 								}
 								return false, "", false
 							default:
 								return false, "", false
 							}
 						}()
 						body, err := buildBodyForProtocol(t, from, to, cs.modelSuffix)
 						actualPresent, actualValue := func() (bool, string) {
 							path := ""
 							switch to {
 							case "gemini":
 								path = "generationConfig.thinkingConfig.thinkingBudget"
 							case "claude":
 								path = "thinking.budget_tokens"
 							case "openai":
 								path = "reasoning_effort"
 							case "codex":
 								path = "reasoning.effort"
 							}
 							if path == "" {
 								return false, ""
 							}
 							val := gjson.GetBytes(body, path)
 							if to == "codex" && !val.Exists() {
 								reasoning := gjson.GetBytes(body, "reasoning")
 								if reasoning.Exists() {
 									val = reasoning.Get("effort")
 								}
 							}
 							if !val.Exists() {
 								return false, ""
 							}
 							if val.Type == gjson.Number {
 								return true, fmt.Sprintf("%d", val.Int())
 							}
 							return true, val.String()
 						}()
 						t.Logf("from=%s to=%s model=%s suffix=%s present(expect=%v got=%v) value(expect=%s got=%s) err(expect=%v got=%v) body=%s",
 							from, to, model, cs.modelSuffix, expectPresent, actualPresent, expectValue, actualValue, expectErr, err != nil, string(body))
 						if expectErr {
 							if err == nil {
 								t.Fatalf("expected validation error but got none, body=%s", string(body))
 							}
 							return
 						}
 						if err != nil {
 							t.Fatalf("unexpected error: %v body=%s", err, string(body))
 						}
 						if expectPresent != actualPresent {
 							t.Fatalf("presence mismatch: expect %v got %v body=%s", expectPresent, actualPresent, string(body))
 						}
 						if expectPresent && expectValue != actualValue {
 							t.Fatalf("value mismatch: expect %s got %s body=%s", expectValue, actualValue, string(body))
 						}
 					})
 				}
 			}
 		}
 	}
 }
 func TestOpenAIThinkingBudgetToEffortRanges(t *testing.T) {
 	cleanup := registerCoreModels(t)
 	defer cleanup()
 	cases := []struct {
 		name   string
 		model  string
 		budget int
 		want   string
 		ok     bool
 	}{
 		{name: "zero-none", model: "gpt-5", budget: 0, want: "none", ok: true},
 		{name: "low-min", model: "gpt-5", budget: 1, want: "low", ok: true},
 		{name: "low-max", model: "gpt-5", budget: 1024, want: "low", ok: true},
 		{name: "medium-min", model: "gpt-5", budget: 1025, want: "medium", ok: true},
 		{name: "medium-max", model: "gpt-5", budget: 8192, want: "medium", ok: true},
 		{name: "high-min", model: "gpt-5", budget: 8193, want: "high", ok: true},
 		{name: "high-max", model: "gpt-5", budget: 24576, want: "high", ok: true},
 		{name: "over-max-clamps-to-highest", model: "gpt-5", budget: 24577, want: "high", ok: true},
 		{name: "over-max-xhigh-model", model: "gpt-5.2", budget: 50000, want: "xhigh", ok: true},
 		{name: "negative-unsupported", model: "gpt-5", budget: -5, want: "", ok: false},
 	}
 	for _, cs := range cases {
 		cs := cs
 		t.Run(cs.name, func(t *testing.T) {
 			got, ok := util.OpenAIThinkingBudgetToEffort(cs.model, cs.budget)
 			if ok != cs.ok {
 				t.Fatalf("ok mismatch for model=%s budget=%d: expect %v got %v", cs.model, cs.budget, cs.ok, ok)
 			}
 			if got != cs.want {
 				t.Fatalf("value mismatch for model=%s budget=%d: expect %q got %q", cs.model, cs.budget, cs.want, got)
 			}
 		})
 	}
 }