diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go
index 4558b319..ada0af39 100644
--- a/internal/runtime/executor/aistudio_executor.go
+++ b/internal/runtime/executor/aistudio_executor.go
@@ -322,7 +322,7 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
-	payload = applyThinkingMetadata(payload, req.Metadata, req.Model)
+	payload = ApplyThinkingMetadata(payload, req.Metadata, req.Model)
 	payload = util.ApplyDefaultThinkingIfNeeded(req.Model, payload)
 	payload = util.ConvertThinkingLevelToBudget(payload)
 	payload = util.NormalizeGeminiThinkingBudget(req.Model, payload)
@@ -384,8 +384,16 @@ func ensureColonSpacedJSON(payload []byte) []byte {
 
 	for i := 0; i < len(indented); i++ {
 		ch := indented[i]
-		if ch == '"' && (i == 0 || indented[i-1] != '\\') {
-			inString = !inString
+		if ch == '"' {
+			// A quote is escaped only when preceded by an odd number of consecutive backslashes.
+			// For example: "\\\"" keeps the quote inside the string, but "\\\\" closes the string.
+			backslashes := 0
+			for j := i - 1; j >= 0 && indented[j] == '\\'; j-- {
+				backslashes++
+			}
+			if backslashes%2 == 0 {
+				inString = !inString
+			}
 		}
 
 		if !inString {
diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go
index 108fee27..4caf05c4 100644
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -54,9 +54,9 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
-	body = normalizeThinkingConfig(body, upstreamModel, false)
-	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
+	body = NormalizeThinkingConfig(body, upstreamModel, false)
+	if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return resp, errValidate
 	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)
@@ -152,9 +152,9 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	to := sdktranslator.FromString("codex")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
-	body = normalizeThinkingConfig(body, upstreamModel, false)
-	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
+	body = NormalizeThinkingConfig(body, upstreamModel, false)
+	if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return nil, errValidate
 	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)
@@ -254,7 +254,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
 
 	modelForCounting := req.Model
 
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
+	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
 	body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.SetBytes(body, "stream", false)
diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go
index 8dd3dc3b..f211ba62 100644
--- a/internal/runtime/executor/gemini_executor.go
+++ b/internal/runtime/executor/gemini_executor.go
@@ -83,7 +83,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyThinkingMetadata(body, req.Metadata, req.Model)
+	body = ApplyThinkingMetadata(body, req.Metadata, req.Model)
 	body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
 	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
@@ -178,7 +178,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	body = applyThinkingMetadata(body, req.Metadata, req.Model)
+	body = ApplyThinkingMetadata(body, req.Metadata, req.Model)
 	body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
 	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
@@ -290,7 +290,7 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	translatedReq = applyThinkingMetadata(translatedReq, req.Metadata, req.Model)
+	translatedReq = ApplyThinkingMetadata(translatedReq, req.Metadata, req.Model)
 	translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
 	translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
 	respCtx := context.WithValue(ctx, "alt", opts.Alt)
diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go
index 03df1be0..ad0b4d2a 100644
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -57,13 +57,13 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
+	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
-	body = normalizeThinkingConfig(body, upstreamModel, false)
-	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+	body = NormalizeThinkingConfig(body, upstreamModel, false)
+	if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return resp, errValidate
 	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)
@@ -148,13 +148,13 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
+	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
-	body = normalizeThinkingConfig(body, upstreamModel, false)
-	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+	body = NormalizeThinkingConfig(body, upstreamModel, false)
+	if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return nil, errValidate
 	}
 	// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go
index 1bbd0c8e..1c57c9b7 100644
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -60,13 +60,13 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 	}
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
 	allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
-	translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
+	translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" && modelOverride == "" {
 		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
 	}
-	translated = normalizeThinkingConfig(translated, upstreamModel, allowCompat)
-	if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
+	translated = NormalizeThinkingConfig(translated, upstreamModel, allowCompat)
+	if errValidate := ValidateThinkingConfig(translated, upstreamModel); errValidate != nil {
 		return resp, errValidate
 	}
 
@@ -156,13 +156,13 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 	}
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
 	allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
-	translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
+	translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" && modelOverride == "" {
 		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
 	}
-	translated = normalizeThinkingConfig(translated, upstreamModel, allowCompat)
-	if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
+	translated = NormalizeThinkingConfig(translated, upstreamModel, allowCompat)
+	if errValidate := ValidateThinkingConfig(translated, upstreamModel); errValidate != nil {
 		return nil, errValidate
 	}
 
diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go
index 6e352c51..b0eafbb7 100644
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -11,9 +11,9 @@ import (
 	"github.com/tidwall/sjson"
 )
 
-// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192))
+// ApplyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192))
 // for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
-func applyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
+func ApplyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
 	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
 	if !ok || (budgetOverride == nil && includeOverride == nil) {
 		return payload
@@ -45,10 +45,10 @@ func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model str
 	return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
 }
 
-// applyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path.
+// ApplyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path.
 // Metadata values take precedence over any existing field when the model supports thinking, intentionally
 // overwriting caller-provided values to honor suffix/default metadata priority.
-func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string, allowCompat bool) []byte {
+func ApplyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string, allowCompat bool) []byte {
 	if len(metadata) == 0 {
 		return payload
 	}
@@ -73,6 +73,12 @@ func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model
 	if util.ModelUsesThinkingLevels(baseModel) || allowCompat {
 		if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
 			if effort, ok := util.OpenAIThinkingBudgetToEffort(baseModel, *budget); ok && effort != "" {
+				if *budget == 0 && effort == "none" && util.ModelUsesThinkingLevels(baseModel) {
+					if _, supported := util.NormalizeReasoningEffortLevel(baseModel, effort); !supported {
+						return StripThinkingFields(payload, false)
+					}
+				}
+
 				if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
 					return updated
 				}
@@ -232,12 +238,12 @@ func matchModelPattern(pattern, model string) bool {
 	return pi == len(pattern)
 }
 
-// normalizeThinkingConfig normalizes thinking-related fields in the payload
+// NormalizeThinkingConfig normalizes thinking-related fields in the payload
 // based on model capabilities. For models without thinking support, it strips
 // reasoning fields. For models with level-based thinking, it validates and
 // normalizes the reasoning effort level. For models with numeric budget thinking,
 // it strips the effort string fields.
-func normalizeThinkingConfig(payload []byte, model string, allowCompat bool) []byte {
+func NormalizeThinkingConfig(payload []byte, model string, allowCompat bool) []byte {
 	if len(payload) == 0 || model == "" {
 		return payload
 	}
@@ -246,22 +252,22 @@ func normalizeThinkingConfig(payload []byte, model string, allowCompat bool) []b
 		if allowCompat {
 			return payload
 		}
-		return stripThinkingFields(payload, false)
+		return StripThinkingFields(payload, false)
 	}
 
 	if util.ModelUsesThinkingLevels(model) {
-		return normalizeReasoningEffortLevel(payload, model)
+		return NormalizeReasoningEffortLevel(payload, model)
 	}
 
 	// Model supports thinking but uses numeric budgets, not levels.
 	// Strip effort string fields since they are not applicable.
-	return stripThinkingFields(payload, true)
+	return StripThinkingFields(payload, true)
 }
 
-// stripThinkingFields removes thinking-related fields from the payload for
+// StripThinkingFields removes thinking-related fields from the payload for
 // models that do not support thinking. If effortOnly is true, only removes
 // effort string fields (for models using numeric budgets).
-func stripThinkingFields(payload []byte, effortOnly bool) []byte {
+func StripThinkingFields(payload []byte, effortOnly bool) []byte {
 	fieldsToRemove := []string{
 		"reasoning_effort",
 		"reasoning.effort",
@@ -278,9 +284,9 @@ func stripThinkingFields(payload []byte, effortOnly bool) []byte {
 	return out
 }
 
-// normalizeReasoningEffortLevel validates and normalizes the reasoning_effort
+// NormalizeReasoningEffortLevel validates and normalizes the reasoning_effort
 // or reasoning.effort field for level-based thinking models.
-func normalizeReasoningEffortLevel(payload []byte, model string) []byte {
+func NormalizeReasoningEffortLevel(payload []byte, model string) []byte {
 	out := payload
 
 	if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() {
@@ -298,10 +304,10 @@ func normalizeReasoningEffortLevel(payload []byte, model string) []byte {
 	return out
 }
 
-// validateThinkingConfig checks for unsupported reasoning levels on level-based models.
+// ValidateThinkingConfig checks for unsupported reasoning levels on level-based models.
 // Returns a statusErr with 400 when an unsupported level is supplied to avoid silently
 // downgrading requests.
-func validateThinkingConfig(payload []byte, model string) error {
+func ValidateThinkingConfig(payload []byte, model string) error {
 	if len(payload) == 0 || model == "" {
 		return nil
 	}
diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go
index 3bd61021..1d4ef52d 100644
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -51,13 +51,13 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
+	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
-	body = normalizeThinkingConfig(body, upstreamModel, false)
-	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+	body = NormalizeThinkingConfig(body, upstreamModel, false)
+	if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return resp, errValidate
 	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)
@@ -131,13 +131,13 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
+	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
-	body = normalizeThinkingConfig(body, upstreamModel, false)
-	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+	body = NormalizeThinkingConfig(body, upstreamModel, false)
+	if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return nil, errValidate
 	}
 	toolsResult := gjson.GetBytes(body, "tools")
diff --git a/internal/translator/antigravity/gemini/antigravity_gemini_request.go b/internal/translator/antigravity/gemini/antigravity_gemini_request.go
index 30bc3830..e694b790 100644
--- a/internal/translator/antigravity/gemini/antigravity_gemini_request.go
+++ b/internal/translator/antigravity/gemini/antigravity_gemini_request.go
@@ -122,6 +122,38 @@ type FunctionCallGroup struct {
 	ResponsesNeeded int
 }
 
+// parseFunctionResponse attempts to unmarshal a function response part.
+// Falls back to gjson extraction if standard json.Unmarshal fails.
+func parseFunctionResponse(response gjson.Result) map[string]interface{} {
+	var responseMap map[string]interface{}
+	err := json.Unmarshal([]byte(response.Raw), &responseMap)
+	if err == nil {
+		return responseMap
+	}
+
+	log.Debugf("unmarshal function response failed, using fallback: %v", err)
+	funcResp := response.Get("functionResponse")
+	if funcResp.Exists() {
+		fr := map[string]interface{}{
+			"name": funcResp.Get("name").String(),
+			"response": map[string]interface{}{
+				"result": funcResp.Get("response").String(),
+			},
+		}
+		if id := funcResp.Get("id").String(); id != "" {
+			fr["id"] = id
+		}
+		return map[string]interface{}{"functionResponse": fr}
+	}
+
+	return map[string]interface{}{
+		"functionResponse": map[string]interface{}{
+			"name":     "unknown",
+			"response": map[string]interface{}{"result": response.String()},
+		},
+	}
+}
+
 // fixCLIToolResponse performs sophisticated tool response format conversion and grouping.
 // This function transforms the CLI tool response format by intelligently grouping function calls
 // with their corresponding responses, ensuring proper conversation flow and API compatibility.
@@ -180,13 +212,7 @@ func fixCLIToolResponse(input string) (string, error) {
 					// Create merged function response content
 					var responseParts []interface{}
 					for _, response := range groupResponses {
-						var responseMap map[string]interface{}
-						errUnmarshal := json.Unmarshal([]byte(response.Raw), &responseMap)
-						if errUnmarshal != nil {
-							log.Warnf("failed to unmarshal function response: %v\n", errUnmarshal)
-							continue
-						}
-						responseParts = append(responseParts, responseMap)
+						responseParts = append(responseParts, parseFunctionResponse(response))
 					}
 
 					if len(responseParts) > 0 {
@@ -265,13 +291,7 @@ func fixCLIToolResponse(input string) (string, error) {
 
 			var responseParts []interface{}
 			for _, response := range groupResponses {
-				var responseMap map[string]interface{}
-				errUnmarshal := json.Unmarshal([]byte(response.Raw), &responseMap)
-				if errUnmarshal != nil {
-					log.Warnf("failed to unmarshal function response: %v\n", errUnmarshal)
-					continue
-				}
-				responseParts = append(responseParts, responseMap)
+				responseParts = append(responseParts, parseFunctionResponse(response))
 			}
 
 			if len(responseParts) > 0 {
diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
index 717f88f7..2a4684e2 100644
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
@@ -39,31 +39,13 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 	// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
 	hasOfficialThinking := re.Exists()
-	if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
-		switch re.String() {
-		case "none":
-			out, _ = sjson.DeleteBytes(out, "request.generationConfig.thinkingConfig.include_thoughts")
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 0)
-		case "auto":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		case "low":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		case "medium":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		case "high":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 32768)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		default:
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		}
+	if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+		out = util.ApplyReasoningEffortToGeminiCLI(out, re.String())
 	}
 
 	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
-	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
+	// Only apply for models that use numeric budgets, not discrete levels.
+	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
 		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
 			var budget int
diff --git a/internal/translator/claude/gemini/claude_gemini_request.go b/internal/translator/claude/gemini/claude_gemini_request.go
index 302c7d66..6518947b 100644
--- a/internal/translator/claude/gemini/claude_gemini_request.go
+++ b/internal/translator/claude/gemini/claude_gemini_request.go
@@ -114,14 +114,16 @@ func ConvertGeminiRequestToClaude(modelName string, inputRawJSON []byte, stream
 			}
 		}
 		// Include thoughts configuration for reasoning process visibility
-		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
-			if includeThoughts := thinkingConfig.Get("include_thoughts"); includeThoughts.Exists() {
-				if includeThoughts.Type == gjson.True {
-					out, _ = sjson.Set(out, "thinking.type", "enabled")
-					if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
-						out, _ = sjson.Set(out, "thinking.budget_tokens", thinkingBudget.Int())
-					}
-				}
+		// Only apply for models that support thinking and use numeric budgets, not discrete levels.
+		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+			// Check for thinkingBudget first - if present, enable thinking with budget
+			if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() && thinkingBudget.Int() > 0 {
+				out, _ = sjson.Set(out, "thinking.type", "enabled")
+				normalizedBudget := util.NormalizeThinkingBudget(modelName, int(thinkingBudget.Int()))
+				out, _ = sjson.Set(out, "thinking.budget_tokens", normalizedBudget)
+			} else if includeThoughts := thinkingConfig.Get("include_thoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
+				// Fallback to include_thoughts if no budget specified
+				out, _ = sjson.Set(out, "thinking.type", "enabled")
 			}
 		}
 	}
diff --git a/internal/translator/claude/openai/chat-completions/claude_openai_request.go b/internal/translator/claude/openai/chat-completions/claude_openai_request.go
index b3384ecc..9122b97e 100644
--- a/internal/translator/claude/openai/chat-completions/claude_openai_request.go
+++ b/internal/translator/claude/openai/chat-completions/claude_openai_request.go
@@ -16,6 +16,7 @@ import (
 	"strings"
 
 	"github.com/google/uuid"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -65,18 +66,23 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
 
 	root := gjson.ParseBytes(rawJSON)
 
-	if v := root.Get("reasoning_effort"); v.Exists() {
-		out, _ = sjson.Set(out, "thinking.type", "enabled")
-
-		switch v.String() {
-		case "none":
-			out, _ = sjson.Set(out, "thinking.type", "disabled")
-		case "low":
-			out, _ = sjson.Set(out, "thinking.budget_tokens", 1024)
-		case "medium":
-			out, _ = sjson.Set(out, "thinking.budget_tokens", 8192)
-		case "high":
-			out, _ = sjson.Set(out, "thinking.budget_tokens", 24576)
+	if v := root.Get("reasoning_effort"); v.Exists() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+		effort := strings.ToLower(strings.TrimSpace(v.String()))
+		if effort != "" {
+			budget, ok := util.ThinkingEffortToBudget(modelName, effort)
+			if ok {
+				switch budget {
+				case 0:
+					out, _ = sjson.Set(out, "thinking.type", "disabled")
+				case -1:
+					out, _ = sjson.Set(out, "thinking.type", "enabled")
+				default:
+					if budget > 0 {
+						out, _ = sjson.Set(out, "thinking.type", "enabled")
+						out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
+					}
+				}
+			}
 		}
 	}
 
diff --git a/internal/translator/claude/openai/responses/claude_openai-responses_request.go b/internal/translator/claude/openai/responses/claude_openai-responses_request.go
index 764bb5c9..b3654ca0 100644
--- a/internal/translator/claude/openai/responses/claude_openai-responses_request.go
+++ b/internal/translator/claude/openai/responses/claude_openai-responses_request.go
@@ -10,6 +10,7 @@ import (
 	"strings"
 
 	"github.com/google/uuid"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -52,20 +53,23 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte
 
 	root := gjson.ParseBytes(rawJSON)
 
-	if v := root.Get("reasoning.effort"); v.Exists() {
-		out, _ = sjson.Set(out, "thinking.type", "enabled")
-
-		switch v.String() {
-		case "none":
-			out, _ = sjson.Set(out, "thinking.type", "disabled")
-		case "minimal":
-			out, _ = sjson.Set(out, "thinking.budget_tokens", 1024)
-		case "low":
-			out, _ = sjson.Set(out, "thinking.budget_tokens", 4096)
-		case "medium":
-			out, _ = sjson.Set(out, "thinking.budget_tokens", 8192)
-		case "high":
-			out, _ = sjson.Set(out, "thinking.budget_tokens", 24576)
+	if v := root.Get("reasoning.effort"); v.Exists() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+		effort := strings.ToLower(strings.TrimSpace(v.String()))
+		if effort != "" {
+			budget, ok := util.ThinkingEffortToBudget(modelName, effort)
+			if ok {
+				switch budget {
+				case 0:
+					out, _ = sjson.Set(out, "thinking.type", "disabled")
+				case -1:
+					out, _ = sjson.Set(out, "thinking.type", "enabled")
+				default:
+					if budget > 0 {
+						out, _ = sjson.Set(out, "thinking.type", "enabled")
+						out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
+					}
+				}
+			}
 		}
 	}
 
diff --git a/internal/translator/codex/claude/codex_claude_request.go b/internal/translator/codex/claude/codex_claude_request.go
index 3c86e3cf..414efa89 100644
--- a/internal/translator/codex/claude/codex_claude_request.go
+++ b/internal/translator/codex/claude/codex_claude_request.go
@@ -12,6 +12,7 @@ import (
 	"strings"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -214,7 +215,22 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 
 	// Add additional configuration parameters for the Codex API.
 	template, _ = sjson.Set(template, "parallel_tool_calls", true)
-	template, _ = sjson.Set(template, "reasoning.effort", "medium")
+
+	// Convert thinking.budget_tokens to reasoning.effort for level-based models
+	reasoningEffort := "medium" // default
+	if thinking := rootResult.Get("thinking"); thinking.Exists() && thinking.IsObject() {
+		if thinking.Get("type").String() == "enabled" {
+			if util.ModelUsesThinkingLevels(modelName) {
+				if budgetTokens := thinking.Get("budget_tokens"); budgetTokens.Exists() {
+					budget := int(budgetTokens.Int())
+					if effort, ok := util.OpenAIThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
+						reasoningEffort = effort
+					}
+				}
+			}
+		}
+	}
+	template, _ = sjson.Set(template, "reasoning.effort", reasoningEffort)
 	template, _ = sjson.Set(template, "reasoning.summary", "auto")
 	template, _ = sjson.Set(template, "stream", true)
 	template, _ = sjson.Set(template, "store", false)
diff --git a/internal/translator/codex/gemini/codex_gemini_request.go b/internal/translator/codex/gemini/codex_gemini_request.go
index 427fd9ad..c2dacd3e 100644
--- a/internal/translator/codex/gemini/codex_gemini_request.go
+++ b/internal/translator/codex/gemini/codex_gemini_request.go
@@ -245,7 +245,22 @@ func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 
 	// Fixed flags aligning with Codex expectations
 	out, _ = sjson.Set(out, "parallel_tool_calls", true)
-	out, _ = sjson.Set(out, "reasoning.effort", "medium")
+
+	// Convert thinkingBudget to reasoning.effort for level-based models
+	reasoningEffort := "medium" // default
+	if genConfig := root.Get("generationConfig"); genConfig.Exists() {
+		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
+			if util.ModelUsesThinkingLevels(modelName) {
+				if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
+					budget := int(thinkingBudget.Int())
+					if effort, ok := util.OpenAIThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
+						reasoningEffort = effort
+					}
+				}
+			}
+		}
+	}
+	out, _ = sjson.Set(out, "reasoning.effort", reasoningEffort)
 	out, _ = sjson.Set(out, "reasoning.summary", "auto")
 	out, _ = sjson.Set(out, "stream", true)
 	out, _ = sjson.Set(out, "store", false)
diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
index b52bf224..dc5cf935 100644
--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
@@ -39,31 +39,13 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 	// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
 	hasOfficialThinking := re.Exists()
-	if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
-		switch re.String() {
-		case "none":
-			out, _ = sjson.DeleteBytes(out, "request.generationConfig.thinkingConfig.include_thoughts")
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 0)
-		case "auto":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		case "low":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		case "medium":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		case "high":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 32768)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		default:
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		}
+	if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+		out = util.ApplyReasoningEffortToGeminiCLI(out, re.String())
 	}
 
 	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
-	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
+	// Only apply for models that use numeric budgets, not discrete levels.
+	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
 		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
 			var budget int
diff --git a/internal/translator/gemini/claude/gemini_claude_request.go b/internal/translator/gemini/claude/gemini_claude_request.go
index 45a5a88f..f626a581 100644
--- a/internal/translator/gemini/claude/gemini_claude_request.go
+++ b/internal/translator/gemini/claude/gemini_claude_request.go
@@ -154,7 +154,8 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 	}
 
 	// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when enabled
-	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) {
+	// Only apply for models that use numeric budgets, not discrete levels.
+	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
 		if t.Get("type").String() == "enabled" {
 			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
 				budget := int(b.Int())
diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
index 8c48a5b3..54843f0d 100644
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
@@ -37,33 +37,17 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 
 	// Reasoning effort -> thinkingBudget/include_thoughts
 	// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
+	// Only convert for models that use numeric budgets (not discrete levels) to avoid
+	// incorrectly applying thinkingBudget for level-based models like gpt-5.
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
 	hasOfficialThinking := re.Exists()
-	if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
-		switch re.String() {
-		case "none":
-			out, _ = sjson.DeleteBytes(out, "generationConfig.thinkingConfig.include_thoughts")
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 0)
-		case "auto":
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		case "low":
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 1024)
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		case "medium":
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		case "high":
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 32768)
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		default:
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		}
+	if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+		out = util.ApplyReasoningEffortToGemini(out, re.String())
 	}
 
 	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
-	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
+	// Only apply for models that use numeric budgets, not discrete levels.
+	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
 		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
 			var budget int
diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
index bdf59785..1bf67e7f 100644
--- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
+++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
@@ -389,36 +389,16 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
 	}
 
 	// OpenAI official reasoning fields take precedence
+	// Only convert for models that use numeric budgets (not discrete levels).
 	hasOfficialThinking := root.Get("reasoning.effort").Exists()
-	if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
+	if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
 		reasoningEffort := root.Get("reasoning.effort")
-		switch reasoningEffort.String() {
-		case "none":
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", false)
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 0)
-		case "auto":
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		case "minimal":
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 1024)
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		case "low":
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 4096)
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		case "medium":
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		case "high":
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 32768)
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		default:
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		}
+		out = string(util.ApplyReasoningEffortToGemini([]byte(out), reasoningEffort.String()))
 	}
 
 	// Cherry Studio extension (applies only when official fields are missing)
-	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
+	// Only apply for models that use numeric budgets, not discrete levels.
+	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
 		if tc := root.Get("extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
 			var budget int
diff --git a/internal/translator/openai/claude/openai_claude_request.go b/internal/translator/openai/claude/openai_claude_request.go
index 3521b2e5..0ee8c225 100644
--- a/internal/translator/openai/claude/openai_claude_request.go
+++ b/internal/translator/openai/claude/openai_claude_request.go
@@ -10,6 +10,7 @@ import (
 	"encoding/json"
 	"strings"
 
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -60,6 +61,18 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 	// Stream
 	out, _ = sjson.Set(out, "stream", stream)
 
+	// Thinking: Convert Claude thinking.budget_tokens to OpenAI reasoning_effort
+	if thinking := root.Get("thinking"); thinking.Exists() && thinking.IsObject() {
+		if thinkingType := thinking.Get("type"); thinkingType.Exists() && thinkingType.String() == "enabled" {
+			if budgetTokens := thinking.Get("budget_tokens"); budgetTokens.Exists() {
+				budget := int(budgetTokens.Int())
+				if effort, ok := util.OpenAIThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
+					out, _ = sjson.Set(out, "reasoning_effort", effort)
+				}
+			}
+		}
+	}
+
 	// Process messages and system
 	var messagesJSON = "[]"
 
diff --git a/internal/translator/openai/gemini/openai_gemini_request.go b/internal/translator/openai/gemini/openai_gemini_request.go
index deedf96a..cca6ebf7 100644
--- a/internal/translator/openai/gemini/openai_gemini_request.go
+++ b/internal/translator/openai/gemini/openai_gemini_request.go
@@ -13,6 +13,7 @@ import (
 	"math/big"
 	"strings"
 
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -76,6 +77,17 @@ func ConvertGeminiRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 				out, _ = sjson.Set(out, "stop", stops)
 			}
 		}
+
+		// Convert thinkingBudget to reasoning_effort
+		// Always perform conversion to support allowCompat models that may not be in registry
+		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
+			if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
+				budget := int(thinkingBudget.Int())
+				if effort, ok := util.OpenAIThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
+					out, _ = sjson.Set(out, "reasoning_effort", effort)
+				}
+			}
+		}
 	}
 
 	// Stream parameter
diff --git a/internal/translator/openai/openai/responses/openai_openai-responses_request.go b/internal/translator/openai/openai/responses/openai_openai-responses_request.go
index 1b1f071f..f8bcb7b1 100644
--- a/internal/translator/openai/openai/responses/openai_openai-responses_request.go
+++ b/internal/translator/openai/openai/responses/openai_openai-responses_request.go
@@ -2,6 +2,7 @@ package responses
 
 import (
 	"bytes"
+	"strings"
 
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -189,23 +190,9 @@ func ConvertOpenAIResponsesRequestToOpenAIChatCompletions(modelName string, inpu
 	}
 
 	if reasoningEffort := root.Get("reasoning.effort"); reasoningEffort.Exists() {
-		switch reasoningEffort.String() {
-		case "none":
-			out, _ = sjson.Set(out, "reasoning_effort", "none")
-		case "auto":
-			out, _ = sjson.Set(out, "reasoning_effort", "auto")
-		case "minimal":
-			out, _ = sjson.Set(out, "reasoning_effort", "low")
-		case "low":
-			out, _ = sjson.Set(out, "reasoning_effort", "low")
-		case "medium":
-			out, _ = sjson.Set(out, "reasoning_effort", "medium")
-		case "high":
-			out, _ = sjson.Set(out, "reasoning_effort", "high")
-		case "xhigh":
-			out, _ = sjson.Set(out, "reasoning_effort", "xhigh")
-		default:
-			out, _ = sjson.Set(out, "reasoning_effort", "auto")
+		effort := strings.ToLower(strings.TrimSpace(reasoningEffort.String()))
+		if effort != "" {
+			out, _ = sjson.Set(out, "reasoning_effort", effort)
 		}
 	}
 
diff --git a/internal/util/gemini_thinking.go b/internal/util/gemini_thinking.go
index a89aba26..9e349d99 100644
--- a/internal/util/gemini_thinking.go
+++ b/internal/util/gemini_thinking.go
@@ -152,6 +152,71 @@ func NormalizeGeminiCLIThinkingBudget(model string, body []byte) []byte {
 	return updated
 }
 
+// ReasoningEffortBudgetMapping defines the thinkingBudget values for each reasoning effort level.
+var ReasoningEffortBudgetMapping = map[string]int{
+	"none":    0,
+	"auto":    -1,
+	"minimal": 512,
+	"low":     1024,
+	"medium":  8192,
+	"high":    24576,
+	"xhigh":   32768,
+}
+
+// ApplyReasoningEffortToGemini applies OpenAI reasoning_effort to Gemini thinkingConfig
+// for standard Gemini API format (generationConfig.thinkingConfig path).
+// Returns the modified body with thinkingBudget and include_thoughts set.
+func ApplyReasoningEffortToGemini(body []byte, effort string) []byte {
+	normalized := strings.ToLower(strings.TrimSpace(effort))
+	if normalized == "" {
+		return body
+	}
+
+	budgetPath := "generationConfig.thinkingConfig.thinkingBudget"
+	includePath := "generationConfig.thinkingConfig.include_thoughts"
+
+	if normalized == "none" {
+		body, _ = sjson.DeleteBytes(body, "generationConfig.thinkingConfig")
+		return body
+	}
+
+	budget, ok := ReasoningEffortBudgetMapping[normalized]
+	if !ok {
+		return body
+	}
+
+	body, _ = sjson.SetBytes(body, budgetPath, budget)
+	body, _ = sjson.SetBytes(body, includePath, true)
+	return body
+}
+
+// ApplyReasoningEffortToGeminiCLI applies OpenAI reasoning_effort to Gemini CLI thinkingConfig
+// for Gemini CLI API format (request.generationConfig.thinkingConfig path).
+// Returns the modified body with thinkingBudget and include_thoughts set.
+func ApplyReasoningEffortToGeminiCLI(body []byte, effort string) []byte {
+	normalized := strings.ToLower(strings.TrimSpace(effort))
+	if normalized == "" {
+		return body
+	}
+
+	budgetPath := "request.generationConfig.thinkingConfig.thinkingBudget"
+	includePath := "request.generationConfig.thinkingConfig.include_thoughts"
+
+	if normalized == "none" {
+		body, _ = sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig")
+		return body
+	}
+
+	budget, ok := ReasoningEffortBudgetMapping[normalized]
+	if !ok {
+		return body
+	}
+
+	body, _ = sjson.SetBytes(body, budgetPath, budget)
+	body, _ = sjson.SetBytes(body, includePath, true)
+	return body
+}
+
 // ConvertThinkingLevelToBudget checks for "generationConfig.thinkingConfig.thinkingLevel"
 // and converts it to "thinkingBudget".
 // "high" -> 32768
diff --git a/internal/util/openai_thinking.go b/internal/util/openai_thinking.go
index 4dda38f6..5ce7e6bf 100644
--- a/internal/util/openai_thinking.go
+++ b/internal/util/openai_thinking.go
@@ -5,15 +5,18 @@ package util
 //
 // Ranges:
 //   - 0            -> "none"
+//   - -1           -> "auto"
 //   - 1..1024      -> "low"
 //   - 1025..8192   -> "medium"
 //   - 8193..24576  -> "high"
 //   - 24577..      -> highest supported level for the model (defaults to "xhigh")
 //
-// Negative values (except the dynamic -1 handled elsewhere) are treated as unsupported.
+// Negative values other than -1 are treated as unsupported.
 func OpenAIThinkingBudgetToEffort(model string, budget int) (string, bool) {
 	switch {
-	case budget < 0:
+	case budget == -1:
+		return "auto", true
+	case budget < -1:
 		return "", false
 	case budget == 0:
 		return "none", true
diff --git a/test/thinking_conversion_test.go b/test/thinking_conversion_test.go
index c2f4aa8d..6d156954 100644
--- a/test/thinking_conversion_test.go
+++ b/test/thinking_conversion_test.go
@@ -2,7 +2,6 @@ package test
 
 import (
 	"fmt"
-	"net/http"
 	"strings"
 	"testing"
 	"time"
@@ -10,20 +9,20 @@ import (
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
 
-// statusErr mirrors executor.statusErr to keep validation behavior aligned.
-type statusErr struct {
-	code int
-	msg  string
+// isOpenAICompatModel returns true if the model is configured as an OpenAI-compatible
+// model that should have reasoning effort passed through even if not in registry.
+// This simulates the allowCompat behavior from OpenAICompatExecutor.
+func isOpenAICompatModel(model string) bool {
+	return model == "openai-compat"
 }
 
-func (e statusErr) Error() string { return e.msg }
-
 // registerCoreModels loads representative models across providers into the registry
 // so NormalizeThinkingBudget and level validation use real ranges.
 func registerCoreModels(t *testing.T) func() {
@@ -34,14 +33,62 @@ func registerCoreModels(t *testing.T) func() {
 	reg.RegisterClient(uid+"-claude", "claude", registry.GetClaudeModels())
 	reg.RegisterClient(uid+"-openai", "codex", registry.GetOpenAIModels())
 	reg.RegisterClient(uid+"-qwen", "qwen", registry.GetQwenModels())
+	// Custom openai-compatible model with forced thinking suffix passthrough.
+	// No Thinking field - simulates an external model added via openai-compat
+	// where the registry has no knowledge of its thinking capabilities.
+	// The allowCompat flag should preserve reasoning effort for such models.
+	customOpenAIModels := []*registry.ModelInfo{
+		{
+			ID:          "openai-compat",
+			Object:      "model",
+			Created:     1700000000,
+			OwnedBy:     "custom-provider",
+			Type:        "openai",
+			DisplayName: "OpenAI Compatible Model",
+			Description: "OpenAI-compatible model with forced thinking suffix support",
+		},
+	}
+	reg.RegisterClient(uid+"-custom-openai", "codex", customOpenAIModels)
 	return func() {
 		reg.UnregisterClient(uid + "-gemini")
 		reg.UnregisterClient(uid + "-claude")
 		reg.UnregisterClient(uid + "-openai")
 		reg.UnregisterClient(uid + "-qwen")
+		reg.UnregisterClient(uid + "-custom-openai")
 	}
 }
 
+var (
+	thinkingTestModels = []string{
+		"gpt-5",           // level-based thinking model
+		"gemini-2.5-pro",  // numeric-budget thinking model
+		"qwen3-code-plus", // no thinking support
+		"openai-compat",   // allowCompat=true (OpenAI-compatible channel)
+	}
+	thinkingTestFromProtocols = []string{"openai", "claude", "gemini", "openai-response"}
+	thinkingTestToProtocols   = []string{"gemini", "claude", "openai", "codex"}
+
+	// Numeric budgets and their level equivalents:
+	// -1 -> auto
+	// 0 -> none
+	// 1..1024 -> low
+	// 1025..8192 -> medium
+	// 8193..24576 -> high
+	// >24576 -> model highest level (right-most in Levels)
+	thinkingNumericSamples = []int{-1, 0, 1023, 1025, 8193, 64000}
+
+	// Levels and their numeric equivalents:
+	// auto -> -1
+	// none -> 0
+	// minimal -> 512
+	// low -> 1024
+	// medium -> 8192
+	// high -> 24576
+	// xhigh -> 32768
+	// invalid -> invalid (no mapping)
+	thinkingLevelSamples = []string{"auto", "none", "minimal", "low", "medium", "high", "xhigh", "invalid"}
+)
+
 func buildRawPayload(fromProtocol, modelWithSuffix string) []byte {
 	switch fromProtocol {
 	case "gemini":
@@ -53,141 +100,10 @@ func buildRawPayload(fromProtocol, modelWithSuffix string) []byte {
 	}
 }
 
-// applyThinkingMetadataLocal mirrors executor.applyThinkingMetadata.
-func applyThinkingMetadataLocal(payload []byte, metadata map[string]any, model string) []byte {
-	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
-	if !ok || (budgetOverride == nil && includeOverride == nil) {
-		return payload
-	}
-	if !util.ModelSupportsThinking(model) {
-		return payload
-	}
-	if budgetOverride != nil {
-		norm := util.NormalizeThinkingBudget(model, *budgetOverride)
-		budgetOverride = &norm
-	}
-	return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
-}
-
-// applyReasoningEffortMetadataLocal mirrors executor.applyReasoningEffortMetadata.
-func applyReasoningEffortMetadataLocal(payload []byte, metadata map[string]any, model, field string) []byte {
-	if len(metadata) == 0 {
-		return payload
-	}
-	if !util.ModelSupportsThinking(model) {
-		return payload
-	}
-	if field == "" {
-		return payload
-	}
-	if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
-		if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
-			return updated
-		}
-	}
-	if util.ModelUsesThinkingLevels(model) {
-		if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
-			if effort, ok := util.OpenAIThinkingBudgetToEffort(model, *budget); ok && effort != "" {
-				if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
-					return updated
-				}
-			}
-		}
-	}
-	return payload
-}
-
-// normalizeThinkingConfigLocal mirrors executor.normalizeThinkingConfig.
-func normalizeThinkingConfigLocal(payload []byte, model string) []byte {
-	if len(payload) == 0 || model == "" {
-		return payload
-	}
-
-	if !util.ModelSupportsThinking(model) {
-		return stripThinkingFieldsLocal(payload, false)
-	}
-
-	if util.ModelUsesThinkingLevels(model) {
-		return normalizeReasoningEffortLevelLocal(payload, model)
-	}
-
-	// Model supports thinking but uses numeric budgets, not levels.
-	// Strip effort string fields since they are not applicable.
-	return stripThinkingFieldsLocal(payload, true)
-}
-
-// stripThinkingFieldsLocal mirrors executor.stripThinkingFields.
-func stripThinkingFieldsLocal(payload []byte, effortOnly bool) []byte {
-	fieldsToRemove := []string{
-		"reasoning_effort",
-		"reasoning.effort",
-	}
-	if !effortOnly {
-		fieldsToRemove = append([]string{"reasoning"}, fieldsToRemove...)
-	}
-	out := payload
-	for _, field := range fieldsToRemove {
-		if gjson.GetBytes(out, field).Exists() {
-			out, _ = sjson.DeleteBytes(out, field)
-		}
-	}
-	return out
-}
-
-// normalizeReasoningEffortLevelLocal mirrors executor.normalizeReasoningEffortLevel.
-func normalizeReasoningEffortLevelLocal(payload []byte, model string) []byte {
-	out := payload
-
-	if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() {
-		if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
-			out, _ = sjson.SetBytes(out, "reasoning_effort", normalized)
-		}
-	}
-
-	if effort := gjson.GetBytes(out, "reasoning.effort"); effort.Exists() {
-		if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
-			out, _ = sjson.SetBytes(out, "reasoning.effort", normalized)
-		}
-	}
-
-	return out
-}
-
-// validateThinkingConfigLocal mirrors executor.validateThinkingConfig.
-func validateThinkingConfigLocal(payload []byte, model string) error {
-	if len(payload) == 0 || model == "" {
-		return nil
-	}
-	if !util.ModelSupportsThinking(model) || !util.ModelUsesThinkingLevels(model) {
-		return nil
-	}
-
-	levels := util.GetModelThinkingLevels(model)
-	checkField := func(path string) error {
-		if effort := gjson.GetBytes(payload, path); effort.Exists() {
-			if _, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); !ok {
-				return statusErr{
-					code: http.StatusBadRequest,
-					msg:  fmt.Sprintf("unsupported reasoning effort level %q for model %s (supported: %s)", effort.String(), model, strings.Join(levels, ", ")),
-				}
-			}
-		}
-		return nil
-	}
-
-	if err := checkField("reasoning_effort"); err != nil {
-		return err
-	}
-	if err := checkField("reasoning.effort"); err != nil {
-		return err
-	}
-	return nil
-}
-
 // normalizeCodexPayload mirrors codex_executor's reasoning + streaming tweaks.
-func normalizeCodexPayload(body []byte, upstreamModel string) ([]byte, error) {
-	body = normalizeThinkingConfigLocal(body, upstreamModel)
-	if err := validateThinkingConfigLocal(body, upstreamModel); err != nil {
+func normalizeCodexPayload(body []byte, upstreamModel string, allowCompat bool) ([]byte, error) {
+	body = executor.NormalizeThinkingConfig(body, upstreamModel, allowCompat)
+	if err := executor.ValidateThinkingConfig(body, upstreamModel); err != nil {
 		return body, err
 	}
 	body, _ = sjson.SetBytes(body, "model", upstreamModel)
@@ -214,9 +130,10 @@ func buildBodyForProtocol(t *testing.T, fromProtocol, toProtocol, modelWithSuffi
 	)
 
 	var err error
+	allowCompat := isOpenAICompatModel(normalizedModel)
 	switch toProtocol {
 	case "gemini":
-		body = applyThinkingMetadataLocal(body, metadata, normalizedModel)
+		body = executor.ApplyThinkingMetadata(body, metadata, normalizedModel)
 		body = util.ApplyDefaultThinkingIfNeeded(normalizedModel, body)
 		body = util.NormalizeGeminiThinkingBudget(normalizedModel, body)
 		body = util.StripThinkingConfigIfUnsupported(normalizedModel, body)
@@ -225,13 +142,14 @@ func buildBodyForProtocol(t *testing.T, fromProtocol, toProtocol, modelWithSuffi
 			body = util.ApplyClaudeThinkingConfig(body, budget)
 		}
 	case "openai":
-		body = applyReasoningEffortMetadataLocal(body, metadata, normalizedModel, "reasoning_effort")
-		body = normalizeThinkingConfigLocal(body, upstreamModel)
-		err = validateThinkingConfigLocal(body, upstreamModel)
+		body = executor.ApplyReasoningEffortMetadata(body, metadata, normalizedModel, "reasoning_effort", allowCompat)
+		body = executor.NormalizeThinkingConfig(body, upstreamModel, allowCompat)
+		err = executor.ValidateThinkingConfig(body, upstreamModel)
 	case "codex": // OpenAI responses / codex
-		body = applyReasoningEffortMetadataLocal(body, metadata, normalizedModel, "reasoning.effort")
+		// Codex does not support allowCompat; always use false.
+		body = executor.ApplyReasoningEffortMetadata(body, metadata, normalizedModel, "reasoning.effort", false)
 		// Mirror CodexExecutor final normalization and model override so tests log the final body.
-		body, err = normalizeCodexPayload(body, upstreamModel)
+		body, err = normalizeCodexPayload(body, upstreamModel, false)
 	default:
 	}
 
@@ -287,83 +205,46 @@ func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) {
 	cleanup := registerCoreModels(t)
 	defer cleanup()
 
-	models := []string{
-		"gpt-5",             // supports levels (low/medium/high)
-		"gemini-2.5-pro",    // supports numeric budget
-		"qwen3-coder-flash", // no thinking support
-	}
-	fromProtocols := []string{"openai", "claude", "gemini", "openai-response"}
-	toProtocols := []string{"gemini", "claude", "openai", "codex"}
-
 	type scenario struct {
 		name        string
 		modelSuffix string
-		expectFn    func(info *registry.ModelInfo) (present bool, budget int64)
 	}
 
-	buildBudgetFn := func(raw int) func(info *registry.ModelInfo) (bool, int64) {
-		return func(info *registry.ModelInfo) (bool, int64) {
-			if info == nil || info.Thinking == nil {
-				return false, 0
-			}
-			return true, int64(util.NormalizeThinkingBudget(info.ID, raw))
+	numericName := func(budget int) string {
+		if budget < 0 {
+			return "numeric-neg1"
 		}
+		return fmt.Sprintf("numeric-%d", budget)
 	}
 
-	levelBudgetFn := func(level string) func(info *registry.ModelInfo) (bool, int64) {
-		return func(info *registry.ModelInfo) (bool, int64) {
-			if info == nil || info.Thinking == nil {
-				return false, 0
-			}
-			if b, ok := util.ThinkingEffortToBudget(info.ID, level); ok {
-				return true, int64(b)
-			}
-			return false, 0
-		}
-	}
+	for _, model := range thinkingTestModels {
+		_ = registry.GetGlobalRegistry().GetModelInfo(model)
 
-	for _, model := range models {
-		info := registry.GetGlobalRegistry().GetModelInfo(model)
-		min, max := 0, 0
-		if info != nil && info.Thinking != nil {
-			min = info.Thinking.Min
-			max = info.Thinking.Max
-		}
-
-		for _, from := range fromProtocols {
+		for _, from := range thinkingTestFromProtocols {
 			// Scenario selection follows protocol semantics:
 			// - OpenAI-style protocols (openai/openai-response) express thinking as levels.
 			// - Claude/Gemini-style protocols express thinking as numeric budgets.
 			cases := []scenario{
-				{name: "no-suffix", modelSuffix: model, expectFn: func(_ *registry.ModelInfo) (bool, int64) { return false, 0 }},
+				{name: "no-suffix", modelSuffix: model},
 			}
 			if from == "openai" || from == "openai-response" {
-				cases = append(cases,
-					scenario{name: "level-low", modelSuffix: fmt.Sprintf("%s(low)", model), expectFn: levelBudgetFn("low")},
-					scenario{name: "level-high", modelSuffix: fmt.Sprintf("%s(high)", model), expectFn: levelBudgetFn("high")},
-					scenario{name: "level-auto", modelSuffix: fmt.Sprintf("%s(auto)", model), expectFn: levelBudgetFn("auto")},
-				)
+				for _, lvl := range thinkingLevelSamples {
+					cases = append(cases, scenario{
+						name:        "level-" + lvl,
+						modelSuffix: fmt.Sprintf("%s(%s)", model, lvl),
+					})
+				}
 			} else { // claude or gemini
-				if util.ModelUsesThinkingLevels(model) {
-					// Numeric budgets for level-based models are mapped into levels when needed.
-					cases = append(cases,
-						scenario{name: "numeric-0", modelSuffix: fmt.Sprintf("%s(0)", model), expectFn: buildBudgetFn(0)},
-						scenario{name: "numeric-1024", modelSuffix: fmt.Sprintf("%s(1024)", model), expectFn: buildBudgetFn(1024)},
-						scenario{name: "numeric-1025", modelSuffix: fmt.Sprintf("%s(1025)", model), expectFn: buildBudgetFn(1025)},
-						scenario{name: "numeric-8192", modelSuffix: fmt.Sprintf("%s(8192)", model), expectFn: buildBudgetFn(8192)},
-						scenario{name: "numeric-8193", modelSuffix: fmt.Sprintf("%s(8193)", model), expectFn: buildBudgetFn(8193)},
-						scenario{name: "numeric-24576", modelSuffix: fmt.Sprintf("%s(24576)", model), expectFn: buildBudgetFn(24576)},
-						scenario{name: "numeric-24577", modelSuffix: fmt.Sprintf("%s(24577)", model), expectFn: buildBudgetFn(24577)},
-					)
-				} else {
-					cases = append(cases,
-						scenario{name: "numeric-below-min", modelSuffix: fmt.Sprintf("%s(%d)", model, min-10), expectFn: buildBudgetFn(min - 10)},
-						scenario{name: "numeric-above-max", modelSuffix: fmt.Sprintf("%s(%d)", model, max+10), expectFn: buildBudgetFn(max + 10)},
-					)
+				for _, budget := range thinkingNumericSamples {
+					budget := budget
+					cases = append(cases, scenario{
+						name:        numericName(budget),
+						modelSuffix: fmt.Sprintf("%s(%d)", model, budget),
+					})
 				}
 			}
 
-			for _, to := range toProtocols {
+			for _, to := range thinkingTestToProtocols {
 				if from == to {
 					continue
 				}
@@ -402,7 +283,22 @@ func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) {
 								}
 								return true, fmt.Sprintf("%d", *budget), false
 							case "openai":
-								if !util.ModelSupportsThinking(normalizedModel) {
+								allowCompat := isOpenAICompatModel(normalizedModel)
+								if !util.ModelSupportsThinking(normalizedModel) && !allowCompat {
+									return false, "", false
+								}
+								// For allowCompat models, pass through effort directly without validation
+								if allowCompat {
+									effort, ok := util.ReasoningEffortFromMetadata(metadata)
+									if ok && strings.TrimSpace(effort) != "" {
+										return true, strings.ToLower(strings.TrimSpace(effort)), false
+									}
+									// Check numeric budget fallback for allowCompat
+									if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
+										if mapped, okMap := util.OpenAIThinkingBudgetToEffort(normalizedModel, *budget); okMap && mapped != "" {
+											return true, mapped, false
+										}
+									}
 									return false, "", false
 								}
 								if !util.ModelUsesThinkingLevels(normalizedModel) {
@@ -427,14 +323,8 @@ func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) {
 								}
 								return false, "", true // validation would fail
 							case "codex":
-								if !util.ModelSupportsThinking(normalizedModel) {
-									return false, "", false
-								}
-								if !util.ModelUsesThinkingLevels(normalizedModel) {
-									// Non-levels models don't support effort strings in codex
-									if from != "openai-response" {
-										return false, "", false
-									}
+								// Codex does not support allowCompat; require thinking-capable level models.
+								if !util.ModelSupportsThinking(normalizedModel) || !util.ModelUsesThinkingLevels(normalizedModel) {
 									return false, "", false
 								}
 								effort, ok := util.ReasoningEffortFromMetadata(metadata)
@@ -523,6 +413,314 @@ func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) {
 	}
 }
 
+// buildRawPayloadWithThinking creates a payload with thinking parameters already in the body.
+// This tests the path where thinking comes from the raw payload, not model suffix.
+func buildRawPayloadWithThinking(fromProtocol, model string, thinkingParam any) []byte {
+	switch fromProtocol {
+	case "gemini":
+		base := fmt.Sprintf(`{"model":"%s","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, model)
+		if budget, ok := thinkingParam.(int); ok {
+			base, _ = sjson.Set(base, "generationConfig.thinkingConfig.thinkingBudget", budget)
+		}
+		return []byte(base)
+	case "openai-response":
+		base := fmt.Sprintf(`{"model":"%s","input":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`, model)
+		if effort, ok := thinkingParam.(string); ok && effort != "" {
+			base, _ = sjson.Set(base, "reasoning.effort", effort)
+		}
+		return []byte(base)
+	case "openai":
+		base := fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, model)
+		if effort, ok := thinkingParam.(string); ok && effort != "" {
+			base, _ = sjson.Set(base, "reasoning_effort", effort)
+		}
+		return []byte(base)
+	case "claude":
+		base := fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, model)
+		if budget, ok := thinkingParam.(int); ok {
+			base, _ = sjson.Set(base, "thinking.type", "enabled")
+			base, _ = sjson.Set(base, "thinking.budget_tokens", budget)
+		}
+		return []byte(base)
+	default:
+		return []byte(fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, model))
+	}
+}
+
+// buildBodyForProtocolWithRawThinking translates payload with raw thinking params.
+func buildBodyForProtocolWithRawThinking(t *testing.T, fromProtocol, toProtocol, model string, thinkingParam any) ([]byte, error) {
+	t.Helper()
+	raw := buildRawPayloadWithThinking(fromProtocol, model, thinkingParam)
+	stream := fromProtocol != toProtocol
+
+	body := sdktranslator.TranslateRequest(
+		sdktranslator.FromString(fromProtocol),
+		sdktranslator.FromString(toProtocol),
+		model,
+		raw,
+		stream,
+	)
+
+	var err error
+	allowCompat := isOpenAICompatModel(model)
+	switch toProtocol {
+	case "gemini":
+		body = util.ApplyDefaultThinkingIfNeeded(model, body)
+		body = util.NormalizeGeminiThinkingBudget(model, body)
+		body = util.StripThinkingConfigIfUnsupported(model, body)
+	case "claude":
+		// For raw payload, Claude thinking is passed through by translator
+		// No additional processing needed as thinking is already in body
+	case "openai":
+		body = executor.NormalizeThinkingConfig(body, model, allowCompat)
+		err = executor.ValidateThinkingConfig(body, model)
+	case "codex":
+		// Codex does not support allowCompat; always use false.
+		body, err = normalizeCodexPayload(body, model, false)
+	}
+
+	body, _ = sjson.SetBytes(body, "model", model)
+	body = filterThinkingBody(toProtocol, body, model, model)
+	return body, err
+}
+
+func TestRawPayloadThinkingConversions(t *testing.T) {
+	cleanup := registerCoreModels(t)
+	defer cleanup()
+
+	type scenario struct {
+		name          string
+		thinkingParam any // int for budget, string for effort level
+	}
+
+	numericName := func(budget int) string {
+		if budget < 0 {
+			return "budget-neg1"
+		}
+		return fmt.Sprintf("budget-%d", budget)
+	}
+
+	for _, model := range thinkingTestModels {
+		supportsThinking := util.ModelSupportsThinking(model)
+		usesLevels := util.ModelUsesThinkingLevels(model)
+		allowCompat := isOpenAICompatModel(model)
+
+		for _, from := range thinkingTestFromProtocols {
+			var cases []scenario
+			switch from {
+			case "openai", "openai-response":
+				cases = []scenario{
+					{name: "no-thinking", thinkingParam: nil},
+				}
+				for _, lvl := range thinkingLevelSamples {
+					cases = append(cases, scenario{
+						name:          "effort-" + lvl,
+						thinkingParam: lvl,
+					})
+				}
+			case "gemini", "claude":
+				cases = []scenario{
+					{name: "no-thinking", thinkingParam: nil},
+				}
+				for _, budget := range thinkingNumericSamples {
+					budget := budget
+					cases = append(cases, scenario{
+						name:          numericName(budget),
+						thinkingParam: budget,
+					})
+				}
+			}
+
+			for _, to := range thinkingTestToProtocols {
+				if from == to {
+					continue
+				}
+				t.Logf("═══════════════════════════════════════════════════════════════════════════════")
+				t.Logf("  RAW PAYLOAD: %s -> %s | model: %s", from, to, model)
+				t.Logf("═══════════════════════════════════════════════════════════════════════════════")
+
+				for _, cs := range cases {
+					from := from
+					to := to
+					cs := cs
+					testName := fmt.Sprintf("raw/%s->%s/%s/%s", from, to, model, cs.name)
+					t.Run(testName, func(t *testing.T) {
+						expectPresent, expectValue, expectErr := func() (bool, string, bool) {
+							if cs.thinkingParam == nil {
+								if to == "codex" && from != "openai-response" && supportsThinking && usesLevels {
+									// Codex translators default reasoning.effort to "medium" for thinking-capable level models
+									return true, "medium", false
+								}
+								return false, "", false
+							}
+
+							switch to {
+							case "gemini":
+								if !supportsThinking || usesLevels {
+									return false, "", false
+								}
+								// Gemini expects numeric budget (only for non-level models)
+								if budget, ok := cs.thinkingParam.(int); ok {
+									norm := util.NormalizeThinkingBudget(model, budget)
+									return true, fmt.Sprintf("%d", norm), false
+								}
+								// Convert effort level to budget for non-level models only
+								if effort, ok := cs.thinkingParam.(string); ok && effort != "" {
+									// "none" disables thinking - no thinkingBudget in output
+									if strings.ToLower(effort) == "none" {
+										return false, "", false
+									}
+									if budget, okB := util.ThinkingEffortToBudget(model, effort); okB {
+										// ThinkingEffortToBudget already returns normalized budget
+										return true, fmt.Sprintf("%d", budget), false
+									}
+									// Invalid effort does not map to a budget
+									return false, "", false
+								}
+								return false, "", false
+							case "claude":
+								if !supportsThinking || usesLevels {
+									return false, "", false
+								}
+								// Claude expects numeric budget (only for non-level models)
+								if budget, ok := cs.thinkingParam.(int); ok && budget > 0 {
+									norm := util.NormalizeThinkingBudget(model, budget)
+									return true, fmt.Sprintf("%d", norm), false
+								}
+								// Convert effort level to budget for non-level models only
+								if effort, ok := cs.thinkingParam.(string); ok && effort != "" {
+									// "none" and "auto" don't produce budget_tokens
+									lower := strings.ToLower(effort)
+									if lower == "none" || lower == "auto" {
+										return false, "", false
+									}
+									if budget, okB := util.ThinkingEffortToBudget(model, effort); okB {
+										// ThinkingEffortToBudget already returns normalized budget
+										return true, fmt.Sprintf("%d", budget), false
+									}
+									// Invalid effort - claude sets thinking.type:enabled but no budget_tokens
+									return false, "", false
+								}
+								return false, "", false
+							case "openai":
+								if allowCompat {
+									if effort, ok := cs.thinkingParam.(string); ok && strings.TrimSpace(effort) != "" {
+										normalized := strings.ToLower(strings.TrimSpace(effort))
+										return true, normalized, false
+									}
+									if budget, ok := cs.thinkingParam.(int); ok {
+										if mapped, okM := util.OpenAIThinkingBudgetToEffort(model, budget); okM && mapped != "" {
+											return true, mapped, false
+										}
+									}
+									return false, "", false
+								}
+								if !supportsThinking || !usesLevels {
+									return false, "", false
+								}
+								if effort, ok := cs.thinkingParam.(string); ok && effort != "" {
+									if normalized, okN := util.NormalizeReasoningEffortLevel(model, effort); okN {
+										return true, normalized, false
+									}
+									return false, "", true // invalid level
+								}
+								if budget, ok := cs.thinkingParam.(int); ok {
+									if mapped, okM := util.OpenAIThinkingBudgetToEffort(model, budget); okM && mapped != "" {
+										// Check if the mapped effort is valid for this model
+										if _, validLevel := util.NormalizeReasoningEffortLevel(model, mapped); !validLevel {
+											return true, mapped, true // expect validation error
+										}
+										return true, mapped, false
+									}
+								}
+								return false, "", false
+							case "codex":
+								// Codex does not support allowCompat; require thinking-capable level models.
+								if !supportsThinking || !usesLevels {
+									return false, "", false
+								}
+								if effort, ok := cs.thinkingParam.(string); ok && effort != "" {
+									if normalized, okN := util.NormalizeReasoningEffortLevel(model, effort); okN {
+										return true, normalized, false
+									}
+									return false, "", true
+								}
+								if budget, ok := cs.thinkingParam.(int); ok {
+									if mapped, okM := util.OpenAIThinkingBudgetToEffort(model, budget); okM && mapped != "" {
+										// Check if the mapped effort is valid for this model
+										if _, validLevel := util.NormalizeReasoningEffortLevel(model, mapped); !validLevel {
+											return true, mapped, true // expect validation error
+										}
+										return true, mapped, false
+									}
+								}
+								if from != "openai-response" {
+									// Codex translators default reasoning.effort to "medium" for thinking-capable models
+									return true, "medium", false
+								}
+								return false, "", false
+							}
+							return false, "", false
+						}()
+
+						body, err := buildBodyForProtocolWithRawThinking(t, from, to, model, cs.thinkingParam)
+						actualPresent, actualValue := func() (bool, string) {
+							path := ""
+							switch to {
+							case "gemini":
+								path = "generationConfig.thinkingConfig.thinkingBudget"
+							case "claude":
+								path = "thinking.budget_tokens"
+							case "openai":
+								path = "reasoning_effort"
+							case "codex":
+								path = "reasoning.effort"
+							}
+							if path == "" {
+								return false, ""
+							}
+							val := gjson.GetBytes(body, path)
+							if to == "codex" && !val.Exists() {
+								reasoning := gjson.GetBytes(body, "reasoning")
+								if reasoning.Exists() {
+									val = reasoning.Get("effort")
+								}
+							}
+							if !val.Exists() {
+								return false, ""
+							}
+							if val.Type == gjson.Number {
+								return true, fmt.Sprintf("%d", val.Int())
+							}
+							return true, val.String()
+						}()
+
+						t.Logf("from=%s to=%s model=%s param=%v present(expect=%v got=%v) value(expect=%s got=%s) err(expect=%v got=%v) body=%s",
+							from, to, model, cs.thinkingParam, expectPresent, actualPresent, expectValue, actualValue, expectErr, err != nil, string(body))
+
+						if expectErr {
+							if err == nil {
+								t.Fatalf("expected validation error but got none, body=%s", string(body))
+							}
+							return
+						}
+						if err != nil {
+							t.Fatalf("unexpected error: %v body=%s", err, string(body))
+						}
+
+						if expectPresent != actualPresent {
+							t.Fatalf("presence mismatch: expect %v got %v body=%s", expectPresent, actualPresent, string(body))
+						}
+						if expectPresent && expectValue != actualValue {
+							t.Fatalf("value mismatch: expect %s got %s body=%s", expectValue, actualValue, string(body))
+						}
+					})
+				}
+			}
+		}
+	}
+}
+
 func TestOpenAIThinkingBudgetToEffortRanges(t *testing.T) {
 	cleanup := registerCoreModels(t)
 	defer cleanup()
@@ -534,6 +732,7 @@ func TestOpenAIThinkingBudgetToEffortRanges(t *testing.T) {
 		want   string
 		ok     bool
 	}{
+		{name: "dynamic-auto", model: "gpt-5", budget: -1, want: "auto", ok: true},
 		{name: "zero-none", model: "gpt-5", budget: 0, want: "none", ok: true},
 		{name: "low-min", model: "gpt-5", budget: 1, want: "low", ok: true},
 		{name: "low-max", model: "gpt-5", budget: 1024, want: "low", ok: true},
@@ -541,7 +740,7 @@ func TestOpenAIThinkingBudgetToEffortRanges(t *testing.T) {
 		{name: "medium-max", model: "gpt-5", budget: 8192, want: "medium", ok: true},
 		{name: "high-min", model: "gpt-5", budget: 8193, want: "high", ok: true},
 		{name: "high-max", model: "gpt-5", budget: 24576, want: "high", ok: true},
-		{name: "over-max-clamps-to-highest", model: "gpt-5", budget: 24577, want: "high", ok: true},
+		{name: "over-max-clamps-to-highest", model: "gpt-5", budget: 64000, want: "high", ok: true},
 		{name: "over-max-xhigh-model", model: "gpt-5.2", budget: 50000, want: "xhigh", ok: true},
 		{name: "negative-unsupported", model: "gpt-5", budget: -5, want: "", ok: false},
 	}