From 1215c635a0e7ba72a8e94864d83f4def3480be73 Mon Sep 17 00:00:00 2001 From: teeverc <72298507+teeverc@users.noreply.github.com> Date: Fri, 12 Dec 2025 00:14:19 -0800 Subject: [PATCH 1/4] fix: flush Claude SSE chunks immediately to match OpenAI behavior - Write each SSE chunk directly to c.Writer and flush immediately - Remove buffered writer and ticker-based flushing that caused delayed output - Add 500ms timeout case for consistency with OpenAI/Gemini handlers - Clean up unused bufio import This fixes the 'not streaming' issue where small responses were held in the buffer until timeout/threshold was reached. Amp-Thread-ID: https://ampcode.com/threads/T-019b1186-164e-740c-96ab-856f64ee6bee Co-authored-by: Amp --- sdk/api/handlers/claude/code_handlers.go | 51 ++++++------------------ 1 file changed, 12 insertions(+), 39 deletions(-) diff --git a/sdk/api/handlers/claude/code_handlers.go b/sdk/api/handlers/claude/code_handlers.go index 8a57a0cc..5cca651a 100644 --- a/sdk/api/handlers/claude/code_handlers.go +++ b/sdk/api/handlers/claude/code_handlers.go @@ -7,7 +7,6 @@ package claude import ( - "bufio" "bytes" "compress/gzip" "context" @@ -219,52 +218,24 @@ func (h *ClaudeCodeAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON [ } func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) { - // v6.1: Intelligent Buffered Streamer strategy - // Enhanced buffering with larger buffer size (16KB) and longer flush interval (120ms). - // Smart flush only when buffer is sufficiently filled (≥50%), dramatically reducing - // flush frequency from ~12.5Hz to ~5-8Hz while maintaining low latency. - writer := bufio.NewWriterSize(c.Writer, 16*1024) // 4KB → 16KB - ticker := time.NewTicker(120 * time.Millisecond) // 80ms → 120ms - defer ticker.Stop() - - var chunkIdx int - + // OpenAI-style stream forwarding: write each SSE chunk and flush immediately. + // This guarantees clients see incremental output even for small responses. for { select { case <-c.Request.Context().Done(): - // Context cancelled, flush any remaining data before exit - _ = writer.Flush() cancel(c.Request.Context().Err()) return - case <-ticker.C: - // Smart flush: only flush when buffer has sufficient data (≥50% full) - // This reduces flush frequency while ensuring data flows naturally - buffered := writer.Buffered() - if buffered >= 8*1024 { // At least 8KB (50% of 16KB buffer) - if err := writer.Flush(); err != nil { - // Error flushing, cancel and return - cancel(err) - return - } - flusher.Flush() // Also flush the underlying http.ResponseWriter - } - case chunk, ok := <-data: if !ok { - // Stream ended, flush remaining data - _ = writer.Flush() + flusher.Flush() cancel(nil) return } - - // Forward the complete SSE event block directly (already formatted by the translator). - // The translator returns a complete SSE-compliant event block, including event:, data:, and separators. - // The handler just needs to forward it without reassembly. if len(chunk) > 0 { - _, _ = writer.Write(chunk) + _, _ = c.Writer.Write(chunk) + flusher.Flush() } - chunkIdx++ case errMsg, ok := <-errs: if !ok { @@ -276,21 +247,23 @@ func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http. status = errMsg.StatusCode } c.Status(status) + // An error occurred: emit as a proper SSE error event errorBytes, _ := json.Marshal(h.toClaudeError(errMsg)) - _, _ = writer.WriteString("event: error\n") - _, _ = writer.WriteString("data: ") - _, _ = writer.Write(errorBytes) - _, _ = writer.WriteString("\n\n") - _ = writer.Flush() + _, _ = c.Writer.Write([]byte("event: error\n")) + _, _ = c.Writer.Write([]byte("data: ")) + _, _ = c.Writer.Write(errorBytes) + _, _ = c.Writer.Write([]byte("\n\n")) flusher.Flush() } + var execErr error if errMsg != nil { execErr = errMsg.Error } cancel(execErr) return + case <-time.After(500 * time.Millisecond): } } } From 5ab30323354a4cff00a7ab9c0c1acacfe96dbb8d Mon Sep 17 00:00:00 2001 From: teeverc <72298507+teeverc@users.noreply.github.com> Date: Fri, 12 Dec 2025 00:26:01 -0800 Subject: [PATCH 2/4] Update sdk/api/handlers/claude/code_handlers.go thank you gemini Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- sdk/api/handlers/claude/code_handlers.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sdk/api/handlers/claude/code_handlers.go b/sdk/api/handlers/claude/code_handlers.go index 5cca651a..8a4c4806 100644 --- a/sdk/api/handlers/claude/code_handlers.go +++ b/sdk/api/handlers/claude/code_handlers.go @@ -250,10 +250,7 @@ func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http. // An error occurred: emit as a proper SSE error event errorBytes, _ := json.Marshal(h.toClaudeError(errMsg)) - _, _ = c.Writer.Write([]byte("event: error\n")) - _, _ = c.Writer.Write([]byte("data: ")) - _, _ = c.Writer.Write(errorBytes) - _, _ = c.Writer.Write([]byte("\n\n")) + _, _ = fmt.Fprintf(c.Writer, "event: error\ndata: %s\n\n", errorBytes) flusher.Flush() } From 374faa264028530a7c813378d9a7889bdb358e0e Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Fri, 12 Dec 2025 21:33:20 +0800 Subject: [PATCH 3/4] fix(thinking): map budgets to effort levels Ensure thinking settings translate correctly across providers: - Only apply reasoning_effort to level-based models and derive it from numeric budget suffixes when present - Strip effort string fields for budget-based models and skip Claude/Gemini budget resolution for level-based or unsupported models - Default Gemini include_thoughts when a nonzero budget override is set - Add cross-protocol conversion and budget range tests --- internal/runtime/executor/payload_helpers.go | 34 +- internal/util/claude_thinking.go | 3 + internal/util/gemini_thinking.go | 20 +- internal/util/openai_thinking.go | 34 ++ internal/util/thinking_suffix.go | 5 + test/thinking_conversion_test.go | 561 +++++++++++++++++++ 6 files changed, 645 insertions(+), 12 deletions(-) create mode 100644 internal/util/openai_thinking.go create mode 100644 test/thinking_conversion_test.go diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go index 9bc82f1f..222c6e37 100644 --- a/internal/runtime/executor/payload_helpers.go +++ b/internal/runtime/executor/payload_helpers.go @@ -59,8 +59,20 @@ func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model return payload } if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" { - if updated, err := sjson.SetBytes(payload, field, effort); err == nil { - return updated + if util.ModelUsesThinkingLevels(model) { + if updated, err := sjson.SetBytes(payload, field, effort); err == nil { + return updated + } + } + } + // Fallback: numeric thinking_budget suffix for level-based (OpenAI-style) models. + if util.ModelUsesThinkingLevels(model) { + if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil { + if effort, ok := util.OpenAIThinkingBudgetToEffort(model, *budget); ok && effort != "" { + if updated, err := sjson.SetBytes(payload, field, effort); err == nil { + return updated + } + } } } return payload @@ -219,31 +231,37 @@ func matchModelPattern(pattern, model string) bool { // normalizeThinkingConfig normalizes thinking-related fields in the payload // based on model capabilities. For models without thinking support, it strips // reasoning fields. For models with level-based thinking, it validates and -// normalizes the reasoning effort level. +// normalizes the reasoning effort level. For models with numeric budget thinking, +// it strips the effort string fields. func normalizeThinkingConfig(payload []byte, model string) []byte { if len(payload) == 0 || model == "" { return payload } if !util.ModelSupportsThinking(model) { - return stripThinkingFields(payload) + return stripThinkingFields(payload, false) } if util.ModelUsesThinkingLevels(model) { return normalizeReasoningEffortLevel(payload, model) } - return payload + // Model supports thinking but uses numeric budgets, not levels. + // Strip effort string fields since they are not applicable. + return stripThinkingFields(payload, true) } // stripThinkingFields removes thinking-related fields from the payload for -// models that do not support thinking. -func stripThinkingFields(payload []byte) []byte { +// models that do not support thinking. If effortOnly is true, only removes +// effort string fields (for models using numeric budgets). +func stripThinkingFields(payload []byte, effortOnly bool) []byte { fieldsToRemove := []string{ - "reasoning", "reasoning_effort", "reasoning.effort", } + if !effortOnly { + fieldsToRemove = append([]string{"reasoning"}, fieldsToRemove...) + } out := payload for _, field := range fieldsToRemove { if gjson.GetBytes(out, field).Exists() { diff --git a/internal/util/claude_thinking.go b/internal/util/claude_thinking.go index b0c5a0a2..6176f57d 100644 --- a/internal/util/claude_thinking.go +++ b/internal/util/claude_thinking.go @@ -28,6 +28,9 @@ func ApplyClaudeThinkingConfig(body []byte, budget *int) []byte { // It uses the unified ResolveThinkingConfigFromMetadata and normalizes the budget. // Returns the normalized budget (nil if thinking should not be enabled) and whether it matched. func ResolveClaudeThinkingConfig(modelName string, metadata map[string]any) (*int, bool) { + if !ModelSupportsThinking(modelName) { + return nil, false + } budget, include, matched := ResolveThinkingConfigFromMetadata(modelName, metadata) if !matched { return nil, false diff --git a/internal/util/gemini_thinking.go b/internal/util/gemini_thinking.go index f33928b0..a89aba26 100644 --- a/internal/util/gemini_thinking.go +++ b/internal/util/gemini_thinking.go @@ -25,9 +25,15 @@ func ApplyGeminiThinkingConfig(body []byte, budget *int, includeThoughts *bool) updated = rewritten } } - if includeThoughts != nil { + // Default to including thoughts when a budget override is present but no explicit include flag is provided. + incl := includeThoughts + if incl == nil && budget != nil && *budget != 0 { + defaultInclude := true + incl = &defaultInclude + } + if incl != nil { valuePath := "generationConfig.thinkingConfig.include_thoughts" - rewritten, err := sjson.SetBytes(updated, valuePath, *includeThoughts) + rewritten, err := sjson.SetBytes(updated, valuePath, *incl) if err == nil { updated = rewritten } @@ -47,9 +53,15 @@ func ApplyGeminiCLIThinkingConfig(body []byte, budget *int, includeThoughts *boo updated = rewritten } } - if includeThoughts != nil { + // Default to including thoughts when a budget override is present but no explicit include flag is provided. + incl := includeThoughts + if incl == nil && budget != nil && *budget != 0 { + defaultInclude := true + incl = &defaultInclude + } + if incl != nil { valuePath := "request.generationConfig.thinkingConfig.include_thoughts" - rewritten, err := sjson.SetBytes(updated, valuePath, *includeThoughts) + rewritten, err := sjson.SetBytes(updated, valuePath, *incl) if err == nil { updated = rewritten } diff --git a/internal/util/openai_thinking.go b/internal/util/openai_thinking.go new file mode 100644 index 00000000..4dda38f6 --- /dev/null +++ b/internal/util/openai_thinking.go @@ -0,0 +1,34 @@ +package util + +// OpenAIThinkingBudgetToEffort maps a numeric thinking budget (tokens) +// into an OpenAI-style reasoning effort level for level-based models. +// +// Ranges: +// - 0 -> "none" +// - 1..1024 -> "low" +// - 1025..8192 -> "medium" +// - 8193..24576 -> "high" +// - 24577.. -> highest supported level for the model (defaults to "xhigh") +// +// Negative values (except the dynamic -1 handled elsewhere) are treated as unsupported. +func OpenAIThinkingBudgetToEffort(model string, budget int) (string, bool) { + switch { + case budget < 0: + return "", false + case budget == 0: + return "none", true + case budget > 0 && budget <= 1024: + return "low", true + case budget <= 8192: + return "medium", true + case budget <= 24576: + return "high", true + case budget > 24576: + if levels := GetModelThinkingLevels(model); len(levels) > 0 { + return levels[len(levels)-1], true + } + return "xhigh", true + default: + return "", false + } +} diff --git a/internal/util/thinking_suffix.go b/internal/util/thinking_suffix.go index 7851c580..b877e109 100644 --- a/internal/util/thinking_suffix.go +++ b/internal/util/thinking_suffix.go @@ -163,6 +163,11 @@ func ResolveThinkingConfigFromMetadata(model string, metadata map[string]any) (* if !matched { return nil, nil, false } + // Level-based models (OpenAI-style) do not accept numeric thinking budgets in + // Claude/Gemini-style protocols, so we don't derive budgets for them here. + if ModelUsesThinkingLevels(model) { + return nil, nil, false + } if budget == nil && effort != nil { if derived, ok := ThinkingEffortToBudget(model, *effort); ok { diff --git a/test/thinking_conversion_test.go b/test/thinking_conversion_test.go new file mode 100644 index 00000000..c2f4aa8d --- /dev/null +++ b/test/thinking_conversion_test.go @@ -0,0 +1,561 @@ +package test + +import ( + "fmt" + "net/http" + "strings" + "testing" + "time" + + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/util" + sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" +) + +// statusErr mirrors executor.statusErr to keep validation behavior aligned. +type statusErr struct { + code int + msg string +} + +func (e statusErr) Error() string { return e.msg } + +// registerCoreModels loads representative models across providers into the registry +// so NormalizeThinkingBudget and level validation use real ranges. +func registerCoreModels(t *testing.T) func() { + t.Helper() + reg := registry.GetGlobalRegistry() + uid := fmt.Sprintf("thinking-core-%d", time.Now().UnixNano()) + reg.RegisterClient(uid+"-gemini", "gemini", registry.GetGeminiModels()) + reg.RegisterClient(uid+"-claude", "claude", registry.GetClaudeModels()) + reg.RegisterClient(uid+"-openai", "codex", registry.GetOpenAIModels()) + reg.RegisterClient(uid+"-qwen", "qwen", registry.GetQwenModels()) + return func() { + reg.UnregisterClient(uid + "-gemini") + reg.UnregisterClient(uid + "-claude") + reg.UnregisterClient(uid + "-openai") + reg.UnregisterClient(uid + "-qwen") + } +} + +func buildRawPayload(fromProtocol, modelWithSuffix string) []byte { + switch fromProtocol { + case "gemini": + return []byte(fmt.Sprintf(`{"model":"%s","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, modelWithSuffix)) + case "openai-response": + return []byte(fmt.Sprintf(`{"model":"%s","input":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`, modelWithSuffix)) + default: // openai / claude and other chat-style payloads + return []byte(fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, modelWithSuffix)) + } +} + +// applyThinkingMetadataLocal mirrors executor.applyThinkingMetadata. +func applyThinkingMetadataLocal(payload []byte, metadata map[string]any, model string) []byte { + budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata) + if !ok || (budgetOverride == nil && includeOverride == nil) { + return payload + } + if !util.ModelSupportsThinking(model) { + return payload + } + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(model, *budgetOverride) + budgetOverride = &norm + } + return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride) +} + +// applyReasoningEffortMetadataLocal mirrors executor.applyReasoningEffortMetadata. +func applyReasoningEffortMetadataLocal(payload []byte, metadata map[string]any, model, field string) []byte { + if len(metadata) == 0 { + return payload + } + if !util.ModelSupportsThinking(model) { + return payload + } + if field == "" { + return payload + } + if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" { + if updated, err := sjson.SetBytes(payload, field, effort); err == nil { + return updated + } + } + if util.ModelUsesThinkingLevels(model) { + if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil { + if effort, ok := util.OpenAIThinkingBudgetToEffort(model, *budget); ok && effort != "" { + if updated, err := sjson.SetBytes(payload, field, effort); err == nil { + return updated + } + } + } + } + return payload +} + +// normalizeThinkingConfigLocal mirrors executor.normalizeThinkingConfig. +func normalizeThinkingConfigLocal(payload []byte, model string) []byte { + if len(payload) == 0 || model == "" { + return payload + } + + if !util.ModelSupportsThinking(model) { + return stripThinkingFieldsLocal(payload, false) + } + + if util.ModelUsesThinkingLevels(model) { + return normalizeReasoningEffortLevelLocal(payload, model) + } + + // Model supports thinking but uses numeric budgets, not levels. + // Strip effort string fields since they are not applicable. + return stripThinkingFieldsLocal(payload, true) +} + +// stripThinkingFieldsLocal mirrors executor.stripThinkingFields. +func stripThinkingFieldsLocal(payload []byte, effortOnly bool) []byte { + fieldsToRemove := []string{ + "reasoning_effort", + "reasoning.effort", + } + if !effortOnly { + fieldsToRemove = append([]string{"reasoning"}, fieldsToRemove...) + } + out := payload + for _, field := range fieldsToRemove { + if gjson.GetBytes(out, field).Exists() { + out, _ = sjson.DeleteBytes(out, field) + } + } + return out +} + +// normalizeReasoningEffortLevelLocal mirrors executor.normalizeReasoningEffortLevel. +func normalizeReasoningEffortLevelLocal(payload []byte, model string) []byte { + out := payload + + if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() { + if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok { + out, _ = sjson.SetBytes(out, "reasoning_effort", normalized) + } + } + + if effort := gjson.GetBytes(out, "reasoning.effort"); effort.Exists() { + if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok { + out, _ = sjson.SetBytes(out, "reasoning.effort", normalized) + } + } + + return out +} + +// validateThinkingConfigLocal mirrors executor.validateThinkingConfig. +func validateThinkingConfigLocal(payload []byte, model string) error { + if len(payload) == 0 || model == "" { + return nil + } + if !util.ModelSupportsThinking(model) || !util.ModelUsesThinkingLevels(model) { + return nil + } + + levels := util.GetModelThinkingLevels(model) + checkField := func(path string) error { + if effort := gjson.GetBytes(payload, path); effort.Exists() { + if _, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); !ok { + return statusErr{ + code: http.StatusBadRequest, + msg: fmt.Sprintf("unsupported reasoning effort level %q for model %s (supported: %s)", effort.String(), model, strings.Join(levels, ", ")), + } + } + } + return nil + } + + if err := checkField("reasoning_effort"); err != nil { + return err + } + if err := checkField("reasoning.effort"); err != nil { + return err + } + return nil +} + +// normalizeCodexPayload mirrors codex_executor's reasoning + streaming tweaks. +func normalizeCodexPayload(body []byte, upstreamModel string) ([]byte, error) { + body = normalizeThinkingConfigLocal(body, upstreamModel) + if err := validateThinkingConfigLocal(body, upstreamModel); err != nil { + return body, err + } + body, _ = sjson.SetBytes(body, "model", upstreamModel) + body, _ = sjson.SetBytes(body, "stream", true) + body, _ = sjson.DeleteBytes(body, "previous_response_id") + return body, nil +} + +// buildBodyForProtocol runs a minimal request through the same translation and +// thinking pipeline used in executors for the given target protocol. +func buildBodyForProtocol(t *testing.T, fromProtocol, toProtocol, modelWithSuffix string) ([]byte, error) { + t.Helper() + normalizedModel, metadata := util.NormalizeThinkingModel(modelWithSuffix) + upstreamModel := util.ResolveOriginalModel(normalizedModel, metadata) + raw := buildRawPayload(fromProtocol, modelWithSuffix) + stream := fromProtocol != toProtocol + + body := sdktranslator.TranslateRequest( + sdktranslator.FromString(fromProtocol), + sdktranslator.FromString(toProtocol), + normalizedModel, + raw, + stream, + ) + + var err error + switch toProtocol { + case "gemini": + body = applyThinkingMetadataLocal(body, metadata, normalizedModel) + body = util.ApplyDefaultThinkingIfNeeded(normalizedModel, body) + body = util.NormalizeGeminiThinkingBudget(normalizedModel, body) + body = util.StripThinkingConfigIfUnsupported(normalizedModel, body) + case "claude": + if budget, ok := util.ResolveClaudeThinkingConfig(normalizedModel, metadata); ok { + body = util.ApplyClaudeThinkingConfig(body, budget) + } + case "openai": + body = applyReasoningEffortMetadataLocal(body, metadata, normalizedModel, "reasoning_effort") + body = normalizeThinkingConfigLocal(body, upstreamModel) + err = validateThinkingConfigLocal(body, upstreamModel) + case "codex": // OpenAI responses / codex + body = applyReasoningEffortMetadataLocal(body, metadata, normalizedModel, "reasoning.effort") + // Mirror CodexExecutor final normalization and model override so tests log the final body. + body, err = normalizeCodexPayload(body, upstreamModel) + default: + } + + // Mirror executor behavior: final payload uses the upstream (base) model name. + if upstreamModel != "" { + body, _ = sjson.SetBytes(body, "model", upstreamModel) + } + + // For tests we only keep model + thinking-related fields to avoid noise. + body = filterThinkingBody(toProtocol, body, upstreamModel, normalizedModel) + return body, err +} + +// filterThinkingBody projects the translated payload down to only model and +// thinking-related fields for the given target protocol. +func filterThinkingBody(toProtocol string, body []byte, upstreamModel, normalizedModel string) []byte { + if len(body) == 0 { + return body + } + out := []byte(`{}`) + + // Preserve model if present, otherwise fall back to upstream/normalized model. + if m := gjson.GetBytes(body, "model"); m.Exists() { + out, _ = sjson.SetBytes(out, "model", m.Value()) + } else if upstreamModel != "" { + out, _ = sjson.SetBytes(out, "model", upstreamModel) + } else if normalizedModel != "" { + out, _ = sjson.SetBytes(out, "model", normalizedModel) + } + + switch toProtocol { + case "gemini": + if tc := gjson.GetBytes(body, "generationConfig.thinkingConfig"); tc.Exists() { + out, _ = sjson.SetRawBytes(out, "generationConfig.thinkingConfig", []byte(tc.Raw)) + } + case "claude": + if tcfg := gjson.GetBytes(body, "thinking"); tcfg.Exists() { + out, _ = sjson.SetRawBytes(out, "thinking", []byte(tcfg.Raw)) + } + case "openai": + if re := gjson.GetBytes(body, "reasoning_effort"); re.Exists() { + out, _ = sjson.SetBytes(out, "reasoning_effort", re.Value()) + } + case "codex": + if re := gjson.GetBytes(body, "reasoning.effort"); re.Exists() { + out, _ = sjson.SetBytes(out, "reasoning.effort", re.Value()) + } + } + return out +} + +func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) { + cleanup := registerCoreModels(t) + defer cleanup() + + models := []string{ + "gpt-5", // supports levels (low/medium/high) + "gemini-2.5-pro", // supports numeric budget + "qwen3-coder-flash", // no thinking support + } + fromProtocols := []string{"openai", "claude", "gemini", "openai-response"} + toProtocols := []string{"gemini", "claude", "openai", "codex"} + + type scenario struct { + name string + modelSuffix string + expectFn func(info *registry.ModelInfo) (present bool, budget int64) + } + + buildBudgetFn := func(raw int) func(info *registry.ModelInfo) (bool, int64) { + return func(info *registry.ModelInfo) (bool, int64) { + if info == nil || info.Thinking == nil { + return false, 0 + } + return true, int64(util.NormalizeThinkingBudget(info.ID, raw)) + } + } + + levelBudgetFn := func(level string) func(info *registry.ModelInfo) (bool, int64) { + return func(info *registry.ModelInfo) (bool, int64) { + if info == nil || info.Thinking == nil { + return false, 0 + } + if b, ok := util.ThinkingEffortToBudget(info.ID, level); ok { + return true, int64(b) + } + return false, 0 + } + } + + for _, model := range models { + info := registry.GetGlobalRegistry().GetModelInfo(model) + min, max := 0, 0 + if info != nil && info.Thinking != nil { + min = info.Thinking.Min + max = info.Thinking.Max + } + + for _, from := range fromProtocols { + // Scenario selection follows protocol semantics: + // - OpenAI-style protocols (openai/openai-response) express thinking as levels. + // - Claude/Gemini-style protocols express thinking as numeric budgets. + cases := []scenario{ + {name: "no-suffix", modelSuffix: model, expectFn: func(_ *registry.ModelInfo) (bool, int64) { return false, 0 }}, + } + if from == "openai" || from == "openai-response" { + cases = append(cases, + scenario{name: "level-low", modelSuffix: fmt.Sprintf("%s(low)", model), expectFn: levelBudgetFn("low")}, + scenario{name: "level-high", modelSuffix: fmt.Sprintf("%s(high)", model), expectFn: levelBudgetFn("high")}, + scenario{name: "level-auto", modelSuffix: fmt.Sprintf("%s(auto)", model), expectFn: levelBudgetFn("auto")}, + ) + } else { // claude or gemini + if util.ModelUsesThinkingLevels(model) { + // Numeric budgets for level-based models are mapped into levels when needed. + cases = append(cases, + scenario{name: "numeric-0", modelSuffix: fmt.Sprintf("%s(0)", model), expectFn: buildBudgetFn(0)}, + scenario{name: "numeric-1024", modelSuffix: fmt.Sprintf("%s(1024)", model), expectFn: buildBudgetFn(1024)}, + scenario{name: "numeric-1025", modelSuffix: fmt.Sprintf("%s(1025)", model), expectFn: buildBudgetFn(1025)}, + scenario{name: "numeric-8192", modelSuffix: fmt.Sprintf("%s(8192)", model), expectFn: buildBudgetFn(8192)}, + scenario{name: "numeric-8193", modelSuffix: fmt.Sprintf("%s(8193)", model), expectFn: buildBudgetFn(8193)}, + scenario{name: "numeric-24576", modelSuffix: fmt.Sprintf("%s(24576)", model), expectFn: buildBudgetFn(24576)}, + scenario{name: "numeric-24577", modelSuffix: fmt.Sprintf("%s(24577)", model), expectFn: buildBudgetFn(24577)}, + ) + } else { + cases = append(cases, + scenario{name: "numeric-below-min", modelSuffix: fmt.Sprintf("%s(%d)", model, min-10), expectFn: buildBudgetFn(min - 10)}, + scenario{name: "numeric-above-max", modelSuffix: fmt.Sprintf("%s(%d)", model, max+10), expectFn: buildBudgetFn(max + 10)}, + ) + } + } + + for _, to := range toProtocols { + if from == to { + continue + } + t.Logf("─────────────────────────────────────────────────────────────────────────────────") + t.Logf(" %s -> %s | model: %s", from, to, model) + t.Logf("─────────────────────────────────────────────────────────────────────────────────") + for _, cs := range cases { + from := from + to := to + cs := cs + testName := fmt.Sprintf("%s->%s/%s/%s", from, to, model, cs.name) + t.Run(testName, func(t *testing.T) { + normalizedModel, metadata := util.NormalizeThinkingModel(cs.modelSuffix) + expectPresent, expectValue, expectErr := func() (bool, string, bool) { + switch to { + case "gemini": + budget, include, ok := util.ResolveThinkingConfigFromMetadata(normalizedModel, metadata) + if !ok || !util.ModelSupportsThinking(normalizedModel) { + return false, "", false + } + if include != nil && !*include { + return false, "", false + } + if budget == nil { + return false, "", false + } + norm := util.NormalizeThinkingBudget(normalizedModel, *budget) + return true, fmt.Sprintf("%d", norm), false + case "claude": + if !util.ModelSupportsThinking(normalizedModel) { + return false, "", false + } + budget, ok := util.ResolveClaudeThinkingConfig(normalizedModel, metadata) + if !ok || budget == nil { + return false, "", false + } + return true, fmt.Sprintf("%d", *budget), false + case "openai": + if !util.ModelSupportsThinking(normalizedModel) { + return false, "", false + } + if !util.ModelUsesThinkingLevels(normalizedModel) { + // Non-levels models don't support effort strings in openai + return false, "", false + } + effort, ok := util.ReasoningEffortFromMetadata(metadata) + if !ok || strings.TrimSpace(effort) == "" { + if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil { + if mapped, okMap := util.OpenAIThinkingBudgetToEffort(normalizedModel, *budget); okMap { + effort = mapped + ok = true + } + } + } + if !ok || strings.TrimSpace(effort) == "" { + return false, "", false + } + effort = strings.ToLower(strings.TrimSpace(effort)) + if normalized, okLevel := util.NormalizeReasoningEffortLevel(normalizedModel, effort); okLevel { + return true, normalized, false + } + return false, "", true // validation would fail + case "codex": + if !util.ModelSupportsThinking(normalizedModel) { + return false, "", false + } + if !util.ModelUsesThinkingLevels(normalizedModel) { + // Non-levels models don't support effort strings in codex + if from != "openai-response" { + return false, "", false + } + return false, "", false + } + effort, ok := util.ReasoningEffortFromMetadata(metadata) + if ok && strings.TrimSpace(effort) != "" { + effort = strings.ToLower(strings.TrimSpace(effort)) + if normalized, okLevel := util.NormalizeReasoningEffortLevel(normalizedModel, effort); okLevel { + return true, normalized, false + } + return false, "", true + } + if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil { + if mapped, okMap := util.OpenAIThinkingBudgetToEffort(normalizedModel, *budget); okMap && mapped != "" { + mapped = strings.ToLower(strings.TrimSpace(mapped)) + if normalized, okLevel := util.NormalizeReasoningEffortLevel(normalizedModel, mapped); okLevel { + return true, normalized, false + } + return false, "", true + } + } + if from != "openai-response" { + // Codex translators default reasoning.effort to "medium" when + // no explicit thinking suffix/metadata is provided. + return true, "medium", false + } + return false, "", false + default: + return false, "", false + } + }() + + body, err := buildBodyForProtocol(t, from, to, cs.modelSuffix) + actualPresent, actualValue := func() (bool, string) { + path := "" + switch to { + case "gemini": + path = "generationConfig.thinkingConfig.thinkingBudget" + case "claude": + path = "thinking.budget_tokens" + case "openai": + path = "reasoning_effort" + case "codex": + path = "reasoning.effort" + } + if path == "" { + return false, "" + } + val := gjson.GetBytes(body, path) + if to == "codex" && !val.Exists() { + reasoning := gjson.GetBytes(body, "reasoning") + if reasoning.Exists() { + val = reasoning.Get("effort") + } + } + if !val.Exists() { + return false, "" + } + if val.Type == gjson.Number { + return true, fmt.Sprintf("%d", val.Int()) + } + return true, val.String() + }() + + t.Logf("from=%s to=%s model=%s suffix=%s present(expect=%v got=%v) value(expect=%s got=%s) err(expect=%v got=%v) body=%s", + from, to, model, cs.modelSuffix, expectPresent, actualPresent, expectValue, actualValue, expectErr, err != nil, string(body)) + + if expectErr { + if err == nil { + t.Fatalf("expected validation error but got none, body=%s", string(body)) + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v body=%s", err, string(body)) + } + + if expectPresent != actualPresent { + t.Fatalf("presence mismatch: expect %v got %v body=%s", expectPresent, actualPresent, string(body)) + } + if expectPresent && expectValue != actualValue { + t.Fatalf("value mismatch: expect %s got %s body=%s", expectValue, actualValue, string(body)) + } + }) + } + } + } + } +} + +func TestOpenAIThinkingBudgetToEffortRanges(t *testing.T) { + cleanup := registerCoreModels(t) + defer cleanup() + + cases := []struct { + name string + model string + budget int + want string + ok bool + }{ + {name: "zero-none", model: "gpt-5", budget: 0, want: "none", ok: true}, + {name: "low-min", model: "gpt-5", budget: 1, want: "low", ok: true}, + {name: "low-max", model: "gpt-5", budget: 1024, want: "low", ok: true}, + {name: "medium-min", model: "gpt-5", budget: 1025, want: "medium", ok: true}, + {name: "medium-max", model: "gpt-5", budget: 8192, want: "medium", ok: true}, + {name: "high-min", model: "gpt-5", budget: 8193, want: "high", ok: true}, + {name: "high-max", model: "gpt-5", budget: 24576, want: "high", ok: true}, + {name: "over-max-clamps-to-highest", model: "gpt-5", budget: 24577, want: "high", ok: true}, + {name: "over-max-xhigh-model", model: "gpt-5.2", budget: 50000, want: "xhigh", ok: true}, + {name: "negative-unsupported", model: "gpt-5", budget: -5, want: "", ok: false}, + } + + for _, cs := range cases { + cs := cs + t.Run(cs.name, func(t *testing.T) { + got, ok := util.OpenAIThinkingBudgetToEffort(cs.model, cs.budget) + if ok != cs.ok { + t.Fatalf("ok mismatch for model=%s budget=%d: expect %v got %v", cs.model, cs.budget, cs.ok, ok) + } + if got != cs.want { + t.Fatalf("value mismatch for model=%s budget=%d: expect %q got %q", cs.model, cs.budget, cs.want, got) + } + }) + } +} From 660aabc437585b42825522fd1250f403a0f24516 Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Sat, 13 Dec 2025 04:06:02 +0800 Subject: [PATCH 4/4] fix(executor): add `allowCompat` support for reasoning effort normalization Introduced `allowCompat` parameter to improve compatibility handling for reasoning effort in payloads across OpenAI and similar models. --- internal/runtime/executor/codex_executor.go | 10 +++--- internal/runtime/executor/iflow_executor.go | 8 ++--- .../executor/openai_compat_executor.go | 31 ++++++++++++++++--- internal/runtime/executor/payload_helpers.go | 14 ++++----- internal/runtime/executor/qwen_executor.go | 8 ++--- 5 files changed, 47 insertions(+), 24 deletions(-) diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index 866f1bdc..2e966237 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -54,8 +54,8 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re from := opts.SourceFormat to := sdktranslator.FromString("codex") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort") - body = normalizeThinkingConfig(body, upstreamModel) + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false) + body = normalizeThinkingConfig(body, upstreamModel, false) if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { return resp, errValidate } @@ -152,8 +152,8 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au to := sdktranslator.FromString("codex") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort") - body = normalizeThinkingConfig(body, upstreamModel) + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false) + body = normalizeThinkingConfig(body, upstreamModel, false) if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { return nil, errValidate } @@ -254,7 +254,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth modelForCounting := req.Model - body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort") + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false) body, _ = sjson.SetBytes(body, "model", upstreamModel) body, _ = sjson.DeleteBytes(body, "previous_response_id") body, _ = sjson.SetBytes(body, "stream", false) diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go index 4424ea0f..03df1be0 100644 --- a/internal/runtime/executor/iflow_executor.go +++ b/internal/runtime/executor/iflow_executor.go @@ -57,12 +57,12 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re from := opts.SourceFormat to := sdktranslator.FromString("openai") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort") + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) if upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } - body = normalizeThinkingConfig(body, upstreamModel) + body = normalizeThinkingConfig(body, upstreamModel, false) if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { return resp, errValidate } @@ -148,12 +148,12 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au to := sdktranslator.FromString("openai") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort") + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) if upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } - body = normalizeThinkingConfig(body, upstreamModel) + body = normalizeThinkingConfig(body, upstreamModel, false) if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { return nil, errValidate } diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go index d4b0afcb..1bbd0c8e 100644 --- a/internal/runtime/executor/openai_compat_executor.go +++ b/internal/runtime/executor/openai_compat_executor.go @@ -59,12 +59,13 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A translated = e.overrideModel(translated, modelOverride) } translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated) - translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort") + allowCompat := e.allowCompatReasoningEffort(req.Model, auth) + translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) if upstreamModel != "" && modelOverride == "" { translated, _ = sjson.SetBytes(translated, "model", upstreamModel) } - translated = normalizeThinkingConfig(translated, upstreamModel) + translated = normalizeThinkingConfig(translated, upstreamModel, allowCompat) if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil { return resp, errValidate } @@ -154,12 +155,13 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy translated = e.overrideModel(translated, modelOverride) } translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated) - translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort") + allowCompat := e.allowCompatReasoningEffort(req.Model, auth) + translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) if upstreamModel != "" && modelOverride == "" { translated, _ = sjson.SetBytes(translated, "model", upstreamModel) } - translated = normalizeThinkingConfig(translated, upstreamModel) + translated = normalizeThinkingConfig(translated, upstreamModel, allowCompat) if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil { return nil, errValidate } @@ -325,6 +327,27 @@ func (e *OpenAICompatExecutor) resolveUpstreamModel(alias string, auth *cliproxy return "" } +func (e *OpenAICompatExecutor) allowCompatReasoningEffort(model string, auth *cliproxyauth.Auth) bool { + trimmed := strings.TrimSpace(model) + if trimmed == "" || e == nil || e.cfg == nil { + return false + } + compat := e.resolveCompatConfig(auth) + if compat == nil || len(compat.Models) == 0 { + return false + } + for i := range compat.Models { + entry := compat.Models[i] + if strings.EqualFold(strings.TrimSpace(entry.Alias), trimmed) { + return true + } + if strings.EqualFold(strings.TrimSpace(entry.Name), trimmed) { + return true + } + } + return false +} + func (e *OpenAICompatExecutor) resolveCompatConfig(auth *cliproxyauth.Auth) *config.OpenAICompatibility { if auth == nil || e.cfg == nil { return nil diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go index b791dac7..6e352c51 100644 --- a/internal/runtime/executor/payload_helpers.go +++ b/internal/runtime/executor/payload_helpers.go @@ -48,7 +48,7 @@ func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model str // applyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path. // Metadata values take precedence over any existing field when the model supports thinking, intentionally // overwriting caller-provided values to honor suffix/default metadata priority. -func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string) []byte { +func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string, allowCompat bool) []byte { if len(metadata) == 0 { return payload } @@ -59,20 +59,20 @@ func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model if baseModel == "" { baseModel = model } - if !util.ModelSupportsThinking(baseModel) && !util.IsOpenAICompatibilityModel(baseModel) { + if !util.ModelSupportsThinking(baseModel) && !allowCompat { return payload } if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" { - if util.ModelUsesThinkingLevels(model) { + if util.ModelUsesThinkingLevels(baseModel) || allowCompat { if updated, err := sjson.SetBytes(payload, field, effort); err == nil { return updated } } } // Fallback: numeric thinking_budget suffix for level-based (OpenAI-style) models. - if util.ModelUsesThinkingLevels(model) { + if util.ModelUsesThinkingLevels(baseModel) || allowCompat { if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil { - if effort, ok := util.OpenAIThinkingBudgetToEffort(model, *budget); ok && effort != "" { + if effort, ok := util.OpenAIThinkingBudgetToEffort(baseModel, *budget); ok && effort != "" { if updated, err := sjson.SetBytes(payload, field, effort); err == nil { return updated } @@ -237,13 +237,13 @@ func matchModelPattern(pattern, model string) bool { // reasoning fields. For models with level-based thinking, it validates and // normalizes the reasoning effort level. For models with numeric budget thinking, // it strips the effort string fields. -func normalizeThinkingConfig(payload []byte, model string) []byte { +func normalizeThinkingConfig(payload []byte, model string, allowCompat bool) []byte { if len(payload) == 0 || model == "" { return payload } if !util.ModelSupportsThinking(model) { - if util.IsOpenAICompatibilityModel(model) { + if allowCompat { return payload } return stripThinkingFields(payload, false) diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go index c8e2974a..3bd61021 100644 --- a/internal/runtime/executor/qwen_executor.go +++ b/internal/runtime/executor/qwen_executor.go @@ -51,12 +51,12 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req from := opts.SourceFormat to := sdktranslator.FromString("openai") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort") + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) if upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } - body = normalizeThinkingConfig(body, upstreamModel) + body = normalizeThinkingConfig(body, upstreamModel, false) if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { return resp, errValidate } @@ -131,12 +131,12 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut to := sdktranslator.FromString("openai") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort") + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) if upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } - body = normalizeThinkingConfig(body, upstreamModel) + body = normalizeThinkingConfig(body, upstreamModel, false) if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { return nil, errValidate }