From 8496cc24446d2ac51e9ce2d938a3768f978a5835 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Sun, 14 Dec 2025 13:00:34 +0800 Subject: [PATCH] test(thinking): cover openai-compat reasoning passthrough --- test/thinking_conversion_test.go | 166 ++++++++++++++++++++++--------- 1 file changed, 117 insertions(+), 49 deletions(-) diff --git a/test/thinking_conversion_test.go b/test/thinking_conversion_test.go index a1462611..60f4a02e 100644 --- a/test/thinking_conversion_test.go +++ b/test/thinking_conversion_test.go @@ -24,6 +24,13 @@ type statusErr struct { func (e statusErr) Error() string { return e.msg } +// isOpenAICompatModel returns true if the model is configured as an OpenAI-compatible +// model that should have reasoning effort passed through even if not in registry. +// This simulates the allowCompat behavior from OpenAICompatExecutor. +func isOpenAICompatModel(model string) bool { + return model == "custom-thinking-model" +} + // registerCoreModels loads representative models across providers into the registry // so NormalizeThinkingBudget and level validation use real ranges. func registerCoreModels(t *testing.T) func() { @@ -34,11 +41,28 @@ func registerCoreModels(t *testing.T) func() { reg.RegisterClient(uid+"-claude", "claude", registry.GetClaudeModels()) reg.RegisterClient(uid+"-openai", "codex", registry.GetOpenAIModels()) reg.RegisterClient(uid+"-qwen", "qwen", registry.GetQwenModels()) + // Custom openai-compatible model with forced thinking suffix passthrough. + // No Thinking field - simulates an external model added via openai-compat + // where the registry has no knowledge of its thinking capabilities. + // The allowCompat flag should preserve reasoning effort for such models. + customOpenAIModels := []*registry.ModelInfo{ + { + ID: "custom-thinking-model", + Object: "model", + Created: 1700000000, + OwnedBy: "custom-provider", + Type: "openai", + DisplayName: "Custom Thinking Model", + Description: "OpenAI-compatible model with forced thinking suffix support", + }, + } + reg.RegisterClient(uid+"-custom-openai", "codex", customOpenAIModels) return func() { reg.UnregisterClient(uid + "-gemini") reg.UnregisterClient(uid + "-claude") reg.UnregisterClient(uid + "-openai") reg.UnregisterClient(uid + "-qwen") + reg.UnregisterClient(uid + "-custom-openai") } } @@ -70,24 +94,24 @@ func applyThinkingMetadataLocal(payload []byte, metadata map[string]any, model s } // applyReasoningEffortMetadataLocal mirrors executor.applyReasoningEffortMetadata. -func applyReasoningEffortMetadataLocal(payload []byte, metadata map[string]any, model, field string) []byte { +func applyReasoningEffortMetadataLocal(payload []byte, metadata map[string]any, model, field string, allowCompat bool) []byte { if len(metadata) == 0 { return payload } - if !util.ModelSupportsThinking(model) { - return payload - } if field == "" { return payload } + if !util.ModelSupportsThinking(model) && !allowCompat { + return payload + } if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" { - if util.ModelUsesThinkingLevels(model) { + if util.ModelUsesThinkingLevels(model) || allowCompat { if updated, err := sjson.SetBytes(payload, field, effort); err == nil { return updated } } } - if util.ModelUsesThinkingLevels(model) { + if util.ModelUsesThinkingLevels(model) || allowCompat { if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil { if effort, ok := util.OpenAIThinkingBudgetToEffort(model, *budget); ok && effort != "" { if updated, err := sjson.SetBytes(payload, field, effort); err == nil { @@ -100,12 +124,17 @@ func applyReasoningEffortMetadataLocal(payload []byte, metadata map[string]any, } // normalizeThinkingConfigLocal mirrors executor.normalizeThinkingConfig. -func normalizeThinkingConfigLocal(payload []byte, model string) []byte { +// When allowCompat is true, reasoning fields are preserved even for models +// without thinking support (simulating openai-compat passthrough behavior). +func normalizeThinkingConfigLocal(payload []byte, model string, allowCompat bool) []byte { if len(payload) == 0 || model == "" { return payload } if !util.ModelSupportsThinking(model) { + if allowCompat { + return payload + } return stripThinkingFieldsLocal(payload, false) } @@ -187,8 +216,8 @@ func validateThinkingConfigLocal(payload []byte, model string) error { } // normalizeCodexPayload mirrors codex_executor's reasoning + streaming tweaks. -func normalizeCodexPayload(body []byte, upstreamModel string) ([]byte, error) { - body = normalizeThinkingConfigLocal(body, upstreamModel) +func normalizeCodexPayload(body []byte, upstreamModel string, allowCompat bool) ([]byte, error) { + body = normalizeThinkingConfigLocal(body, upstreamModel, allowCompat) if err := validateThinkingConfigLocal(body, upstreamModel); err != nil { return body, err } @@ -216,6 +245,7 @@ func buildBodyForProtocol(t *testing.T, fromProtocol, toProtocol, modelWithSuffi ) var err error + allowCompat := isOpenAICompatModel(normalizedModel) switch toProtocol { case "gemini": body = applyThinkingMetadataLocal(body, metadata, normalizedModel) @@ -227,13 +257,14 @@ func buildBodyForProtocol(t *testing.T, fromProtocol, toProtocol, modelWithSuffi body = util.ApplyClaudeThinkingConfig(body, budget) } case "openai": - body = applyReasoningEffortMetadataLocal(body, metadata, normalizedModel, "reasoning_effort") - body = normalizeThinkingConfigLocal(body, upstreamModel) + body = applyReasoningEffortMetadataLocal(body, metadata, normalizedModel, "reasoning_effort", allowCompat) + body = normalizeThinkingConfigLocal(body, upstreamModel, allowCompat) err = validateThinkingConfigLocal(body, upstreamModel) case "codex": // OpenAI responses / codex - body = applyReasoningEffortMetadataLocal(body, metadata, normalizedModel, "reasoning.effort") + // Codex does not support allowCompat; always use false. + body = applyReasoningEffortMetadataLocal(body, metadata, normalizedModel, "reasoning.effort", false) // Mirror CodexExecutor final normalization and model override so tests log the final body. - body, err = normalizeCodexPayload(body, upstreamModel) + body, err = normalizeCodexPayload(body, upstreamModel, false) default: } @@ -290,9 +321,10 @@ func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) { defer cleanup() models := []string{ - "gpt-5", // supports levels (low/medium/high) - "gemini-2.5-pro", // supports numeric budget - "qwen3-coder-flash", // no thinking support + "gpt-5", // supports levels (low/medium/high) + "gemini-2.5-pro", // supports numeric budget + "qwen3-coder-flash", // no thinking support + "custom-thinking-model", // openai-compatible model with forced thinking suffix } fromProtocols := []string{"openai", "claude", "gemini", "openai-response"} toProtocols := []string{"gemini", "claude", "openai", "codex"} @@ -404,7 +436,22 @@ func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) { } return true, fmt.Sprintf("%d", *budget), false case "openai": - if !util.ModelSupportsThinking(normalizedModel) { + allowCompat := isOpenAICompatModel(normalizedModel) + if !util.ModelSupportsThinking(normalizedModel) && !allowCompat { + return false, "", false + } + // For allowCompat models, pass through effort directly without validation + if allowCompat { + effort, ok := util.ReasoningEffortFromMetadata(metadata) + if ok && strings.TrimSpace(effort) != "" { + return true, strings.ToLower(strings.TrimSpace(effort)), false + } + // Check numeric budget fallback for allowCompat + if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil { + if mapped, okMap := util.OpenAIThinkingBudgetToEffort(normalizedModel, *budget); okMap && mapped != "" { + return true, mapped, false + } + } return false, "", false } if !util.ModelUsesThinkingLevels(normalizedModel) { @@ -429,14 +476,8 @@ func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) { } return false, "", true // validation would fail case "codex": - if !util.ModelSupportsThinking(normalizedModel) { - return false, "", false - } - if !util.ModelUsesThinkingLevels(normalizedModel) { - // Non-levels models don't support effort strings in codex - if from != "openai-response" { - return false, "", false - } + // Codex does not support allowCompat; require thinking-capable level models. + if !util.ModelSupportsThinking(normalizedModel) || !util.ModelUsesThinkingLevels(normalizedModel) { return false, "", false } effort, ok := util.ReasoningEffortFromMetadata(metadata) @@ -574,6 +615,7 @@ func buildBodyForProtocolWithRawThinking(t *testing.T, fromProtocol, toProtocol, ) var err error + allowCompat := isOpenAICompatModel(model) switch toProtocol { case "gemini": body = util.ApplyDefaultThinkingIfNeeded(model, body) @@ -583,10 +625,11 @@ func buildBodyForProtocolWithRawThinking(t *testing.T, fromProtocol, toProtocol, // For raw payload, Claude thinking is passed through by translator // No additional processing needed as thinking is already in body case "openai": - body = normalizeThinkingConfigLocal(body, model) + body = normalizeThinkingConfigLocal(body, model, allowCompat) err = validateThinkingConfigLocal(body, model) case "codex": - body, err = normalizeCodexPayload(body, model) + // Codex does not support allowCompat; always use false. + body, err = normalizeCodexPayload(body, model, false) } body, _ = sjson.SetBytes(body, "model", model) @@ -599,9 +642,10 @@ func TestRawPayloadThinkingConversions(t *testing.T) { defer cleanup() models := []string{ - "gpt-5", // supports levels (low/medium/high) - "gemini-2.5-pro", // supports numeric budget - "qwen3-coder-flash", // no thinking support + "gpt-5", // supports levels (low/medium/high) + "gemini-2.5-pro", // supports numeric budget + "qwen3-coder-flash", // no thinking support + "custom-thinking-model", // openai-compatible model with forced thinking suffix } fromProtocols := []string{"openai", "claude", "gemini", "openai-response"} toProtocols := []string{"gemini", "claude", "openai", "codex"} @@ -614,6 +658,7 @@ func TestRawPayloadThinkingConversions(t *testing.T) { for _, model := range models { supportsThinking := util.ModelSupportsThinking(model) usesLevels := util.ModelUsesThinkingLevels(model) + allowCompat := isOpenAICompatModel(model) for _, from := range fromProtocols { var cases []scenario @@ -624,7 +669,7 @@ func TestRawPayloadThinkingConversions(t *testing.T) { {name: "effort-low", thinkingParam: "low"}, {name: "effort-medium", thinkingParam: "medium"}, {name: "effort-high", thinkingParam: "high"}, - {name: "effort-invalid-xhigh", thinkingParam: "xhigh"}, + {name: "effort-xhigh", thinkingParam: "xhigh"}, {name: "effort-invalid-foo", thinkingParam: "foo"}, } case "gemini": @@ -659,46 +704,65 @@ func TestRawPayloadThinkingConversions(t *testing.T) { t.Run(testName, func(t *testing.T) { expectPresent, expectValue, expectErr := func() (bool, string, bool) { if cs.thinkingParam == nil { - // No thinking param provided - if to == "codex" && from != "openai-response" { - // Codex translators default to medium - if supportsThinking && usesLevels { - return true, "medium", false - } + if to == "codex" && from != "openai-response" && supportsThinking && usesLevels { + // Codex translators default reasoning.effort to "medium" for thinking-capable level models + return true, "medium", false } return false, "", false } - if !supportsThinking { - return false, "", false - } switch to { case "gemini": - // Gemini expects numeric budget + if !supportsThinking || usesLevels { + return false, "", false + } + // Gemini expects numeric budget (only for non-level models) if budget, ok := cs.thinkingParam.(int); ok { norm := util.NormalizeThinkingBudget(model, budget) return true, fmt.Sprintf("%d", norm), false } + // Convert effort level to budget for non-level models only if effort, ok := cs.thinkingParam.(string); ok && effort != "" { - if b, okB := util.ThinkingEffortToBudget(model, effort); okB { - return true, fmt.Sprintf("%d", b), false + if budget, okB := util.ThinkingEffortToBudget(model, effort); okB { + // ThinkingEffortToBudget already returns normalized budget + return true, fmt.Sprintf("%d", budget), false } + // Invalid effort maps to default/fallback + return true, fmt.Sprintf("%d", -1), false } return false, "", false case "claude": - // Claude expects numeric budget + if !supportsThinking || usesLevels { + return false, "", false + } + // Claude expects numeric budget (only for non-level models) if budget, ok := cs.thinkingParam.(int); ok && budget > 0 { norm := util.NormalizeThinkingBudget(model, budget) return true, fmt.Sprintf("%d", norm), false } + // Convert effort level to budget for non-level models only if effort, ok := cs.thinkingParam.(string); ok && effort != "" { - if b, okB := util.ThinkingEffortToBudget(model, effort); okB && b > 0 { - return true, fmt.Sprintf("%d", b), false + if budget, okB := util.ThinkingEffortToBudget(model, effort); okB { + // ThinkingEffortToBudget already returns normalized budget + return true, fmt.Sprintf("%d", budget), false } + // Invalid effort - claude may still set thinking with type:enabled + return true, "", false } return false, "", false case "openai": - if !usesLevels { + if allowCompat { + if effort, ok := cs.thinkingParam.(string); ok && strings.TrimSpace(effort) != "" { + return true, strings.ToLower(strings.TrimSpace(effort)), false + } + if budget, ok := cs.thinkingParam.(int); ok { + if mapped, okM := util.OpenAIThinkingBudgetToEffort(model, budget); okM && mapped != "" { + return true, mapped, false + } + } + return false, "", false + } + if !supportsThinking || !usesLevels { return false, "", false } if effort, ok := cs.thinkingParam.(string); ok && effort != "" { @@ -714,7 +778,8 @@ func TestRawPayloadThinkingConversions(t *testing.T) { } return false, "", false case "codex": - if !usesLevels { + // Codex does not support allowCompat; require thinking-capable level models. + if !supportsThinking || !usesLevels { return false, "", false } if effort, ok := cs.thinkingParam.(string); ok && effort != "" { @@ -728,7 +793,10 @@ func TestRawPayloadThinkingConversions(t *testing.T) { return true, mapped, false } } - // thinkingParam was non-nil but couldn't map - no default medium + if from != "openai-response" { + // Codex translators default reasoning.effort to "medium" for thinking-capable models + return true, "medium", false + } return false, "", false } return false, "", false