From facfe7c518cb528426dcb82c7f927e4f151bea33 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Thu, 11 Dec 2025 18:17:28 +0800 Subject: [PATCH] refactor(thinking): use bracket tags for thinking meta Align thinking suffix handling on a single bracket-style marker. NormalizeThinkingModel strips a terminal `[value]` segment from model identifiers and turns it into either a thinking budget (for numeric values) or a reasoning effort hint (for strings). Emission of `ThinkingIncludeThoughtsMetadataKey` is removed. Executor helpers and the example config are updated so their comments reference the new `[value]` suffix format instead of the legacy dash variants. BREAKING CHANGE: dash-based thinking suffixes (`-thinking`, `-thinking-N`, `-reasoning`, `-nothinking`) are no longer parsed for thinking metadata; only `[value]` annotations are recognized. --- config.example.yaml | 2 +- internal/runtime/executor/payload_helpers.go | 4 +- internal/util/thinking_suffix.go | 122 ++++++------------- 3 files changed, 41 insertions(+), 87 deletions(-) diff --git a/config.example.yaml b/config.example.yaml index dfd7454b..31f16973 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -100,7 +100,7 @@ ws-auth: false # excluded-models: # - "claude-opus-4-5-20251101" # exclude specific models (exact match) # - "claude-3-*" # wildcard matching prefix (e.g. claude-3-7-sonnet-20250219) -# - "*-think" # wildcard matching suffix (e.g. claude-opus-4-5-thinking) +# - "*-thinking" # wildcard matching suffix (e.g. claude-opus-4-5-thinking) # - "*haiku*" # wildcard matching substring (e.g. claude-3-5-haiku-20241022) # OpenAI compatibility providers diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go index 9c45681a..be249868 100644 --- a/internal/runtime/executor/payload_helpers.go +++ b/internal/runtime/executor/payload_helpers.go @@ -11,7 +11,7 @@ import ( "github.com/tidwall/sjson" ) -// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N) +// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., [high], [8192]) // for standard Gemini format payloads. It normalizes the budget when the model supports thinking. func applyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte { budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata) @@ -28,7 +28,7 @@ func applyThinkingMetadata(payload []byte, metadata map[string]any, model string return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride) } -// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N) +// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., [high], [8192]) // for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking. func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte { budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata) diff --git a/internal/util/thinking_suffix.go b/internal/util/thinking_suffix.go index ef8302b0..c9a68534 100644 --- a/internal/util/thinking_suffix.go +++ b/internal/util/thinking_suffix.go @@ -14,100 +14,57 @@ const ( ) // NormalizeThinkingModel parses dynamic thinking suffixes on model names and returns -// the normalized base model with extracted metadata. Supported patterns: -// - "-thinking-" extracts a numeric budget -// - "-thinking-" extracts a reasoning effort level (minimal/low/medium/high/xhigh/auto/none) -// - "-thinking" maps to a default reasoning effort of "medium" -// - "-reasoning" maps to dynamic budget (-1) and include_thoughts=true -// - "-nothinking" maps to budget=0 and include_thoughts=false +// the normalized base model with extracted metadata. Supported pattern: +// - "[]" where value can be: +// - A numeric budget (e.g., "[8192]", "[16384]") +// - A reasoning effort level (e.g., "[high]", "[medium]", "[low]") +// +// Examples: +// - "claude-sonnet-4-5-20250929[16384]" → budget=16384 +// - "gpt-5.1[high]" → reasoning_effort="high" +// - "gemini-2.5-pro[32768]" → budget=32768 +// +// Note: Empty brackets "[]" are not supported and will be ignored. func NormalizeThinkingModel(modelName string) (string, map[string]any) { if modelName == "" { return modelName, nil } - lower := strings.ToLower(modelName) baseModel := modelName var ( budgetOverride *int - includeThoughts *bool reasoningEffort *string matched bool ) - switch { - case strings.HasSuffix(lower, "-nothinking"): - baseModel = modelName[:len(modelName)-len("-nothinking")] - budget := 0 - include := false - budgetOverride = &budget - includeThoughts = &include - matched = true - case strings.HasSuffix(lower, "-reasoning"): - baseModel = modelName[:len(modelName)-len("-reasoning")] - budget := -1 - include := true - budgetOverride = &budget - includeThoughts = &include - matched = true - default: - if idx := strings.LastIndex(lower, "-thinking-"); idx != -1 { - // Skip stripping if the original model is a registered thinking model. - // This prevents "-thinking-2507" in "qwen3-235b-a22b-thinking-2507" from being parsed. - if ModelSupportsThinking(modelName) { - break - } - value := modelName[idx+len("-thinking-"):] - if value != "" { - if parsed, ok := parseIntPrefix(value); ok { - candidateBase := modelName[:idx] - if ModelUsesThinkingLevels(candidateBase) { - baseModel = candidateBase - // Numeric suffix on level-aware models should still surface as reasoning effort metadata. - raw := strings.ToLower(strings.TrimSpace(value)) - if raw != "" { - reasoningEffort = &raw - } - matched = true - } else { - baseModel = candidateBase - budgetOverride = &parsed - matched = true - } - } else { - baseModel = modelName[:idx] - if normalized, ok := NormalizeReasoningEffortLevel(baseModel, value); ok { - reasoningEffort = &normalized - matched = true - } else if !ModelUsesThinkingLevels(baseModel) { - // Keep unknown effort tokens so callers can honor user intent even without normalization. - raw := strings.ToLower(strings.TrimSpace(value)) - if raw != "" { - reasoningEffort = &raw - matched = true - } else { - baseModel = modelName - } - } else { - raw := strings.ToLower(strings.TrimSpace(value)) - if raw != "" { - reasoningEffort = &raw - matched = true - } else { - baseModel = modelName - } - } - } - } - } else if strings.HasSuffix(lower, "-thinking") { - candidateBase := modelName[:len(modelName)-len("-thinking")] - // Only strip the suffix if the original model is NOT a registered thinking model. - // This prevents stripping "-thinking" from models like "kimi-k2-thinking" where - // the suffix is part of the model's actual name. - if !ModelSupportsThinking(modelName) { - baseModel = candidateBase - effort := "medium" - reasoningEffort = &effort + // Match "[value]" pattern at the end of the model name + if idx := strings.LastIndex(modelName, "["); idx != -1 { + if !strings.HasSuffix(modelName, "]") { + // Incomplete bracket, ignore + return baseModel, nil + } + + value := modelName[idx+1 : len(modelName)-1] // Extract content between [ and ] + if value == "" { + // Empty brackets not supported + return baseModel, nil + } + + candidateBase := modelName[:idx] + + // Auto-detect: pure numeric → budget, string → reasoning effort level + if parsed, ok := parseIntPrefix(value); ok { + // Numeric value: treat as thinking budget + baseModel = candidateBase + budgetOverride = &parsed + matched = true + } else { + // String value: treat as reasoning effort level + baseModel = candidateBase + raw := strings.ToLower(strings.TrimSpace(value)) + if raw != "" { + reasoningEffort = &raw matched = true } } @@ -123,9 +80,6 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) { if budgetOverride != nil { metadata[ThinkingBudgetMetadataKey] = *budgetOverride } - if includeThoughts != nil { - metadata[ThinkingIncludeThoughtsMetadataKey] = *includeThoughts - } if reasoningEffort != nil { metadata[ReasoningEffortMetadataKey] = *reasoningEffort }