Merge pull request #19 from router-for-me/plus

v6.6.1
This commit is contained in:
Luis Pater
2025-12-11 21:35:16 +08:00
committed by GitHub
22 changed files with 416 additions and 215 deletions

View File

@@ -105,7 +105,7 @@ ws-auth: false
# excluded-models:
# - "claude-opus-4-5-20251101" # exclude specific models (exact match)
# - "claude-3-*" # wildcard matching prefix (e.g. claude-3-7-sonnet-20250219)
# - "*-think" # wildcard matching suffix (e.g. claude-opus-4-5-thinking)
# - "*-thinking" # wildcard matching suffix (e.g. claude-opus-4-5-thinking)
# - "*haiku*" # wildcard matching substring (e.g. claude-3-5-haiku-20241022)
# Kiro (AWS CodeWhisperer) configuration

View File

@@ -16,6 +16,7 @@ func GetClaudeModels() []*ModelInfo {
DisplayName: "Claude 4.5 Haiku",
ContextLength: 200000,
MaxCompletionTokens: 64000,
// Thinking: not supported for Haiku models
},
{
ID: "claude-sonnet-4-5-20250929",
@@ -49,6 +50,7 @@ func GetClaudeModels() []*ModelInfo {
DisplayName: "Claude 4.1 Opus",
ContextLength: 200000,
MaxCompletionTokens: 32000,
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "claude-opus-4-20250514",
@@ -59,6 +61,7 @@ func GetClaudeModels() []*ModelInfo {
DisplayName: "Claude 4 Opus",
ContextLength: 200000,
MaxCompletionTokens: 32000,
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "claude-sonnet-4-20250514",
@@ -69,6 +72,7 @@ func GetClaudeModels() []*ModelInfo {
DisplayName: "Claude 4 Sonnet",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "claude-3-7-sonnet-20250219",
@@ -79,6 +83,7 @@ func GetClaudeModels() []*ModelInfo {
DisplayName: "Claude 3.7 Sonnet",
ContextLength: 128000,
MaxCompletionTokens: 8192,
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "claude-3-5-haiku-20241022",
@@ -89,6 +94,7 @@ func GetClaudeModels() []*ModelInfo {
DisplayName: "Claude 3.5 Haiku",
ContextLength: 128000,
MaxCompletionTokens: 8192,
// Thinking: not supported for Haiku models
},
}
}
@@ -476,6 +482,7 @@ func GetOpenAIModels() []*ModelInfo {
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}},
},
{
ID: "gpt-5-codex",
@@ -489,6 +496,7 @@ func GetOpenAIModels() []*ModelInfo {
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "gpt-5-codex-mini",
@@ -502,6 +510,7 @@ func GetOpenAIModels() []*ModelInfo {
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "gpt-5.1",
@@ -515,6 +524,7 @@ func GetOpenAIModels() []*ModelInfo {
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}},
},
{
ID: "gpt-5.1-codex",
@@ -528,6 +538,7 @@ func GetOpenAIModels() []*ModelInfo {
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "gpt-5.1-codex-mini",
@@ -541,6 +552,7 @@ func GetOpenAIModels() []*ModelInfo {
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "gpt-5.1-codex-max",
@@ -554,6 +566,7 @@ func GetOpenAIModels() []*ModelInfo {
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
},
}
}
@@ -610,6 +623,7 @@ func GetIFlowModels() []*ModelInfo {
DisplayName string
Description string
Created int64
Thinking *ThinkingSupport
}{
{ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600},
{ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800},
@@ -619,17 +633,17 @@ func GetIFlowModels() []*ModelInfo {
{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400},
{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 general model", Created: 1762387200},
{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2", Created: 1764576000},
{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000},
{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200},
{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200},
{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000},
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
}
models := make([]*ModelInfo, 0, len(entries))
for _, entry := range entries {
@@ -641,6 +655,7 @@ func GetIFlowModels() []*ModelInfo {
Type: "iflow",
DisplayName: entry.DisplayName,
Description: entry.Description,
Thinking: entry.Thinking,
})
}
return models

View File

@@ -63,6 +63,9 @@ type ThinkingSupport struct {
ZeroAllowed bool `json:"zero_allowed,omitempty"`
// DynamicAllowed indicates whether -1 is a valid value (dynamic thinking budget).
DynamicAllowed bool `json:"dynamic_allowed,omitempty"`
// Levels defines discrete reasoning effort levels (e.g., "low", "medium", "high").
// When set, the model uses level-based reasoning instead of token budgets.
Levels []string `json:"levels,omitempty"`
}
// ModelRegistration tracks a model's availability

View File

@@ -39,7 +39,7 @@ const (
defaultAntigravityAgent = "antigravity/1.11.5 windows/amd64"
antigravityAuthType = "antigravity"
refreshSkew = 3000 * time.Second
streamScannerBuffer int = 20_971_520
streamScannerBuffer int = 52_428_800 // 50MB
)
var (

View File

@@ -254,7 +254,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
// If from == to (Claude → Claude), directly forward the SSE stream without translation
if from == to {
scanner := bufio.NewScanner(decodedBody)
scanner.Buffer(nil, 20_971_520)
scanner.Buffer(nil, 52_428_800) // 50MB
for scanner.Scan() {
line := scanner.Bytes()
appendAPIResponseChunk(ctx, e.cfg, line)
@@ -277,7 +277,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
// For other formats, use translation
scanner := bufio.NewScanner(decodedBody)
scanner.Buffer(nil, 20_971_520)
scanner.Buffer(nil, 52_428_800) // 50MB
var param any
for scanner.Scan() {
line := scanner.Bytes()
@@ -450,59 +450,15 @@ func extractAndRemoveBetas(body []byte) ([]string, []byte) {
return betas, body
}
// injectThinkingConfig adds thinking configuration based on metadata or legacy suffixes.
// injectThinkingConfig adds thinking configuration based on metadata using the unified flow.
// It uses util.ResolveClaudeThinkingConfig which internally calls ResolveThinkingConfigFromMetadata
// and NormalizeThinkingBudget, ensuring consistency with other executors like Gemini.
func (e *ClaudeExecutor) injectThinkingConfig(modelName string, metadata map[string]any, body []byte) []byte {
// Only inject if thinking config is not already present
if gjson.GetBytes(body, "thinking").Exists() {
budget, ok := util.ResolveClaudeThinkingConfig(modelName, metadata)
if !ok {
return body
}
budgetTokens, ok := resolveClaudeThinkingBudget(modelName, metadata)
if !ok || budgetTokens <= 0 {
return body
}
body, _ = sjson.SetBytes(body, "thinking.type", "enabled")
body, _ = sjson.SetBytes(body, "thinking.budget_tokens", budgetTokens)
return body
}
func resolveClaudeThinkingBudget(modelName string, metadata map[string]any) (int, bool) {
budget, include, effort, matched := util.ThinkingFromMetadata(metadata)
if matched {
if include != nil && !*include {
return 0, false
}
if budget != nil {
normalized := util.NormalizeThinkingBudget(modelName, *budget)
if normalized > 0 {
return normalized, true
}
return 0, false
}
if effort != nil {
if derived, ok := util.ThinkingEffortToBudget(modelName, *effort); ok && derived > 0 {
return derived, true
}
}
}
return claudeBudgetFromSuffix(modelName)
}
func claudeBudgetFromSuffix(modelName string) (int, bool) {
lower := strings.ToLower(strings.TrimSpace(modelName))
switch {
case strings.HasSuffix(lower, "-thinking-low"):
return 1024, true
case strings.HasSuffix(lower, "-thinking-medium"):
return 8192, true
case strings.HasSuffix(lower, "-thinking-high"):
return 24576, true
case strings.HasSuffix(lower, "-thinking"):
return 8192, true
default:
return 0, false
}
return util.ApplyClaudeThinkingConfig(body, budget)
}
// ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled.

View File

@@ -54,7 +54,11 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
from := opts.SourceFormat
to := sdktranslator.FromString("codex")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
body = normalizeThinkingConfig(body, upstreamModel)
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
return resp, errValidate
}
body = applyPayloadConfig(e.cfg, req.Model, body)
body, _ = sjson.SetBytes(body, "model", upstreamModel)
body, _ = sjson.SetBytes(body, "stream", true)
@@ -148,7 +152,11 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
to := sdktranslator.FromString("codex")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
body = normalizeThinkingConfig(body, upstreamModel)
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
return nil, errValidate
}
body = applyPayloadConfig(e.cfg, req.Model, body)
body, _ = sjson.DeleteBytes(body, "previous_response_id")
body, _ = sjson.SetBytes(body, "model", upstreamModel)
@@ -208,7 +216,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
}
}()
scanner := bufio.NewScanner(httpResp.Body)
scanner.Buffer(nil, 20_971_520)
scanner.Buffer(nil, 52_428_800) // 50MB
var param any
for scanner.Scan() {
line := scanner.Bytes()
@@ -246,7 +254,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
modelForCounting := req.Model
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
body, _ = sjson.SetBytes(body, "model", upstreamModel)
body, _ = sjson.DeleteBytes(body, "previous_response_id")
body, _ = sjson.SetBytes(body, "stream", false)

View File

@@ -309,7 +309,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
}()
if opts.Alt == "" {
scanner := bufio.NewScanner(resp.Body)
scanner.Buffer(nil, 20_971_520)
scanner.Buffer(nil, 52_428_800) // 50MB
var param any
for scanner.Scan() {
line := scanner.Bytes()

View File

@@ -249,7 +249,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
}
}()
scanner := bufio.NewScanner(httpResp.Body)
scanner.Buffer(nil, 20_971_520)
scanner.Buffer(nil, 52_428_800) // 50MB
var param any
for scanner.Scan() {
line := scanner.Bytes()

View File

@@ -579,7 +579,7 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
}
}()
scanner := bufio.NewScanner(httpResp.Body)
scanner.Buffer(nil, 20_971_520)
scanner.Buffer(nil, 52_428_800) // 50MB
var param any
for scanner.Scan() {
line := scanner.Bytes()
@@ -696,7 +696,7 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
}
}()
scanner := bufio.NewScanner(httpResp.Body)
scanner.Buffer(nil, 20_971_520)
scanner.Buffer(nil, 52_428_800) // 50MB
var param any
for scanner.Scan() {
line := scanner.Bytes()

View File

@@ -57,10 +57,15 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
from := opts.SourceFormat
to := sdktranslator.FromString("openai")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
if upstreamModel != "" {
body, _ = sjson.SetBytes(body, "model", upstreamModel)
}
body = normalizeThinkingConfig(body, upstreamModel)
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
return resp, errValidate
}
body = applyPayloadConfig(e.cfg, req.Model, body)
endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
@@ -143,10 +148,15 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
to := sdktranslator.FromString("openai")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
if upstreamModel != "" {
body, _ = sjson.SetBytes(body, "model", upstreamModel)
}
body = normalizeThinkingConfig(body, upstreamModel)
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
return nil, errValidate
}
// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
toolsResult := gjson.GetBytes(body, "tools")
if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
@@ -209,7 +219,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
}()
scanner := bufio.NewScanner(httpResp.Body)
scanner.Buffer(nil, 20_971_520)
scanner.Buffer(nil, 52_428_800) // 50MB
var param any
for scanner.Scan() {
line := scanner.Bytes()

View File

@@ -58,10 +58,15 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
translated = e.overrideModel(translated, modelOverride)
}
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model)
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
if upstreamModel != "" {
translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
}
translated = normalizeThinkingConfig(translated, upstreamModel)
if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
return resp, errValidate
}
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
@@ -147,10 +152,15 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
translated = e.overrideModel(translated, modelOverride)
}
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model)
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
if upstreamModel != "" {
translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
}
translated = normalizeThinkingConfig(translated, upstreamModel)
if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
return nil, errValidate
}
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
@@ -214,7 +224,7 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
}
}()
scanner := bufio.NewScanner(httpResp.Body)
scanner.Buffer(nil, 20_971_520)
scanner.Buffer(nil, 52_428_800) // 50MB
var param any
for scanner.Scan() {
line := scanner.Bytes()

View File

@@ -1,6 +1,8 @@
package executor
import (
"fmt"
"net/http"
"strings"
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
@@ -9,7 +11,7 @@ import (
"github.com/tidwall/sjson"
)
// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N)
// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192))
// for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
func applyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
@@ -26,7 +28,7 @@ func applyThinkingMetadata(payload []byte, metadata map[string]any, model string
return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
}
// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N)
// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192))
// for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking.
func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
@@ -43,40 +45,21 @@ func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model str
return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
}
// applyReasoningEffortMetadata applies reasoning effort overrides (reasoning.effort) when present in metadata.
// It avoids overwriting an existing reasoning.effort field and only applies to models that support thinking.
func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model string) []byte {
// applyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path.
// Metadata values take precedence over any existing field when the model supports thinking, intentionally
// overwriting caller-provided values to honor suffix/default metadata priority.
func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string) []byte {
if len(metadata) == 0 {
return payload
}
if !util.ModelSupportsThinking(model) {
return payload
}
if gjson.GetBytes(payload, "reasoning.effort").Exists() {
if field == "" {
return payload
}
if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
if updated, err := sjson.SetBytes(payload, "reasoning.effort", effort); err == nil {
return updated
}
}
return payload
}
// applyReasoningEffortMetadataChatCompletions applies reasoning_effort (OpenAI chat completions field)
// when present in metadata. It avoids overwriting an existing reasoning_effort field.
func applyReasoningEffortMetadataChatCompletions(payload []byte, metadata map[string]any, model string) []byte {
if len(metadata) == 0 {
return payload
}
if !util.ModelSupportsThinking(model) {
return payload
}
if gjson.GetBytes(payload, "reasoning_effort").Exists() {
return payload
}
if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
if updated, err := sjson.SetBytes(payload, "reasoning_effort", effort); err == nil {
if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
return updated
}
}
@@ -232,3 +215,93 @@ func matchModelPattern(pattern, model string) bool {
}
return pi == len(pattern)
}
// normalizeThinkingConfig normalizes thinking-related fields in the payload
// based on model capabilities. For models without thinking support, it strips
// reasoning fields. For models with level-based thinking, it validates and
// normalizes the reasoning effort level.
func normalizeThinkingConfig(payload []byte, model string) []byte {
if len(payload) == 0 || model == "" {
return payload
}
if !util.ModelSupportsThinking(model) {
return stripThinkingFields(payload)
}
if util.ModelUsesThinkingLevels(model) {
return normalizeReasoningEffortLevel(payload, model)
}
return payload
}
// stripThinkingFields removes thinking-related fields from the payload for
// models that do not support thinking.
func stripThinkingFields(payload []byte) []byte {
fieldsToRemove := []string{
"reasoning",
"reasoning_effort",
"reasoning.effort",
}
out := payload
for _, field := range fieldsToRemove {
if gjson.GetBytes(out, field).Exists() {
out, _ = sjson.DeleteBytes(out, field)
}
}
return out
}
// normalizeReasoningEffortLevel validates and normalizes the reasoning_effort
// or reasoning.effort field for level-based thinking models.
func normalizeReasoningEffortLevel(payload []byte, model string) []byte {
out := payload
if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() {
if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
out, _ = sjson.SetBytes(out, "reasoning_effort", normalized)
}
}
if effort := gjson.GetBytes(out, "reasoning.effort"); effort.Exists() {
if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
out, _ = sjson.SetBytes(out, "reasoning.effort", normalized)
}
}
return out
}
// validateThinkingConfig checks for unsupported reasoning levels on level-based models.
// Returns a statusErr with 400 when an unsupported level is supplied to avoid silently
// downgrading requests.
func validateThinkingConfig(payload []byte, model string) error {
if len(payload) == 0 || model == "" {
return nil
}
if !util.ModelSupportsThinking(model) || !util.ModelUsesThinkingLevels(model) {
return nil
}
levels := util.GetModelThinkingLevels(model)
checkField := func(path string) error {
if effort := gjson.GetBytes(payload, path); effort.Exists() {
if _, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); !ok {
return statusErr{
code: http.StatusBadRequest,
msg: fmt.Sprintf("unsupported reasoning effort level %q for model %s (supported: %s)", effort.String(), model, strings.Join(levels, ", ")),
}
}
}
return nil
}
if err := checkField("reasoning_effort"); err != nil {
return err
}
if err := checkField("reasoning.effort"); err != nil {
return err
}
return nil
}

View File

@@ -51,10 +51,15 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
from := opts.SourceFormat
to := sdktranslator.FromString("openai")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
if upstreamModel != "" {
body, _ = sjson.SetBytes(body, "model", upstreamModel)
}
body = normalizeThinkingConfig(body, upstreamModel)
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
return resp, errValidate
}
body = applyPayloadConfig(e.cfg, req.Model, body)
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
@@ -126,10 +131,15 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
to := sdktranslator.FromString("openai")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
if upstreamModel != "" {
body, _ = sjson.SetBytes(body, "model", upstreamModel)
}
body = normalizeThinkingConfig(body, upstreamModel)
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
return nil, errValidate
}
toolsResult := gjson.GetBytes(body, "tools")
// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
// This will have no real consequences. It's just to scare Qwen3.
@@ -190,7 +200,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
}
}()
scanner := bufio.NewScanner(httpResp.Body)
scanner.Buffer(nil, 20_971_520)
scanner.Buffer(nil, 52_428_800) // 50MB
var param any
for scanner.Scan() {
line := scanner.Bytes()

View File

@@ -35,6 +35,7 @@ type Params struct {
TotalTokenCount int64 // Cached total token count from usage metadata
HasSentFinalEvents bool // Indicates if final content/message events have been sent
HasToolUse bool // Indicates if tool use was observed in the stream
HasContent bool // Tracks whether any content (text, thinking, or tool use) has been output
}
// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
@@ -69,11 +70,14 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
if bytes.Equal(rawJSON, []byte("[DONE]")) {
output := ""
appendFinalEvents(params, &output, true)
return []string{
output + "event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
// Only send final events if we have actually output content
if params.HasContent {
appendFinalEvents(params, &output, true)
return []string{
output + "event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
}
}
return []string{}
}
output := ""
@@ -119,10 +123,12 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
output = output + "event: content_block_delta\n"
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":""}}`, params.ResponseIndex), "delta.signature", thoughtSignature.String())
output = output + fmt.Sprintf("data: %s\n\n\n", data)
params.HasContent = true
} else if params.ResponseType == 2 { // Continue existing thinking block if already in thinking state
output = output + "event: content_block_delta\n"
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, params.ResponseIndex), "delta.thinking", partTextResult.String())
output = output + fmt.Sprintf("data: %s\n\n\n", data)
params.HasContent = true
} else {
// Transition from another state to thinking
// First, close any existing content block
@@ -146,6 +152,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, params.ResponseIndex), "delta.thinking", partTextResult.String())
output = output + fmt.Sprintf("data: %s\n\n\n", data)
params.ResponseType = 2 // Set state to thinking
params.HasContent = true
}
} else {
finishReasonResult := gjson.GetBytes(rawJSON, "response.candidates.0.finishReason")
@@ -156,6 +163,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
output = output + "event: content_block_delta\n"
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, params.ResponseIndex), "delta.text", partTextResult.String())
output = output + fmt.Sprintf("data: %s\n\n\n", data)
params.HasContent = true
} else {
// Transition from another state to text content
// First, close any existing content block
@@ -179,6 +187,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, params.ResponseIndex), "delta.text", partTextResult.String())
output = output + fmt.Sprintf("data: %s\n\n\n", data)
params.ResponseType = 1 // Set state to content
params.HasContent = true
}
}
}
@@ -230,6 +239,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
output = output + fmt.Sprintf("data: %s\n\n\n", data)
}
params.ResponseType = 3
params.HasContent = true
}
}
}
@@ -269,6 +279,11 @@ func appendFinalEvents(params *Params, output *string, force bool) {
return
}
// Only send final events if we have actually output content
if !params.HasContent {
return
}
if params.ResponseType != 0 {
*output = *output + "event: content_block_stop\n"
*output = *output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, params.ResponseIndex)

View File

@@ -331,9 +331,8 @@ func ConvertClaudeResponseToGeminiNonStream(_ context.Context, modelName string,
streamingEvents := make([][]byte, 0)
scanner := bufio.NewScanner(bytes.NewReader(rawJSON))
// Use a smaller initial buffer (64KB) that can grow up to 20MB if needed
// This prevents allocating 20MB for every request regardless of size
scanner.Buffer(make([]byte, 64*1024), 20_971_520)
buffer := make([]byte, 52_428_800) // 50MB
scanner.Buffer(buffer, 52_428_800)
for scanner.Scan() {
line := scanner.Bytes()
// log.Debug(string(line))

View File

@@ -445,8 +445,8 @@ func ConvertClaudeResponseToOpenAIResponsesNonStream(_ context.Context, _ string
// Use a simple scanner to iterate through raw bytes
// Note: extremely large responses may require increasing the buffer
scanner := bufio.NewScanner(bytes.NewReader(rawJSON))
buf := make([]byte, 20_971_520)
scanner.Buffer(buf, 20_971_520)
buf := make([]byte, 52_428_800) // 50MB
scanner.Buffer(buf, 52_428_800)
for scanner.Scan() {
line := scanner.Bytes()
if !bytes.HasPrefix(line, dataTag) {

View File

@@ -26,6 +26,7 @@ type Params struct {
HasFirstResponse bool // Indicates if the initial message_start event has been sent
ResponseType int // Current response type: 0=none, 1=content, 2=thinking, 3=function
ResponseIndex int // Index counter for content blocks in the streaming response
HasContent bool // Tracks whether any content (text, thinking, or tool use) has been output
}
// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
@@ -57,9 +58,13 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
}
if bytes.Equal(rawJSON, []byte("[DONE]")) {
return []string{
"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
// Only send message_stop if we have actually output content
if (*param).(*Params).HasContent {
return []string{
"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
}
}
return []string{}
}
// Track whether tools are being used in this response chunk
@@ -107,7 +112,8 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
if (*param).(*Params).ResponseType == 2 {
sb.WriteString("event: content_block_delta\n")
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
output = output + fmt.Sprintf("data: %s\n\n\n", data)
(*param).(*Params).HasContent = true
} else {
// Transition from another state to thinking
// First, close any existing content block
@@ -126,6 +132,7 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
(*param).(*Params).ResponseType = 2 // Set state to thinking
(*param).(*Params).HasContent = true
}
} else {
// Process regular text content (user-visible output)
@@ -133,7 +140,8 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
if (*param).(*Params).ResponseType == 1 {
sb.WriteString("event: content_block_delta\n")
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
output = output + fmt.Sprintf("data: %s\n\n\n", data)
(*param).(*Params).HasContent = true
} else {
// Transition from another state to text content
// First, close any existing content block
@@ -152,6 +160,7 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
(*param).(*Params).ResponseType = 1 // Set state to content
(*param).(*Params).HasContent = true
}
}
} else if functionCallResult.Exists() {
@@ -194,6 +203,7 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
}
(*param).(*Params).ResponseType = 3
(*param).(*Params).HasContent = true
}
}
}
@@ -202,28 +212,31 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
// Process usage metadata and finish reason when present in the response
if usageResult.Exists() && bytes.Contains(rawJSON, []byte(`"finishReason"`)) {
if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
// Close the final content block
sb.WriteString("event: content_block_stop\n")
sb.WriteString(fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex))
sb.WriteString("\n\n\n")
// Only send final events if we have actually output content
if (*param).(*Params).HasContent {
// Close the final content block
output = output + "event: content_block_stop\n"
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
output = output + "\n\n\n"
// Send the final message delta with usage information and stop reason
sb.WriteString("event: message_delta\n")
sb.WriteString(`data: `)
// Send the final message delta with usage information and stop reason
output = output + "event: message_delta\n"
output = output + `data: `
// Create the message delta template with appropriate stop reason
template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
// Set tool_use stop reason if tools were used in this response
if usedTool {
template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
// Create the message delta template with appropriate stop reason
template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
// Set tool_use stop reason if tools were used in this response
if usedTool {
template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
}
// Include thinking tokens in output token count if present
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
output = output + template + "\n\n\n"
}
// Include thinking tokens in output token count if present
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
sb.WriteString(template + "\n\n\n")
}
}

View File

@@ -25,6 +25,7 @@ type Params struct {
HasFirstResponse bool
ResponseType int
ResponseIndex int
HasContent bool // Tracks whether any content (text, thinking, or tool use) has been output
}
// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
@@ -57,9 +58,13 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
}
if bytes.Equal(rawJSON, []byte("[DONE]")) {
return []string{
"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
// Only send message_stop if we have actually output content
if (*param).(*Params).HasContent {
return []string{
"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
}
}
return []string{}
}
// Track whether tools are being used in this response chunk
@@ -108,6 +113,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
output = output + "event: content_block_delta\n"
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
output = output + fmt.Sprintf("data: %s\n\n\n", data)
(*param).(*Params).HasContent = true
} else {
// Transition from another state to thinking
// First, close any existing content block
@@ -131,6 +137,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
output = output + fmt.Sprintf("data: %s\n\n\n", data)
(*param).(*Params).ResponseType = 2 // Set state to thinking
(*param).(*Params).HasContent = true
}
} else {
// Process regular text content (user-visible output)
@@ -139,6 +146,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
output = output + "event: content_block_delta\n"
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
output = output + fmt.Sprintf("data: %s\n\n\n", data)
(*param).(*Params).HasContent = true
} else {
// Transition from another state to text content
// First, close any existing content block
@@ -162,6 +170,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
output = output + fmt.Sprintf("data: %s\n\n\n", data)
(*param).(*Params).ResponseType = 1 // Set state to content
(*param).(*Params).HasContent = true
}
}
} else if functionCallResult.Exists() {
@@ -211,6 +220,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
output = output + fmt.Sprintf("data: %s\n\n\n", data)
}
(*param).(*Params).ResponseType = 3
(*param).(*Params).HasContent = true
}
}
}
@@ -218,23 +228,26 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
usageResult := gjson.GetBytes(rawJSON, "usageMetadata")
if usageResult.Exists() && bytes.Contains(rawJSON, []byte(`"finishReason"`)) {
if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
output = output + "event: content_block_stop\n"
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
output = output + "\n\n\n"
// Only send final events if we have actually output content
if (*param).(*Params).HasContent {
output = output + "event: content_block_stop\n"
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
output = output + "\n\n\n"
output = output + "event: message_delta\n"
output = output + `data: `
output = output + "event: message_delta\n"
output = output + `data: `
template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
if usedTool {
template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
if usedTool {
template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
}
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
output = output + template + "\n\n\n"
}
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
output = output + template + "\n\n\n"
}
}

View File

@@ -0,0 +1,46 @@
package util
import (
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
)
// ApplyClaudeThinkingConfig applies thinking configuration to a Claude API request payload.
// It sets the thinking.type to "enabled" and thinking.budget_tokens to the specified budget.
// If budget is nil or the payload already has thinking config, it returns the payload unchanged.
func ApplyClaudeThinkingConfig(body []byte, budget *int) []byte {
if budget == nil {
return body
}
if gjson.GetBytes(body, "thinking").Exists() {
return body
}
if *budget <= 0 {
return body
}
updated := body
updated, _ = sjson.SetBytes(updated, "thinking.type", "enabled")
updated, _ = sjson.SetBytes(updated, "thinking.budget_tokens", *budget)
return updated
}
// ResolveClaudeThinkingConfig resolves thinking configuration from metadata for Claude models.
// It uses the unified ResolveThinkingConfigFromMetadata and normalizes the budget.
// Returns the normalized budget (nil if thinking should not be enabled) and whether it matched.
func ResolveClaudeThinkingConfig(modelName string, metadata map[string]any) (*int, bool) {
budget, include, matched := ResolveThinkingConfigFromMetadata(modelName, metadata)
if !matched {
return nil, false
}
if include != nil && !*include {
return nil, true
}
if budget == nil {
return nil, true
}
normalized := NormalizeThinkingBudget(modelName, *budget)
if normalized <= 0 {
return nil, true
}
return &normalized, true
}

View File

@@ -1,6 +1,8 @@
package util
import (
"strings"
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
)
@@ -67,3 +69,39 @@ func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zero
}
return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
}
// GetModelThinkingLevels returns the discrete reasoning effort levels for the model.
// Returns nil if the model has no thinking support or no levels defined.
func GetModelThinkingLevels(model string) []string {
if model == "" {
return nil
}
info := registry.GetGlobalRegistry().GetModelInfo(model)
if info == nil || info.Thinking == nil {
return nil
}
return info.Thinking.Levels
}
// ModelUsesThinkingLevels reports whether the model uses discrete reasoning
// effort levels instead of numeric budgets.
func ModelUsesThinkingLevels(model string) bool {
levels := GetModelThinkingLevels(model)
return len(levels) > 0
}
// NormalizeReasoningEffortLevel validates and normalizes a reasoning effort
// level for the given model. Returns false when the level is not supported.
func NormalizeReasoningEffortLevel(model, effort string) (string, bool) {
levels := GetModelThinkingLevels(model)
if len(levels) == 0 {
return "", false
}
loweredEffort := strings.ToLower(strings.TrimSpace(effort))
for _, lvl := range levels {
if strings.ToLower(lvl) == loweredEffort {
return lvl, true
}
}
return "", false
}

View File

@@ -14,61 +14,59 @@ const (
)
// NormalizeThinkingModel parses dynamic thinking suffixes on model names and returns
// the normalized base model with extracted metadata. Supported patterns:
// - "-thinking-<number>" extracts a numeric budget
// - "-thinking-<level>" extracts a reasoning effort level (minimal/low/medium/high/xhigh/auto/none)
// - "-thinking" maps to a default reasoning effort of "medium"
// - "-reasoning" maps to dynamic budget (-1) and include_thoughts=true
// - "-nothinking" maps to budget=0 and include_thoughts=false
// the normalized base model with extracted metadata. Supported pattern:
// - "(<value>)" where value can be:
// - A numeric budget (e.g., "(8192)", "(16384)")
// - A reasoning effort level (e.g., "(high)", "(medium)", "(low)")
//
// Examples:
// - "claude-sonnet-4-5-20250929(16384)" → budget=16384
// - "gpt-5.1(high)" → reasoning_effort="high"
// - "gemini-2.5-pro(32768)" → budget=32768
//
// Note: Empty parentheses "()" are not supported and will be ignored.
func NormalizeThinkingModel(modelName string) (string, map[string]any) {
if modelName == "" {
return modelName, nil
}
lower := strings.ToLower(modelName)
baseModel := modelName
var (
budgetOverride *int
includeThoughts *bool
reasoningEffort *string
matched bool
)
switch {
case strings.HasSuffix(lower, "-nothinking"):
baseModel = modelName[:len(modelName)-len("-nothinking")]
budget := 0
include := false
budgetOverride = &budget
includeThoughts = &include
matched = true
case strings.HasSuffix(lower, "-reasoning"):
baseModel = modelName[:len(modelName)-len("-reasoning")]
budget := -1
include := true
budgetOverride = &budget
includeThoughts = &include
matched = true
default:
if idx := strings.LastIndex(lower, "-thinking-"); idx != -1 {
value := modelName[idx+len("-thinking-"):]
if value != "" {
if parsed, ok := parseIntPrefix(value); ok {
baseModel = modelName[:idx]
budgetOverride = &parsed
matched = true
} else if effort, okEffort := normalizeReasoningEffort(value); okEffort {
baseModel = modelName[:idx]
reasoningEffort = &effort
matched = true
}
}
} else if strings.HasSuffix(lower, "-thinking") {
baseModel = modelName[:len(modelName)-len("-thinking")]
effort := "medium"
reasoningEffort = &effort
// Match "(<value>)" pattern at the end of the model name
if idx := strings.LastIndex(modelName, "("); idx != -1 {
if !strings.HasSuffix(modelName, ")") {
// Incomplete parenthesis, ignore
return baseModel, nil
}
value := modelName[idx+1 : len(modelName)-1] // Extract content between ( and )
if value == "" {
// Empty parentheses not supported
return baseModel, nil
}
candidateBase := modelName[:idx]
// Auto-detect: pure numeric → budget, string → reasoning effort level
if parsed, ok := parseIntPrefix(value); ok {
// Numeric value: treat as thinking budget
baseModel = candidateBase
budgetOverride = &parsed
matched = true
} else {
// String value: treat as reasoning effort level
baseModel = candidateBase
raw := strings.ToLower(strings.TrimSpace(value))
if raw != "" {
reasoningEffort = &raw
matched = true
}
}
}
@@ -82,9 +80,6 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) {
if budgetOverride != nil {
metadata[ThinkingBudgetMetadataKey] = *budgetOverride
}
if includeThoughts != nil {
metadata[ThinkingIncludeThoughtsMetadataKey] = *includeThoughts
}
if reasoningEffort != nil {
metadata[ReasoningEffortMetadataKey] = *reasoningEffort
}
@@ -185,7 +180,7 @@ func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) {
return "", false
}
if effort != nil && *effort != "" {
return *effort, true
return strings.ToLower(strings.TrimSpace(*effort)), true
}
if budget != nil {
switch *budget {
@@ -207,7 +202,11 @@ func ThinkingEffortToBudget(model, effort string) (int, bool) {
if effort == "" {
return 0, false
}
switch strings.ToLower(effort) {
normalized, ok := NormalizeReasoningEffortLevel(model, effort)
if !ok {
normalized = strings.ToLower(strings.TrimSpace(effort))
}
switch normalized {
case "none":
return 0, true
case "auto":
@@ -312,16 +311,3 @@ func parseNumberToInt(raw any) (int, bool) {
}
return 0, false
}
func normalizeReasoningEffort(value string) (string, bool) {
if value == "" {
return "", false
}
effort := strings.ToLower(strings.TrimSpace(value))
switch effort {
case "minimal", "low", "medium", "high", "xhigh", "auto", "none":
return effort, true
default:
return "", false
}
}

View File

@@ -271,6 +271,11 @@ func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.
continue
}
if errMsg != nil {
status := http.StatusInternalServerError
if errMsg.StatusCode > 0 {
status = errMsg.StatusCode
}
c.Status(status)
// An error occurred: emit as a proper SSE error event
errorBytes, _ := json.Marshal(h.toClaudeError(errMsg))
_, _ = writer.WriteString("event: error\n")
@@ -278,6 +283,7 @@ func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.
_, _ = writer.Write(errorBytes)
_, _ = writer.WriteString("\n\n")
_ = writer.Flush()
flusher.Flush()
}
var execErr error
if errMsg != nil {