Compare commits

..

27 Commits

Author SHA1 Message Date
Luis Pater
28469576bf Merge branch 'router-for-me:main' into main 2025-12-12 02:41:05 +08:00
Luis Pater
ef0edbfe69 refactor(claude): replace strings.Builder with simpler output string concatenation 2025-12-11 22:34:06 +08:00
Luis Pater
bb6312b4fc Merge pull request #488 from router-for-me/gemini
Unify the Gemini executor style
2025-12-11 22:14:17 +08:00
hkfires
3c315551b0 refactor(executor): relocate gemini token counters 2025-12-11 21:56:44 +08:00
hkfires
27c9c5c4da refactor(executor): clarify executor comments and oauth names 2025-12-11 21:56:44 +08:00
hkfires
fc9f6c974a refactor(executor): clarify providers and streams
Add package and constructor documentation for AI Studio, Antigravity,
Gemini CLI, Gemini API, and Vertex executors to describe their roles and
inputs.

Introduce a shared stream scanner buffer constant in the Gemini API
executor and reuse it in Gemini CLI and Vertex streaming code so stream
handling uses a consistent configuration.

Update Refresh implementations for AI Studio, Gemini CLI, Gemini API
(API key), and Vertex executors to short‑circuit and simply return the
incoming auth object, while keeping Antigravity token renewal as the
only executor that performs OAuth refresh.

Remove OAuth2-based token refresh logic and related dependencies from
the Gemini API executor, since it now operates strictly with API key
credentials.
2025-12-11 21:56:43 +08:00
Luis Pater
242b4d5754 Merge pull request #19 from router-for-me/plus
v6.6.1
2025-12-11 21:35:16 +08:00
Luis Pater
4ce7c61a17 Merge branch 'main' into plus 2025-12-11 21:33:49 +08:00
Luis Pater
a74ee3f319 Merge pull request #481 from sususu98/fix/increase-buffer-size
fix: increase buffer size for stream scanners to 50MB across multiple executors
2025-12-11 21:20:54 +08:00
Luis Pater
564bcbaa54 Merge pull request #487 from router-for-me/amp
fix(amp): set status on claude stream errors
2025-12-11 21:18:19 +08:00
hkfires
88bdd25f06 fix(amp): set status on claude stream errors 2025-12-11 20:12:06 +08:00
hkfires
e79f65fd8e refactor(thinking): use parentheses for metadata suffix 2025-12-11 18:39:07 +08:00
Luis Pater
2760989401 Merge pull request #485 from router-for-me/think
Think
2025-12-11 18:27:00 +08:00
hkfires
facfe7c518 refactor(thinking): use bracket tags for thinking meta
Align thinking suffix handling on a single bracket-style marker.

NormalizeThinkingModel strips a terminal `[value]` segment from
model identifiers and turns it into either a thinking budget (for
numeric values) or a reasoning effort hint (for strings). Emission
of `ThinkingIncludeThoughtsMetadataKey` is removed.

Executor helpers and the example config are updated so their
comments reference the new `[value]` suffix format instead of the
legacy dash variants.

BREAKING CHANGE: dash-based thinking suffixes (`-thinking`,
`-thinking-N`, `-reasoning`, `-nothinking`) are no longer parsed
for thinking metadata; only `[value]` annotations are recognized.
2025-12-11 18:17:28 +08:00
hkfires
6285459c08 fix(runtime): unify claude thinking config resolution 2025-12-11 17:20:44 +08:00
hkfires
21bbceca0c docs(runtime): document reasoning effort precedence 2025-12-11 16:35:36 +08:00
hkfires
f6300c72b7 fix(runtime): validate thinking config in iflow and qwen 2025-12-11 16:21:50 +08:00
hkfires
007572b58e fix(util): do not strip thinking suffix on registered models
NormalizeThinkingModel now checks ModelSupportsThinking before removing
"-thinking" or "-thinking-<ver>", avoiding accidental parsing of model
names where the suffix is part of the official id (e.g., kimi-k2-thinking,
qwen3-235b-a22b-thinking-2507).

The registry adds ThinkingSupport metadata for several models and
propagates it via ModelInfo (e.g., kimi-k2-thinking, deepseek-r1,
qwen3-235b-a22b-thinking-2507, minimax-m2), enabling accurate detection
of thinking-capable models and correcting base model inference.
2025-12-11 15:52:14 +08:00
hkfires
3a81ab22fd fix(runtime): unify reasoning effort metadata overrides 2025-12-11 14:35:05 +08:00
hkfires
519da2e042 fix(runtime): validate reasoning effort levels 2025-12-11 12:36:54 +08:00
hkfires
169f4295d0 fix(util): align reasoning effort handling with registry 2025-12-11 12:20:12 +08:00
hkfires
d06d0eab2f fix(util): centralize reasoning effort normalization 2025-12-11 12:14:51 +08:00
hkfires
3ffd120ae9 feat(runtime): add thinking config normalization 2025-12-11 11:51:33 +08:00
hkfires
a03d514095 feat(registry): add thinking metadata for models 2025-12-11 11:28:44 +08:00
Luis Pater
1da03bfe15 Merge pull request #479 from router-for-me/claude
fix(claude): prevent final events when no content streamed
2025-12-11 08:18:59 +08:00
sususu
76c563d161 fix(executor): increase buffer size for stream scanners to 50MB across multiple executors 2025-12-10 23:20:04 +08:00
hkfires
a89514951f fix(claude): prevent final events when no content streamed 2025-12-10 22:19:55 +08:00
23 changed files with 736 additions and 570 deletions

View File

@@ -105,7 +105,7 @@ ws-auth: false
# excluded-models:
# - "claude-opus-4-5-20251101" # exclude specific models (exact match)
# - "claude-3-*" # wildcard matching prefix (e.g. claude-3-7-sonnet-20250219)
# - "*-think" # wildcard matching suffix (e.g. claude-opus-4-5-thinking)
# - "*-thinking" # wildcard matching suffix (e.g. claude-opus-4-5-thinking)
# - "*haiku*" # wildcard matching substring (e.g. claude-3-5-haiku-20241022)
# Kiro (AWS CodeWhisperer) configuration

View File

@@ -16,6 +16,7 @@ func GetClaudeModels() []*ModelInfo {
DisplayName: "Claude 4.5 Haiku",
ContextLength: 200000,
MaxCompletionTokens: 64000,
// Thinking: not supported for Haiku models
},
{
ID: "claude-sonnet-4-5-20250929",
@@ -49,6 +50,7 @@ func GetClaudeModels() []*ModelInfo {
DisplayName: "Claude 4.1 Opus",
ContextLength: 200000,
MaxCompletionTokens: 32000,
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "claude-opus-4-20250514",
@@ -59,6 +61,7 @@ func GetClaudeModels() []*ModelInfo {
DisplayName: "Claude 4 Opus",
ContextLength: 200000,
MaxCompletionTokens: 32000,
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "claude-sonnet-4-20250514",
@@ -69,6 +72,7 @@ func GetClaudeModels() []*ModelInfo {
DisplayName: "Claude 4 Sonnet",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "claude-3-7-sonnet-20250219",
@@ -79,6 +83,7 @@ func GetClaudeModels() []*ModelInfo {
DisplayName: "Claude 3.7 Sonnet",
ContextLength: 128000,
MaxCompletionTokens: 8192,
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "claude-3-5-haiku-20241022",
@@ -89,6 +94,7 @@ func GetClaudeModels() []*ModelInfo {
DisplayName: "Claude 3.5 Haiku",
ContextLength: 128000,
MaxCompletionTokens: 8192,
// Thinking: not supported for Haiku models
},
}
}
@@ -476,6 +482,7 @@ func GetOpenAIModels() []*ModelInfo {
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}},
},
{
ID: "gpt-5-codex",
@@ -489,6 +496,7 @@ func GetOpenAIModels() []*ModelInfo {
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "gpt-5-codex-mini",
@@ -502,6 +510,7 @@ func GetOpenAIModels() []*ModelInfo {
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "gpt-5.1",
@@ -515,6 +524,7 @@ func GetOpenAIModels() []*ModelInfo {
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}},
},
{
ID: "gpt-5.1-codex",
@@ -528,6 +538,7 @@ func GetOpenAIModels() []*ModelInfo {
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "gpt-5.1-codex-mini",
@@ -541,6 +552,7 @@ func GetOpenAIModels() []*ModelInfo {
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "gpt-5.1-codex-max",
@@ -554,6 +566,7 @@ func GetOpenAIModels() []*ModelInfo {
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
},
}
}
@@ -610,6 +623,7 @@ func GetIFlowModels() []*ModelInfo {
DisplayName string
Description string
Created int64
Thinking *ThinkingSupport
}{
{ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600},
{ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800},
@@ -619,17 +633,17 @@ func GetIFlowModels() []*ModelInfo {
{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400},
{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 general model", Created: 1762387200},
{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2", Created: 1764576000},
{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000},
{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200},
{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200},
{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000},
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
}
models := make([]*ModelInfo, 0, len(entries))
for _, entry := range entries {
@@ -641,6 +655,7 @@ func GetIFlowModels() []*ModelInfo {
Type: "iflow",
DisplayName: entry.DisplayName,
Description: entry.Description,
Thinking: entry.Thinking,
})
}
return models

View File

@@ -63,6 +63,9 @@ type ThinkingSupport struct {
ZeroAllowed bool `json:"zero_allowed,omitempty"`
// DynamicAllowed indicates whether -1 is a valid value (dynamic thinking budget).
DynamicAllowed bool `json:"dynamic_allowed,omitempty"`
// Levels defines discrete reasoning effort levels (e.g., "low", "medium", "high").
// When set, the model uses level-based reasoning instead of token budgets.
Levels []string `json:"levels,omitempty"`
}
// ModelRegistration tracks a model's availability

View File

@@ -1,3 +1,6 @@
// Package executor provides runtime execution capabilities for various AI service providers.
// This file implements the AI Studio executor that routes requests through a websocket-backed
// transport for the AI Studio provider.
package executor
import (
@@ -26,19 +29,28 @@ type AIStudioExecutor struct {
cfg *config.Config
}
// NewAIStudioExecutor constructs a websocket executor for the provider name.
// NewAIStudioExecutor creates a new AI Studio executor instance.
//
// Parameters:
// - cfg: The application configuration
// - provider: The provider name
// - relay: The websocket relay manager
//
// Returns:
// - *AIStudioExecutor: A new AI Studio executor instance
func NewAIStudioExecutor(cfg *config.Config, provider string, relay *wsrelay.Manager) *AIStudioExecutor {
return &AIStudioExecutor{provider: strings.ToLower(provider), relay: relay, cfg: cfg}
}
// Identifier returns the logical provider key for routing.
// Identifier returns the executor identifier.
func (e *AIStudioExecutor) Identifier() string { return "aistudio" }
// PrepareRequest is a no-op because websocket transport already injects headers.
// PrepareRequest prepares the HTTP request for execution (no-op for AI Studio).
func (e *AIStudioExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Auth) error {
return nil
}
// Execute performs a non-streaming request to the AI Studio API.
func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
defer reporter.trackFailure(ctx, &err)
@@ -92,6 +104,7 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
return resp, nil
}
// ExecuteStream performs a streaming request to the AI Studio API.
func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
defer reporter.trackFailure(ctx, &err)
@@ -239,6 +252,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
return stream, nil
}
// CountTokens counts tokens for the given request using the AI Studio API.
func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
_, body, err := e.translateRequest(req, opts, false)
if err != nil {
@@ -293,8 +307,8 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A
return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
}
func (e *AIStudioExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
_ = ctx
// Refresh refreshes the authentication credentials (no-op for AI Studio).
func (e *AIStudioExecutor) Refresh(_ context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
return auth, nil
}

View File

@@ -1,3 +1,6 @@
// Package executor provides runtime execution capabilities for various AI service providers.
// This file implements the Antigravity executor that proxies requests to the antigravity
// upstream using OAuth credentials.
package executor
import (
@@ -30,16 +33,15 @@ import (
const (
antigravityBaseURLDaily = "https://daily-cloudcode-pa.sandbox.googleapis.com"
// antigravityBaseURLAutopush = "https://autopush-cloudcode-pa.sandbox.googleapis.com"
antigravityBaseURLProd = "https://cloudcode-pa.googleapis.com"
antigravityStreamPath = "/v1internal:streamGenerateContent"
antigravityGeneratePath = "/v1internal:generateContent"
antigravityModelsPath = "/v1internal:fetchAvailableModels"
antigravityClientID = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
antigravityClientSecret = "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"
defaultAntigravityAgent = "antigravity/1.11.5 windows/amd64"
antigravityAuthType = "antigravity"
refreshSkew = 3000 * time.Second
streamScannerBuffer int = 20_971_520
antigravityBaseURLProd = "https://cloudcode-pa.googleapis.com"
antigravityStreamPath = "/v1internal:streamGenerateContent"
antigravityGeneratePath = "/v1internal:generateContent"
antigravityModelsPath = "/v1internal:fetchAvailableModels"
antigravityClientID = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
antigravityClientSecret = "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"
defaultAntigravityAgent = "antigravity/1.11.5 windows/amd64"
antigravityAuthType = "antigravity"
refreshSkew = 3000 * time.Second
)
var (
@@ -52,18 +54,24 @@ type AntigravityExecutor struct {
cfg *config.Config
}
// NewAntigravityExecutor constructs a new executor instance.
// NewAntigravityExecutor creates a new Antigravity executor instance.
//
// Parameters:
// - cfg: The application configuration
//
// Returns:
// - *AntigravityExecutor: A new Antigravity executor instance
func NewAntigravityExecutor(cfg *config.Config) *AntigravityExecutor {
return &AntigravityExecutor{cfg: cfg}
}
// Identifier implements ProviderExecutor.
// Identifier returns the executor identifier.
func (e *AntigravityExecutor) Identifier() string { return antigravityAuthType }
// PrepareRequest implements ProviderExecutor.
// PrepareRequest prepares the HTTP request for execution (no-op for Antigravity).
func (e *AntigravityExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Auth) error { return nil }
// Execute handles non-streaming requests via the antigravity generate endpoint.
// Execute performs a non-streaming request to the Antigravity API.
func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth)
if errToken != nil {
@@ -156,7 +164,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
return resp, err
}
// ExecuteStream handles streaming requests via the antigravity upstream.
// ExecuteStream performs a streaming request to the Antigravity API.
func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
ctx = context.WithValue(ctx, "alt", "")
@@ -296,7 +304,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
return nil, err
}
// Refresh refreshes the OAuth token using the refresh token.
// Refresh refreshes the authentication credentials using the refresh token.
func (e *AntigravityExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
if auth == nil {
return auth, nil
@@ -308,7 +316,7 @@ func (e *AntigravityExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Au
return updated, nil
}
// CountTokens is not supported for the antigravity provider.
// CountTokens counts tokens for the given request (not supported for Antigravity).
func (e *AntigravityExecutor) CountTokens(context.Context, *cliproxyauth.Auth, cliproxyexecutor.Request, cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
return cliproxyexecutor.Response{}, statusErr{code: http.StatusNotImplemented, msg: "count tokens not supported"}
}

View File

@@ -254,7 +254,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
// If from == to (Claude → Claude), directly forward the SSE stream without translation
if from == to {
scanner := bufio.NewScanner(decodedBody)
scanner.Buffer(nil, 20_971_520)
scanner.Buffer(nil, 52_428_800) // 50MB
for scanner.Scan() {
line := scanner.Bytes()
appendAPIResponseChunk(ctx, e.cfg, line)
@@ -277,7 +277,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
// For other formats, use translation
scanner := bufio.NewScanner(decodedBody)
scanner.Buffer(nil, 20_971_520)
scanner.Buffer(nil, 52_428_800) // 50MB
var param any
for scanner.Scan() {
line := scanner.Bytes()
@@ -450,59 +450,15 @@ func extractAndRemoveBetas(body []byte) ([]string, []byte) {
return betas, body
}
// injectThinkingConfig adds thinking configuration based on metadata or legacy suffixes.
// injectThinkingConfig adds thinking configuration based on metadata using the unified flow.
// It uses util.ResolveClaudeThinkingConfig which internally calls ResolveThinkingConfigFromMetadata
// and NormalizeThinkingBudget, ensuring consistency with other executors like Gemini.
func (e *ClaudeExecutor) injectThinkingConfig(modelName string, metadata map[string]any, body []byte) []byte {
// Only inject if thinking config is not already present
if gjson.GetBytes(body, "thinking").Exists() {
budget, ok := util.ResolveClaudeThinkingConfig(modelName, metadata)
if !ok {
return body
}
budgetTokens, ok := resolveClaudeThinkingBudget(modelName, metadata)
if !ok || budgetTokens <= 0 {
return body
}
body, _ = sjson.SetBytes(body, "thinking.type", "enabled")
body, _ = sjson.SetBytes(body, "thinking.budget_tokens", budgetTokens)
return body
}
func resolveClaudeThinkingBudget(modelName string, metadata map[string]any) (int, bool) {
budget, include, effort, matched := util.ThinkingFromMetadata(metadata)
if matched {
if include != nil && !*include {
return 0, false
}
if budget != nil {
normalized := util.NormalizeThinkingBudget(modelName, *budget)
if normalized > 0 {
return normalized, true
}
return 0, false
}
if effort != nil {
if derived, ok := util.ThinkingEffortToBudget(modelName, *effort); ok && derived > 0 {
return derived, true
}
}
}
return claudeBudgetFromSuffix(modelName)
}
func claudeBudgetFromSuffix(modelName string) (int, bool) {
lower := strings.ToLower(strings.TrimSpace(modelName))
switch {
case strings.HasSuffix(lower, "-thinking-low"):
return 1024, true
case strings.HasSuffix(lower, "-thinking-medium"):
return 8192, true
case strings.HasSuffix(lower, "-thinking-high"):
return 24576, true
case strings.HasSuffix(lower, "-thinking"):
return 8192, true
default:
return 0, false
}
return util.ApplyClaudeThinkingConfig(body, budget)
}
// ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled.

View File

@@ -54,7 +54,11 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
from := opts.SourceFormat
to := sdktranslator.FromString("codex")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
body = normalizeThinkingConfig(body, upstreamModel)
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
return resp, errValidate
}
body = applyPayloadConfig(e.cfg, req.Model, body)
body, _ = sjson.SetBytes(body, "model", upstreamModel)
body, _ = sjson.SetBytes(body, "stream", true)
@@ -148,7 +152,11 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
to := sdktranslator.FromString("codex")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
body = normalizeThinkingConfig(body, upstreamModel)
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
return nil, errValidate
}
body = applyPayloadConfig(e.cfg, req.Model, body)
body, _ = sjson.DeleteBytes(body, "previous_response_id")
body, _ = sjson.SetBytes(body, "model", upstreamModel)
@@ -208,7 +216,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
}
}()
scanner := bufio.NewScanner(httpResp.Body)
scanner.Buffer(nil, 20_971_520)
scanner.Buffer(nil, 52_428_800) // 50MB
var param any
for scanner.Scan() {
line := scanner.Bytes()
@@ -246,7 +254,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
modelForCounting := req.Model
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
body, _ = sjson.SetBytes(body, "model", upstreamModel)
body, _ = sjson.DeleteBytes(body, "previous_response_id")
body, _ = sjson.SetBytes(body, "stream", false)

View File

@@ -1,3 +1,6 @@
// Package executor provides runtime execution capabilities for various AI service providers.
// This file implements the Gemini CLI executor that talks to Cloud Code Assist endpoints
// using OAuth credentials from auth metadata.
package executor
import (
@@ -29,11 +32,11 @@ import (
const (
codeAssistEndpoint = "https://cloudcode-pa.googleapis.com"
codeAssistVersion = "v1internal"
geminiOauthClientID = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com"
geminiOauthClientSecret = "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"
geminiOAuthClientID = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com"
geminiOAuthClientSecret = "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"
)
var geminiOauthScopes = []string{
var geminiOAuthScopes = []string{
"https://www.googleapis.com/auth/cloud-platform",
"https://www.googleapis.com/auth/userinfo.email",
"https://www.googleapis.com/auth/userinfo.profile",
@@ -44,14 +47,24 @@ type GeminiCLIExecutor struct {
cfg *config.Config
}
// NewGeminiCLIExecutor creates a new Gemini CLI executor instance.
//
// Parameters:
// - cfg: The application configuration
//
// Returns:
// - *GeminiCLIExecutor: A new Gemini CLI executor instance
func NewGeminiCLIExecutor(cfg *config.Config) *GeminiCLIExecutor {
return &GeminiCLIExecutor{cfg: cfg}
}
// Identifier returns the executor identifier.
func (e *GeminiCLIExecutor) Identifier() string { return "gemini-cli" }
// PrepareRequest prepares the HTTP request for execution (no-op for Gemini CLI).
func (e *GeminiCLIExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Auth) error { return nil }
// Execute performs a non-streaming request to the Gemini CLI API.
func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth)
if err != nil {
@@ -189,6 +202,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
return resp, err
}
// ExecuteStream performs a streaming request to the Gemini CLI API.
func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth)
if err != nil {
@@ -309,7 +323,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
}()
if opts.Alt == "" {
scanner := bufio.NewScanner(resp.Body)
scanner.Buffer(nil, 20_971_520)
scanner.Buffer(nil, streamScannerBuffer)
var param any
for scanner.Scan() {
line := scanner.Bytes()
@@ -371,6 +385,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
return nil, err
}
// CountTokens counts tokens for the given request using the Gemini CLI API.
func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth)
if err != nil {
@@ -471,9 +486,8 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
return cliproxyexecutor.Response{}, newGeminiStatusErr(lastStatus, lastBody)
}
func (e *GeminiCLIExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
log.Debugf("gemini cli executor: refresh called")
_ = ctx
// Refresh refreshes the authentication credentials (no-op for Gemini CLI).
func (e *GeminiCLIExecutor) Refresh(_ context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
return auth, nil
}
@@ -515,9 +529,9 @@ func prepareGeminiCLITokenSource(ctx context.Context, cfg *config.Config, auth *
}
conf := &oauth2.Config{
ClientID: geminiOauthClientID,
ClientSecret: geminiOauthClientSecret,
Scopes: geminiOauthScopes,
ClientID: geminiOAuthClientID,
ClientSecret: geminiOAuthClientSecret,
Scopes: geminiOAuthScopes,
Endpoint: google.Endpoint,
}

View File

@@ -11,7 +11,6 @@ import (
"io"
"net/http"
"strings"
"time"
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
@@ -21,8 +20,6 @@ import (
log "github.com/sirupsen/logrus"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
"golang.org/x/oauth2"
"golang.org/x/oauth2/google"
)
const (
@@ -31,6 +28,9 @@ const (
// glAPIVersion is the API version used for Gemini requests.
glAPIVersion = "v1beta"
// streamScannerBuffer is the buffer size for SSE stream scanning.
streamScannerBuffer = 52_428_800
)
// GeminiExecutor is a stateless executor for the official Gemini API using API keys.
@@ -48,9 +48,11 @@ type GeminiExecutor struct {
//
// Returns:
// - *GeminiExecutor: A new Gemini executor instance
func NewGeminiExecutor(cfg *config.Config) *GeminiExecutor { return &GeminiExecutor{cfg: cfg} }
func NewGeminiExecutor(cfg *config.Config) *GeminiExecutor {
return &GeminiExecutor{cfg: cfg}
}
// Identifier returns the executor identifier for Gemini.
// Identifier returns the executor identifier.
func (e *GeminiExecutor) Identifier() string { return "gemini" }
// PrepareRequest prepares the HTTP request for execution (no-op for Gemini).
@@ -164,6 +166,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
return resp, nil
}
// ExecuteStream performs a streaming request to the Gemini API.
func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
apiKey, bearer := geminiCreds(auth)
@@ -249,7 +252,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
}
}()
scanner := bufio.NewScanner(httpResp.Body)
scanner.Buffer(nil, 20_971_520)
scanner.Buffer(nil, streamScannerBuffer)
var param any
for scanner.Scan() {
line := scanner.Bytes()
@@ -280,6 +283,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
return stream, nil
}
// CountTokens counts tokens for the given request using the Gemini API.
func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
apiKey, bearer := geminiCreds(auth)
@@ -353,106 +357,8 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
}
func (e *GeminiExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
log.Debugf("gemini executor: refresh called")
// OAuth bearer token refresh for official Gemini API.
if auth == nil {
return nil, fmt.Errorf("gemini executor: auth is nil")
}
if auth.Metadata == nil {
return auth, nil
}
// Token data is typically nested under "token" map in Gemini files.
tokenMap, _ := auth.Metadata["token"].(map[string]any)
var refreshToken, accessToken, clientID, clientSecret, tokenURI, expiryStr string
if tokenMap != nil {
if v, ok := tokenMap["refresh_token"].(string); ok {
refreshToken = v
}
if v, ok := tokenMap["access_token"].(string); ok {
accessToken = v
}
if v, ok := tokenMap["client_id"].(string); ok {
clientID = v
}
if v, ok := tokenMap["client_secret"].(string); ok {
clientSecret = v
}
if v, ok := tokenMap["token_uri"].(string); ok {
tokenURI = v
}
if v, ok := tokenMap["expiry"].(string); ok {
expiryStr = v
}
} else {
// Fallback to top-level keys if present
if v, ok := auth.Metadata["refresh_token"].(string); ok {
refreshToken = v
}
if v, ok := auth.Metadata["access_token"].(string); ok {
accessToken = v
}
if v, ok := auth.Metadata["client_id"].(string); ok {
clientID = v
}
if v, ok := auth.Metadata["client_secret"].(string); ok {
clientSecret = v
}
if v, ok := auth.Metadata["token_uri"].(string); ok {
tokenURI = v
}
if v, ok := auth.Metadata["expiry"].(string); ok {
expiryStr = v
}
}
if refreshToken == "" {
// Nothing to do for API key or cookie based entries
return auth, nil
}
// Prepare oauth2 config; default to Google endpoints
endpoint := google.Endpoint
if tokenURI != "" {
endpoint.TokenURL = tokenURI
}
conf := &oauth2.Config{ClientID: clientID, ClientSecret: clientSecret, Endpoint: endpoint}
// Ensure proxy-aware HTTP client for token refresh
httpClient := util.SetProxy(&e.cfg.SDKConfig, &http.Client{})
ctx = context.WithValue(ctx, oauth2.HTTPClient, httpClient)
// Build base token
tok := &oauth2.Token{AccessToken: accessToken, RefreshToken: refreshToken}
if t, err := time.Parse(time.RFC3339, expiryStr); err == nil {
tok.Expiry = t
}
newTok, err := conf.TokenSource(ctx, tok).Token()
if err != nil {
return nil, err
}
// Persist back to metadata; prefer nested token map if present
if tokenMap == nil {
tokenMap = make(map[string]any)
}
tokenMap["access_token"] = newTok.AccessToken
tokenMap["refresh_token"] = newTok.RefreshToken
tokenMap["expiry"] = newTok.Expiry.Format(time.RFC3339)
if clientID != "" {
tokenMap["client_id"] = clientID
}
if clientSecret != "" {
tokenMap["client_secret"] = clientSecret
}
if tokenURI != "" {
tokenMap["token_uri"] = tokenURI
}
auth.Metadata["token"] = tokenMap
// Also mirror top-level access_token for compatibility if previously present
if _, ok := auth.Metadata["access_token"]; ok {
auth.Metadata["access_token"] = newTok.AccessToken
}
// Refresh refreshes the authentication credentials (no-op for Gemini API key).
func (e *GeminiExecutor) Refresh(_ context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
return auth, nil
}

View File

@@ -1,6 +1,6 @@
// Package executor contains provider executors. This file implements the Vertex AI
// Gemini executor that talks to Google Vertex AI endpoints using service account
// credentials imported by the CLI.
// Package executor provides runtime execution capabilities for various AI service providers.
// This file implements the Vertex AI Gemini executor that talks to Google Vertex AI
// endpoints using service account credentials or API keys.
package executor
import (
@@ -36,20 +36,26 @@ type GeminiVertexExecutor struct {
cfg *config.Config
}
// NewGeminiVertexExecutor constructs the Vertex executor.
// NewGeminiVertexExecutor creates a new Vertex AI Gemini executor instance.
//
// Parameters:
// - cfg: The application configuration
//
// Returns:
// - *GeminiVertexExecutor: A new Vertex AI Gemini executor instance
func NewGeminiVertexExecutor(cfg *config.Config) *GeminiVertexExecutor {
return &GeminiVertexExecutor{cfg: cfg}
}
// Identifier returns provider key for manager routing.
// Identifier returns the executor identifier.
func (e *GeminiVertexExecutor) Identifier() string { return "vertex" }
// PrepareRequest is a no-op for Vertex.
// PrepareRequest prepares the HTTP request for execution (no-op for Vertex).
func (e *GeminiVertexExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Auth) error {
return nil
}
// Execute handles non-streaming requests.
// Execute performs a non-streaming request to the Vertex AI API.
func (e *GeminiVertexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
// Try API key authentication first
apiKey, baseURL := vertexAPICreds(auth)
@@ -67,7 +73,7 @@ func (e *GeminiVertexExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
return e.executeWithAPIKey(ctx, auth, req, opts, apiKey, baseURL)
}
// ExecuteStream handles SSE streaming for Vertex.
// ExecuteStream performs a streaming request to the Vertex AI API.
func (e *GeminiVertexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
// Try API key authentication first
apiKey, baseURL := vertexAPICreds(auth)
@@ -85,7 +91,7 @@ func (e *GeminiVertexExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
return e.executeStreamWithAPIKey(ctx, auth, req, opts, apiKey, baseURL)
}
// CountTokens calls Vertex countTokens endpoint.
// CountTokens counts tokens for the given request using the Vertex AI API.
func (e *GeminiVertexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
// Try API key authentication first
apiKey, baseURL := vertexAPICreds(auth)
@@ -103,185 +109,7 @@ func (e *GeminiVertexExecutor) CountTokens(ctx context.Context, auth *cliproxyau
return e.countTokensWithAPIKey(ctx, auth, req, opts, apiKey, baseURL)
}
// countTokensWithServiceAccount handles token counting using service account credentials.
func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (cliproxyexecutor.Response, error) {
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
}
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel)
respCtx := context.WithValue(ctx, "alt", opts.Alt)
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
baseURL := vertexBaseURL(location)
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, "countTokens")
httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
if errNewReq != nil {
return cliproxyexecutor.Response{}, errNewReq
}
httpReq.Header.Set("Content-Type", "application/json")
if token, errTok := vertexAccessToken(ctx, e.cfg, auth, saJSON); errTok == nil && token != "" {
httpReq.Header.Set("Authorization", "Bearer "+token)
} else if errTok != nil {
log.Errorf("vertex executor: access token error: %v", errTok)
return cliproxyexecutor.Response{}, statusErr{code: 500, msg: "internal server error"}
}
applyGeminiHeaders(httpReq, auth)
var authID, authLabel, authType, authValue string
if auth != nil {
authID = auth.ID
authLabel = auth.Label
authType, authValue = auth.AccountInfo()
}
recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
URL: url,
Method: http.MethodPost,
Headers: httpReq.Header.Clone(),
Body: translatedReq,
Provider: e.Identifier(),
AuthID: authID,
AuthLabel: authLabel,
AuthType: authType,
AuthValue: authValue,
})
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
httpResp, errDo := httpClient.Do(httpReq)
if errDo != nil {
recordAPIResponseError(ctx, e.cfg, errDo)
return cliproxyexecutor.Response{}, errDo
}
defer func() {
if errClose := httpResp.Body.Close(); errClose != nil {
log.Errorf("vertex executor: close response body error: %v", errClose)
}
}()
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
b, _ := io.ReadAll(httpResp.Body)
appendAPIResponseChunk(ctx, e.cfg, b)
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(b)}
}
data, errRead := io.ReadAll(httpResp.Body)
if errRead != nil {
recordAPIResponseError(ctx, e.cfg, errRead)
return cliproxyexecutor.Response{}, errRead
}
appendAPIResponseChunk(ctx, e.cfg, data)
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)}
}
count := gjson.GetBytes(data, "totalTokens").Int()
out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
return cliproxyexecutor.Response{Payload: []byte(out)}, nil
}
// countTokensWithAPIKey handles token counting using API key credentials.
func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (cliproxyexecutor.Response, error) {
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
}
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel)
respCtx := context.WithValue(ctx, "alt", opts.Alt)
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
// For API key auth, use simpler URL format without project/location
if baseURL == "" {
baseURL = "https://generativelanguage.googleapis.com"
}
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, req.Model, "countTokens")
httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
if errNewReq != nil {
return cliproxyexecutor.Response{}, errNewReq
}
httpReq.Header.Set("Content-Type", "application/json")
if apiKey != "" {
httpReq.Header.Set("x-goog-api-key", apiKey)
}
applyGeminiHeaders(httpReq, auth)
var authID, authLabel, authType, authValue string
if auth != nil {
authID = auth.ID
authLabel = auth.Label
authType, authValue = auth.AccountInfo()
}
recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
URL: url,
Method: http.MethodPost,
Headers: httpReq.Header.Clone(),
Body: translatedReq,
Provider: e.Identifier(),
AuthID: authID,
AuthLabel: authLabel,
AuthType: authType,
AuthValue: authValue,
})
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
httpResp, errDo := httpClient.Do(httpReq)
if errDo != nil {
recordAPIResponseError(ctx, e.cfg, errDo)
return cliproxyexecutor.Response{}, errDo
}
defer func() {
if errClose := httpResp.Body.Close(); errClose != nil {
log.Errorf("vertex executor: close response body error: %v", errClose)
}
}()
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
b, _ := io.ReadAll(httpResp.Body)
appendAPIResponseChunk(ctx, e.cfg, b)
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(b)}
}
data, errRead := io.ReadAll(httpResp.Body)
if errRead != nil {
recordAPIResponseError(ctx, e.cfg, errRead)
return cliproxyexecutor.Response{}, errRead
}
appendAPIResponseChunk(ctx, e.cfg, data)
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)}
}
count := gjson.GetBytes(data, "totalTokens").Int()
out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
return cliproxyexecutor.Response{Payload: []byte(out)}, nil
}
// Refresh is a no-op for service account based credentials.
// Refresh refreshes the authentication credentials (no-op for Vertex).
func (e *GeminiVertexExecutor) Refresh(_ context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
return auth, nil
}
@@ -579,7 +407,7 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
}
}()
scanner := bufio.NewScanner(httpResp.Body)
scanner.Buffer(nil, 20_971_520)
scanner.Buffer(nil, streamScannerBuffer)
var param any
for scanner.Scan() {
line := scanner.Bytes()
@@ -696,7 +524,7 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
}
}()
scanner := bufio.NewScanner(httpResp.Body)
scanner.Buffer(nil, 20_971_520)
scanner.Buffer(nil, streamScannerBuffer)
var param any
for scanner.Scan() {
line := scanner.Bytes()
@@ -722,6 +550,184 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
return stream, nil
}
// countTokensWithServiceAccount counts tokens using service account credentials.
func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (cliproxyexecutor.Response, error) {
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
}
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel)
respCtx := context.WithValue(ctx, "alt", opts.Alt)
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
baseURL := vertexBaseURL(location)
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, "countTokens")
httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
if errNewReq != nil {
return cliproxyexecutor.Response{}, errNewReq
}
httpReq.Header.Set("Content-Type", "application/json")
if token, errTok := vertexAccessToken(ctx, e.cfg, auth, saJSON); errTok == nil && token != "" {
httpReq.Header.Set("Authorization", "Bearer "+token)
} else if errTok != nil {
log.Errorf("vertex executor: access token error: %v", errTok)
return cliproxyexecutor.Response{}, statusErr{code: 500, msg: "internal server error"}
}
applyGeminiHeaders(httpReq, auth)
var authID, authLabel, authType, authValue string
if auth != nil {
authID = auth.ID
authLabel = auth.Label
authType, authValue = auth.AccountInfo()
}
recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
URL: url,
Method: http.MethodPost,
Headers: httpReq.Header.Clone(),
Body: translatedReq,
Provider: e.Identifier(),
AuthID: authID,
AuthLabel: authLabel,
AuthType: authType,
AuthValue: authValue,
})
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
httpResp, errDo := httpClient.Do(httpReq)
if errDo != nil {
recordAPIResponseError(ctx, e.cfg, errDo)
return cliproxyexecutor.Response{}, errDo
}
defer func() {
if errClose := httpResp.Body.Close(); errClose != nil {
log.Errorf("vertex executor: close response body error: %v", errClose)
}
}()
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
b, _ := io.ReadAll(httpResp.Body)
appendAPIResponseChunk(ctx, e.cfg, b)
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(b)}
}
data, errRead := io.ReadAll(httpResp.Body)
if errRead != nil {
recordAPIResponseError(ctx, e.cfg, errRead)
return cliproxyexecutor.Response{}, errRead
}
appendAPIResponseChunk(ctx, e.cfg, data)
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)}
}
count := gjson.GetBytes(data, "totalTokens").Int()
out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
return cliproxyexecutor.Response{Payload: []byte(out)}, nil
}
// countTokensWithAPIKey handles token counting using API key credentials.
func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (cliproxyexecutor.Response, error) {
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
}
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel)
respCtx := context.WithValue(ctx, "alt", opts.Alt)
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
// For API key auth, use simpler URL format without project/location
if baseURL == "" {
baseURL = "https://generativelanguage.googleapis.com"
}
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, req.Model, "countTokens")
httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
if errNewReq != nil {
return cliproxyexecutor.Response{}, errNewReq
}
httpReq.Header.Set("Content-Type", "application/json")
if apiKey != "" {
httpReq.Header.Set("x-goog-api-key", apiKey)
}
applyGeminiHeaders(httpReq, auth)
var authID, authLabel, authType, authValue string
if auth != nil {
authID = auth.ID
authLabel = auth.Label
authType, authValue = auth.AccountInfo()
}
recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
URL: url,
Method: http.MethodPost,
Headers: httpReq.Header.Clone(),
Body: translatedReq,
Provider: e.Identifier(),
AuthID: authID,
AuthLabel: authLabel,
AuthType: authType,
AuthValue: authValue,
})
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
httpResp, errDo := httpClient.Do(httpReq)
if errDo != nil {
recordAPIResponseError(ctx, e.cfg, errDo)
return cliproxyexecutor.Response{}, errDo
}
defer func() {
if errClose := httpResp.Body.Close(); errClose != nil {
log.Errorf("vertex executor: close response body error: %v", errClose)
}
}()
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
b, _ := io.ReadAll(httpResp.Body)
appendAPIResponseChunk(ctx, e.cfg, b)
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(b)}
}
data, errRead := io.ReadAll(httpResp.Body)
if errRead != nil {
recordAPIResponseError(ctx, e.cfg, errRead)
return cliproxyexecutor.Response{}, errRead
}
appendAPIResponseChunk(ctx, e.cfg, data)
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)}
}
count := gjson.GetBytes(data, "totalTokens").Int()
out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
return cliproxyexecutor.Response{Payload: []byte(out)}, nil
}
// vertexCreds extracts project, location and raw service account JSON from auth metadata.
func vertexCreds(a *cliproxyauth.Auth) (projectID, location string, serviceAccountJSON []byte, err error) {
if a == nil || a.Metadata == nil {

View File

@@ -57,10 +57,15 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
from := opts.SourceFormat
to := sdktranslator.FromString("openai")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
if upstreamModel != "" {
body, _ = sjson.SetBytes(body, "model", upstreamModel)
}
body = normalizeThinkingConfig(body, upstreamModel)
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
return resp, errValidate
}
body = applyPayloadConfig(e.cfg, req.Model, body)
endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
@@ -143,10 +148,15 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
to := sdktranslator.FromString("openai")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
if upstreamModel != "" {
body, _ = sjson.SetBytes(body, "model", upstreamModel)
}
body = normalizeThinkingConfig(body, upstreamModel)
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
return nil, errValidate
}
// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
toolsResult := gjson.GetBytes(body, "tools")
if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
@@ -209,7 +219,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
}()
scanner := bufio.NewScanner(httpResp.Body)
scanner.Buffer(nil, 20_971_520)
scanner.Buffer(nil, 52_428_800) // 50MB
var param any
for scanner.Scan() {
line := scanner.Bytes()

View File

@@ -58,10 +58,15 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
translated = e.overrideModel(translated, modelOverride)
}
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model)
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
if upstreamModel != "" {
translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
}
translated = normalizeThinkingConfig(translated, upstreamModel)
if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
return resp, errValidate
}
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
@@ -147,10 +152,15 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
translated = e.overrideModel(translated, modelOverride)
}
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model)
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
if upstreamModel != "" {
translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
}
translated = normalizeThinkingConfig(translated, upstreamModel)
if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
return nil, errValidate
}
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
@@ -214,7 +224,7 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
}
}()
scanner := bufio.NewScanner(httpResp.Body)
scanner.Buffer(nil, 20_971_520)
scanner.Buffer(nil, 52_428_800) // 50MB
var param any
for scanner.Scan() {
line := scanner.Bytes()

View File

@@ -1,6 +1,8 @@
package executor
import (
"fmt"
"net/http"
"strings"
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
@@ -9,7 +11,7 @@ import (
"github.com/tidwall/sjson"
)
// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N)
// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192))
// for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
func applyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
@@ -26,7 +28,7 @@ func applyThinkingMetadata(payload []byte, metadata map[string]any, model string
return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
}
// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N)
// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192))
// for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking.
func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
@@ -43,40 +45,21 @@ func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model str
return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
}
// applyReasoningEffortMetadata applies reasoning effort overrides (reasoning.effort) when present in metadata.
// It avoids overwriting an existing reasoning.effort field and only applies to models that support thinking.
func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model string) []byte {
// applyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path.
// Metadata values take precedence over any existing field when the model supports thinking, intentionally
// overwriting caller-provided values to honor suffix/default metadata priority.
func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string) []byte {
if len(metadata) == 0 {
return payload
}
if !util.ModelSupportsThinking(model) {
return payload
}
if gjson.GetBytes(payload, "reasoning.effort").Exists() {
if field == "" {
return payload
}
if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
if updated, err := sjson.SetBytes(payload, "reasoning.effort", effort); err == nil {
return updated
}
}
return payload
}
// applyReasoningEffortMetadataChatCompletions applies reasoning_effort (OpenAI chat completions field)
// when present in metadata. It avoids overwriting an existing reasoning_effort field.
func applyReasoningEffortMetadataChatCompletions(payload []byte, metadata map[string]any, model string) []byte {
if len(metadata) == 0 {
return payload
}
if !util.ModelSupportsThinking(model) {
return payload
}
if gjson.GetBytes(payload, "reasoning_effort").Exists() {
return payload
}
if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
if updated, err := sjson.SetBytes(payload, "reasoning_effort", effort); err == nil {
if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
return updated
}
}
@@ -232,3 +215,93 @@ func matchModelPattern(pattern, model string) bool {
}
return pi == len(pattern)
}
// normalizeThinkingConfig normalizes thinking-related fields in the payload
// based on model capabilities. For models without thinking support, it strips
// reasoning fields. For models with level-based thinking, it validates and
// normalizes the reasoning effort level.
func normalizeThinkingConfig(payload []byte, model string) []byte {
if len(payload) == 0 || model == "" {
return payload
}
if !util.ModelSupportsThinking(model) {
return stripThinkingFields(payload)
}
if util.ModelUsesThinkingLevels(model) {
return normalizeReasoningEffortLevel(payload, model)
}
return payload
}
// stripThinkingFields removes thinking-related fields from the payload for
// models that do not support thinking.
func stripThinkingFields(payload []byte) []byte {
fieldsToRemove := []string{
"reasoning",
"reasoning_effort",
"reasoning.effort",
}
out := payload
for _, field := range fieldsToRemove {
if gjson.GetBytes(out, field).Exists() {
out, _ = sjson.DeleteBytes(out, field)
}
}
return out
}
// normalizeReasoningEffortLevel validates and normalizes the reasoning_effort
// or reasoning.effort field for level-based thinking models.
func normalizeReasoningEffortLevel(payload []byte, model string) []byte {
out := payload
if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() {
if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
out, _ = sjson.SetBytes(out, "reasoning_effort", normalized)
}
}
if effort := gjson.GetBytes(out, "reasoning.effort"); effort.Exists() {
if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
out, _ = sjson.SetBytes(out, "reasoning.effort", normalized)
}
}
return out
}
// validateThinkingConfig checks for unsupported reasoning levels on level-based models.
// Returns a statusErr with 400 when an unsupported level is supplied to avoid silently
// downgrading requests.
func validateThinkingConfig(payload []byte, model string) error {
if len(payload) == 0 || model == "" {
return nil
}
if !util.ModelSupportsThinking(model) || !util.ModelUsesThinkingLevels(model) {
return nil
}
levels := util.GetModelThinkingLevels(model)
checkField := func(path string) error {
if effort := gjson.GetBytes(payload, path); effort.Exists() {
if _, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); !ok {
return statusErr{
code: http.StatusBadRequest,
msg: fmt.Sprintf("unsupported reasoning effort level %q for model %s (supported: %s)", effort.String(), model, strings.Join(levels, ", ")),
}
}
}
return nil
}
if err := checkField("reasoning_effort"); err != nil {
return err
}
if err := checkField("reasoning.effort"); err != nil {
return err
}
return nil
}

View File

@@ -51,10 +51,15 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
from := opts.SourceFormat
to := sdktranslator.FromString("openai")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
if upstreamModel != "" {
body, _ = sjson.SetBytes(body, "model", upstreamModel)
}
body = normalizeThinkingConfig(body, upstreamModel)
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
return resp, errValidate
}
body = applyPayloadConfig(e.cfg, req.Model, body)
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
@@ -126,10 +131,15 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
to := sdktranslator.FromString("openai")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
if upstreamModel != "" {
body, _ = sjson.SetBytes(body, "model", upstreamModel)
}
body = normalizeThinkingConfig(body, upstreamModel)
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
return nil, errValidate
}
toolsResult := gjson.GetBytes(body, "tools")
// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
// This will have no real consequences. It's just to scare Qwen3.
@@ -190,7 +200,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
}
}()
scanner := bufio.NewScanner(httpResp.Body)
scanner.Buffer(nil, 20_971_520)
scanner.Buffer(nil, 52_428_800) // 50MB
var param any
for scanner.Scan() {
line := scanner.Bytes()

View File

@@ -35,6 +35,7 @@ type Params struct {
TotalTokenCount int64 // Cached total token count from usage metadata
HasSentFinalEvents bool // Indicates if final content/message events have been sent
HasToolUse bool // Indicates if tool use was observed in the stream
HasContent bool // Tracks whether any content (text, thinking, or tool use) has been output
}
// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
@@ -69,11 +70,14 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
if bytes.Equal(rawJSON, []byte("[DONE]")) {
output := ""
appendFinalEvents(params, &output, true)
return []string{
output + "event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
// Only send final events if we have actually output content
if params.HasContent {
appendFinalEvents(params, &output, true)
return []string{
output + "event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
}
}
return []string{}
}
output := ""
@@ -119,10 +123,12 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
output = output + "event: content_block_delta\n"
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":""}}`, params.ResponseIndex), "delta.signature", thoughtSignature.String())
output = output + fmt.Sprintf("data: %s\n\n\n", data)
params.HasContent = true
} else if params.ResponseType == 2 { // Continue existing thinking block if already in thinking state
output = output + "event: content_block_delta\n"
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, params.ResponseIndex), "delta.thinking", partTextResult.String())
output = output + fmt.Sprintf("data: %s\n\n\n", data)
params.HasContent = true
} else {
// Transition from another state to thinking
// First, close any existing content block
@@ -146,6 +152,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, params.ResponseIndex), "delta.thinking", partTextResult.String())
output = output + fmt.Sprintf("data: %s\n\n\n", data)
params.ResponseType = 2 // Set state to thinking
params.HasContent = true
}
} else {
finishReasonResult := gjson.GetBytes(rawJSON, "response.candidates.0.finishReason")
@@ -156,6 +163,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
output = output + "event: content_block_delta\n"
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, params.ResponseIndex), "delta.text", partTextResult.String())
output = output + fmt.Sprintf("data: %s\n\n\n", data)
params.HasContent = true
} else {
// Transition from another state to text content
// First, close any existing content block
@@ -179,6 +187,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, params.ResponseIndex), "delta.text", partTextResult.String())
output = output + fmt.Sprintf("data: %s\n\n\n", data)
params.ResponseType = 1 // Set state to content
params.HasContent = true
}
}
}
@@ -230,6 +239,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
output = output + fmt.Sprintf("data: %s\n\n\n", data)
}
params.ResponseType = 3
params.HasContent = true
}
}
}
@@ -269,6 +279,11 @@ func appendFinalEvents(params *Params, output *string, force bool) {
return
}
// Only send final events if we have actually output content
if !params.HasContent {
return
}
if params.ResponseType != 0 {
*output = *output + "event: content_block_stop\n"
*output = *output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, params.ResponseIndex)

View File

@@ -331,9 +331,8 @@ func ConvertClaudeResponseToGeminiNonStream(_ context.Context, modelName string,
streamingEvents := make([][]byte, 0)
scanner := bufio.NewScanner(bytes.NewReader(rawJSON))
// Use a smaller initial buffer (64KB) that can grow up to 20MB if needed
// This prevents allocating 20MB for every request regardless of size
scanner.Buffer(make([]byte, 64*1024), 20_971_520)
buffer := make([]byte, 52_428_800) // 50MB
scanner.Buffer(buffer, 52_428_800)
for scanner.Scan() {
line := scanner.Bytes()
// log.Debug(string(line))

View File

@@ -445,8 +445,8 @@ func ConvertClaudeResponseToOpenAIResponsesNonStream(_ context.Context, _ string
// Use a simple scanner to iterate through raw bytes
// Note: extremely large responses may require increasing the buffer
scanner := bufio.NewScanner(bytes.NewReader(rawJSON))
buf := make([]byte, 20_971_520)
scanner.Buffer(buf, 20_971_520)
buf := make([]byte, 52_428_800) // 50MB
scanner.Buffer(buf, 52_428_800)
for scanner.Scan() {
line := scanner.Bytes()
if !bytes.HasPrefix(line, dataTag) {

View File

@@ -26,6 +26,7 @@ type Params struct {
HasFirstResponse bool // Indicates if the initial message_start event has been sent
ResponseType int // Current response type: 0=none, 1=content, 2=thinking, 3=function
ResponseIndex int // Index counter for content blocks in the streaming response
HasContent bool // Tracks whether any content (text, thinking, or tool use) has been output
}
// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
@@ -57,19 +58,23 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
}
if bytes.Equal(rawJSON, []byte("[DONE]")) {
return []string{
"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
// Only send message_stop if we have actually output content
if (*param).(*Params).HasContent {
return []string{
"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
}
}
return []string{}
}
// Track whether tools are being used in this response chunk
usedTool := false
var sb strings.Builder
output := ""
// Initialize the streaming session with a message_start event
// This is only sent for the very first response chunk to establish the streaming session
if !(*param).(*Params).HasFirstResponse {
sb.WriteString("event: message_start\n")
output = "event: message_start\n"
// Create the initial message structure with default values according to Claude Code API specification
// This follows the Claude Code API specification for streaming message initialization
@@ -82,7 +87,7 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
if responseIDResult := gjson.GetBytes(rawJSON, "response.responseId"); responseIDResult.Exists() {
messageStartTemplate, _ = sjson.Set(messageStartTemplate, "message.id", responseIDResult.String())
}
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", messageStartTemplate))
output = output + fmt.Sprintf("data: %s\n\n\n", messageStartTemplate)
(*param).(*Params).HasFirstResponse = true
}
@@ -105,53 +110,67 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
if partResult.Get("thought").Bool() {
// Continue existing thinking block if already in thinking state
if (*param).(*Params).ResponseType == 2 {
sb.WriteString("event: content_block_delta\n")
output = output + "event: content_block_delta\n"
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
output = output + fmt.Sprintf("data: %s\n\n\n", data)
(*param).(*Params).HasContent = true
} else {
// Transition from another state to thinking
// First, close any existing content block
if (*param).(*Params).ResponseType != 0 {
sb.WriteString("event: content_block_stop\n")
sb.WriteString(fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex))
sb.WriteString("\n\n\n")
if (*param).(*Params).ResponseType == 2 {
// output = output + "event: content_block_delta\n"
// output = output + fmt.Sprintf(`data: {"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":null}}`, (*param).(*Params).ResponseIndex)
// output = output + "\n\n\n"
}
output = output + "event: content_block_stop\n"
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
output = output + "\n\n\n"
(*param).(*Params).ResponseIndex++
}
// Start a new thinking content block
sb.WriteString("event: content_block_start\n")
sb.WriteString(fmt.Sprintf(`data: {"type":"content_block_start","index":%d,"content_block":{"type":"thinking","thinking":""}}`, (*param).(*Params).ResponseIndex))
sb.WriteString("\n\n\n")
sb.WriteString("event: content_block_delta\n")
output = output + "event: content_block_start\n"
output = output + fmt.Sprintf(`data: {"type":"content_block_start","index":%d,"content_block":{"type":"thinking","thinking":""}}`, (*param).(*Params).ResponseIndex)
output = output + "\n\n\n"
output = output + "event: content_block_delta\n"
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
output = output + fmt.Sprintf("data: %s\n\n\n", data)
(*param).(*Params).ResponseType = 2 // Set state to thinking
(*param).(*Params).HasContent = true
}
} else {
// Process regular text content (user-visible output)
// Continue existing text block if already in content state
if (*param).(*Params).ResponseType == 1 {
sb.WriteString("event: content_block_delta\n")
output = output + "event: content_block_delta\n"
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
output = output + fmt.Sprintf("data: %s\n\n\n", data)
(*param).(*Params).HasContent = true
} else {
// Transition from another state to text content
// First, close any existing content block
if (*param).(*Params).ResponseType != 0 {
sb.WriteString("event: content_block_stop\n")
sb.WriteString(fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex))
sb.WriteString("\n\n\n")
if (*param).(*Params).ResponseType == 2 {
// output = output + "event: content_block_delta\n"
// output = output + fmt.Sprintf(`data: {"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":null}}`, (*param).(*Params).ResponseIndex)
// output = output + "\n\n\n"
}
output = output + "event: content_block_stop\n"
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
output = output + "\n\n\n"
(*param).(*Params).ResponseIndex++
}
// Start a new text content block
sb.WriteString("event: content_block_start\n")
sb.WriteString(fmt.Sprintf(`data: {"type":"content_block_start","index":%d,"content_block":{"type":"text","text":""}}`, (*param).(*Params).ResponseIndex))
sb.WriteString("\n\n\n")
sb.WriteString("event: content_block_delta\n")
output = output + "event: content_block_start\n"
output = output + fmt.Sprintf(`data: {"type":"content_block_start","index":%d,"content_block":{"type":"text","text":""}}`, (*param).(*Params).ResponseIndex)
output = output + "\n\n\n"
output = output + "event: content_block_delta\n"
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
output = output + fmt.Sprintf("data: %s\n\n\n", data)
(*param).(*Params).ResponseType = 1 // Set state to content
(*param).(*Params).HasContent = true
}
}
} else if functionCallResult.Exists() {
@@ -163,37 +182,45 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
// Handle state transitions when switching to function calls
// Close any existing function call block first
if (*param).(*Params).ResponseType == 3 {
sb.WriteString("event: content_block_stop\n")
sb.WriteString(fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex))
sb.WriteString("\n\n\n")
output = output + "event: content_block_stop\n"
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
output = output + "\n\n\n"
(*param).(*Params).ResponseIndex++
(*param).(*Params).ResponseType = 0
}
// Special handling for thinking state transition
if (*param).(*Params).ResponseType == 2 {
// output = output + "event: content_block_delta\n"
// output = output + fmt.Sprintf(`data: {"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":null}}`, (*param).(*Params).ResponseIndex)
// output = output + "\n\n\n"
}
// Close any other existing content block
if (*param).(*Params).ResponseType != 0 {
sb.WriteString("event: content_block_stop\n")
sb.WriteString(fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex))
sb.WriteString("\n\n\n")
output = output + "event: content_block_stop\n"
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
output = output + "\n\n\n"
(*param).(*Params).ResponseIndex++
}
// Start a new tool use content block
// This creates the structure for a function call in Claude Code format
sb.WriteString("event: content_block_start\n")
output = output + "event: content_block_start\n"
// Create the tool use block with unique ID and function details
data := fmt.Sprintf(`{"type":"content_block_start","index":%d,"content_block":{"type":"tool_use","id":"","name":"","input":{}}}`, (*param).(*Params).ResponseIndex)
data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&toolUseIDCounter, 1)))
data, _ = sjson.Set(data, "content_block.name", fcName)
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
output = output + fmt.Sprintf("data: %s\n\n\n", data)
if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
sb.WriteString("event: content_block_delta\n")
output = output + "event: content_block_delta\n"
data, _ = sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"input_json_delta","partial_json":""}}`, (*param).(*Params).ResponseIndex), "delta.partial_json", fcArgsResult.Raw)
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
output = output + fmt.Sprintf("data: %s\n\n\n", data)
}
(*param).(*Params).ResponseType = 3
(*param).(*Params).HasContent = true
}
}
}
@@ -202,32 +229,35 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
// Process usage metadata and finish reason when present in the response
if usageResult.Exists() && bytes.Contains(rawJSON, []byte(`"finishReason"`)) {
if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
// Close the final content block
sb.WriteString("event: content_block_stop\n")
sb.WriteString(fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex))
sb.WriteString("\n\n\n")
// Only send final events if we have actually output content
if (*param).(*Params).HasContent {
// Close the final content block
output = output + "event: content_block_stop\n"
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
output = output + "\n\n\n"
// Send the final message delta with usage information and stop reason
sb.WriteString("event: message_delta\n")
sb.WriteString(`data: `)
// Send the final message delta with usage information and stop reason
output = output + "event: message_delta\n"
output = output + `data: `
// Create the message delta template with appropriate stop reason
template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
// Set tool_use stop reason if tools were used in this response
if usedTool {
template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
// Create the message delta template with appropriate stop reason
template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
// Set tool_use stop reason if tools were used in this response
if usedTool {
template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
}
// Include thinking tokens in output token count if present
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
output = output + template + "\n\n\n"
}
// Include thinking tokens in output token count if present
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
sb.WriteString(template + "\n\n\n")
}
}
return []string{sb.String()}
return []string{output}
}
// ConvertGeminiCLIResponseToClaudeNonStream converts a non-streaming Gemini CLI response to a non-streaming Claude response.

View File

@@ -25,6 +25,7 @@ type Params struct {
HasFirstResponse bool
ResponseType int
ResponseIndex int
HasContent bool // Tracks whether any content (text, thinking, or tool use) has been output
}
// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
@@ -57,9 +58,13 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
}
if bytes.Equal(rawJSON, []byte("[DONE]")) {
return []string{
"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
// Only send message_stop if we have actually output content
if (*param).(*Params).HasContent {
return []string{
"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
}
}
return []string{}
}
// Track whether tools are being used in this response chunk
@@ -108,6 +113,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
output = output + "event: content_block_delta\n"
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
output = output + fmt.Sprintf("data: %s\n\n\n", data)
(*param).(*Params).HasContent = true
} else {
// Transition from another state to thinking
// First, close any existing content block
@@ -131,6 +137,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
output = output + fmt.Sprintf("data: %s\n\n\n", data)
(*param).(*Params).ResponseType = 2 // Set state to thinking
(*param).(*Params).HasContent = true
}
} else {
// Process regular text content (user-visible output)
@@ -139,6 +146,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
output = output + "event: content_block_delta\n"
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
output = output + fmt.Sprintf("data: %s\n\n\n", data)
(*param).(*Params).HasContent = true
} else {
// Transition from another state to text content
// First, close any existing content block
@@ -162,6 +170,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
output = output + fmt.Sprintf("data: %s\n\n\n", data)
(*param).(*Params).ResponseType = 1 // Set state to content
(*param).(*Params).HasContent = true
}
}
} else if functionCallResult.Exists() {
@@ -211,6 +220,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
output = output + fmt.Sprintf("data: %s\n\n\n", data)
}
(*param).(*Params).ResponseType = 3
(*param).(*Params).HasContent = true
}
}
}
@@ -218,23 +228,26 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
usageResult := gjson.GetBytes(rawJSON, "usageMetadata")
if usageResult.Exists() && bytes.Contains(rawJSON, []byte(`"finishReason"`)) {
if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
output = output + "event: content_block_stop\n"
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
output = output + "\n\n\n"
// Only send final events if we have actually output content
if (*param).(*Params).HasContent {
output = output + "event: content_block_stop\n"
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
output = output + "\n\n\n"
output = output + "event: message_delta\n"
output = output + `data: `
output = output + "event: message_delta\n"
output = output + `data: `
template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
if usedTool {
template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
if usedTool {
template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
}
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
output = output + template + "\n\n\n"
}
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
output = output + template + "\n\n\n"
}
}

View File

@@ -0,0 +1,46 @@
package util
import (
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
)
// ApplyClaudeThinkingConfig applies thinking configuration to a Claude API request payload.
// It sets the thinking.type to "enabled" and thinking.budget_tokens to the specified budget.
// If budget is nil or the payload already has thinking config, it returns the payload unchanged.
func ApplyClaudeThinkingConfig(body []byte, budget *int) []byte {
if budget == nil {
return body
}
if gjson.GetBytes(body, "thinking").Exists() {
return body
}
if *budget <= 0 {
return body
}
updated := body
updated, _ = sjson.SetBytes(updated, "thinking.type", "enabled")
updated, _ = sjson.SetBytes(updated, "thinking.budget_tokens", *budget)
return updated
}
// ResolveClaudeThinkingConfig resolves thinking configuration from metadata for Claude models.
// It uses the unified ResolveThinkingConfigFromMetadata and normalizes the budget.
// Returns the normalized budget (nil if thinking should not be enabled) and whether it matched.
func ResolveClaudeThinkingConfig(modelName string, metadata map[string]any) (*int, bool) {
budget, include, matched := ResolveThinkingConfigFromMetadata(modelName, metadata)
if !matched {
return nil, false
}
if include != nil && !*include {
return nil, true
}
if budget == nil {
return nil, true
}
normalized := NormalizeThinkingBudget(modelName, *budget)
if normalized <= 0 {
return nil, true
}
return &normalized, true
}

View File

@@ -1,6 +1,8 @@
package util
import (
"strings"
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
)
@@ -67,3 +69,39 @@ func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zero
}
return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
}
// GetModelThinkingLevels returns the discrete reasoning effort levels for the model.
// Returns nil if the model has no thinking support or no levels defined.
func GetModelThinkingLevels(model string) []string {
if model == "" {
return nil
}
info := registry.GetGlobalRegistry().GetModelInfo(model)
if info == nil || info.Thinking == nil {
return nil
}
return info.Thinking.Levels
}
// ModelUsesThinkingLevels reports whether the model uses discrete reasoning
// effort levels instead of numeric budgets.
func ModelUsesThinkingLevels(model string) bool {
levels := GetModelThinkingLevels(model)
return len(levels) > 0
}
// NormalizeReasoningEffortLevel validates and normalizes a reasoning effort
// level for the given model. Returns false when the level is not supported.
func NormalizeReasoningEffortLevel(model, effort string) (string, bool) {
levels := GetModelThinkingLevels(model)
if len(levels) == 0 {
return "", false
}
loweredEffort := strings.ToLower(strings.TrimSpace(effort))
for _, lvl := range levels {
if strings.ToLower(lvl) == loweredEffort {
return lvl, true
}
}
return "", false
}

View File

@@ -14,61 +14,59 @@ const (
)
// NormalizeThinkingModel parses dynamic thinking suffixes on model names and returns
// the normalized base model with extracted metadata. Supported patterns:
// - "-thinking-<number>" extracts a numeric budget
// - "-thinking-<level>" extracts a reasoning effort level (minimal/low/medium/high/xhigh/auto/none)
// - "-thinking" maps to a default reasoning effort of "medium"
// - "-reasoning" maps to dynamic budget (-1) and include_thoughts=true
// - "-nothinking" maps to budget=0 and include_thoughts=false
// the normalized base model with extracted metadata. Supported pattern:
// - "(<value>)" where value can be:
// - A numeric budget (e.g., "(8192)", "(16384)")
// - A reasoning effort level (e.g., "(high)", "(medium)", "(low)")
//
// Examples:
// - "claude-sonnet-4-5-20250929(16384)" → budget=16384
// - "gpt-5.1(high)" → reasoning_effort="high"
// - "gemini-2.5-pro(32768)" → budget=32768
//
// Note: Empty parentheses "()" are not supported and will be ignored.
func NormalizeThinkingModel(modelName string) (string, map[string]any) {
if modelName == "" {
return modelName, nil
}
lower := strings.ToLower(modelName)
baseModel := modelName
var (
budgetOverride *int
includeThoughts *bool
reasoningEffort *string
matched bool
)
switch {
case strings.HasSuffix(lower, "-nothinking"):
baseModel = modelName[:len(modelName)-len("-nothinking")]
budget := 0
include := false
budgetOverride = &budget
includeThoughts = &include
matched = true
case strings.HasSuffix(lower, "-reasoning"):
baseModel = modelName[:len(modelName)-len("-reasoning")]
budget := -1
include := true
budgetOverride = &budget
includeThoughts = &include
matched = true
default:
if idx := strings.LastIndex(lower, "-thinking-"); idx != -1 {
value := modelName[idx+len("-thinking-"):]
if value != "" {
if parsed, ok := parseIntPrefix(value); ok {
baseModel = modelName[:idx]
budgetOverride = &parsed
matched = true
} else if effort, okEffort := normalizeReasoningEffort(value); okEffort {
baseModel = modelName[:idx]
reasoningEffort = &effort
matched = true
}
}
} else if strings.HasSuffix(lower, "-thinking") {
baseModel = modelName[:len(modelName)-len("-thinking")]
effort := "medium"
reasoningEffort = &effort
// Match "(<value>)" pattern at the end of the model name
if idx := strings.LastIndex(modelName, "("); idx != -1 {
if !strings.HasSuffix(modelName, ")") {
// Incomplete parenthesis, ignore
return baseModel, nil
}
value := modelName[idx+1 : len(modelName)-1] // Extract content between ( and )
if value == "" {
// Empty parentheses not supported
return baseModel, nil
}
candidateBase := modelName[:idx]
// Auto-detect: pure numeric → budget, string → reasoning effort level
if parsed, ok := parseIntPrefix(value); ok {
// Numeric value: treat as thinking budget
baseModel = candidateBase
budgetOverride = &parsed
matched = true
} else {
// String value: treat as reasoning effort level
baseModel = candidateBase
raw := strings.ToLower(strings.TrimSpace(value))
if raw != "" {
reasoningEffort = &raw
matched = true
}
}
}
@@ -82,9 +80,6 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) {
if budgetOverride != nil {
metadata[ThinkingBudgetMetadataKey] = *budgetOverride
}
if includeThoughts != nil {
metadata[ThinkingIncludeThoughtsMetadataKey] = *includeThoughts
}
if reasoningEffort != nil {
metadata[ReasoningEffortMetadataKey] = *reasoningEffort
}
@@ -185,7 +180,7 @@ func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) {
return "", false
}
if effort != nil && *effort != "" {
return *effort, true
return strings.ToLower(strings.TrimSpace(*effort)), true
}
if budget != nil {
switch *budget {
@@ -207,7 +202,11 @@ func ThinkingEffortToBudget(model, effort string) (int, bool) {
if effort == "" {
return 0, false
}
switch strings.ToLower(effort) {
normalized, ok := NormalizeReasoningEffortLevel(model, effort)
if !ok {
normalized = strings.ToLower(strings.TrimSpace(effort))
}
switch normalized {
case "none":
return 0, true
case "auto":
@@ -312,16 +311,3 @@ func parseNumberToInt(raw any) (int, bool) {
}
return 0, false
}
func normalizeReasoningEffort(value string) (string, bool) {
if value == "" {
return "", false
}
effort := strings.ToLower(strings.TrimSpace(value))
switch effort {
case "minimal", "low", "medium", "high", "xhigh", "auto", "none":
return effort, true
default:
return "", false
}
}

View File

@@ -271,6 +271,11 @@ func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.
continue
}
if errMsg != nil {
status := http.StatusInternalServerError
if errMsg.StatusCode > 0 {
status = errMsg.StatusCode
}
c.Status(status)
// An error occurred: emit as a proper SSE error event
errorBytes, _ := json.Marshal(h.toClaudeError(errMsg))
_, _ = writer.WriteString("event: error\n")
@@ -278,6 +283,7 @@ func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.
_, _ = writer.Write(errorBytes)
_, _ = writer.WriteString("\n\n")
_ = writer.Flush()
flusher.Flush()
}
var execErr error
if errMsg != nil {