mirror of
https://github.com/router-for-me/CLIProxyAPIPlus.git
synced 2026-04-23 19:52:40 +00:00
Compare commits
37 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
242b4d5754 | ||
|
|
4ce7c61a17 | ||
|
|
a74ee3f319 | ||
|
|
564bcbaa54 | ||
|
|
88bdd25f06 | ||
|
|
e79f65fd8e | ||
|
|
2760989401 | ||
|
|
facfe7c518 | ||
|
|
6285459c08 | ||
|
|
21bbceca0c | ||
|
|
f6300c72b7 | ||
|
|
007572b58e | ||
|
|
3a81ab22fd | ||
|
|
519da2e042 | ||
|
|
169f4295d0 | ||
|
|
d06d0eab2f | ||
|
|
3ffd120ae9 | ||
|
|
a03d514095 | ||
|
|
69fccf0015 | ||
|
|
1da03bfe15 | ||
|
|
6133bac226 | ||
|
|
f302be5ce6 | ||
|
|
cd4e84a360 | ||
|
|
4360ed8a7b | ||
|
|
423ce97665 | ||
|
|
b27a175fef | ||
|
|
8d5f89ccfd | ||
|
|
084e2666cb | ||
|
|
2eb2dbb266 | ||
|
|
e717939edb | ||
|
|
7758a86d1e | ||
|
|
76c563d161 | ||
|
|
a89514951f | ||
|
|
1770c491db | ||
|
|
2bf9e08b31 | ||
|
|
f56bfaa689 | ||
|
|
5d716dc796 |
@@ -105,7 +105,7 @@ ws-auth: false
|
|||||||
# excluded-models:
|
# excluded-models:
|
||||||
# - "claude-opus-4-5-20251101" # exclude specific models (exact match)
|
# - "claude-opus-4-5-20251101" # exclude specific models (exact match)
|
||||||
# - "claude-3-*" # wildcard matching prefix (e.g. claude-3-7-sonnet-20250219)
|
# - "claude-3-*" # wildcard matching prefix (e.g. claude-3-7-sonnet-20250219)
|
||||||
# - "*-think" # wildcard matching suffix (e.g. claude-opus-4-5-thinking)
|
# - "*-thinking" # wildcard matching suffix (e.g. claude-opus-4-5-thinking)
|
||||||
# - "*haiku*" # wildcard matching substring (e.g. claude-3-5-haiku-20241022)
|
# - "*haiku*" # wildcard matching substring (e.g. claude-3-5-haiku-20241022)
|
||||||
|
|
||||||
# Kiro (AWS CodeWhisperer) configuration
|
# Kiro (AWS CodeWhisperer) configuration
|
||||||
|
|||||||
@@ -133,8 +133,8 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Normalize model (handles Gemini thinking suffixes)
|
// Normalize model (handles dynamic thinking suffixes)
|
||||||
normalizedModel, _ := util.NormalizeGeminiThinkingModel(modelName)
|
normalizedModel, _ := util.NormalizeThinkingModel(modelName)
|
||||||
|
|
||||||
// Track resolved model for logging (may change if mapping is applied)
|
// Track resolved model for logging (may change if mapping is applied)
|
||||||
resolvedModel := normalizedModel
|
resolvedModel := normalizedModel
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ func GetClaudeModels() []*ModelInfo {
|
|||||||
DisplayName: "Claude 4.5 Haiku",
|
DisplayName: "Claude 4.5 Haiku",
|
||||||
ContextLength: 200000,
|
ContextLength: 200000,
|
||||||
MaxCompletionTokens: 64000,
|
MaxCompletionTokens: 64000,
|
||||||
|
// Thinking: not supported for Haiku models
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "claude-sonnet-4-5-20250929",
|
ID: "claude-sonnet-4-5-20250929",
|
||||||
@@ -26,60 +27,6 @@ func GetClaudeModels() []*ModelInfo {
|
|||||||
DisplayName: "Claude 4.5 Sonnet",
|
DisplayName: "Claude 4.5 Sonnet",
|
||||||
ContextLength: 200000,
|
ContextLength: 200000,
|
||||||
MaxCompletionTokens: 64000,
|
MaxCompletionTokens: 64000,
|
||||||
},
|
|
||||||
{
|
|
||||||
ID: "claude-sonnet-4-5-thinking",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1759104000, // 2025-09-29
|
|
||||||
OwnedBy: "anthropic",
|
|
||||||
Type: "claude",
|
|
||||||
DisplayName: "Claude 4.5 Sonnet Thinking",
|
|
||||||
ContextLength: 200000,
|
|
||||||
MaxCompletionTokens: 64000,
|
|
||||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ID: "claude-opus-4-5-thinking",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1761955200, // 2025-11-01
|
|
||||||
OwnedBy: "anthropic",
|
|
||||||
Type: "claude",
|
|
||||||
DisplayName: "Claude 4.5 Opus Thinking",
|
|
||||||
ContextLength: 200000,
|
|
||||||
MaxCompletionTokens: 64000,
|
|
||||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ID: "claude-opus-4-5-thinking-low",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1761955200, // 2025-11-01
|
|
||||||
OwnedBy: "anthropic",
|
|
||||||
Type: "claude",
|
|
||||||
DisplayName: "Claude 4.5 Opus Thinking Low",
|
|
||||||
ContextLength: 200000,
|
|
||||||
MaxCompletionTokens: 64000,
|
|
||||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ID: "claude-opus-4-5-thinking-medium",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1761955200, // 2025-11-01
|
|
||||||
OwnedBy: "anthropic",
|
|
||||||
Type: "claude",
|
|
||||||
DisplayName: "Claude 4.5 Opus Thinking Medium",
|
|
||||||
ContextLength: 200000,
|
|
||||||
MaxCompletionTokens: 64000,
|
|
||||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ID: "claude-opus-4-5-thinking-high",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1761955200, // 2025-11-01
|
|
||||||
OwnedBy: "anthropic",
|
|
||||||
Type: "claude",
|
|
||||||
DisplayName: "Claude 4.5 Opus Thinking High",
|
|
||||||
ContextLength: 200000,
|
|
||||||
MaxCompletionTokens: 64000,
|
|
||||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -92,6 +39,7 @@ func GetClaudeModels() []*ModelInfo {
|
|||||||
Description: "Premium model combining maximum intelligence with practical performance",
|
Description: "Premium model combining maximum intelligence with practical performance",
|
||||||
ContextLength: 200000,
|
ContextLength: 200000,
|
||||||
MaxCompletionTokens: 64000,
|
MaxCompletionTokens: 64000,
|
||||||
|
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "claude-opus-4-1-20250805",
|
ID: "claude-opus-4-1-20250805",
|
||||||
@@ -102,6 +50,7 @@ func GetClaudeModels() []*ModelInfo {
|
|||||||
DisplayName: "Claude 4.1 Opus",
|
DisplayName: "Claude 4.1 Opus",
|
||||||
ContextLength: 200000,
|
ContextLength: 200000,
|
||||||
MaxCompletionTokens: 32000,
|
MaxCompletionTokens: 32000,
|
||||||
|
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "claude-opus-4-20250514",
|
ID: "claude-opus-4-20250514",
|
||||||
@@ -112,6 +61,7 @@ func GetClaudeModels() []*ModelInfo {
|
|||||||
DisplayName: "Claude 4 Opus",
|
DisplayName: "Claude 4 Opus",
|
||||||
ContextLength: 200000,
|
ContextLength: 200000,
|
||||||
MaxCompletionTokens: 32000,
|
MaxCompletionTokens: 32000,
|
||||||
|
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "claude-sonnet-4-20250514",
|
ID: "claude-sonnet-4-20250514",
|
||||||
@@ -122,6 +72,7 @@ func GetClaudeModels() []*ModelInfo {
|
|||||||
DisplayName: "Claude 4 Sonnet",
|
DisplayName: "Claude 4 Sonnet",
|
||||||
ContextLength: 200000,
|
ContextLength: 200000,
|
||||||
MaxCompletionTokens: 64000,
|
MaxCompletionTokens: 64000,
|
||||||
|
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "claude-3-7-sonnet-20250219",
|
ID: "claude-3-7-sonnet-20250219",
|
||||||
@@ -132,6 +83,7 @@ func GetClaudeModels() []*ModelInfo {
|
|||||||
DisplayName: "Claude 3.7 Sonnet",
|
DisplayName: "Claude 3.7 Sonnet",
|
||||||
ContextLength: 128000,
|
ContextLength: 128000,
|
||||||
MaxCompletionTokens: 8192,
|
MaxCompletionTokens: 8192,
|
||||||
|
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "claude-3-5-haiku-20241022",
|
ID: "claude-3-5-haiku-20241022",
|
||||||
@@ -142,6 +94,7 @@ func GetClaudeModels() []*ModelInfo {
|
|||||||
DisplayName: "Claude 3.5 Haiku",
|
DisplayName: "Claude 3.5 Haiku",
|
||||||
ContextLength: 128000,
|
ContextLength: 128000,
|
||||||
MaxCompletionTokens: 8192,
|
MaxCompletionTokens: 8192,
|
||||||
|
// Thinking: not supported for Haiku models
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -529,58 +482,7 @@ func GetOpenAIModels() []*ModelInfo {
|
|||||||
ContextLength: 400000,
|
ContextLength: 400000,
|
||||||
MaxCompletionTokens: 128000,
|
MaxCompletionTokens: 128000,
|
||||||
SupportedParameters: []string{"tools"},
|
SupportedParameters: []string{"tools"},
|
||||||
},
|
Thinking: &ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}},
|
||||||
{
|
|
||||||
ID: "gpt-5-minimal",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1754524800,
|
|
||||||
OwnedBy: "openai",
|
|
||||||
Type: "openai",
|
|
||||||
Version: "gpt-5-2025-08-07",
|
|
||||||
DisplayName: "GPT 5 Minimal",
|
|
||||||
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
|
|
||||||
ContextLength: 400000,
|
|
||||||
MaxCompletionTokens: 128000,
|
|
||||||
SupportedParameters: []string{"tools"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ID: "gpt-5-low",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1754524800,
|
|
||||||
OwnedBy: "openai",
|
|
||||||
Type: "openai",
|
|
||||||
Version: "gpt-5-2025-08-07",
|
|
||||||
DisplayName: "GPT 5 Low",
|
|
||||||
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
|
|
||||||
ContextLength: 400000,
|
|
||||||
MaxCompletionTokens: 128000,
|
|
||||||
SupportedParameters: []string{"tools"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ID: "gpt-5-medium",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1754524800,
|
|
||||||
OwnedBy: "openai",
|
|
||||||
Type: "openai",
|
|
||||||
Version: "gpt-5-2025-08-07",
|
|
||||||
DisplayName: "GPT 5 Medium",
|
|
||||||
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
|
|
||||||
ContextLength: 400000,
|
|
||||||
MaxCompletionTokens: 128000,
|
|
||||||
SupportedParameters: []string{"tools"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ID: "gpt-5-high",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1754524800,
|
|
||||||
OwnedBy: "openai",
|
|
||||||
Type: "openai",
|
|
||||||
Version: "gpt-5-2025-08-07",
|
|
||||||
DisplayName: "GPT 5 High",
|
|
||||||
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
|
|
||||||
ContextLength: 400000,
|
|
||||||
MaxCompletionTokens: 128000,
|
|
||||||
SupportedParameters: []string{"tools"},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "gpt-5-codex",
|
ID: "gpt-5-codex",
|
||||||
@@ -594,45 +496,7 @@ func GetOpenAIModels() []*ModelInfo {
|
|||||||
ContextLength: 400000,
|
ContextLength: 400000,
|
||||||
MaxCompletionTokens: 128000,
|
MaxCompletionTokens: 128000,
|
||||||
SupportedParameters: []string{"tools"},
|
SupportedParameters: []string{"tools"},
|
||||||
},
|
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||||
{
|
|
||||||
ID: "gpt-5-codex-low",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1757894400,
|
|
||||||
OwnedBy: "openai",
|
|
||||||
Type: "openai",
|
|
||||||
Version: "gpt-5-2025-09-15",
|
|
||||||
DisplayName: "GPT 5 Codex Low",
|
|
||||||
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
|
|
||||||
ContextLength: 400000,
|
|
||||||
MaxCompletionTokens: 128000,
|
|
||||||
SupportedParameters: []string{"tools"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ID: "gpt-5-codex-medium",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1757894400,
|
|
||||||
OwnedBy: "openai",
|
|
||||||
Type: "openai",
|
|
||||||
Version: "gpt-5-2025-09-15",
|
|
||||||
DisplayName: "GPT 5 Codex Medium",
|
|
||||||
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
|
|
||||||
ContextLength: 400000,
|
|
||||||
MaxCompletionTokens: 128000,
|
|
||||||
SupportedParameters: []string{"tools"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ID: "gpt-5-codex-high",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1757894400,
|
|
||||||
OwnedBy: "openai",
|
|
||||||
Type: "openai",
|
|
||||||
Version: "gpt-5-2025-09-15",
|
|
||||||
DisplayName: "GPT 5 Codex High",
|
|
||||||
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
|
|
||||||
ContextLength: 400000,
|
|
||||||
MaxCompletionTokens: 128000,
|
|
||||||
SupportedParameters: []string{"tools"},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "gpt-5-codex-mini",
|
ID: "gpt-5-codex-mini",
|
||||||
@@ -646,32 +510,7 @@ func GetOpenAIModels() []*ModelInfo {
|
|||||||
ContextLength: 400000,
|
ContextLength: 400000,
|
||||||
MaxCompletionTokens: 128000,
|
MaxCompletionTokens: 128000,
|
||||||
SupportedParameters: []string{"tools"},
|
SupportedParameters: []string{"tools"},
|
||||||
},
|
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||||
{
|
|
||||||
ID: "gpt-5-codex-mini-medium",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1762473600,
|
|
||||||
OwnedBy: "openai",
|
|
||||||
Type: "openai",
|
|
||||||
Version: "gpt-5-2025-11-07",
|
|
||||||
DisplayName: "GPT 5 Codex Mini Medium",
|
|
||||||
Description: "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
|
|
||||||
ContextLength: 400000,
|
|
||||||
MaxCompletionTokens: 128000,
|
|
||||||
SupportedParameters: []string{"tools"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ID: "gpt-5-codex-mini-high",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1762473600,
|
|
||||||
OwnedBy: "openai",
|
|
||||||
Type: "openai",
|
|
||||||
Version: "gpt-5-2025-11-07",
|
|
||||||
DisplayName: "GPT 5 Codex Mini High",
|
|
||||||
Description: "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
|
|
||||||
ContextLength: 400000,
|
|
||||||
MaxCompletionTokens: 128000,
|
|
||||||
SupportedParameters: []string{"tools"},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "gpt-5.1",
|
ID: "gpt-5.1",
|
||||||
@@ -685,58 +524,7 @@ func GetOpenAIModels() []*ModelInfo {
|
|||||||
ContextLength: 400000,
|
ContextLength: 400000,
|
||||||
MaxCompletionTokens: 128000,
|
MaxCompletionTokens: 128000,
|
||||||
SupportedParameters: []string{"tools"},
|
SupportedParameters: []string{"tools"},
|
||||||
},
|
Thinking: &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}},
|
||||||
{
|
|
||||||
ID: "gpt-5.1-none",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1762905600,
|
|
||||||
OwnedBy: "openai",
|
|
||||||
Type: "openai",
|
|
||||||
Version: "gpt-5.1-2025-11-12",
|
|
||||||
DisplayName: "GPT 5.1 Nothink",
|
|
||||||
Description: "Stable version of GPT 5.1, The best model for coding and agentic tasks across domains.",
|
|
||||||
ContextLength: 400000,
|
|
||||||
MaxCompletionTokens: 128000,
|
|
||||||
SupportedParameters: []string{"tools"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ID: "gpt-5.1-low",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1762905600,
|
|
||||||
OwnedBy: "openai",
|
|
||||||
Type: "openai",
|
|
||||||
Version: "gpt-5.1-2025-11-12",
|
|
||||||
DisplayName: "GPT 5 Low",
|
|
||||||
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
|
|
||||||
ContextLength: 400000,
|
|
||||||
MaxCompletionTokens: 128000,
|
|
||||||
SupportedParameters: []string{"tools"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ID: "gpt-5.1-medium",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1762905600,
|
|
||||||
OwnedBy: "openai",
|
|
||||||
Type: "openai",
|
|
||||||
Version: "gpt-5.1-2025-11-12",
|
|
||||||
DisplayName: "GPT 5.1 Medium",
|
|
||||||
Description: "Stable version of GPT 5.1, The best model for coding and agentic tasks across domains.",
|
|
||||||
ContextLength: 400000,
|
|
||||||
MaxCompletionTokens: 128000,
|
|
||||||
SupportedParameters: []string{"tools"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ID: "gpt-5.1-high",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1762905600,
|
|
||||||
OwnedBy: "openai",
|
|
||||||
Type: "openai",
|
|
||||||
Version: "gpt-5.1-2025-11-12",
|
|
||||||
DisplayName: "GPT 5.1 High",
|
|
||||||
Description: "Stable version of GPT 5.1, The best model for coding and agentic tasks across domains.",
|
|
||||||
ContextLength: 400000,
|
|
||||||
MaxCompletionTokens: 128000,
|
|
||||||
SupportedParameters: []string{"tools"},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "gpt-5.1-codex",
|
ID: "gpt-5.1-codex",
|
||||||
@@ -750,45 +538,7 @@ func GetOpenAIModels() []*ModelInfo {
|
|||||||
ContextLength: 400000,
|
ContextLength: 400000,
|
||||||
MaxCompletionTokens: 128000,
|
MaxCompletionTokens: 128000,
|
||||||
SupportedParameters: []string{"tools"},
|
SupportedParameters: []string{"tools"},
|
||||||
},
|
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||||
{
|
|
||||||
ID: "gpt-5.1-codex-low",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1762905600,
|
|
||||||
OwnedBy: "openai",
|
|
||||||
Type: "openai",
|
|
||||||
Version: "gpt-5.1-2025-11-12",
|
|
||||||
DisplayName: "GPT 5.1 Codex Low",
|
|
||||||
Description: "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
|
|
||||||
ContextLength: 400000,
|
|
||||||
MaxCompletionTokens: 128000,
|
|
||||||
SupportedParameters: []string{"tools"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ID: "gpt-5.1-codex-medium",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1762905600,
|
|
||||||
OwnedBy: "openai",
|
|
||||||
Type: "openai",
|
|
||||||
Version: "gpt-5.1-2025-11-12",
|
|
||||||
DisplayName: "GPT 5.1 Codex Medium",
|
|
||||||
Description: "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
|
|
||||||
ContextLength: 400000,
|
|
||||||
MaxCompletionTokens: 128000,
|
|
||||||
SupportedParameters: []string{"tools"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ID: "gpt-5.1-codex-high",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1762905600,
|
|
||||||
OwnedBy: "openai",
|
|
||||||
Type: "openai",
|
|
||||||
Version: "gpt-5.1-2025-11-12",
|
|
||||||
DisplayName: "GPT 5.1 Codex High",
|
|
||||||
Description: "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
|
|
||||||
ContextLength: 400000,
|
|
||||||
MaxCompletionTokens: 128000,
|
|
||||||
SupportedParameters: []string{"tools"},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "gpt-5.1-codex-mini",
|
ID: "gpt-5.1-codex-mini",
|
||||||
@@ -802,34 +552,8 @@ func GetOpenAIModels() []*ModelInfo {
|
|||||||
ContextLength: 400000,
|
ContextLength: 400000,
|
||||||
MaxCompletionTokens: 128000,
|
MaxCompletionTokens: 128000,
|
||||||
SupportedParameters: []string{"tools"},
|
SupportedParameters: []string{"tools"},
|
||||||
|
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
ID: "gpt-5.1-codex-mini-medium",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1762905600,
|
|
||||||
OwnedBy: "openai",
|
|
||||||
Type: "openai",
|
|
||||||
Version: "gpt-5.1-2025-11-12",
|
|
||||||
DisplayName: "GPT 5.1 Codex Mini Medium",
|
|
||||||
Description: "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
|
|
||||||
ContextLength: 400000,
|
|
||||||
MaxCompletionTokens: 128000,
|
|
||||||
SupportedParameters: []string{"tools"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ID: "gpt-5.1-codex-mini-high",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1762905600,
|
|
||||||
OwnedBy: "openai",
|
|
||||||
Type: "openai",
|
|
||||||
Version: "gpt-5.1-2025-11-12",
|
|
||||||
DisplayName: "GPT 5.1 Codex Mini High",
|
|
||||||
Description: "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
|
|
||||||
ContextLength: 400000,
|
|
||||||
MaxCompletionTokens: 128000,
|
|
||||||
SupportedParameters: []string{"tools"},
|
|
||||||
},
|
|
||||||
|
|
||||||
{
|
{
|
||||||
ID: "gpt-5.1-codex-max",
|
ID: "gpt-5.1-codex-max",
|
||||||
Object: "model",
|
Object: "model",
|
||||||
@@ -842,58 +566,7 @@ func GetOpenAIModels() []*ModelInfo {
|
|||||||
ContextLength: 400000,
|
ContextLength: 400000,
|
||||||
MaxCompletionTokens: 128000,
|
MaxCompletionTokens: 128000,
|
||||||
SupportedParameters: []string{"tools"},
|
SupportedParameters: []string{"tools"},
|
||||||
},
|
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
|
||||||
{
|
|
||||||
ID: "gpt-5.1-codex-max-low",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1763424000,
|
|
||||||
OwnedBy: "openai",
|
|
||||||
Type: "openai",
|
|
||||||
Version: "gpt-5.1-max",
|
|
||||||
DisplayName: "GPT 5.1 Codex Max Low",
|
|
||||||
Description: "Stable version of GPT 5.1 Codex Max Low",
|
|
||||||
ContextLength: 400000,
|
|
||||||
MaxCompletionTokens: 128000,
|
|
||||||
SupportedParameters: []string{"tools"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ID: "gpt-5.1-codex-max-medium",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1763424000,
|
|
||||||
OwnedBy: "openai",
|
|
||||||
Type: "openai",
|
|
||||||
Version: "gpt-5.1-max",
|
|
||||||
DisplayName: "GPT 5.1 Codex Max Medium",
|
|
||||||
Description: "Stable version of GPT 5.1 Codex Max Medium",
|
|
||||||
ContextLength: 400000,
|
|
||||||
MaxCompletionTokens: 128000,
|
|
||||||
SupportedParameters: []string{"tools"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ID: "gpt-5.1-codex-max-high",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1763424000,
|
|
||||||
OwnedBy: "openai",
|
|
||||||
Type: "openai",
|
|
||||||
Version: "gpt-5.1-max",
|
|
||||||
DisplayName: "GPT 5.1 Codex Max High",
|
|
||||||
Description: "Stable version of GPT 5.1 Codex Max High",
|
|
||||||
ContextLength: 400000,
|
|
||||||
MaxCompletionTokens: 128000,
|
|
||||||
SupportedParameters: []string{"tools"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ID: "gpt-5.1-codex-max-xhigh",
|
|
||||||
Object: "model",
|
|
||||||
Created: 1763424000,
|
|
||||||
OwnedBy: "openai",
|
|
||||||
Type: "openai",
|
|
||||||
Version: "gpt-5.1-max",
|
|
||||||
DisplayName: "GPT 5.1 Codex Max XHigh",
|
|
||||||
Description: "Stable version of GPT 5.1 Codex Max XHigh",
|
|
||||||
ContextLength: 400000,
|
|
||||||
MaxCompletionTokens: 128000,
|
|
||||||
SupportedParameters: []string{"tools"},
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -950,6 +623,7 @@ func GetIFlowModels() []*ModelInfo {
|
|||||||
DisplayName string
|
DisplayName string
|
||||||
Description string
|
Description string
|
||||||
Created int64
|
Created int64
|
||||||
|
Thinking *ThinkingSupport
|
||||||
}{
|
}{
|
||||||
{ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600},
|
{ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600},
|
||||||
{ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800},
|
{ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800},
|
||||||
@@ -959,17 +633,17 @@ func GetIFlowModels() []*ModelInfo {
|
|||||||
{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
|
{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
|
||||||
{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400},
|
{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400},
|
||||||
{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
|
{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
|
||||||
{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 general model", Created: 1762387200},
|
{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
|
||||||
{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2", Created: 1764576000},
|
{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2", Created: 1764576000},
|
||||||
{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000},
|
{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000},
|
||||||
{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200},
|
{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200},
|
||||||
{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200},
|
{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
|
||||||
{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
|
{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
|
||||||
{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
|
{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
|
||||||
{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
|
{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
|
||||||
{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
|
{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
|
||||||
{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
|
{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
|
||||||
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000},
|
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
|
||||||
}
|
}
|
||||||
models := make([]*ModelInfo, 0, len(entries))
|
models := make([]*ModelInfo, 0, len(entries))
|
||||||
for _, entry := range entries {
|
for _, entry := range entries {
|
||||||
@@ -981,6 +655,7 @@ func GetIFlowModels() []*ModelInfo {
|
|||||||
Type: "iflow",
|
Type: "iflow",
|
||||||
DisplayName: entry.DisplayName,
|
DisplayName: entry.DisplayName,
|
||||||
Description: entry.Description,
|
Description: entry.Description,
|
||||||
|
Thinking: entry.Thinking,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
return models
|
return models
|
||||||
|
|||||||
@@ -63,6 +63,9 @@ type ThinkingSupport struct {
|
|||||||
ZeroAllowed bool `json:"zero_allowed,omitempty"`
|
ZeroAllowed bool `json:"zero_allowed,omitempty"`
|
||||||
// DynamicAllowed indicates whether -1 is a valid value (dynamic thinking budget).
|
// DynamicAllowed indicates whether -1 is a valid value (dynamic thinking budget).
|
||||||
DynamicAllowed bool `json:"dynamic_allowed,omitempty"`
|
DynamicAllowed bool `json:"dynamic_allowed,omitempty"`
|
||||||
|
// Levels defines discrete reasoning effort levels (e.g., "low", "medium", "high").
|
||||||
|
// When set, the model uses level-based reasoning instead of token budgets.
|
||||||
|
Levels []string `json:"levels,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// ModelRegistration tracks a model's availability
|
// ModelRegistration tracks a model's availability
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ const (
|
|||||||
defaultAntigravityAgent = "antigravity/1.11.5 windows/amd64"
|
defaultAntigravityAgent = "antigravity/1.11.5 windows/amd64"
|
||||||
antigravityAuthType = "antigravity"
|
antigravityAuthType = "antigravity"
|
||||||
refreshSkew = 3000 * time.Second
|
refreshSkew = 3000 * time.Second
|
||||||
streamScannerBuffer int = 20_971_520
|
streamScannerBuffer int = 52_428_800 // 50MB
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|||||||
@@ -54,15 +54,22 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
|||||||
// Use streaming translation to preserve function calling, except for claude.
|
// Use streaming translation to preserve function calling, except for claude.
|
||||||
stream := from != to
|
stream := from != to
|
||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
|
||||||
modelForUpstream := req.Model
|
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||||
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
if upstreamModel == "" {
|
||||||
body, _ = sjson.SetBytes(body, "model", modelOverride)
|
upstreamModel = req.Model
|
||||||
modelForUpstream = modelOverride
|
|
||||||
}
|
}
|
||||||
// Inject thinking config based on model suffix for thinking variants
|
if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
|
||||||
body = e.injectThinkingConfig(req.Model, body)
|
upstreamModel = modelOverride
|
||||||
|
} else if !strings.EqualFold(upstreamModel, req.Model) {
|
||||||
|
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
||||||
|
upstreamModel = modelOverride
|
||||||
|
}
|
||||||
|
}
|
||||||
|
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||||
|
// Inject thinking config based on model metadata for thinking variants
|
||||||
|
body = e.injectThinkingConfig(req.Model, req.Metadata, body)
|
||||||
|
|
||||||
if !strings.HasPrefix(modelForUpstream, "claude-3-5-haiku") {
|
if !strings.HasPrefix(upstreamModel, "claude-3-5-haiku") {
|
||||||
body = checkSystemInstructions(body)
|
body = checkSystemInstructions(body)
|
||||||
}
|
}
|
||||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||||
@@ -161,11 +168,20 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
|||||||
from := opts.SourceFormat
|
from := opts.SourceFormat
|
||||||
to := sdktranslator.FromString("claude")
|
to := sdktranslator.FromString("claude")
|
||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||||
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||||
body, _ = sjson.SetBytes(body, "model", modelOverride)
|
if upstreamModel == "" {
|
||||||
|
upstreamModel = req.Model
|
||||||
}
|
}
|
||||||
// Inject thinking config based on model suffix for thinking variants
|
if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
|
||||||
body = e.injectThinkingConfig(req.Model, body)
|
upstreamModel = modelOverride
|
||||||
|
} else if !strings.EqualFold(upstreamModel, req.Model) {
|
||||||
|
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
||||||
|
upstreamModel = modelOverride
|
||||||
|
}
|
||||||
|
}
|
||||||
|
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||||
|
// Inject thinking config based on model metadata for thinking variants
|
||||||
|
body = e.injectThinkingConfig(req.Model, req.Metadata, body)
|
||||||
body = checkSystemInstructions(body)
|
body = checkSystemInstructions(body)
|
||||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||||
|
|
||||||
@@ -238,7 +254,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
|||||||
// If from == to (Claude → Claude), directly forward the SSE stream without translation
|
// If from == to (Claude → Claude), directly forward the SSE stream without translation
|
||||||
if from == to {
|
if from == to {
|
||||||
scanner := bufio.NewScanner(decodedBody)
|
scanner := bufio.NewScanner(decodedBody)
|
||||||
scanner.Buffer(nil, 20_971_520)
|
scanner.Buffer(nil, 52_428_800) // 50MB
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
line := scanner.Bytes()
|
line := scanner.Bytes()
|
||||||
appendAPIResponseChunk(ctx, e.cfg, line)
|
appendAPIResponseChunk(ctx, e.cfg, line)
|
||||||
@@ -261,7 +277,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
|||||||
|
|
||||||
// For other formats, use translation
|
// For other formats, use translation
|
||||||
scanner := bufio.NewScanner(decodedBody)
|
scanner := bufio.NewScanner(decodedBody)
|
||||||
scanner.Buffer(nil, 20_971_520)
|
scanner.Buffer(nil, 52_428_800) // 50MB
|
||||||
var param any
|
var param any
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
line := scanner.Bytes()
|
line := scanner.Bytes()
|
||||||
@@ -295,13 +311,20 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
|
|||||||
// Use streaming translation to preserve function calling, except for claude.
|
// Use streaming translation to preserve function calling, except for claude.
|
||||||
stream := from != to
|
stream := from != to
|
||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
|
||||||
modelForUpstream := req.Model
|
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||||
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
if upstreamModel == "" {
|
||||||
body, _ = sjson.SetBytes(body, "model", modelOverride)
|
upstreamModel = req.Model
|
||||||
modelForUpstream = modelOverride
|
|
||||||
}
|
}
|
||||||
|
if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
|
||||||
|
upstreamModel = modelOverride
|
||||||
|
} else if !strings.EqualFold(upstreamModel, req.Model) {
|
||||||
|
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
||||||
|
upstreamModel = modelOverride
|
||||||
|
}
|
||||||
|
}
|
||||||
|
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||||
|
|
||||||
if !strings.HasPrefix(modelForUpstream, "claude-3-5-haiku") {
|
if !strings.HasPrefix(upstreamModel, "claude-3-5-haiku") {
|
||||||
body = checkSystemInstructions(body)
|
body = checkSystemInstructions(body)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -427,31 +450,15 @@ func extractAndRemoveBetas(body []byte) ([]string, []byte) {
|
|||||||
return betas, body
|
return betas, body
|
||||||
}
|
}
|
||||||
|
|
||||||
// injectThinkingConfig adds thinking configuration based on model name suffix
|
// injectThinkingConfig adds thinking configuration based on metadata using the unified flow.
|
||||||
func (e *ClaudeExecutor) injectThinkingConfig(modelName string, body []byte) []byte {
|
// It uses util.ResolveClaudeThinkingConfig which internally calls ResolveThinkingConfigFromMetadata
|
||||||
// Only inject if thinking config is not already present
|
// and NormalizeThinkingBudget, ensuring consistency with other executors like Gemini.
|
||||||
if gjson.GetBytes(body, "thinking").Exists() {
|
func (e *ClaudeExecutor) injectThinkingConfig(modelName string, metadata map[string]any, body []byte) []byte {
|
||||||
|
budget, ok := util.ResolveClaudeThinkingConfig(modelName, metadata)
|
||||||
|
if !ok {
|
||||||
return body
|
return body
|
||||||
}
|
}
|
||||||
|
return util.ApplyClaudeThinkingConfig(body, budget)
|
||||||
var budgetTokens int
|
|
||||||
switch {
|
|
||||||
case strings.HasSuffix(modelName, "-thinking-low"):
|
|
||||||
budgetTokens = 1024
|
|
||||||
case strings.HasSuffix(modelName, "-thinking-medium"):
|
|
||||||
budgetTokens = 8192
|
|
||||||
case strings.HasSuffix(modelName, "-thinking-high"):
|
|
||||||
budgetTokens = 24576
|
|
||||||
case strings.HasSuffix(modelName, "-thinking"):
|
|
||||||
// Default thinking without suffix uses medium budget
|
|
||||||
budgetTokens = 8192
|
|
||||||
default:
|
|
||||||
return body
|
|
||||||
}
|
|
||||||
|
|
||||||
body, _ = sjson.SetBytes(body, "thinking.type", "enabled")
|
|
||||||
body, _ = sjson.SetBytes(body, "thinking.budget_tokens", budgetTokens)
|
|
||||||
return body
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled.
|
// ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled.
|
||||||
@@ -491,35 +498,45 @@ func ensureMaxTokensForThinking(modelName string, body []byte) []byte {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (e *ClaudeExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
|
func (e *ClaudeExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
|
||||||
if alias == "" {
|
trimmed := strings.TrimSpace(alias)
|
||||||
|
if trimmed == "" {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
// Hardcoded mappings for thinking models to actual Claude model names
|
|
||||||
switch alias {
|
|
||||||
case "claude-opus-4-5-thinking", "claude-opus-4-5-thinking-low", "claude-opus-4-5-thinking-medium", "claude-opus-4-5-thinking-high":
|
|
||||||
return "claude-opus-4-5-20251101"
|
|
||||||
case "claude-sonnet-4-5-thinking":
|
|
||||||
return "claude-sonnet-4-5-20250929"
|
|
||||||
}
|
|
||||||
entry := e.resolveClaudeConfig(auth)
|
entry := e.resolveClaudeConfig(auth)
|
||||||
if entry == nil {
|
if entry == nil {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
|
||||||
|
|
||||||
|
// Candidate names to match against configured aliases/names.
|
||||||
|
candidates := []string{strings.TrimSpace(normalizedModel)}
|
||||||
|
if !strings.EqualFold(normalizedModel, trimmed) {
|
||||||
|
candidates = append(candidates, trimmed)
|
||||||
|
}
|
||||||
|
if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
|
||||||
|
candidates = append(candidates, original)
|
||||||
|
}
|
||||||
|
|
||||||
for i := range entry.Models {
|
for i := range entry.Models {
|
||||||
model := entry.Models[i]
|
model := entry.Models[i]
|
||||||
name := strings.TrimSpace(model.Name)
|
name := strings.TrimSpace(model.Name)
|
||||||
modelAlias := strings.TrimSpace(model.Alias)
|
modelAlias := strings.TrimSpace(model.Alias)
|
||||||
if modelAlias != "" {
|
|
||||||
if strings.EqualFold(modelAlias, alias) {
|
for _, candidate := range candidates {
|
||||||
|
if candidate == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
|
||||||
if name != "" {
|
if name != "" {
|
||||||
return name
|
return name
|
||||||
}
|
}
|
||||||
return alias
|
return candidate
|
||||||
|
}
|
||||||
|
if name != "" && strings.EqualFold(name, candidate) {
|
||||||
|
return name
|
||||||
}
|
}
|
||||||
continue
|
|
||||||
}
|
|
||||||
if name != "" && strings.EqualFold(name, alias) {
|
|
||||||
return name
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return ""
|
return ""
|
||||||
|
|||||||
@@ -49,14 +49,18 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
|||||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||||
defer reporter.trackFailure(ctx, &err)
|
defer reporter.trackFailure(ctx, &err)
|
||||||
|
|
||||||
|
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||||
|
|
||||||
from := opts.SourceFormat
|
from := opts.SourceFormat
|
||||||
to := sdktranslator.FromString("codex")
|
to := sdktranslator.FromString("codex")
|
||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||||
|
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
|
||||||
body = e.setReasoningEffortByAlias(req.Model, body)
|
body = normalizeThinkingConfig(body, upstreamModel)
|
||||||
|
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||||
|
return resp, errValidate
|
||||||
|
}
|
||||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||||
|
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||||
body, _ = sjson.SetBytes(body, "stream", true)
|
body, _ = sjson.SetBytes(body, "stream", true)
|
||||||
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
||||||
|
|
||||||
@@ -142,13 +146,20 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
|||||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||||
defer reporter.trackFailure(ctx, &err)
|
defer reporter.trackFailure(ctx, &err)
|
||||||
|
|
||||||
|
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||||
|
|
||||||
from := opts.SourceFormat
|
from := opts.SourceFormat
|
||||||
to := sdktranslator.FromString("codex")
|
to := sdktranslator.FromString("codex")
|
||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||||
|
|
||||||
body = e.setReasoningEffortByAlias(req.Model, body)
|
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
|
||||||
|
body = normalizeThinkingConfig(body, upstreamModel)
|
||||||
|
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||||
|
return nil, errValidate
|
||||||
|
}
|
||||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||||
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
||||||
|
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||||
|
|
||||||
url := strings.TrimSuffix(baseURL, "/") + "/responses"
|
url := strings.TrimSuffix(baseURL, "/") + "/responses"
|
||||||
httpReq, err := e.cacheHelper(ctx, from, url, req, body)
|
httpReq, err := e.cacheHelper(ctx, from, url, req, body)
|
||||||
@@ -205,7 +216,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
scanner := bufio.NewScanner(httpResp.Body)
|
scanner := bufio.NewScanner(httpResp.Body)
|
||||||
scanner.Buffer(nil, 20_971_520)
|
scanner.Buffer(nil, 52_428_800) // 50MB
|
||||||
var param any
|
var param any
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
line := scanner.Bytes()
|
line := scanner.Bytes()
|
||||||
@@ -235,14 +246,16 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||||
|
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||||
|
|
||||||
from := opts.SourceFormat
|
from := opts.SourceFormat
|
||||||
to := sdktranslator.FromString("codex")
|
to := sdktranslator.FromString("codex")
|
||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||||
|
|
||||||
modelForCounting := req.Model
|
modelForCounting := req.Model
|
||||||
|
|
||||||
body = e.setReasoningEffortByAlias(req.Model, body)
|
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
|
||||||
|
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||||
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
||||||
body, _ = sjson.SetBytes(body, "stream", false)
|
body, _ = sjson.SetBytes(body, "stream", false)
|
||||||
|
|
||||||
@@ -261,83 +274,6 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
|
|||||||
return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
|
return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *CodexExecutor) setReasoningEffortByAlias(modelName string, payload []byte) []byte {
|
|
||||||
if util.InArray([]string{"gpt-5", "gpt-5-minimal", "gpt-5-low", "gpt-5-medium", "gpt-5-high"}, modelName) {
|
|
||||||
payload, _ = sjson.SetBytes(payload, "model", "gpt-5")
|
|
||||||
switch modelName {
|
|
||||||
case "gpt-5-minimal":
|
|
||||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "minimal")
|
|
||||||
case "gpt-5-low":
|
|
||||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
|
|
||||||
case "gpt-5-medium":
|
|
||||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
|
|
||||||
case "gpt-5-high":
|
|
||||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
|
|
||||||
}
|
|
||||||
} else if util.InArray([]string{"gpt-5-codex", "gpt-5-codex-low", "gpt-5-codex-medium", "gpt-5-codex-high"}, modelName) {
|
|
||||||
payload, _ = sjson.SetBytes(payload, "model", "gpt-5-codex")
|
|
||||||
switch modelName {
|
|
||||||
case "gpt-5-codex-low":
|
|
||||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
|
|
||||||
case "gpt-5-codex-medium":
|
|
||||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
|
|
||||||
case "gpt-5-codex-high":
|
|
||||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
|
|
||||||
}
|
|
||||||
} else if util.InArray([]string{"gpt-5-codex-mini", "gpt-5-codex-mini-medium", "gpt-5-codex-mini-high"}, modelName) {
|
|
||||||
payload, _ = sjson.SetBytes(payload, "model", "gpt-5-codex-mini")
|
|
||||||
switch modelName {
|
|
||||||
case "gpt-5-codex-mini-medium":
|
|
||||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
|
|
||||||
case "gpt-5-codex-mini-high":
|
|
||||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
|
|
||||||
}
|
|
||||||
} else if util.InArray([]string{"gpt-5.1", "gpt-5.1-none", "gpt-5.1-low", "gpt-5.1-medium", "gpt-5.1-high"}, modelName) {
|
|
||||||
payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1")
|
|
||||||
switch modelName {
|
|
||||||
case "gpt-5.1-none":
|
|
||||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "none")
|
|
||||||
case "gpt-5.1-low":
|
|
||||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
|
|
||||||
case "gpt-5.1-medium":
|
|
||||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
|
|
||||||
case "gpt-5.1-high":
|
|
||||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
|
|
||||||
}
|
|
||||||
} else if util.InArray([]string{"gpt-5.1-codex", "gpt-5.1-codex-low", "gpt-5.1-codex-medium", "gpt-5.1-codex-high"}, modelName) {
|
|
||||||
payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1-codex")
|
|
||||||
switch modelName {
|
|
||||||
case "gpt-5.1-codex-low":
|
|
||||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
|
|
||||||
case "gpt-5.1-codex-medium":
|
|
||||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
|
|
||||||
case "gpt-5.1-codex-high":
|
|
||||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
|
|
||||||
}
|
|
||||||
} else if util.InArray([]string{"gpt-5.1-codex-mini", "gpt-5.1-codex-mini-medium", "gpt-5.1-codex-mini-high"}, modelName) {
|
|
||||||
payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1-codex-mini")
|
|
||||||
switch modelName {
|
|
||||||
case "gpt-5.1-codex-mini-medium":
|
|
||||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
|
|
||||||
case "gpt-5.1-codex-mini-high":
|
|
||||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
|
|
||||||
}
|
|
||||||
} else if util.InArray([]string{"gpt-5.1-codex-max", "gpt-5.1-codex-max-low", "gpt-5.1-codex-max-medium", "gpt-5.1-codex-max-high", "gpt-5.1-codex-max-xhigh"}, modelName) {
|
|
||||||
payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1-codex-max")
|
|
||||||
switch modelName {
|
|
||||||
case "gpt-5.1-codex-max-low":
|
|
||||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
|
|
||||||
case "gpt-5.1-codex-max-medium":
|
|
||||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
|
|
||||||
case "gpt-5.1-codex-max-high":
|
|
||||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
|
|
||||||
case "gpt-5.1-codex-max-xhigh":
|
|
||||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "xhigh")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return payload
|
|
||||||
}
|
|
||||||
|
|
||||||
func tokenizerForCodexModel(model string) (tokenizer.Codec, error) {
|
func tokenizerForCodexModel(model string) (tokenizer.Codec, error) {
|
||||||
sanitized := strings.ToLower(strings.TrimSpace(model))
|
sanitized := strings.ToLower(strings.TrimSpace(model))
|
||||||
switch {
|
switch {
|
||||||
|
|||||||
@@ -309,7 +309,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
|||||||
}()
|
}()
|
||||||
if opts.Alt == "" {
|
if opts.Alt == "" {
|
||||||
scanner := bufio.NewScanner(resp.Body)
|
scanner := bufio.NewScanner(resp.Body)
|
||||||
scanner.Buffer(nil, 20_971_520)
|
scanner.Buffer(nil, 52_428_800) // 50MB
|
||||||
var param any
|
var param any
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
line := scanner.Bytes()
|
line := scanner.Bytes()
|
||||||
|
|||||||
@@ -75,6 +75,8 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
|||||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||||
defer reporter.trackFailure(ctx, &err)
|
defer reporter.trackFailure(ctx, &err)
|
||||||
|
|
||||||
|
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||||
|
|
||||||
// Official Gemini API via API key or OAuth bearer
|
// Official Gemini API via API key or OAuth bearer
|
||||||
from := opts.SourceFormat
|
from := opts.SourceFormat
|
||||||
to := sdktranslator.FromString("gemini")
|
to := sdktranslator.FromString("gemini")
|
||||||
@@ -85,6 +87,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
|||||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||||
|
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||||
|
|
||||||
action := "generateContent"
|
action := "generateContent"
|
||||||
if req.Metadata != nil {
|
if req.Metadata != nil {
|
||||||
@@ -93,7 +96,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
baseURL := resolveGeminiBaseURL(auth)
|
baseURL := resolveGeminiBaseURL(auth)
|
||||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, req.Model, action)
|
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, upstreamModel, action)
|
||||||
if opts.Alt != "" && action != "countTokens" {
|
if opts.Alt != "" && action != "countTokens" {
|
||||||
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
||||||
}
|
}
|
||||||
@@ -167,6 +170,8 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
|||||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||||
defer reporter.trackFailure(ctx, &err)
|
defer reporter.trackFailure(ctx, &err)
|
||||||
|
|
||||||
|
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||||
|
|
||||||
from := opts.SourceFormat
|
from := opts.SourceFormat
|
||||||
to := sdktranslator.FromString("gemini")
|
to := sdktranslator.FromString("gemini")
|
||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||||
@@ -176,9 +181,10 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
|||||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||||
|
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||||
|
|
||||||
baseURL := resolveGeminiBaseURL(auth)
|
baseURL := resolveGeminiBaseURL(auth)
|
||||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, req.Model, "streamGenerateContent")
|
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, upstreamModel, "streamGenerateContent")
|
||||||
if opts.Alt == "" {
|
if opts.Alt == "" {
|
||||||
url = url + "?alt=sse"
|
url = url + "?alt=sse"
|
||||||
} else {
|
} else {
|
||||||
@@ -243,7 +249,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
scanner := bufio.NewScanner(httpResp.Body)
|
scanner := bufio.NewScanner(httpResp.Body)
|
||||||
scanner.Buffer(nil, 20_971_520)
|
scanner.Buffer(nil, 52_428_800) // 50MB
|
||||||
var param any
|
var param any
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
line := scanner.Bytes()
|
line := scanner.Bytes()
|
||||||
|
|||||||
@@ -105,10 +105,12 @@ func (e *GeminiVertexExecutor) CountTokens(ctx context.Context, auth *cliproxyau
|
|||||||
|
|
||||||
// countTokensWithServiceAccount handles token counting using service account credentials.
|
// countTokensWithServiceAccount handles token counting using service account credentials.
|
||||||
func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (cliproxyexecutor.Response, error) {
|
func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (cliproxyexecutor.Response, error) {
|
||||||
|
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||||
|
|
||||||
from := opts.SourceFormat
|
from := opts.SourceFormat
|
||||||
to := sdktranslator.FromString("gemini")
|
to := sdktranslator.FromString("gemini")
|
||||||
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||||
if budgetOverride != nil {
|
if budgetOverride != nil {
|
||||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||||
budgetOverride = &norm
|
budgetOverride = &norm
|
||||||
@@ -117,13 +119,14 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
|
|||||||
}
|
}
|
||||||
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
|
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
|
||||||
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
|
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
|
||||||
|
translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel)
|
||||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
||||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
|
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
|
||||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
|
translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
|
||||||
|
|
||||||
baseURL := vertexBaseURL(location)
|
baseURL := vertexBaseURL(location)
|
||||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "countTokens")
|
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, "countTokens")
|
||||||
|
|
||||||
httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
|
httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
|
||||||
if errNewReq != nil {
|
if errNewReq != nil {
|
||||||
@@ -191,10 +194,12 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
|
|||||||
|
|
||||||
// countTokensWithAPIKey handles token counting using API key credentials.
|
// countTokensWithAPIKey handles token counting using API key credentials.
|
||||||
func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (cliproxyexecutor.Response, error) {
|
func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (cliproxyexecutor.Response, error) {
|
||||||
|
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||||
|
|
||||||
from := opts.SourceFormat
|
from := opts.SourceFormat
|
||||||
to := sdktranslator.FromString("gemini")
|
to := sdktranslator.FromString("gemini")
|
||||||
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||||
if budgetOverride != nil {
|
if budgetOverride != nil {
|
||||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||||
budgetOverride = &norm
|
budgetOverride = &norm
|
||||||
@@ -203,6 +208,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
|
|||||||
}
|
}
|
||||||
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
|
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
|
||||||
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
|
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
|
||||||
|
translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel)
|
||||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
||||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
|
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
|
||||||
@@ -286,10 +292,12 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
|
|||||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||||
defer reporter.trackFailure(ctx, &err)
|
defer reporter.trackFailure(ctx, &err)
|
||||||
|
|
||||||
|
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||||
|
|
||||||
from := opts.SourceFormat
|
from := opts.SourceFormat
|
||||||
to := sdktranslator.FromString("gemini")
|
to := sdktranslator.FromString("gemini")
|
||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||||
if budgetOverride != nil {
|
if budgetOverride != nil {
|
||||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||||
budgetOverride = &norm
|
budgetOverride = &norm
|
||||||
@@ -301,6 +309,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
|
|||||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||||
|
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||||
|
|
||||||
action := "generateContent"
|
action := "generateContent"
|
||||||
if req.Metadata != nil {
|
if req.Metadata != nil {
|
||||||
@@ -309,7 +318,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
baseURL := vertexBaseURL(location)
|
baseURL := vertexBaseURL(location)
|
||||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, action)
|
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, action)
|
||||||
if opts.Alt != "" && action != "countTokens" {
|
if opts.Alt != "" && action != "countTokens" {
|
||||||
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
||||||
}
|
}
|
||||||
@@ -383,10 +392,12 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
|
|||||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||||
defer reporter.trackFailure(ctx, &err)
|
defer reporter.trackFailure(ctx, &err)
|
||||||
|
|
||||||
|
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||||
|
|
||||||
from := opts.SourceFormat
|
from := opts.SourceFormat
|
||||||
to := sdktranslator.FromString("gemini")
|
to := sdktranslator.FromString("gemini")
|
||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||||
if budgetOverride != nil {
|
if budgetOverride != nil {
|
||||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||||
budgetOverride = &norm
|
budgetOverride = &norm
|
||||||
@@ -398,6 +409,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
|
|||||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||||
|
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||||
|
|
||||||
action := "generateContent"
|
action := "generateContent"
|
||||||
if req.Metadata != nil {
|
if req.Metadata != nil {
|
||||||
@@ -410,7 +422,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
|
|||||||
if baseURL == "" {
|
if baseURL == "" {
|
||||||
baseURL = "https://generativelanguage.googleapis.com"
|
baseURL = "https://generativelanguage.googleapis.com"
|
||||||
}
|
}
|
||||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, req.Model, action)
|
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, upstreamModel, action)
|
||||||
if opts.Alt != "" && action != "countTokens" {
|
if opts.Alt != "" && action != "countTokens" {
|
||||||
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
||||||
}
|
}
|
||||||
@@ -481,10 +493,12 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
|
|||||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||||
defer reporter.trackFailure(ctx, &err)
|
defer reporter.trackFailure(ctx, &err)
|
||||||
|
|
||||||
|
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||||
|
|
||||||
from := opts.SourceFormat
|
from := opts.SourceFormat
|
||||||
to := sdktranslator.FromString("gemini")
|
to := sdktranslator.FromString("gemini")
|
||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||||
if budgetOverride != nil {
|
if budgetOverride != nil {
|
||||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||||
budgetOverride = &norm
|
budgetOverride = &norm
|
||||||
@@ -496,9 +510,10 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
|
|||||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||||
|
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||||
|
|
||||||
baseURL := vertexBaseURL(location)
|
baseURL := vertexBaseURL(location)
|
||||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "streamGenerateContent")
|
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, "streamGenerateContent")
|
||||||
if opts.Alt == "" {
|
if opts.Alt == "" {
|
||||||
url = url + "?alt=sse"
|
url = url + "?alt=sse"
|
||||||
} else {
|
} else {
|
||||||
@@ -564,7 +579,7 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
scanner := bufio.NewScanner(httpResp.Body)
|
scanner := bufio.NewScanner(httpResp.Body)
|
||||||
scanner.Buffer(nil, 20_971_520)
|
scanner.Buffer(nil, 52_428_800) // 50MB
|
||||||
var param any
|
var param any
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
line := scanner.Bytes()
|
line := scanner.Bytes()
|
||||||
@@ -595,10 +610,12 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
|
|||||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||||
defer reporter.trackFailure(ctx, &err)
|
defer reporter.trackFailure(ctx, &err)
|
||||||
|
|
||||||
|
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||||
|
|
||||||
from := opts.SourceFormat
|
from := opts.SourceFormat
|
||||||
to := sdktranslator.FromString("gemini")
|
to := sdktranslator.FromString("gemini")
|
||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||||
if budgetOverride != nil {
|
if budgetOverride != nil {
|
||||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||||
budgetOverride = &norm
|
budgetOverride = &norm
|
||||||
@@ -610,12 +627,13 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
|
|||||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||||
|
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||||
|
|
||||||
// For API key auth, use simpler URL format without project/location
|
// For API key auth, use simpler URL format without project/location
|
||||||
if baseURL == "" {
|
if baseURL == "" {
|
||||||
baseURL = "https://generativelanguage.googleapis.com"
|
baseURL = "https://generativelanguage.googleapis.com"
|
||||||
}
|
}
|
||||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, req.Model, "streamGenerateContent")
|
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, upstreamModel, "streamGenerateContent")
|
||||||
if opts.Alt == "" {
|
if opts.Alt == "" {
|
||||||
url = url + "?alt=sse"
|
url = url + "?alt=sse"
|
||||||
} else {
|
} else {
|
||||||
@@ -678,7 +696,7 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
scanner := bufio.NewScanner(httpResp.Body)
|
scanner := bufio.NewScanner(httpResp.Body)
|
||||||
scanner.Buffer(nil, 20_971_520)
|
scanner.Buffer(nil, 52_428_800) // 50MB
|
||||||
var param any
|
var param any
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
line := scanner.Bytes()
|
line := scanner.Bytes()
|
||||||
|
|||||||
@@ -57,6 +57,15 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
|||||||
from := opts.SourceFormat
|
from := opts.SourceFormat
|
||||||
to := sdktranslator.FromString("openai")
|
to := sdktranslator.FromString("openai")
|
||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||||
|
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
|
||||||
|
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||||
|
if upstreamModel != "" {
|
||||||
|
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||||
|
}
|
||||||
|
body = normalizeThinkingConfig(body, upstreamModel)
|
||||||
|
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||||
|
return resp, errValidate
|
||||||
|
}
|
||||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||||
|
|
||||||
endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
|
endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
|
||||||
@@ -139,6 +148,15 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
|||||||
to := sdktranslator.FromString("openai")
|
to := sdktranslator.FromString("openai")
|
||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||||
|
|
||||||
|
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
|
||||||
|
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||||
|
if upstreamModel != "" {
|
||||||
|
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||||
|
}
|
||||||
|
body = normalizeThinkingConfig(body, upstreamModel)
|
||||||
|
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||||
|
return nil, errValidate
|
||||||
|
}
|
||||||
// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
|
// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
|
||||||
toolsResult := gjson.GetBytes(body, "tools")
|
toolsResult := gjson.GetBytes(body, "tools")
|
||||||
if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
|
if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
|
||||||
@@ -201,7 +219,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
scanner := bufio.NewScanner(httpResp.Body)
|
scanner := bufio.NewScanner(httpResp.Body)
|
||||||
scanner.Buffer(nil, 20_971_520)
|
scanner.Buffer(nil, 52_428_800) // 50MB
|
||||||
var param any
|
var param any
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
line := scanner.Bytes()
|
line := scanner.Bytes()
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -58,6 +58,15 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
|
|||||||
translated = e.overrideModel(translated, modelOverride)
|
translated = e.overrideModel(translated, modelOverride)
|
||||||
}
|
}
|
||||||
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
|
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
|
||||||
|
translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
|
||||||
|
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||||
|
if upstreamModel != "" {
|
||||||
|
translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
|
||||||
|
}
|
||||||
|
translated = normalizeThinkingConfig(translated, upstreamModel)
|
||||||
|
if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
|
||||||
|
return resp, errValidate
|
||||||
|
}
|
||||||
|
|
||||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
|
||||||
@@ -143,6 +152,15 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
|
|||||||
translated = e.overrideModel(translated, modelOverride)
|
translated = e.overrideModel(translated, modelOverride)
|
||||||
}
|
}
|
||||||
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
|
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
|
||||||
|
translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
|
||||||
|
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||||
|
if upstreamModel != "" {
|
||||||
|
translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
|
||||||
|
}
|
||||||
|
translated = normalizeThinkingConfig(translated, upstreamModel)
|
||||||
|
if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
|
||||||
|
return nil, errValidate
|
||||||
|
}
|
||||||
|
|
||||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
|
||||||
@@ -206,7 +224,7 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
scanner := bufio.NewScanner(httpResp.Body)
|
scanner := bufio.NewScanner(httpResp.Body)
|
||||||
scanner.Buffer(nil, 20_971_520)
|
scanner.Buffer(nil, 52_428_800) // 50MB
|
||||||
var param any
|
var param any
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
line := scanner.Bytes()
|
line := scanner.Bytes()
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
package executor
|
package executor
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||||
@@ -9,11 +11,11 @@ import (
|
|||||||
"github.com/tidwall/sjson"
|
"github.com/tidwall/sjson"
|
||||||
)
|
)
|
||||||
|
|
||||||
// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N)
|
// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192))
|
||||||
// for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
|
// for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
|
||||||
func applyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
|
func applyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
|
||||||
budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(metadata)
|
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
|
||||||
if !ok {
|
if !ok || (budgetOverride == nil && includeOverride == nil) {
|
||||||
return payload
|
return payload
|
||||||
}
|
}
|
||||||
if !util.ModelSupportsThinking(model) {
|
if !util.ModelSupportsThinking(model) {
|
||||||
@@ -26,20 +28,44 @@ func applyThinkingMetadata(payload []byte, metadata map[string]any, model string
|
|||||||
return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
|
return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
|
||||||
}
|
}
|
||||||
|
|
||||||
// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N)
|
// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192))
|
||||||
// for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking.
|
// for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking.
|
||||||
func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
|
func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
|
||||||
budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(metadata)
|
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
|
||||||
if !ok {
|
if !ok || (budgetOverride == nil && includeOverride == nil) {
|
||||||
return payload
|
return payload
|
||||||
}
|
}
|
||||||
if budgetOverride != nil && util.ModelSupportsThinking(model) {
|
if !util.ModelSupportsThinking(model) {
|
||||||
|
return payload
|
||||||
|
}
|
||||||
|
if budgetOverride != nil {
|
||||||
norm := util.NormalizeThinkingBudget(model, *budgetOverride)
|
norm := util.NormalizeThinkingBudget(model, *budgetOverride)
|
||||||
budgetOverride = &norm
|
budgetOverride = &norm
|
||||||
}
|
}
|
||||||
return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
|
return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// applyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path.
|
||||||
|
// Metadata values take precedence over any existing field when the model supports thinking, intentionally
|
||||||
|
// overwriting caller-provided values to honor suffix/default metadata priority.
|
||||||
|
func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string) []byte {
|
||||||
|
if len(metadata) == 0 {
|
||||||
|
return payload
|
||||||
|
}
|
||||||
|
if !util.ModelSupportsThinking(model) {
|
||||||
|
return payload
|
||||||
|
}
|
||||||
|
if field == "" {
|
||||||
|
return payload
|
||||||
|
}
|
||||||
|
if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
|
||||||
|
if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
|
||||||
|
return updated
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return payload
|
||||||
|
}
|
||||||
|
|
||||||
// applyPayloadConfig applies payload default and override rules from configuration
|
// applyPayloadConfig applies payload default and override rules from configuration
|
||||||
// to the given JSON payload for the specified model.
|
// to the given JSON payload for the specified model.
|
||||||
// Defaults only fill missing fields, while overrides always overwrite existing values.
|
// Defaults only fill missing fields, while overrides always overwrite existing values.
|
||||||
@@ -189,3 +215,93 @@ func matchModelPattern(pattern, model string) bool {
|
|||||||
}
|
}
|
||||||
return pi == len(pattern)
|
return pi == len(pattern)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// normalizeThinkingConfig normalizes thinking-related fields in the payload
|
||||||
|
// based on model capabilities. For models without thinking support, it strips
|
||||||
|
// reasoning fields. For models with level-based thinking, it validates and
|
||||||
|
// normalizes the reasoning effort level.
|
||||||
|
func normalizeThinkingConfig(payload []byte, model string) []byte {
|
||||||
|
if len(payload) == 0 || model == "" {
|
||||||
|
return payload
|
||||||
|
}
|
||||||
|
|
||||||
|
if !util.ModelSupportsThinking(model) {
|
||||||
|
return stripThinkingFields(payload)
|
||||||
|
}
|
||||||
|
|
||||||
|
if util.ModelUsesThinkingLevels(model) {
|
||||||
|
return normalizeReasoningEffortLevel(payload, model)
|
||||||
|
}
|
||||||
|
|
||||||
|
return payload
|
||||||
|
}
|
||||||
|
|
||||||
|
// stripThinkingFields removes thinking-related fields from the payload for
|
||||||
|
// models that do not support thinking.
|
||||||
|
func stripThinkingFields(payload []byte) []byte {
|
||||||
|
fieldsToRemove := []string{
|
||||||
|
"reasoning",
|
||||||
|
"reasoning_effort",
|
||||||
|
"reasoning.effort",
|
||||||
|
}
|
||||||
|
out := payload
|
||||||
|
for _, field := range fieldsToRemove {
|
||||||
|
if gjson.GetBytes(out, field).Exists() {
|
||||||
|
out, _ = sjson.DeleteBytes(out, field)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// normalizeReasoningEffortLevel validates and normalizes the reasoning_effort
|
||||||
|
// or reasoning.effort field for level-based thinking models.
|
||||||
|
func normalizeReasoningEffortLevel(payload []byte, model string) []byte {
|
||||||
|
out := payload
|
||||||
|
|
||||||
|
if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() {
|
||||||
|
if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
|
||||||
|
out, _ = sjson.SetBytes(out, "reasoning_effort", normalized)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if effort := gjson.GetBytes(out, "reasoning.effort"); effort.Exists() {
|
||||||
|
if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
|
||||||
|
out, _ = sjson.SetBytes(out, "reasoning.effort", normalized)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// validateThinkingConfig checks for unsupported reasoning levels on level-based models.
|
||||||
|
// Returns a statusErr with 400 when an unsupported level is supplied to avoid silently
|
||||||
|
// downgrading requests.
|
||||||
|
func validateThinkingConfig(payload []byte, model string) error {
|
||||||
|
if len(payload) == 0 || model == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if !util.ModelSupportsThinking(model) || !util.ModelUsesThinkingLevels(model) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
levels := util.GetModelThinkingLevels(model)
|
||||||
|
checkField := func(path string) error {
|
||||||
|
if effort := gjson.GetBytes(payload, path); effort.Exists() {
|
||||||
|
if _, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); !ok {
|
||||||
|
return statusErr{
|
||||||
|
code: http.StatusBadRequest,
|
||||||
|
msg: fmt.Sprintf("unsupported reasoning effort level %q for model %s (supported: %s)", effort.String(), model, strings.Join(levels, ", ")),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := checkField("reasoning_effort"); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := checkField("reasoning.effort"); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import (
|
|||||||
|
|
||||||
qwenauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen"
|
qwenauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen"
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||||
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||||
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
||||||
@@ -50,6 +51,15 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
|
|||||||
from := opts.SourceFormat
|
from := opts.SourceFormat
|
||||||
to := sdktranslator.FromString("openai")
|
to := sdktranslator.FromString("openai")
|
||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||||
|
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
|
||||||
|
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||||
|
if upstreamModel != "" {
|
||||||
|
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||||
|
}
|
||||||
|
body = normalizeThinkingConfig(body, upstreamModel)
|
||||||
|
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||||
|
return resp, errValidate
|
||||||
|
}
|
||||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||||
|
|
||||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||||
@@ -121,6 +131,15 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
|||||||
to := sdktranslator.FromString("openai")
|
to := sdktranslator.FromString("openai")
|
||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||||
|
|
||||||
|
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
|
||||||
|
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||||
|
if upstreamModel != "" {
|
||||||
|
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||||
|
}
|
||||||
|
body = normalizeThinkingConfig(body, upstreamModel)
|
||||||
|
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||||
|
return nil, errValidate
|
||||||
|
}
|
||||||
toolsResult := gjson.GetBytes(body, "tools")
|
toolsResult := gjson.GetBytes(body, "tools")
|
||||||
// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
|
// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
|
||||||
// This will have no real consequences. It's just to scare Qwen3.
|
// This will have no real consequences. It's just to scare Qwen3.
|
||||||
@@ -181,7 +200,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
scanner := bufio.NewScanner(httpResp.Body)
|
scanner := bufio.NewScanner(httpResp.Body)
|
||||||
scanner.Buffer(nil, 20_971_520)
|
scanner.Buffer(nil, 52_428_800) // 50MB
|
||||||
var param any
|
var param any
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
line := scanner.Bytes()
|
line := scanner.Bytes()
|
||||||
|
|||||||
@@ -123,6 +123,15 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
|||||||
functionResponse := client.FunctionResponse{ID: toolCallID, Name: funcName, Response: map[string]interface{}{"result": responseData}}
|
functionResponse := client.FunctionResponse{ID: toolCallID, Name: funcName, Response: map[string]interface{}{"result": responseData}}
|
||||||
clientContent.Parts = append(clientContent.Parts, client.Part{FunctionResponse: &functionResponse})
|
clientContent.Parts = append(clientContent.Parts, client.Part{FunctionResponse: &functionResponse})
|
||||||
}
|
}
|
||||||
|
} else if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "image" {
|
||||||
|
sourceResult := contentResult.Get("source")
|
||||||
|
if sourceResult.Get("type").String() == "base64" {
|
||||||
|
inlineData := &client.InlineData{
|
||||||
|
MimeType: sourceResult.Get("media_type").String(),
|
||||||
|
Data: sourceResult.Get("data").String(),
|
||||||
|
}
|
||||||
|
clientContent.Parts = append(clientContent.Parts, client.Part{InlineData: inlineData})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
contents = append(contents, clientContent)
|
contents = append(contents, clientContent)
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ type Params struct {
|
|||||||
TotalTokenCount int64 // Cached total token count from usage metadata
|
TotalTokenCount int64 // Cached total token count from usage metadata
|
||||||
HasSentFinalEvents bool // Indicates if final content/message events have been sent
|
HasSentFinalEvents bool // Indicates if final content/message events have been sent
|
||||||
HasToolUse bool // Indicates if tool use was observed in the stream
|
HasToolUse bool // Indicates if tool use was observed in the stream
|
||||||
|
HasContent bool // Tracks whether any content (text, thinking, or tool use) has been output
|
||||||
}
|
}
|
||||||
|
|
||||||
// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
|
// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
|
||||||
@@ -69,11 +70,14 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
|
|||||||
|
|
||||||
if bytes.Equal(rawJSON, []byte("[DONE]")) {
|
if bytes.Equal(rawJSON, []byte("[DONE]")) {
|
||||||
output := ""
|
output := ""
|
||||||
appendFinalEvents(params, &output, true)
|
// Only send final events if we have actually output content
|
||||||
|
if params.HasContent {
|
||||||
return []string{
|
appendFinalEvents(params, &output, true)
|
||||||
output + "event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
|
return []string{
|
||||||
|
output + "event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return []string{}
|
||||||
}
|
}
|
||||||
|
|
||||||
output := ""
|
output := ""
|
||||||
@@ -119,10 +123,12 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
|
|||||||
output = output + "event: content_block_delta\n"
|
output = output + "event: content_block_delta\n"
|
||||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":""}}`, params.ResponseIndex), "delta.signature", thoughtSignature.String())
|
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":""}}`, params.ResponseIndex), "delta.signature", thoughtSignature.String())
|
||||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||||
|
params.HasContent = true
|
||||||
} else if params.ResponseType == 2 { // Continue existing thinking block if already in thinking state
|
} else if params.ResponseType == 2 { // Continue existing thinking block if already in thinking state
|
||||||
output = output + "event: content_block_delta\n"
|
output = output + "event: content_block_delta\n"
|
||||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, params.ResponseIndex), "delta.thinking", partTextResult.String())
|
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, params.ResponseIndex), "delta.thinking", partTextResult.String())
|
||||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||||
|
params.HasContent = true
|
||||||
} else {
|
} else {
|
||||||
// Transition from another state to thinking
|
// Transition from another state to thinking
|
||||||
// First, close any existing content block
|
// First, close any existing content block
|
||||||
@@ -146,6 +152,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
|
|||||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, params.ResponseIndex), "delta.thinking", partTextResult.String())
|
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, params.ResponseIndex), "delta.thinking", partTextResult.String())
|
||||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||||
params.ResponseType = 2 // Set state to thinking
|
params.ResponseType = 2 // Set state to thinking
|
||||||
|
params.HasContent = true
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
finishReasonResult := gjson.GetBytes(rawJSON, "response.candidates.0.finishReason")
|
finishReasonResult := gjson.GetBytes(rawJSON, "response.candidates.0.finishReason")
|
||||||
@@ -156,6 +163,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
|
|||||||
output = output + "event: content_block_delta\n"
|
output = output + "event: content_block_delta\n"
|
||||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, params.ResponseIndex), "delta.text", partTextResult.String())
|
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, params.ResponseIndex), "delta.text", partTextResult.String())
|
||||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||||
|
params.HasContent = true
|
||||||
} else {
|
} else {
|
||||||
// Transition from another state to text content
|
// Transition from another state to text content
|
||||||
// First, close any existing content block
|
// First, close any existing content block
|
||||||
@@ -179,6 +187,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
|
|||||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, params.ResponseIndex), "delta.text", partTextResult.String())
|
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, params.ResponseIndex), "delta.text", partTextResult.String())
|
||||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||||
params.ResponseType = 1 // Set state to content
|
params.ResponseType = 1 // Set state to content
|
||||||
|
params.HasContent = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -230,6 +239,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
|
|||||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||||
}
|
}
|
||||||
params.ResponseType = 3
|
params.ResponseType = 3
|
||||||
|
params.HasContent = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -269,6 +279,11 @@ func appendFinalEvents(params *Params, output *string, force bool) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Only send final events if we have actually output content
|
||||||
|
if !params.HasContent {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
if params.ResponseType != 0 {
|
if params.ResponseType != 0 {
|
||||||
*output = *output + "event: content_block_stop\n"
|
*output = *output + "event: content_block_stop\n"
|
||||||
*output = *output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, params.ResponseIndex)
|
*output = *output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, params.ResponseIndex)
|
||||||
|
|||||||
@@ -331,9 +331,8 @@ func ConvertClaudeResponseToGeminiNonStream(_ context.Context, modelName string,
|
|||||||
streamingEvents := make([][]byte, 0)
|
streamingEvents := make([][]byte, 0)
|
||||||
|
|
||||||
scanner := bufio.NewScanner(bytes.NewReader(rawJSON))
|
scanner := bufio.NewScanner(bytes.NewReader(rawJSON))
|
||||||
// Use a smaller initial buffer (64KB) that can grow up to 20MB if needed
|
buffer := make([]byte, 52_428_800) // 50MB
|
||||||
// This prevents allocating 20MB for every request regardless of size
|
scanner.Buffer(buffer, 52_428_800)
|
||||||
scanner.Buffer(make([]byte, 64*1024), 20_971_520)
|
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
line := scanner.Bytes()
|
line := scanner.Bytes()
|
||||||
// log.Debug(string(line))
|
// log.Debug(string(line))
|
||||||
|
|||||||
@@ -445,8 +445,8 @@ func ConvertClaudeResponseToOpenAIResponsesNonStream(_ context.Context, _ string
|
|||||||
// Use a simple scanner to iterate through raw bytes
|
// Use a simple scanner to iterate through raw bytes
|
||||||
// Note: extremely large responses may require increasing the buffer
|
// Note: extremely large responses may require increasing the buffer
|
||||||
scanner := bufio.NewScanner(bytes.NewReader(rawJSON))
|
scanner := bufio.NewScanner(bytes.NewReader(rawJSON))
|
||||||
buf := make([]byte, 20_971_520)
|
buf := make([]byte, 52_428_800) // 50MB
|
||||||
scanner.Buffer(buf, 20_971_520)
|
scanner.Buffer(buf, 52_428_800)
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
line := scanner.Bytes()
|
line := scanner.Bytes()
|
||||||
if !bytes.HasPrefix(line, dataTag) {
|
if !bytes.HasPrefix(line, dataTag) {
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ type Params struct {
|
|||||||
HasFirstResponse bool // Indicates if the initial message_start event has been sent
|
HasFirstResponse bool // Indicates if the initial message_start event has been sent
|
||||||
ResponseType int // Current response type: 0=none, 1=content, 2=thinking, 3=function
|
ResponseType int // Current response type: 0=none, 1=content, 2=thinking, 3=function
|
||||||
ResponseIndex int // Index counter for content blocks in the streaming response
|
ResponseIndex int // Index counter for content blocks in the streaming response
|
||||||
|
HasContent bool // Tracks whether any content (text, thinking, or tool use) has been output
|
||||||
}
|
}
|
||||||
|
|
||||||
// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
|
// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
|
||||||
@@ -57,9 +58,13 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
|
|||||||
}
|
}
|
||||||
|
|
||||||
if bytes.Equal(rawJSON, []byte("[DONE]")) {
|
if bytes.Equal(rawJSON, []byte("[DONE]")) {
|
||||||
return []string{
|
// Only send message_stop if we have actually output content
|
||||||
"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
|
if (*param).(*Params).HasContent {
|
||||||
|
return []string{
|
||||||
|
"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return []string{}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Track whether tools are being used in this response chunk
|
// Track whether tools are being used in this response chunk
|
||||||
@@ -107,7 +112,8 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
|
|||||||
if (*param).(*Params).ResponseType == 2 {
|
if (*param).(*Params).ResponseType == 2 {
|
||||||
sb.WriteString("event: content_block_delta\n")
|
sb.WriteString("event: content_block_delta\n")
|
||||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
|
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
|
||||||
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
|
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||||
|
(*param).(*Params).HasContent = true
|
||||||
} else {
|
} else {
|
||||||
// Transition from another state to thinking
|
// Transition from another state to thinking
|
||||||
// First, close any existing content block
|
// First, close any existing content block
|
||||||
@@ -126,6 +132,7 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
|
|||||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
|
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
|
||||||
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
|
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
|
||||||
(*param).(*Params).ResponseType = 2 // Set state to thinking
|
(*param).(*Params).ResponseType = 2 // Set state to thinking
|
||||||
|
(*param).(*Params).HasContent = true
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Process regular text content (user-visible output)
|
// Process regular text content (user-visible output)
|
||||||
@@ -133,7 +140,8 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
|
|||||||
if (*param).(*Params).ResponseType == 1 {
|
if (*param).(*Params).ResponseType == 1 {
|
||||||
sb.WriteString("event: content_block_delta\n")
|
sb.WriteString("event: content_block_delta\n")
|
||||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
|
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
|
||||||
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
|
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||||
|
(*param).(*Params).HasContent = true
|
||||||
} else {
|
} else {
|
||||||
// Transition from another state to text content
|
// Transition from another state to text content
|
||||||
// First, close any existing content block
|
// First, close any existing content block
|
||||||
@@ -152,6 +160,7 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
|
|||||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
|
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
|
||||||
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
|
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
|
||||||
(*param).(*Params).ResponseType = 1 // Set state to content
|
(*param).(*Params).ResponseType = 1 // Set state to content
|
||||||
|
(*param).(*Params).HasContent = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if functionCallResult.Exists() {
|
} else if functionCallResult.Exists() {
|
||||||
@@ -194,6 +203,7 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
|
|||||||
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
|
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
|
||||||
}
|
}
|
||||||
(*param).(*Params).ResponseType = 3
|
(*param).(*Params).ResponseType = 3
|
||||||
|
(*param).(*Params).HasContent = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -202,28 +212,31 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
|
|||||||
// Process usage metadata and finish reason when present in the response
|
// Process usage metadata and finish reason when present in the response
|
||||||
if usageResult.Exists() && bytes.Contains(rawJSON, []byte(`"finishReason"`)) {
|
if usageResult.Exists() && bytes.Contains(rawJSON, []byte(`"finishReason"`)) {
|
||||||
if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
|
if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
|
||||||
// Close the final content block
|
// Only send final events if we have actually output content
|
||||||
sb.WriteString("event: content_block_stop\n")
|
if (*param).(*Params).HasContent {
|
||||||
sb.WriteString(fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex))
|
// Close the final content block
|
||||||
sb.WriteString("\n\n\n")
|
output = output + "event: content_block_stop\n"
|
||||||
|
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
|
||||||
|
output = output + "\n\n\n"
|
||||||
|
|
||||||
// Send the final message delta with usage information and stop reason
|
// Send the final message delta with usage information and stop reason
|
||||||
sb.WriteString("event: message_delta\n")
|
output = output + "event: message_delta\n"
|
||||||
sb.WriteString(`data: `)
|
output = output + `data: `
|
||||||
|
|
||||||
// Create the message delta template with appropriate stop reason
|
// Create the message delta template with appropriate stop reason
|
||||||
template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
||||||
// Set tool_use stop reason if tools were used in this response
|
// Set tool_use stop reason if tools were used in this response
|
||||||
if usedTool {
|
if usedTool {
|
||||||
template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Include thinking tokens in output token count if present
|
||||||
|
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
|
||||||
|
template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
|
||||||
|
template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
|
||||||
|
|
||||||
|
output = output + template + "\n\n\n"
|
||||||
}
|
}
|
||||||
|
|
||||||
// Include thinking tokens in output token count if present
|
|
||||||
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
|
|
||||||
template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
|
|
||||||
template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
|
|
||||||
|
|
||||||
sb.WriteString(template + "\n\n\n")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ type Params struct {
|
|||||||
HasFirstResponse bool
|
HasFirstResponse bool
|
||||||
ResponseType int
|
ResponseType int
|
||||||
ResponseIndex int
|
ResponseIndex int
|
||||||
|
HasContent bool // Tracks whether any content (text, thinking, or tool use) has been output
|
||||||
}
|
}
|
||||||
|
|
||||||
// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
|
// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
|
||||||
@@ -57,9 +58,13 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
|
|||||||
}
|
}
|
||||||
|
|
||||||
if bytes.Equal(rawJSON, []byte("[DONE]")) {
|
if bytes.Equal(rawJSON, []byte("[DONE]")) {
|
||||||
return []string{
|
// Only send message_stop if we have actually output content
|
||||||
"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
|
if (*param).(*Params).HasContent {
|
||||||
|
return []string{
|
||||||
|
"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return []string{}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Track whether tools are being used in this response chunk
|
// Track whether tools are being used in this response chunk
|
||||||
@@ -108,6 +113,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
|
|||||||
output = output + "event: content_block_delta\n"
|
output = output + "event: content_block_delta\n"
|
||||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
|
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
|
||||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||||
|
(*param).(*Params).HasContent = true
|
||||||
} else {
|
} else {
|
||||||
// Transition from another state to thinking
|
// Transition from another state to thinking
|
||||||
// First, close any existing content block
|
// First, close any existing content block
|
||||||
@@ -131,6 +137,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
|
|||||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
|
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
|
||||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||||
(*param).(*Params).ResponseType = 2 // Set state to thinking
|
(*param).(*Params).ResponseType = 2 // Set state to thinking
|
||||||
|
(*param).(*Params).HasContent = true
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Process regular text content (user-visible output)
|
// Process regular text content (user-visible output)
|
||||||
@@ -139,6 +146,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
|
|||||||
output = output + "event: content_block_delta\n"
|
output = output + "event: content_block_delta\n"
|
||||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
|
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
|
||||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||||
|
(*param).(*Params).HasContent = true
|
||||||
} else {
|
} else {
|
||||||
// Transition from another state to text content
|
// Transition from another state to text content
|
||||||
// First, close any existing content block
|
// First, close any existing content block
|
||||||
@@ -162,6 +170,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
|
|||||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
|
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
|
||||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||||
(*param).(*Params).ResponseType = 1 // Set state to content
|
(*param).(*Params).ResponseType = 1 // Set state to content
|
||||||
|
(*param).(*Params).HasContent = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if functionCallResult.Exists() {
|
} else if functionCallResult.Exists() {
|
||||||
@@ -211,6 +220,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
|
|||||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||||
}
|
}
|
||||||
(*param).(*Params).ResponseType = 3
|
(*param).(*Params).ResponseType = 3
|
||||||
|
(*param).(*Params).HasContent = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -218,23 +228,26 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
|
|||||||
usageResult := gjson.GetBytes(rawJSON, "usageMetadata")
|
usageResult := gjson.GetBytes(rawJSON, "usageMetadata")
|
||||||
if usageResult.Exists() && bytes.Contains(rawJSON, []byte(`"finishReason"`)) {
|
if usageResult.Exists() && bytes.Contains(rawJSON, []byte(`"finishReason"`)) {
|
||||||
if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
|
if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
|
||||||
output = output + "event: content_block_stop\n"
|
// Only send final events if we have actually output content
|
||||||
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
|
if (*param).(*Params).HasContent {
|
||||||
output = output + "\n\n\n"
|
output = output + "event: content_block_stop\n"
|
||||||
|
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
|
||||||
|
output = output + "\n\n\n"
|
||||||
|
|
||||||
output = output + "event: message_delta\n"
|
output = output + "event: message_delta\n"
|
||||||
output = output + `data: `
|
output = output + `data: `
|
||||||
|
|
||||||
template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
||||||
if usedTool {
|
if usedTool {
|
||||||
template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
||||||
|
}
|
||||||
|
|
||||||
|
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
|
||||||
|
template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
|
||||||
|
template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
|
||||||
|
|
||||||
|
output = output + template + "\n\n\n"
|
||||||
}
|
}
|
||||||
|
|
||||||
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
|
|
||||||
template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
|
|
||||||
template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
|
|
||||||
|
|
||||||
output = output + template + "\n\n\n"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
// Package claude provides translation between Kiro and Claude formats.
|
// Package claude provides translation between Kiro and Claude formats.
|
||||||
// Since Kiro uses Claude-compatible format internally, translations are mostly pass-through.
|
// Since Kiro executor generates Claude-compatible SSE format internally (with event: prefix),
|
||||||
|
// translations are pass-through.
|
||||||
package claude
|
package claude
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@@ -14,6 +15,8 @@ func ConvertClaudeRequestToKiro(modelName string, inputRawJSON []byte, stream bo
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ConvertKiroResponseToClaude converts Kiro streaming response to Claude format.
|
// ConvertKiroResponseToClaude converts Kiro streaming response to Claude format.
|
||||||
|
// Kiro executor already generates complete SSE format with "event:" prefix,
|
||||||
|
// so this is a simple pass-through.
|
||||||
func ConvertKiroResponseToClaude(ctx context.Context, model string, originalRequest, request, rawResponse []byte, param *any) []string {
|
func ConvertKiroResponseToClaude(ctx context.Context, model string, originalRequest, request, rawResponse []byte, param *any) []string {
|
||||||
return []string{string(rawResponse)}
|
return []string{string(rawResponse)}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ package chat_completions
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
@@ -13,15 +14,58 @@ import (
|
|||||||
// ConvertKiroResponseToOpenAI converts Kiro streaming response to OpenAI SSE format.
|
// ConvertKiroResponseToOpenAI converts Kiro streaming response to OpenAI SSE format.
|
||||||
// Handles Claude SSE events: content_block_start, content_block_delta, input_json_delta,
|
// Handles Claude SSE events: content_block_start, content_block_delta, input_json_delta,
|
||||||
// content_block_stop, message_delta, and message_stop.
|
// content_block_stop, message_delta, and message_stop.
|
||||||
|
// Input may be in SSE format: "event: xxx\ndata: {...}" or raw JSON.
|
||||||
func ConvertKiroResponseToOpenAI(ctx context.Context, model string, originalRequest, request, rawResponse []byte, param *any) []string {
|
func ConvertKiroResponseToOpenAI(ctx context.Context, model string, originalRequest, request, rawResponse []byte, param *any) []string {
|
||||||
root := gjson.ParseBytes(rawResponse)
|
raw := string(rawResponse)
|
||||||
|
var results []string
|
||||||
|
|
||||||
|
// Handle SSE format: extract JSON from "data: " lines
|
||||||
|
// Input format: "event: message_start\ndata: {...}"
|
||||||
|
lines := strings.Split(raw, "\n")
|
||||||
|
for _, line := range lines {
|
||||||
|
line = strings.TrimSpace(line)
|
||||||
|
if strings.HasPrefix(line, "data: ") {
|
||||||
|
jsonPart := strings.TrimPrefix(line, "data: ")
|
||||||
|
chunks := convertClaudeEventToOpenAI(jsonPart, model)
|
||||||
|
results = append(results, chunks...)
|
||||||
|
} else if strings.HasPrefix(line, "{") {
|
||||||
|
// Raw JSON (backward compatibility)
|
||||||
|
chunks := convertClaudeEventToOpenAI(line, model)
|
||||||
|
results = append(results, chunks...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
// convertClaudeEventToOpenAI converts a single Claude JSON event to OpenAI format
|
||||||
|
func convertClaudeEventToOpenAI(jsonStr string, model string) []string {
|
||||||
|
root := gjson.Parse(jsonStr)
|
||||||
var results []string
|
var results []string
|
||||||
|
|
||||||
eventType := root.Get("type").String()
|
eventType := root.Get("type").String()
|
||||||
|
|
||||||
switch eventType {
|
switch eventType {
|
||||||
case "message_start":
|
case "message_start":
|
||||||
// Initial message event - could emit initial chunk if needed
|
// Initial message event - emit initial chunk with role
|
||||||
|
response := map[string]interface{}{
|
||||||
|
"id": "chatcmpl-" + uuid.New().String()[:24],
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"created": time.Now().Unix(),
|
||||||
|
"model": model,
|
||||||
|
"choices": []map[string]interface{}{
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": map[string]interface{}{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "",
|
||||||
|
},
|
||||||
|
"finish_reason": nil,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
result, _ := json.Marshal(response)
|
||||||
|
results = append(results, string(result))
|
||||||
return results
|
return results
|
||||||
|
|
||||||
case "content_block_start":
|
case "content_block_start":
|
||||||
@@ -127,7 +171,7 @@ func ConvertKiroResponseToOpenAI(ctx context.Context, model string, originalRequ
|
|||||||
return results
|
return results
|
||||||
|
|
||||||
case "message_delta":
|
case "message_delta":
|
||||||
// Final message delta with stop_reason
|
// Final message delta with stop_reason and usage
|
||||||
stopReason := root.Get("delta.stop_reason").String()
|
stopReason := root.Get("delta.stop_reason").String()
|
||||||
if stopReason != "" {
|
if stopReason != "" {
|
||||||
finishReason := "stop"
|
finishReason := "stop"
|
||||||
@@ -152,6 +196,19 @@ func ConvertKiroResponseToOpenAI(ctx context.Context, model string, originalRequ
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Extract and include usage information from message_delta event
|
||||||
|
usage := root.Get("usage")
|
||||||
|
if usage.Exists() {
|
||||||
|
inputTokens := usage.Get("input_tokens").Int()
|
||||||
|
outputTokens := usage.Get("output_tokens").Int()
|
||||||
|
response["usage"] = map[string]interface{}{
|
||||||
|
"prompt_tokens": inputTokens,
|
||||||
|
"completion_tokens": outputTokens,
|
||||||
|
"total_tokens": inputTokens + outputTokens,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
result, _ := json.Marshal(response)
|
result, _ := json.Marshal(response)
|
||||||
results = append(results, string(result))
|
results = append(results, string(result))
|
||||||
}
|
}
|
||||||
|
|||||||
46
internal/util/claude_thinking.go
Normal file
46
internal/util/claude_thinking.go
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
package util
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/tidwall/gjson"
|
||||||
|
"github.com/tidwall/sjson"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ApplyClaudeThinkingConfig applies thinking configuration to a Claude API request payload.
|
||||||
|
// It sets the thinking.type to "enabled" and thinking.budget_tokens to the specified budget.
|
||||||
|
// If budget is nil or the payload already has thinking config, it returns the payload unchanged.
|
||||||
|
func ApplyClaudeThinkingConfig(body []byte, budget *int) []byte {
|
||||||
|
if budget == nil {
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
if gjson.GetBytes(body, "thinking").Exists() {
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
if *budget <= 0 {
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
updated := body
|
||||||
|
updated, _ = sjson.SetBytes(updated, "thinking.type", "enabled")
|
||||||
|
updated, _ = sjson.SetBytes(updated, "thinking.budget_tokens", *budget)
|
||||||
|
return updated
|
||||||
|
}
|
||||||
|
|
||||||
|
// ResolveClaudeThinkingConfig resolves thinking configuration from metadata for Claude models.
|
||||||
|
// It uses the unified ResolveThinkingConfigFromMetadata and normalizes the budget.
|
||||||
|
// Returns the normalized budget (nil if thinking should not be enabled) and whether it matched.
|
||||||
|
func ResolveClaudeThinkingConfig(modelName string, metadata map[string]any) (*int, bool) {
|
||||||
|
budget, include, matched := ResolveThinkingConfigFromMetadata(modelName, metadata)
|
||||||
|
if !matched {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
if include != nil && !*include {
|
||||||
|
return nil, true
|
||||||
|
}
|
||||||
|
if budget == nil {
|
||||||
|
return nil, true
|
||||||
|
}
|
||||||
|
normalized := NormalizeThinkingBudget(modelName, *budget)
|
||||||
|
if normalized <= 0 {
|
||||||
|
return nil, true
|
||||||
|
}
|
||||||
|
return &normalized, true
|
||||||
|
}
|
||||||
@@ -1,8 +1,6 @@
|
|||||||
package util
|
package util
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/tidwall/gjson"
|
"github.com/tidwall/gjson"
|
||||||
@@ -15,80 +13,6 @@ const (
|
|||||||
GeminiOriginalModelMetadataKey = "gemini_original_model"
|
GeminiOriginalModelMetadataKey = "gemini_original_model"
|
||||||
)
|
)
|
||||||
|
|
||||||
func ParseGeminiThinkingSuffix(model string) (string, *int, *bool, bool) {
|
|
||||||
if model == "" {
|
|
||||||
return model, nil, nil, false
|
|
||||||
}
|
|
||||||
lower := strings.ToLower(model)
|
|
||||||
if !strings.HasPrefix(lower, "gemini-") {
|
|
||||||
return model, nil, nil, false
|
|
||||||
}
|
|
||||||
|
|
||||||
if strings.HasSuffix(lower, "-nothinking") {
|
|
||||||
base := model[:len(model)-len("-nothinking")]
|
|
||||||
budgetValue := 0
|
|
||||||
if strings.HasPrefix(lower, "gemini-2.5-pro") {
|
|
||||||
budgetValue = 128
|
|
||||||
}
|
|
||||||
include := false
|
|
||||||
return base, &budgetValue, &include, true
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle "-reasoning" suffix: enables thinking with dynamic budget (-1)
|
|
||||||
// Maps: gemini-2.5-flash-reasoning -> gemini-2.5-flash with thinkingBudget=-1
|
|
||||||
if strings.HasSuffix(lower, "-reasoning") {
|
|
||||||
base := model[:len(model)-len("-reasoning")]
|
|
||||||
budgetValue := -1 // Dynamic budget
|
|
||||||
include := true
|
|
||||||
return base, &budgetValue, &include, true
|
|
||||||
}
|
|
||||||
|
|
||||||
idx := strings.LastIndex(lower, "-thinking-")
|
|
||||||
if idx == -1 {
|
|
||||||
return model, nil, nil, false
|
|
||||||
}
|
|
||||||
|
|
||||||
digits := model[idx+len("-thinking-"):]
|
|
||||||
if digits == "" {
|
|
||||||
return model, nil, nil, false
|
|
||||||
}
|
|
||||||
end := len(digits)
|
|
||||||
for i := 0; i < len(digits); i++ {
|
|
||||||
if digits[i] < '0' || digits[i] > '9' {
|
|
||||||
end = i
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if end == 0 {
|
|
||||||
return model, nil, nil, false
|
|
||||||
}
|
|
||||||
valueStr := digits[:end]
|
|
||||||
value, err := strconv.Atoi(valueStr)
|
|
||||||
if err != nil {
|
|
||||||
return model, nil, nil, false
|
|
||||||
}
|
|
||||||
base := model[:idx]
|
|
||||||
budgetValue := value
|
|
||||||
return base, &budgetValue, nil, true
|
|
||||||
}
|
|
||||||
|
|
||||||
func NormalizeGeminiThinkingModel(modelName string) (string, map[string]any) {
|
|
||||||
baseModel, budget, include, matched := ParseGeminiThinkingSuffix(modelName)
|
|
||||||
if !matched {
|
|
||||||
return baseModel, nil
|
|
||||||
}
|
|
||||||
metadata := map[string]any{
|
|
||||||
GeminiOriginalModelMetadataKey: modelName,
|
|
||||||
}
|
|
||||||
if budget != nil {
|
|
||||||
metadata[GeminiThinkingBudgetMetadataKey] = *budget
|
|
||||||
}
|
|
||||||
if include != nil {
|
|
||||||
metadata[GeminiIncludeThoughtsMetadataKey] = *include
|
|
||||||
}
|
|
||||||
return baseModel, metadata
|
|
||||||
}
|
|
||||||
|
|
||||||
func ApplyGeminiThinkingConfig(body []byte, budget *int, includeThoughts *bool) []byte {
|
func ApplyGeminiThinkingConfig(body []byte, budget *int, includeThoughts *bool) []byte {
|
||||||
if budget == nil && includeThoughts == nil {
|
if budget == nil && includeThoughts == nil {
|
||||||
return body
|
return body
|
||||||
@@ -133,80 +57,6 @@ func ApplyGeminiCLIThinkingConfig(body []byte, budget *int, includeThoughts *boo
|
|||||||
return updated
|
return updated
|
||||||
}
|
}
|
||||||
|
|
||||||
func GeminiThinkingFromMetadata(metadata map[string]any) (*int, *bool, bool) {
|
|
||||||
if len(metadata) == 0 {
|
|
||||||
return nil, nil, false
|
|
||||||
}
|
|
||||||
var (
|
|
||||||
budgetPtr *int
|
|
||||||
includePtr *bool
|
|
||||||
matched bool
|
|
||||||
)
|
|
||||||
if rawBudget, ok := metadata[GeminiThinkingBudgetMetadataKey]; ok {
|
|
||||||
switch v := rawBudget.(type) {
|
|
||||||
case int:
|
|
||||||
budget := v
|
|
||||||
budgetPtr = &budget
|
|
||||||
matched = true
|
|
||||||
case int32:
|
|
||||||
budget := int(v)
|
|
||||||
budgetPtr = &budget
|
|
||||||
matched = true
|
|
||||||
case int64:
|
|
||||||
budget := int(v)
|
|
||||||
budgetPtr = &budget
|
|
||||||
matched = true
|
|
||||||
case float64:
|
|
||||||
budget := int(v)
|
|
||||||
budgetPtr = &budget
|
|
||||||
matched = true
|
|
||||||
case json.Number:
|
|
||||||
if val, err := v.Int64(); err == nil {
|
|
||||||
budget := int(val)
|
|
||||||
budgetPtr = &budget
|
|
||||||
matched = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if rawInclude, ok := metadata[GeminiIncludeThoughtsMetadataKey]; ok {
|
|
||||||
switch v := rawInclude.(type) {
|
|
||||||
case bool:
|
|
||||||
include := v
|
|
||||||
includePtr = &include
|
|
||||||
matched = true
|
|
||||||
case string:
|
|
||||||
if parsed, err := strconv.ParseBool(v); err == nil {
|
|
||||||
include := parsed
|
|
||||||
includePtr = &include
|
|
||||||
matched = true
|
|
||||||
}
|
|
||||||
case json.Number:
|
|
||||||
if val, err := v.Int64(); err == nil {
|
|
||||||
include := val != 0
|
|
||||||
includePtr = &include
|
|
||||||
matched = true
|
|
||||||
}
|
|
||||||
case int:
|
|
||||||
include := v != 0
|
|
||||||
includePtr = &include
|
|
||||||
matched = true
|
|
||||||
case int32:
|
|
||||||
include := v != 0
|
|
||||||
includePtr = &include
|
|
||||||
matched = true
|
|
||||||
case int64:
|
|
||||||
include := v != 0
|
|
||||||
includePtr = &include
|
|
||||||
matched = true
|
|
||||||
case float64:
|
|
||||||
include := v != 0
|
|
||||||
includePtr = &include
|
|
||||||
matched = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return budgetPtr, includePtr, matched
|
|
||||||
}
|
|
||||||
|
|
||||||
// modelsWithDefaultThinking lists models that should have thinking enabled by default
|
// modelsWithDefaultThinking lists models that should have thinking enabled by default
|
||||||
// when no explicit thinkingConfig is provided.
|
// when no explicit thinkingConfig is provided.
|
||||||
var modelsWithDefaultThinking = map[string]bool{
|
var modelsWithDefaultThinking = map[string]bool{
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
package util
|
package util
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -67,3 +69,39 @@ func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zero
|
|||||||
}
|
}
|
||||||
return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
|
return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetModelThinkingLevels returns the discrete reasoning effort levels for the model.
|
||||||
|
// Returns nil if the model has no thinking support or no levels defined.
|
||||||
|
func GetModelThinkingLevels(model string) []string {
|
||||||
|
if model == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
info := registry.GetGlobalRegistry().GetModelInfo(model)
|
||||||
|
if info == nil || info.Thinking == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return info.Thinking.Levels
|
||||||
|
}
|
||||||
|
|
||||||
|
// ModelUsesThinkingLevels reports whether the model uses discrete reasoning
|
||||||
|
// effort levels instead of numeric budgets.
|
||||||
|
func ModelUsesThinkingLevels(model string) bool {
|
||||||
|
levels := GetModelThinkingLevels(model)
|
||||||
|
return len(levels) > 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// NormalizeReasoningEffortLevel validates and normalizes a reasoning effort
|
||||||
|
// level for the given model. Returns false when the level is not supported.
|
||||||
|
func NormalizeReasoningEffortLevel(model, effort string) (string, bool) {
|
||||||
|
levels := GetModelThinkingLevels(model)
|
||||||
|
if len(levels) == 0 {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
loweredEffort := strings.ToLower(strings.TrimSpace(effort))
|
||||||
|
for _, lvl := range levels {
|
||||||
|
if strings.ToLower(lvl) == loweredEffort {
|
||||||
|
return lvl, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
|||||||
313
internal/util/thinking_suffix.go
Normal file
313
internal/util/thinking_suffix.go
Normal file
@@ -0,0 +1,313 @@
|
|||||||
|
package util
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
ThinkingBudgetMetadataKey = "thinking_budget"
|
||||||
|
ThinkingIncludeThoughtsMetadataKey = "thinking_include_thoughts"
|
||||||
|
ReasoningEffortMetadataKey = "reasoning_effort"
|
||||||
|
ThinkingOriginalModelMetadataKey = "thinking_original_model"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NormalizeThinkingModel parses dynamic thinking suffixes on model names and returns
|
||||||
|
// the normalized base model with extracted metadata. Supported pattern:
|
||||||
|
// - "(<value>)" where value can be:
|
||||||
|
// - A numeric budget (e.g., "(8192)", "(16384)")
|
||||||
|
// - A reasoning effort level (e.g., "(high)", "(medium)", "(low)")
|
||||||
|
//
|
||||||
|
// Examples:
|
||||||
|
// - "claude-sonnet-4-5-20250929(16384)" → budget=16384
|
||||||
|
// - "gpt-5.1(high)" → reasoning_effort="high"
|
||||||
|
// - "gemini-2.5-pro(32768)" → budget=32768
|
||||||
|
//
|
||||||
|
// Note: Empty parentheses "()" are not supported and will be ignored.
|
||||||
|
func NormalizeThinkingModel(modelName string) (string, map[string]any) {
|
||||||
|
if modelName == "" {
|
||||||
|
return modelName, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
baseModel := modelName
|
||||||
|
|
||||||
|
var (
|
||||||
|
budgetOverride *int
|
||||||
|
reasoningEffort *string
|
||||||
|
matched bool
|
||||||
|
)
|
||||||
|
|
||||||
|
// Match "(<value>)" pattern at the end of the model name
|
||||||
|
if idx := strings.LastIndex(modelName, "("); idx != -1 {
|
||||||
|
if !strings.HasSuffix(modelName, ")") {
|
||||||
|
// Incomplete parenthesis, ignore
|
||||||
|
return baseModel, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
value := modelName[idx+1 : len(modelName)-1] // Extract content between ( and )
|
||||||
|
if value == "" {
|
||||||
|
// Empty parentheses not supported
|
||||||
|
return baseModel, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
candidateBase := modelName[:idx]
|
||||||
|
|
||||||
|
// Auto-detect: pure numeric → budget, string → reasoning effort level
|
||||||
|
if parsed, ok := parseIntPrefix(value); ok {
|
||||||
|
// Numeric value: treat as thinking budget
|
||||||
|
baseModel = candidateBase
|
||||||
|
budgetOverride = &parsed
|
||||||
|
matched = true
|
||||||
|
} else {
|
||||||
|
// String value: treat as reasoning effort level
|
||||||
|
baseModel = candidateBase
|
||||||
|
raw := strings.ToLower(strings.TrimSpace(value))
|
||||||
|
if raw != "" {
|
||||||
|
reasoningEffort = &raw
|
||||||
|
matched = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !matched {
|
||||||
|
return baseModel, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
metadata := map[string]any{
|
||||||
|
ThinkingOriginalModelMetadataKey: modelName,
|
||||||
|
}
|
||||||
|
if budgetOverride != nil {
|
||||||
|
metadata[ThinkingBudgetMetadataKey] = *budgetOverride
|
||||||
|
}
|
||||||
|
if reasoningEffort != nil {
|
||||||
|
metadata[ReasoningEffortMetadataKey] = *reasoningEffort
|
||||||
|
}
|
||||||
|
return baseModel, metadata
|
||||||
|
}
|
||||||
|
|
||||||
|
// ThinkingFromMetadata extracts thinking overrides from metadata produced by NormalizeThinkingModel.
|
||||||
|
// It accepts both the new generic keys and legacy Gemini-specific keys.
|
||||||
|
func ThinkingFromMetadata(metadata map[string]any) (*int, *bool, *string, bool) {
|
||||||
|
if len(metadata) == 0 {
|
||||||
|
return nil, nil, nil, false
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
budgetPtr *int
|
||||||
|
includePtr *bool
|
||||||
|
effortPtr *string
|
||||||
|
matched bool
|
||||||
|
)
|
||||||
|
|
||||||
|
readBudget := func(key string) {
|
||||||
|
if budgetPtr != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if raw, ok := metadata[key]; ok {
|
||||||
|
if v, okNumber := parseNumberToInt(raw); okNumber {
|
||||||
|
budget := v
|
||||||
|
budgetPtr = &budget
|
||||||
|
matched = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
readInclude := func(key string) {
|
||||||
|
if includePtr != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if raw, ok := metadata[key]; ok {
|
||||||
|
switch v := raw.(type) {
|
||||||
|
case bool:
|
||||||
|
val := v
|
||||||
|
includePtr = &val
|
||||||
|
matched = true
|
||||||
|
case *bool:
|
||||||
|
if v != nil {
|
||||||
|
val := *v
|
||||||
|
includePtr = &val
|
||||||
|
matched = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
readEffort := func(key string) {
|
||||||
|
if effortPtr != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if raw, ok := metadata[key]; ok {
|
||||||
|
if val, okStr := raw.(string); okStr && strings.TrimSpace(val) != "" {
|
||||||
|
normalized := strings.ToLower(strings.TrimSpace(val))
|
||||||
|
effortPtr = &normalized
|
||||||
|
matched = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
readBudget(ThinkingBudgetMetadataKey)
|
||||||
|
readBudget(GeminiThinkingBudgetMetadataKey)
|
||||||
|
readInclude(ThinkingIncludeThoughtsMetadataKey)
|
||||||
|
readInclude(GeminiIncludeThoughtsMetadataKey)
|
||||||
|
readEffort(ReasoningEffortMetadataKey)
|
||||||
|
readEffort("reasoning.effort")
|
||||||
|
|
||||||
|
return budgetPtr, includePtr, effortPtr, matched
|
||||||
|
}
|
||||||
|
|
||||||
|
// ResolveThinkingConfigFromMetadata derives thinking budget/include overrides,
|
||||||
|
// converting reasoning effort strings into budgets when possible.
|
||||||
|
func ResolveThinkingConfigFromMetadata(model string, metadata map[string]any) (*int, *bool, bool) {
|
||||||
|
budget, include, effort, matched := ThinkingFromMetadata(metadata)
|
||||||
|
if !matched {
|
||||||
|
return nil, nil, false
|
||||||
|
}
|
||||||
|
|
||||||
|
if budget == nil && effort != nil {
|
||||||
|
if derived, ok := ThinkingEffortToBudget(model, *effort); ok {
|
||||||
|
budget = &derived
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return budget, include, budget != nil || include != nil || effort != nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ReasoningEffortFromMetadata resolves a reasoning effort string from metadata,
|
||||||
|
// inferring "auto" and "none" when budgets request dynamic or disabled thinking.
|
||||||
|
func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) {
|
||||||
|
budget, include, effort, matched := ThinkingFromMetadata(metadata)
|
||||||
|
if !matched {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
if effort != nil && *effort != "" {
|
||||||
|
return strings.ToLower(strings.TrimSpace(*effort)), true
|
||||||
|
}
|
||||||
|
if budget != nil {
|
||||||
|
switch *budget {
|
||||||
|
case -1:
|
||||||
|
return "auto", true
|
||||||
|
case 0:
|
||||||
|
return "none", true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if include != nil && !*include {
|
||||||
|
return "none", true
|
||||||
|
}
|
||||||
|
return "", true
|
||||||
|
}
|
||||||
|
|
||||||
|
// ThinkingEffortToBudget maps reasoning effort levels to approximate budgets,
|
||||||
|
// clamping the result to the model's supported range.
|
||||||
|
func ThinkingEffortToBudget(model, effort string) (int, bool) {
|
||||||
|
if effort == "" {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
normalized, ok := NormalizeReasoningEffortLevel(model, effort)
|
||||||
|
if !ok {
|
||||||
|
normalized = strings.ToLower(strings.TrimSpace(effort))
|
||||||
|
}
|
||||||
|
switch normalized {
|
||||||
|
case "none":
|
||||||
|
return 0, true
|
||||||
|
case "auto":
|
||||||
|
return NormalizeThinkingBudget(model, -1), true
|
||||||
|
case "minimal":
|
||||||
|
return NormalizeThinkingBudget(model, 512), true
|
||||||
|
case "low":
|
||||||
|
return NormalizeThinkingBudget(model, 1024), true
|
||||||
|
case "medium":
|
||||||
|
return NormalizeThinkingBudget(model, 8192), true
|
||||||
|
case "high":
|
||||||
|
return NormalizeThinkingBudget(model, 24576), true
|
||||||
|
case "xhigh":
|
||||||
|
return NormalizeThinkingBudget(model, 32768), true
|
||||||
|
default:
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ResolveOriginalModel returns the original model name stored in metadata (if present),
|
||||||
|
// otherwise falls back to the provided model.
|
||||||
|
func ResolveOriginalModel(model string, metadata map[string]any) string {
|
||||||
|
normalize := func(name string) string {
|
||||||
|
if name == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
if base, _ := NormalizeThinkingModel(name); base != "" {
|
||||||
|
return base
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(name)
|
||||||
|
}
|
||||||
|
|
||||||
|
if metadata != nil {
|
||||||
|
if v, ok := metadata[ThinkingOriginalModelMetadataKey]; ok {
|
||||||
|
if s, okStr := v.(string); okStr && strings.TrimSpace(s) != "" {
|
||||||
|
if base := normalize(s); base != "" {
|
||||||
|
return base
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if v, ok := metadata[GeminiOriginalModelMetadataKey]; ok {
|
||||||
|
if s, okStr := v.(string); okStr && strings.TrimSpace(s) != "" {
|
||||||
|
if base := normalize(s); base != "" {
|
||||||
|
return base
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Fallback: try to re-normalize the model name when metadata was dropped.
|
||||||
|
if base := normalize(model); base != "" {
|
||||||
|
return base
|
||||||
|
}
|
||||||
|
return model
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseIntPrefix(value string) (int, bool) {
|
||||||
|
if value == "" {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
digits := strings.TrimLeft(value, "-")
|
||||||
|
if digits == "" {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
end := len(digits)
|
||||||
|
for i := 0; i < len(digits); i++ {
|
||||||
|
if digits[i] < '0' || digits[i] > '9' {
|
||||||
|
end = i
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if end == 0 {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
val, err := strconv.Atoi(digits[:end])
|
||||||
|
if err != nil {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
return val, true
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseNumberToInt(raw any) (int, bool) {
|
||||||
|
switch v := raw.(type) {
|
||||||
|
case int:
|
||||||
|
return v, true
|
||||||
|
case int32:
|
||||||
|
return int(v), true
|
||||||
|
case int64:
|
||||||
|
return int(v), true
|
||||||
|
case float64:
|
||||||
|
return int(v), true
|
||||||
|
case json.Number:
|
||||||
|
if val, err := v.Int64(); err == nil {
|
||||||
|
return int(val), true
|
||||||
|
}
|
||||||
|
case string:
|
||||||
|
if strings.TrimSpace(v) == "" {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
if parsed, err := strconv.Atoi(strings.TrimSpace(v)); err == nil {
|
||||||
|
return parsed, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
@@ -271,6 +271,11 @@ func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if errMsg != nil {
|
if errMsg != nil {
|
||||||
|
status := http.StatusInternalServerError
|
||||||
|
if errMsg.StatusCode > 0 {
|
||||||
|
status = errMsg.StatusCode
|
||||||
|
}
|
||||||
|
c.Status(status)
|
||||||
// An error occurred: emit as a proper SSE error event
|
// An error occurred: emit as a proper SSE error event
|
||||||
errorBytes, _ := json.Marshal(h.toClaudeError(errMsg))
|
errorBytes, _ := json.Marshal(h.toClaudeError(errMsg))
|
||||||
_, _ = writer.WriteString("event: error\n")
|
_, _ = writer.WriteString("event: error\n")
|
||||||
@@ -278,6 +283,7 @@ func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.
|
|||||||
_, _ = writer.Write(errorBytes)
|
_, _ = writer.Write(errorBytes)
|
||||||
_, _ = writer.WriteString("\n\n")
|
_, _ = writer.WriteString("\n\n")
|
||||||
_ = writer.Flush()
|
_ = writer.Flush()
|
||||||
|
flusher.Flush()
|
||||||
}
|
}
|
||||||
var execErr error
|
var execErr error
|
||||||
if errMsg != nil {
|
if errMsg != nil {
|
||||||
|
|||||||
@@ -343,18 +343,32 @@ func (h *BaseAPIHandler) getRequestDetails(modelName string) (providers []string
|
|||||||
|
|
||||||
providerName, extractedModelName, isDynamic := h.parseDynamicModel(resolvedModelName)
|
providerName, extractedModelName, isDynamic := h.parseDynamicModel(resolvedModelName)
|
||||||
|
|
||||||
// First, normalize the model name to handle suffixes like "-thinking-128"
|
targetModelName := resolvedModelName
|
||||||
// This needs to happen before determining the provider for non-dynamic models.
|
if isDynamic {
|
||||||
normalizedModel, metadata = normalizeModelMetadata(resolvedModelName)
|
targetModelName = extractedModelName
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalize the model name to handle dynamic thinking suffixes before determining the provider.
|
||||||
|
normalizedModel, metadata = normalizeModelMetadata(targetModelName)
|
||||||
|
|
||||||
if isDynamic {
|
if isDynamic {
|
||||||
providers = []string{providerName}
|
providers = []string{providerName}
|
||||||
// For dynamic models, the extractedModelName is already normalized by parseDynamicModel
|
|
||||||
// so we use it as the final normalizedModel.
|
|
||||||
normalizedModel = extractedModelName
|
|
||||||
} else {
|
} else {
|
||||||
// For non-dynamic models, use the normalizedModel to get the provider name.
|
// For non-dynamic models, use the normalizedModel to get the provider name.
|
||||||
providers = util.GetProviderName(normalizedModel)
|
providers = util.GetProviderName(normalizedModel)
|
||||||
|
if len(providers) == 0 && metadata != nil {
|
||||||
|
if originalRaw, ok := metadata[util.ThinkingOriginalModelMetadataKey]; ok {
|
||||||
|
if originalModel, okStr := originalRaw.(string); okStr {
|
||||||
|
originalModel = strings.TrimSpace(originalModel)
|
||||||
|
if originalModel != "" && !strings.EqualFold(originalModel, normalizedModel) {
|
||||||
|
if altProviders := util.GetProviderName(originalModel); len(altProviders) > 0 {
|
||||||
|
providers = altProviders
|
||||||
|
normalizedModel = originalModel
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(providers) == 0 {
|
if len(providers) == 0 {
|
||||||
@@ -402,7 +416,7 @@ func cloneBytes(src []byte) []byte {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func normalizeModelMetadata(modelName string) (string, map[string]any) {
|
func normalizeModelMetadata(modelName string) (string, map[string]any) {
|
||||||
return util.NormalizeGeminiThinkingModel(modelName)
|
return util.NormalizeThinkingModel(modelName)
|
||||||
}
|
}
|
||||||
|
|
||||||
func cloneMetadata(src map[string]any) map[string]any {
|
func cloneMetadata(src map[string]any) map[string]any {
|
||||||
|
|||||||
Reference in New Issue
Block a user