mirror of
https://github.com/router-for-me/CLIProxyAPIPlus.git
synced 2026-04-01 10:15:26 +00:00
Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
69fccf0015 | ||
|
|
6133bac226 | ||
|
|
f302be5ce6 | ||
|
|
cd4e84a360 | ||
|
|
4360ed8a7b | ||
|
|
423ce97665 |
@@ -133,8 +133,8 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
|
||||
return
|
||||
}
|
||||
|
||||
// Normalize model (handles Gemini thinking suffixes)
|
||||
normalizedModel, _ := util.NormalizeGeminiThinkingModel(modelName)
|
||||
// Normalize model (handles dynamic thinking suffixes)
|
||||
normalizedModel, _ := util.NormalizeThinkingModel(modelName)
|
||||
|
||||
// Track resolved model for logging (may change if mapping is applied)
|
||||
resolvedModel := normalizedModel
|
||||
|
||||
@@ -26,60 +26,6 @@ func GetClaudeModels() []*ModelInfo {
|
||||
DisplayName: "Claude 4.5 Sonnet",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
},
|
||||
{
|
||||
ID: "claude-sonnet-4-5-thinking",
|
||||
Object: "model",
|
||||
Created: 1759104000, // 2025-09-29
|
||||
OwnedBy: "anthropic",
|
||||
Type: "claude",
|
||||
DisplayName: "Claude 4.5 Sonnet Thinking",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "claude-opus-4-5-thinking",
|
||||
Object: "model",
|
||||
Created: 1761955200, // 2025-11-01
|
||||
OwnedBy: "anthropic",
|
||||
Type: "claude",
|
||||
DisplayName: "Claude 4.5 Opus Thinking",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "claude-opus-4-5-thinking-low",
|
||||
Object: "model",
|
||||
Created: 1761955200, // 2025-11-01
|
||||
OwnedBy: "anthropic",
|
||||
Type: "claude",
|
||||
DisplayName: "Claude 4.5 Opus Thinking Low",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "claude-opus-4-5-thinking-medium",
|
||||
Object: "model",
|
||||
Created: 1761955200, // 2025-11-01
|
||||
OwnedBy: "anthropic",
|
||||
Type: "claude",
|
||||
DisplayName: "Claude 4.5 Opus Thinking Medium",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "claude-opus-4-5-thinking-high",
|
||||
Object: "model",
|
||||
Created: 1761955200, // 2025-11-01
|
||||
OwnedBy: "anthropic",
|
||||
Type: "claude",
|
||||
DisplayName: "Claude 4.5 Opus Thinking High",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
@@ -92,6 +38,7 @@ func GetClaudeModels() []*ModelInfo {
|
||||
Description: "Premium model combining maximum intelligence with practical performance",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "claude-opus-4-1-20250805",
|
||||
@@ -530,58 +477,6 @@ func GetOpenAIModels() []*ModelInfo {
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-minimal",
|
||||
Object: "model",
|
||||
Created: 1754524800,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5-2025-08-07",
|
||||
DisplayName: "GPT 5 Minimal",
|
||||
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-low",
|
||||
Object: "model",
|
||||
Created: 1754524800,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5-2025-08-07",
|
||||
DisplayName: "GPT 5 Low",
|
||||
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-medium",
|
||||
Object: "model",
|
||||
Created: 1754524800,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5-2025-08-07",
|
||||
DisplayName: "GPT 5 Medium",
|
||||
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-high",
|
||||
Object: "model",
|
||||
Created: 1754524800,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5-2025-08-07",
|
||||
DisplayName: "GPT 5 High",
|
||||
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-codex",
|
||||
Object: "model",
|
||||
@@ -595,45 +490,6 @@ func GetOpenAIModels() []*ModelInfo {
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-codex-low",
|
||||
Object: "model",
|
||||
Created: 1757894400,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5-2025-09-15",
|
||||
DisplayName: "GPT 5 Codex Low",
|
||||
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-codex-medium",
|
||||
Object: "model",
|
||||
Created: 1757894400,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5-2025-09-15",
|
||||
DisplayName: "GPT 5 Codex Medium",
|
||||
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-codex-high",
|
||||
Object: "model",
|
||||
Created: 1757894400,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5-2025-09-15",
|
||||
DisplayName: "GPT 5 Codex High",
|
||||
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-codex-mini",
|
||||
Object: "model",
|
||||
@@ -647,32 +503,6 @@ func GetOpenAIModels() []*ModelInfo {
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-codex-mini-medium",
|
||||
Object: "model",
|
||||
Created: 1762473600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5-2025-11-07",
|
||||
DisplayName: "GPT 5 Codex Mini Medium",
|
||||
Description: "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-codex-mini-high",
|
||||
Object: "model",
|
||||
Created: 1762473600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5-2025-11-07",
|
||||
DisplayName: "GPT 5 Codex Mini High",
|
||||
Description: "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1",
|
||||
Object: "model",
|
||||
@@ -686,58 +516,6 @@ func GetOpenAIModels() []*ModelInfo {
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-none",
|
||||
Object: "model",
|
||||
Created: 1762905600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-2025-11-12",
|
||||
DisplayName: "GPT 5.1 Nothink",
|
||||
Description: "Stable version of GPT 5.1, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-low",
|
||||
Object: "model",
|
||||
Created: 1762905600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-2025-11-12",
|
||||
DisplayName: "GPT 5 Low",
|
||||
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-medium",
|
||||
Object: "model",
|
||||
Created: 1762905600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-2025-11-12",
|
||||
DisplayName: "GPT 5.1 Medium",
|
||||
Description: "Stable version of GPT 5.1, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-high",
|
||||
Object: "model",
|
||||
Created: 1762905600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-2025-11-12",
|
||||
DisplayName: "GPT 5.1 High",
|
||||
Description: "Stable version of GPT 5.1, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex",
|
||||
Object: "model",
|
||||
@@ -751,45 +529,6 @@ func GetOpenAIModels() []*ModelInfo {
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex-low",
|
||||
Object: "model",
|
||||
Created: 1762905600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-2025-11-12",
|
||||
DisplayName: "GPT 5.1 Codex Low",
|
||||
Description: "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex-medium",
|
||||
Object: "model",
|
||||
Created: 1762905600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-2025-11-12",
|
||||
DisplayName: "GPT 5.1 Codex Medium",
|
||||
Description: "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex-high",
|
||||
Object: "model",
|
||||
Created: 1762905600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-2025-11-12",
|
||||
DisplayName: "GPT 5.1 Codex High",
|
||||
Description: "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex-mini",
|
||||
Object: "model",
|
||||
@@ -803,33 +542,6 @@ func GetOpenAIModels() []*ModelInfo {
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex-mini-medium",
|
||||
Object: "model",
|
||||
Created: 1762905600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-2025-11-12",
|
||||
DisplayName: "GPT 5.1 Codex Mini Medium",
|
||||
Description: "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex-mini-high",
|
||||
Object: "model",
|
||||
Created: 1762905600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-2025-11-12",
|
||||
DisplayName: "GPT 5.1 Codex Mini High",
|
||||
Description: "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
|
||||
{
|
||||
ID: "gpt-5.1-codex-max",
|
||||
Object: "model",
|
||||
@@ -843,58 +555,6 @@ func GetOpenAIModels() []*ModelInfo {
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex-max-low",
|
||||
Object: "model",
|
||||
Created: 1763424000,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-max",
|
||||
DisplayName: "GPT 5.1 Codex Max Low",
|
||||
Description: "Stable version of GPT 5.1 Codex Max Low",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex-max-medium",
|
||||
Object: "model",
|
||||
Created: 1763424000,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-max",
|
||||
DisplayName: "GPT 5.1 Codex Max Medium",
|
||||
Description: "Stable version of GPT 5.1 Codex Max Medium",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex-max-high",
|
||||
Object: "model",
|
||||
Created: 1763424000,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-max",
|
||||
DisplayName: "GPT 5.1 Codex Max High",
|
||||
Description: "Stable version of GPT 5.1 Codex Max High",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex-max-xhigh",
|
||||
Object: "model",
|
||||
Created: 1763424000,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-max",
|
||||
DisplayName: "GPT 5.1 Codex Max XHigh",
|
||||
Description: "Stable version of GPT 5.1 Codex Max XHigh",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -54,15 +54,22 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
// Use streaming translation to preserve function calling, except for claude.
|
||||
stream := from != to
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
|
||||
modelForUpstream := req.Model
|
||||
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
||||
body, _ = sjson.SetBytes(body, "model", modelOverride)
|
||||
modelForUpstream = modelOverride
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel == "" {
|
||||
upstreamModel = req.Model
|
||||
}
|
||||
// Inject thinking config based on model suffix for thinking variants
|
||||
body = e.injectThinkingConfig(req.Model, body)
|
||||
if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
} else if !strings.EqualFold(upstreamModel, req.Model) {
|
||||
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
}
|
||||
}
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
// Inject thinking config based on model metadata for thinking variants
|
||||
body = e.injectThinkingConfig(req.Model, req.Metadata, body)
|
||||
|
||||
if !strings.HasPrefix(modelForUpstream, "claude-3-5-haiku") {
|
||||
if !strings.HasPrefix(upstreamModel, "claude-3-5-haiku") {
|
||||
body = checkSystemInstructions(body)
|
||||
}
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
@@ -161,11 +168,20 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("claude")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
||||
body, _ = sjson.SetBytes(body, "model", modelOverride)
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel == "" {
|
||||
upstreamModel = req.Model
|
||||
}
|
||||
// Inject thinking config based on model suffix for thinking variants
|
||||
body = e.injectThinkingConfig(req.Model, body)
|
||||
if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
} else if !strings.EqualFold(upstreamModel, req.Model) {
|
||||
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
}
|
||||
}
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
// Inject thinking config based on model metadata for thinking variants
|
||||
body = e.injectThinkingConfig(req.Model, req.Metadata, body)
|
||||
body = checkSystemInstructions(body)
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
|
||||
@@ -295,13 +311,20 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
|
||||
// Use streaming translation to preserve function calling, except for claude.
|
||||
stream := from != to
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
|
||||
modelForUpstream := req.Model
|
||||
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
||||
body, _ = sjson.SetBytes(body, "model", modelOverride)
|
||||
modelForUpstream = modelOverride
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel == "" {
|
||||
upstreamModel = req.Model
|
||||
}
|
||||
if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
} else if !strings.EqualFold(upstreamModel, req.Model) {
|
||||
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
}
|
||||
}
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
|
||||
if !strings.HasPrefix(modelForUpstream, "claude-3-5-haiku") {
|
||||
if !strings.HasPrefix(upstreamModel, "claude-3-5-haiku") {
|
||||
body = checkSystemInstructions(body)
|
||||
}
|
||||
|
||||
@@ -427,25 +450,15 @@ func extractAndRemoveBetas(body []byte) ([]string, []byte) {
|
||||
return betas, body
|
||||
}
|
||||
|
||||
// injectThinkingConfig adds thinking configuration based on model name suffix
|
||||
func (e *ClaudeExecutor) injectThinkingConfig(modelName string, body []byte) []byte {
|
||||
// injectThinkingConfig adds thinking configuration based on metadata or legacy suffixes.
|
||||
func (e *ClaudeExecutor) injectThinkingConfig(modelName string, metadata map[string]any, body []byte) []byte {
|
||||
// Only inject if thinking config is not already present
|
||||
if gjson.GetBytes(body, "thinking").Exists() {
|
||||
return body
|
||||
}
|
||||
|
||||
var budgetTokens int
|
||||
switch {
|
||||
case strings.HasSuffix(modelName, "-thinking-low"):
|
||||
budgetTokens = 1024
|
||||
case strings.HasSuffix(modelName, "-thinking-medium"):
|
||||
budgetTokens = 8192
|
||||
case strings.HasSuffix(modelName, "-thinking-high"):
|
||||
budgetTokens = 24576
|
||||
case strings.HasSuffix(modelName, "-thinking"):
|
||||
// Default thinking without suffix uses medium budget
|
||||
budgetTokens = 8192
|
||||
default:
|
||||
budgetTokens, ok := resolveClaudeThinkingBudget(modelName, metadata)
|
||||
if !ok || budgetTokens <= 0 {
|
||||
return body
|
||||
}
|
||||
|
||||
@@ -454,6 +467,44 @@ func (e *ClaudeExecutor) injectThinkingConfig(modelName string, body []byte) []b
|
||||
return body
|
||||
}
|
||||
|
||||
func resolveClaudeThinkingBudget(modelName string, metadata map[string]any) (int, bool) {
|
||||
budget, include, effort, matched := util.ThinkingFromMetadata(metadata)
|
||||
if matched {
|
||||
if include != nil && !*include {
|
||||
return 0, false
|
||||
}
|
||||
if budget != nil {
|
||||
normalized := util.NormalizeThinkingBudget(modelName, *budget)
|
||||
if normalized > 0 {
|
||||
return normalized, true
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
if effort != nil {
|
||||
if derived, ok := util.ThinkingEffortToBudget(modelName, *effort); ok && derived > 0 {
|
||||
return derived, true
|
||||
}
|
||||
}
|
||||
}
|
||||
return claudeBudgetFromSuffix(modelName)
|
||||
}
|
||||
|
||||
func claudeBudgetFromSuffix(modelName string) (int, bool) {
|
||||
lower := strings.ToLower(strings.TrimSpace(modelName))
|
||||
switch {
|
||||
case strings.HasSuffix(lower, "-thinking-low"):
|
||||
return 1024, true
|
||||
case strings.HasSuffix(lower, "-thinking-medium"):
|
||||
return 8192, true
|
||||
case strings.HasSuffix(lower, "-thinking-high"):
|
||||
return 24576, true
|
||||
case strings.HasSuffix(lower, "-thinking"):
|
||||
return 8192, true
|
||||
default:
|
||||
return 0, false
|
||||
}
|
||||
}
|
||||
|
||||
// ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled.
|
||||
// Anthropic API requires this constraint; violating it returns a 400 error.
|
||||
// This function should be called after all thinking configuration is finalized.
|
||||
@@ -491,35 +542,45 @@ func ensureMaxTokensForThinking(modelName string, body []byte) []byte {
|
||||
}
|
||||
|
||||
func (e *ClaudeExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
|
||||
if alias == "" {
|
||||
trimmed := strings.TrimSpace(alias)
|
||||
if trimmed == "" {
|
||||
return ""
|
||||
}
|
||||
// Hardcoded mappings for thinking models to actual Claude model names
|
||||
switch alias {
|
||||
case "claude-opus-4-5-thinking", "claude-opus-4-5-thinking-low", "claude-opus-4-5-thinking-medium", "claude-opus-4-5-thinking-high":
|
||||
return "claude-opus-4-5-20251101"
|
||||
case "claude-sonnet-4-5-thinking":
|
||||
return "claude-sonnet-4-5-20250929"
|
||||
}
|
||||
|
||||
entry := e.resolveClaudeConfig(auth)
|
||||
if entry == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
|
||||
|
||||
// Candidate names to match against configured aliases/names.
|
||||
candidates := []string{strings.TrimSpace(normalizedModel)}
|
||||
if !strings.EqualFold(normalizedModel, trimmed) {
|
||||
candidates = append(candidates, trimmed)
|
||||
}
|
||||
if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
|
||||
candidates = append(candidates, original)
|
||||
}
|
||||
|
||||
for i := range entry.Models {
|
||||
model := entry.Models[i]
|
||||
name := strings.TrimSpace(model.Name)
|
||||
modelAlias := strings.TrimSpace(model.Alias)
|
||||
if modelAlias != "" {
|
||||
if strings.EqualFold(modelAlias, alias) {
|
||||
|
||||
for _, candidate := range candidates {
|
||||
if candidate == "" {
|
||||
continue
|
||||
}
|
||||
if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
|
||||
if name != "" {
|
||||
return name
|
||||
}
|
||||
return alias
|
||||
return candidate
|
||||
}
|
||||
if name != "" && strings.EqualFold(name, candidate) {
|
||||
return name
|
||||
}
|
||||
continue
|
||||
}
|
||||
if name != "" && strings.EqualFold(name, alias) {
|
||||
return name
|
||||
}
|
||||
}
|
||||
return ""
|
||||
|
||||
@@ -49,14 +49,14 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("codex")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
|
||||
body = e.setReasoningEffortByAlias(req.Model, body)
|
||||
|
||||
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
body, _ = sjson.SetBytes(body, "stream", true)
|
||||
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
||||
|
||||
@@ -142,13 +142,16 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("codex")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
|
||||
body = e.setReasoningEffortByAlias(req.Model, body)
|
||||
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
|
||||
url := strings.TrimSuffix(baseURL, "/") + "/responses"
|
||||
httpReq, err := e.cacheHelper(ctx, from, url, req, body)
|
||||
@@ -235,14 +238,16 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
||||
}
|
||||
|
||||
func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("codex")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
|
||||
modelForCounting := req.Model
|
||||
|
||||
body = e.setReasoningEffortByAlias(req.Model, body)
|
||||
|
||||
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
||||
body, _ = sjson.SetBytes(body, "stream", false)
|
||||
|
||||
@@ -261,83 +266,6 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
|
||||
return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
|
||||
}
|
||||
|
||||
func (e *CodexExecutor) setReasoningEffortByAlias(modelName string, payload []byte) []byte {
|
||||
if util.InArray([]string{"gpt-5", "gpt-5-minimal", "gpt-5-low", "gpt-5-medium", "gpt-5-high"}, modelName) {
|
||||
payload, _ = sjson.SetBytes(payload, "model", "gpt-5")
|
||||
switch modelName {
|
||||
case "gpt-5-minimal":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "minimal")
|
||||
case "gpt-5-low":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
|
||||
case "gpt-5-medium":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
|
||||
case "gpt-5-high":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
|
||||
}
|
||||
} else if util.InArray([]string{"gpt-5-codex", "gpt-5-codex-low", "gpt-5-codex-medium", "gpt-5-codex-high"}, modelName) {
|
||||
payload, _ = sjson.SetBytes(payload, "model", "gpt-5-codex")
|
||||
switch modelName {
|
||||
case "gpt-5-codex-low":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
|
||||
case "gpt-5-codex-medium":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
|
||||
case "gpt-5-codex-high":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
|
||||
}
|
||||
} else if util.InArray([]string{"gpt-5-codex-mini", "gpt-5-codex-mini-medium", "gpt-5-codex-mini-high"}, modelName) {
|
||||
payload, _ = sjson.SetBytes(payload, "model", "gpt-5-codex-mini")
|
||||
switch modelName {
|
||||
case "gpt-5-codex-mini-medium":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
|
||||
case "gpt-5-codex-mini-high":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
|
||||
}
|
||||
} else if util.InArray([]string{"gpt-5.1", "gpt-5.1-none", "gpt-5.1-low", "gpt-5.1-medium", "gpt-5.1-high"}, modelName) {
|
||||
payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1")
|
||||
switch modelName {
|
||||
case "gpt-5.1-none":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "none")
|
||||
case "gpt-5.1-low":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
|
||||
case "gpt-5.1-medium":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
|
||||
case "gpt-5.1-high":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
|
||||
}
|
||||
} else if util.InArray([]string{"gpt-5.1-codex", "gpt-5.1-codex-low", "gpt-5.1-codex-medium", "gpt-5.1-codex-high"}, modelName) {
|
||||
payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1-codex")
|
||||
switch modelName {
|
||||
case "gpt-5.1-codex-low":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
|
||||
case "gpt-5.1-codex-medium":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
|
||||
case "gpt-5.1-codex-high":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
|
||||
}
|
||||
} else if util.InArray([]string{"gpt-5.1-codex-mini", "gpt-5.1-codex-mini-medium", "gpt-5.1-codex-mini-high"}, modelName) {
|
||||
payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1-codex-mini")
|
||||
switch modelName {
|
||||
case "gpt-5.1-codex-mini-medium":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
|
||||
case "gpt-5.1-codex-mini-high":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
|
||||
}
|
||||
} else if util.InArray([]string{"gpt-5.1-codex-max", "gpt-5.1-codex-max-low", "gpt-5.1-codex-max-medium", "gpt-5.1-codex-max-high", "gpt-5.1-codex-max-xhigh"}, modelName) {
|
||||
payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1-codex-max")
|
||||
switch modelName {
|
||||
case "gpt-5.1-codex-max-low":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
|
||||
case "gpt-5.1-codex-max-medium":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
|
||||
case "gpt-5.1-codex-max-high":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
|
||||
case "gpt-5.1-codex-max-xhigh":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "xhigh")
|
||||
}
|
||||
}
|
||||
return payload
|
||||
}
|
||||
|
||||
func tokenizerForCodexModel(model string) (tokenizer.Codec, error) {
|
||||
sanitized := strings.ToLower(strings.TrimSpace(model))
|
||||
switch {
|
||||
|
||||
@@ -75,6 +75,8 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
// Official Gemini API via API key or OAuth bearer
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
@@ -85,6 +87,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
|
||||
action := "generateContent"
|
||||
if req.Metadata != nil {
|
||||
@@ -93,7 +96,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
}
|
||||
}
|
||||
baseURL := resolveGeminiBaseURL(auth)
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, req.Model, action)
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, upstreamModel, action)
|
||||
if opts.Alt != "" && action != "countTokens" {
|
||||
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
||||
}
|
||||
@@ -167,6 +170,8 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
@@ -176,9 +181,10 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
|
||||
baseURL := resolveGeminiBaseURL(auth)
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, req.Model, "streamGenerateContent")
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, upstreamModel, "streamGenerateContent")
|
||||
if opts.Alt == "" {
|
||||
url = url + "?alt=sse"
|
||||
} else {
|
||||
|
||||
@@ -105,10 +105,12 @@ func (e *GeminiVertexExecutor) CountTokens(ctx context.Context, auth *cliproxyau
|
||||
|
||||
// countTokensWithServiceAccount handles token counting using service account credentials.
|
||||
func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (cliproxyexecutor.Response, error) {
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
@@ -117,13 +119,14 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
|
||||
}
|
||||
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
|
||||
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
|
||||
translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel)
|
||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
|
||||
|
||||
baseURL := vertexBaseURL(location)
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "countTokens")
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, "countTokens")
|
||||
|
||||
httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
|
||||
if errNewReq != nil {
|
||||
@@ -191,10 +194,12 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
|
||||
|
||||
// countTokensWithAPIKey handles token counting using API key credentials.
|
||||
func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (cliproxyexecutor.Response, error) {
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
@@ -203,6 +208,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
|
||||
}
|
||||
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
|
||||
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
|
||||
translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel)
|
||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
|
||||
@@ -286,10 +292,12 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
@@ -301,6 +309,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
|
||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
|
||||
action := "generateContent"
|
||||
if req.Metadata != nil {
|
||||
@@ -309,7 +318,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
|
||||
}
|
||||
}
|
||||
baseURL := vertexBaseURL(location)
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, action)
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, action)
|
||||
if opts.Alt != "" && action != "countTokens" {
|
||||
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
||||
}
|
||||
@@ -383,10 +392,12 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
@@ -398,6 +409,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
|
||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
|
||||
action := "generateContent"
|
||||
if req.Metadata != nil {
|
||||
@@ -410,7 +422,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
|
||||
if baseURL == "" {
|
||||
baseURL = "https://generativelanguage.googleapis.com"
|
||||
}
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, req.Model, action)
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, upstreamModel, action)
|
||||
if opts.Alt != "" && action != "countTokens" {
|
||||
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
||||
}
|
||||
@@ -481,10 +493,12 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
@@ -496,9 +510,10 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
|
||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
|
||||
baseURL := vertexBaseURL(location)
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "streamGenerateContent")
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, "streamGenerateContent")
|
||||
if opts.Alt == "" {
|
||||
url = url + "?alt=sse"
|
||||
} else {
|
||||
@@ -595,10 +610,12 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
@@ -610,12 +627,13 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
|
||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
|
||||
// For API key auth, use simpler URL format without project/location
|
||||
if baseURL == "" {
|
||||
baseURL = "https://generativelanguage.googleapis.com"
|
||||
}
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, req.Model, "streamGenerateContent")
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, upstreamModel, "streamGenerateContent")
|
||||
if opts.Alt == "" {
|
||||
url = url + "?alt=sse"
|
||||
} else {
|
||||
|
||||
@@ -57,6 +57,10 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("openai")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
|
||||
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
}
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
|
||||
endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
|
||||
@@ -139,6 +143,10 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
||||
to := sdktranslator.FromString("openai")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
|
||||
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
|
||||
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
}
|
||||
// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
|
||||
toolsResult := gjson.GetBytes(body, "tools")
|
||||
if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
|
||||
|
||||
@@ -121,7 +121,12 @@ func (e *KiroExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
|
||||
return resp, fmt.Errorf("kiro: access token not found in auth")
|
||||
}
|
||||
if profileArn == "" {
|
||||
log.Warnf("kiro: profile ARN not found in auth, API calls may fail")
|
||||
// Only warn if not using builder-id auth (which doesn't need profileArn)
|
||||
if auth == nil || auth.Metadata == nil {
|
||||
log.Debugf("kiro: profile ARN not found in auth (may be normal for builder-id)")
|
||||
} else if authMethod, ok := auth.Metadata["auth_method"].(string); !ok || authMethod != "builder-id" {
|
||||
log.Warnf("kiro: profile ARN not found in auth, API calls may fail")
|
||||
}
|
||||
}
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
@@ -161,10 +166,19 @@ func (e *KiroExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
|
||||
currentOrigin = "CLI"
|
||||
}
|
||||
|
||||
kiroPayload := e.buildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly)
|
||||
// Determine if profileArn should be included based on auth method
|
||||
// profileArn is only needed for social auth (Google OAuth), not for builder-id (AWS SSO)
|
||||
effectiveProfileArn := profileArn
|
||||
if auth != nil && auth.Metadata != nil {
|
||||
if authMethod, ok := auth.Metadata["auth_method"].(string); ok && authMethod == "builder-id" {
|
||||
effectiveProfileArn = "" // Don't include profileArn for builder-id auth
|
||||
}
|
||||
}
|
||||
|
||||
kiroPayload := e.buildKiroPayload(body, kiroModelID, effectiveProfileArn, currentOrigin, isAgentic, isChatOnly)
|
||||
|
||||
// Execute with retry on 401/403 and 429 (quota exhausted)
|
||||
resp, err = e.executeWithRetry(ctx, auth, req, opts, accessToken, profileArn, kiroPayload, body, from, to, reporter, currentOrigin, kiroModelID, isAgentic, isChatOnly)
|
||||
resp, err = e.executeWithRetry(ctx, auth, req, opts, accessToken, effectiveProfileArn, kiroPayload, body, from, to, reporter, currentOrigin, kiroModelID, isAgentic, isChatOnly)
|
||||
return resp, err
|
||||
}
|
||||
|
||||
@@ -301,9 +315,18 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
|
||||
}
|
||||
}
|
||||
if len(content) > 0 {
|
||||
usageInfo.OutputTokens = int64(len(content) / 4)
|
||||
// Use tiktoken for more accurate output token calculation
|
||||
if enc, encErr := tokenizerForModel(req.Model); encErr == nil {
|
||||
if tokenCount, countErr := enc.Count(content); countErr == nil {
|
||||
usageInfo.OutputTokens = int64(tokenCount)
|
||||
}
|
||||
}
|
||||
// Fallback to character count estimation if tiktoken fails
|
||||
if usageInfo.OutputTokens == 0 {
|
||||
usageInfo.OutputTokens = 1
|
||||
usageInfo.OutputTokens = int64(len(content) / 4)
|
||||
if usageInfo.OutputTokens == 0 {
|
||||
usageInfo.OutputTokens = 1
|
||||
}
|
||||
}
|
||||
}
|
||||
usageInfo.TotalTokens = usageInfo.InputTokens + usageInfo.OutputTokens
|
||||
@@ -330,7 +353,12 @@ func (e *KiroExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
||||
return nil, fmt.Errorf("kiro: access token not found in auth")
|
||||
}
|
||||
if profileArn == "" {
|
||||
log.Warnf("kiro: profile ARN not found in auth, API calls may fail")
|
||||
// Only warn if not using builder-id auth (which doesn't need profileArn)
|
||||
if auth == nil || auth.Metadata == nil {
|
||||
log.Debugf("kiro: profile ARN not found in auth (may be normal for builder-id)")
|
||||
} else if authMethod, ok := auth.Metadata["auth_method"].(string); !ok || authMethod != "builder-id" {
|
||||
log.Warnf("kiro: profile ARN not found in auth, API calls may fail")
|
||||
}
|
||||
}
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
@@ -370,10 +398,19 @@ func (e *KiroExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
||||
currentOrigin = "CLI"
|
||||
}
|
||||
|
||||
kiroPayload := e.buildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly)
|
||||
// Determine if profileArn should be included based on auth method
|
||||
// profileArn is only needed for social auth (Google OAuth), not for builder-id (AWS SSO)
|
||||
effectiveProfileArn := profileArn
|
||||
if auth != nil && auth.Metadata != nil {
|
||||
if authMethod, ok := auth.Metadata["auth_method"].(string); ok && authMethod == "builder-id" {
|
||||
effectiveProfileArn = "" // Don't include profileArn for builder-id auth
|
||||
}
|
||||
}
|
||||
|
||||
kiroPayload := e.buildKiroPayload(body, kiroModelID, effectiveProfileArn, currentOrigin, isAgentic, isChatOnly)
|
||||
|
||||
// Execute stream with retry on 401/403 and 429 (quota exhausted)
|
||||
return e.executeStreamWithRetry(ctx, auth, req, opts, accessToken, profileArn, kiroPayload, body, from, reporter, currentOrigin, kiroModelID, isAgentic, isChatOnly)
|
||||
return e.executeStreamWithRetry(ctx, auth, req, opts, accessToken, effectiveProfileArn, kiroPayload, body, from, reporter, currentOrigin, kiroModelID, isAgentic, isChatOnly)
|
||||
}
|
||||
|
||||
// executeStreamWithRetry performs the streaming HTTP request with automatic retry on auth errors.
|
||||
@@ -491,6 +528,12 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
|
||||
|
||||
go func(resp *http.Response) {
|
||||
defer close(out)
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
log.Errorf("kiro: panic in stream handler: %v", r)
|
||||
out <- cliproxyexecutor.StreamChunk{Err: fmt.Errorf("internal error: %v", r)}
|
||||
}
|
||||
}()
|
||||
defer func() {
|
||||
if errClose := resp.Body.Close(); errClose != nil {
|
||||
log.Errorf("response body close error: %v", errClose)
|
||||
@@ -587,10 +630,10 @@ type kiroPayload struct {
|
||||
}
|
||||
|
||||
type kiroConversationState struct {
|
||||
ChatTriggerType string `json:"chatTriggerType"` // Required: "MANUAL" - must be first field
|
||||
ConversationID string `json:"conversationId"`
|
||||
History []kiroHistoryMessage `json:"history"`
|
||||
CurrentMessage kiroCurrentMessage `json:"currentMessage"`
|
||||
ChatTriggerType string `json:"chatTriggerType"` // Required: "MANUAL"
|
||||
History []kiroHistoryMessage `json:"history,omitempty"` // Only include when non-empty
|
||||
}
|
||||
|
||||
type kiroCurrentMessage struct {
|
||||
@@ -627,9 +670,9 @@ type kiroUserInputMessageContext struct {
|
||||
}
|
||||
|
||||
type kiroToolResult struct {
|
||||
ToolUseID string `json:"toolUseId"`
|
||||
Content []kiroTextContent `json:"content"`
|
||||
Status string `json:"status"`
|
||||
ToolUseID string `json:"toolUseId"`
|
||||
}
|
||||
|
||||
type kiroTextContent struct {
|
||||
@@ -735,7 +778,9 @@ func (e *KiroExecutor) buildKiroPayload(claudeBody []byte, modelID, profileArn,
|
||||
var currentUserMsg *kiroUserInputMessage
|
||||
var currentToolResults []kiroToolResult
|
||||
|
||||
messagesArray := messages.Array()
|
||||
// Merge adjacent messages with the same role before processing
|
||||
// This reduces API call complexity and improves compatibility
|
||||
messagesArray := mergeAdjacentMessages(messages.Array())
|
||||
for i, msg := range messagesArray {
|
||||
role := msg.Get("role").String()
|
||||
isLastMessage := i == len(messagesArray)-1
|
||||
@@ -746,6 +791,14 @@ func (e *KiroExecutor) buildKiroPayload(claudeBody []byte, modelID, profileArn,
|
||||
currentUserMsg = &userMsg
|
||||
currentToolResults = toolResults
|
||||
} else {
|
||||
// CRITICAL: Kiro API requires content to be non-empty for history messages too
|
||||
if strings.TrimSpace(userMsg.Content) == "" {
|
||||
if len(toolResults) > 0 {
|
||||
userMsg.Content = "Tool results provided."
|
||||
} else {
|
||||
userMsg.Content = "Continue"
|
||||
}
|
||||
}
|
||||
// For history messages, embed tool results in context
|
||||
if len(toolResults) > 0 {
|
||||
userMsg.UserInputMessageContext = &kiroUserInputMessageContext{
|
||||
@@ -758,9 +811,24 @@ func (e *KiroExecutor) buildKiroPayload(claudeBody []byte, modelID, profileArn,
|
||||
}
|
||||
} else if role == "assistant" {
|
||||
assistantMsg := e.buildAssistantMessageStruct(msg)
|
||||
history = append(history, kiroHistoryMessage{
|
||||
AssistantResponseMessage: &assistantMsg,
|
||||
})
|
||||
// If this is the last message and it's an assistant message,
|
||||
// we need to add it to history and create a "Continue" user message
|
||||
// because Kiro API requires currentMessage to be userInputMessage type
|
||||
if isLastMessage {
|
||||
history = append(history, kiroHistoryMessage{
|
||||
AssistantResponseMessage: &assistantMsg,
|
||||
})
|
||||
// Create a "Continue" user message as currentMessage
|
||||
currentUserMsg = &kiroUserInputMessage{
|
||||
Content: "Continue",
|
||||
ModelID: modelID,
|
||||
Origin: origin,
|
||||
}
|
||||
} else {
|
||||
history = append(history, kiroHistoryMessage{
|
||||
AssistantResponseMessage: &assistantMsg,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -777,7 +845,35 @@ func (e *KiroExecutor) buildKiroPayload(claudeBody []byte, modelID, profileArn,
|
||||
|
||||
// Add the actual user message
|
||||
contentBuilder.WriteString(currentUserMsg.Content)
|
||||
currentUserMsg.Content = contentBuilder.String()
|
||||
finalContent := contentBuilder.String()
|
||||
|
||||
// CRITICAL: Kiro API requires content to be non-empty, even when toolResults are present
|
||||
// If content is empty or only whitespace, provide a default message
|
||||
if strings.TrimSpace(finalContent) == "" {
|
||||
if len(currentToolResults) > 0 {
|
||||
finalContent = "Tool results provided."
|
||||
} else {
|
||||
finalContent = "Continue"
|
||||
}
|
||||
log.Debugf("kiro: content was empty, using default: %s", finalContent)
|
||||
}
|
||||
currentUserMsg.Content = finalContent
|
||||
|
||||
// Deduplicate currentToolResults before adding to context
|
||||
// Kiro API does not accept duplicate toolUseIds
|
||||
if len(currentToolResults) > 0 {
|
||||
seenIDs := make(map[string]bool)
|
||||
uniqueToolResults := make([]kiroToolResult, 0, len(currentToolResults))
|
||||
for _, tr := range currentToolResults {
|
||||
if !seenIDs[tr.ToolUseID] {
|
||||
seenIDs[tr.ToolUseID] = true
|
||||
uniqueToolResults = append(uniqueToolResults, tr)
|
||||
} else {
|
||||
log.Debugf("kiro: skipping duplicate toolResult in currentMessage: %s", tr.ToolUseID)
|
||||
}
|
||||
}
|
||||
currentToolResults = uniqueToolResults
|
||||
}
|
||||
|
||||
// Build userInputMessageContext with tools and tool results
|
||||
if len(kiroTools) > 0 || len(currentToolResults) > 0 {
|
||||
@@ -805,21 +901,18 @@ func (e *KiroExecutor) buildKiroPayload(claudeBody []byte, modelID, profileArn,
|
||||
}}
|
||||
}
|
||||
|
||||
// Build payload with correct field order (matches struct definition)
|
||||
// Note: history is omitempty, so nil/empty slice won't be serialized
|
||||
payload := kiroPayload{
|
||||
ConversationState: kiroConversationState{
|
||||
ChatTriggerType: "MANUAL", // Required by Kiro API - must be first
|
||||
ConversationID: uuid.New().String(),
|
||||
History: history,
|
||||
CurrentMessage: currentMessage,
|
||||
ChatTriggerType: "MANUAL", // Required by Kiro API
|
||||
History: history, // Will be omitted if empty due to omitempty tag
|
||||
},
|
||||
ProfileArn: profileArn,
|
||||
}
|
||||
|
||||
// Ensure history is not nil (empty array)
|
||||
if payload.ConversationState.History == nil {
|
||||
payload.ConversationState.History = []kiroHistoryMessage{}
|
||||
}
|
||||
|
||||
result, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
log.Debugf("kiro: failed to marshal payload: %v", err)
|
||||
@@ -830,11 +923,15 @@ func (e *KiroExecutor) buildKiroPayload(claudeBody []byte, modelID, profileArn,
|
||||
|
||||
// buildUserMessageStruct builds a user message and extracts tool results
|
||||
// origin parameter determines which quota to use: "CLI" for Amazon Q, "AI_EDITOR" for Kiro IDE.
|
||||
// IMPORTANT: Kiro API does not accept duplicate toolUseIds, so we deduplicate here.
|
||||
func (e *KiroExecutor) buildUserMessageStruct(msg gjson.Result, modelID, origin string) (kiroUserInputMessage, []kiroToolResult) {
|
||||
content := msg.Get("content")
|
||||
var contentBuilder strings.Builder
|
||||
var toolResults []kiroToolResult
|
||||
var images []kiroImage
|
||||
|
||||
// Track seen toolUseIds to deduplicate - Kiro API rejects duplicate toolUseIds
|
||||
seenToolUseIDs := make(map[string]bool)
|
||||
|
||||
if content.IsArray() {
|
||||
for _, part := range content.Array() {
|
||||
@@ -864,6 +961,14 @@ func (e *KiroExecutor) buildUserMessageStruct(msg gjson.Result, modelID, origin
|
||||
case "tool_result":
|
||||
// Extract tool result for API
|
||||
toolUseID := part.Get("tool_use_id").String()
|
||||
|
||||
// Skip duplicate toolUseIds - Kiro API does not accept duplicates
|
||||
if seenToolUseIDs[toolUseID] {
|
||||
log.Debugf("kiro: skipping duplicate tool_result with toolUseId: %s", toolUseID)
|
||||
continue
|
||||
}
|
||||
seenToolUseIDs[toolUseID] = true
|
||||
|
||||
isError := part.Get("is_error").Bool()
|
||||
resultContent := part.Get("content")
|
||||
|
||||
@@ -1001,6 +1106,12 @@ func (e *KiroExecutor) parseEventStream(body io.Reader) (string, []kiroToolUse,
|
||||
return content.String(), toolUses, usageInfo, fmt.Errorf("failed to read message: %w", err)
|
||||
}
|
||||
|
||||
// Validate headersLen to prevent slice out of bounds
|
||||
if headersLen+4 > uint32(len(remaining)) {
|
||||
log.Warnf("kiro: invalid headersLen %d exceeds remaining buffer %d", headersLen, len(remaining))
|
||||
continue
|
||||
}
|
||||
|
||||
// Extract event type from headers
|
||||
eventType := e.extractEventType(remaining[:headersLen+4])
|
||||
|
||||
@@ -1018,6 +1129,37 @@ func (e *KiroExecutor) parseEventStream(body io.Reader) (string, []kiroToolUse,
|
||||
continue
|
||||
}
|
||||
|
||||
// DIAGNOSTIC: Log all received event types for debugging
|
||||
log.Debugf("kiro: parseEventStream received event type: %s", eventType)
|
||||
if log.IsLevelEnabled(log.TraceLevel) {
|
||||
log.Tracef("kiro: parseEventStream event payload: %s", string(payload))
|
||||
}
|
||||
|
||||
// Check for error/exception events in the payload (Kiro API may return errors with HTTP 200)
|
||||
// These can appear as top-level fields or nested within the event
|
||||
if errType, hasErrType := event["_type"].(string); hasErrType {
|
||||
// AWS-style error: {"_type": "com.amazon.aws.codewhisperer#ValidationException", "message": "..."}
|
||||
errMsg := ""
|
||||
if msg, ok := event["message"].(string); ok {
|
||||
errMsg = msg
|
||||
}
|
||||
log.Errorf("kiro: received AWS error in event stream: type=%s, message=%s", errType, errMsg)
|
||||
return "", nil, usageInfo, fmt.Errorf("kiro API error: %s - %s", errType, errMsg)
|
||||
}
|
||||
if errType, hasErrType := event["type"].(string); hasErrType && (errType == "error" || errType == "exception") {
|
||||
// Generic error event
|
||||
errMsg := ""
|
||||
if msg, ok := event["message"].(string); ok {
|
||||
errMsg = msg
|
||||
} else if errObj, ok := event["error"].(map[string]interface{}); ok {
|
||||
if msg, ok := errObj["message"].(string); ok {
|
||||
errMsg = msg
|
||||
}
|
||||
}
|
||||
log.Errorf("kiro: received error event in stream: type=%s, message=%s", errType, errMsg)
|
||||
return "", nil, usageInfo, fmt.Errorf("kiro API error: %s", errMsg)
|
||||
}
|
||||
|
||||
// Handle different event types
|
||||
switch eventType {
|
||||
case "assistantResponseEvent":
|
||||
@@ -1231,8 +1373,9 @@ func (e *KiroExecutor) buildClaudeResponse(content string, toolUses []kiroToolUs
|
||||
// streamToChannel converts AWS Event Stream to channel-based streaming.
|
||||
// Supports tool calling - emits tool_use content blocks when tools are used.
|
||||
// Includes embedded [Called ...] tool call parsing and input buffering for toolUseEvent.
|
||||
// Implements duplicate content filtering using lastContentEvent detection (based on AIClient-2-API).
|
||||
func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out chan<- cliproxyexecutor.StreamChunk, targetFormat sdktranslator.Format, model string, originalReq, claudeBody []byte, reporter *usageReporter) {
|
||||
reader := bufio.NewReader(body)
|
||||
reader := bufio.NewReaderSize(body, 20*1024*1024) // 20MB buffer to match other providers
|
||||
var totalUsage usage.Detail
|
||||
var hasToolUses bool // Track if any tool uses were emitted
|
||||
|
||||
@@ -1240,6 +1383,15 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
processedIDs := make(map[string]bool)
|
||||
var currentToolUse *toolUseState
|
||||
|
||||
// Duplicate content detection - tracks last content event to filter duplicates
|
||||
// Based on AIClient-2-API implementation for Kiro
|
||||
var lastContentEvent string
|
||||
|
||||
// Streaming token calculation - accumulate content for real-time token counting
|
||||
// Based on AIClient-2-API implementation
|
||||
var accumulatedContent strings.Builder
|
||||
accumulatedContent.Grow(4096) // Pre-allocate 4KB capacity to reduce reallocations
|
||||
|
||||
// Translator param for maintaining tool call state across streaming events
|
||||
// IMPORTANT: This must persist across all TranslateStream calls
|
||||
var translatorParam any
|
||||
@@ -1279,6 +1431,51 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
prelude := make([]byte, 8)
|
||||
_, err := io.ReadFull(reader, prelude)
|
||||
if err == io.EOF {
|
||||
// Flush any incomplete tool use before ending stream
|
||||
if currentToolUse != nil && !processedIDs[currentToolUse.toolUseID] {
|
||||
log.Warnf("kiro: flushing incomplete tool use at EOF: %s (ID: %s)", currentToolUse.name, currentToolUse.toolUseID)
|
||||
fullInput := currentToolUse.inputBuffer.String()
|
||||
repairedJSON := repairJSON(fullInput)
|
||||
var finalInput map[string]interface{}
|
||||
if err := json.Unmarshal([]byte(repairedJSON), &finalInput); err != nil {
|
||||
log.Warnf("kiro: failed to parse incomplete tool input at EOF: %v", err)
|
||||
finalInput = make(map[string]interface{})
|
||||
}
|
||||
|
||||
processedIDs[currentToolUse.toolUseID] = true
|
||||
contentBlockIndex++
|
||||
|
||||
// Send tool_use content block
|
||||
blockStart := e.buildClaudeContentBlockStartEvent(contentBlockIndex, "tool_use", currentToolUse.toolUseID, currentToolUse.name)
|
||||
sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam)
|
||||
for _, chunk := range sseData {
|
||||
if chunk != "" {
|
||||
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
|
||||
}
|
||||
}
|
||||
|
||||
// Send tool input as delta
|
||||
inputBytes, _ := json.Marshal(finalInput)
|
||||
inputDelta := e.buildClaudeInputJsonDeltaEvent(string(inputBytes), contentBlockIndex)
|
||||
sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, inputDelta, &translatorParam)
|
||||
for _, chunk := range sseData {
|
||||
if chunk != "" {
|
||||
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
|
||||
}
|
||||
}
|
||||
|
||||
// Close block
|
||||
blockStop := e.buildClaudeContentBlockStopEvent(contentBlockIndex)
|
||||
sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStop, &translatorParam)
|
||||
for _, chunk := range sseData {
|
||||
if chunk != "" {
|
||||
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
|
||||
}
|
||||
}
|
||||
|
||||
hasToolUses = true
|
||||
currentToolUse = nil
|
||||
}
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
@@ -1304,6 +1501,12 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
return
|
||||
}
|
||||
|
||||
// Validate headersLen to prevent slice out of bounds
|
||||
if headersLen+4 > uint32(len(remaining)) {
|
||||
log.Warnf("kiro: invalid headersLen %d exceeds remaining buffer %d", headersLen, len(remaining))
|
||||
continue
|
||||
}
|
||||
|
||||
eventType := e.extractEventType(remaining[:headersLen+4])
|
||||
|
||||
payloadStart := 4 + headersLen
|
||||
@@ -1317,9 +1520,43 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
|
||||
var event map[string]interface{}
|
||||
if err := json.Unmarshal(payload, &event); err != nil {
|
||||
log.Warnf("kiro: failed to unmarshal event payload: %v, raw: %s", err, string(payload))
|
||||
continue
|
||||
}
|
||||
|
||||
// DIAGNOSTIC: Log all received event types for debugging
|
||||
log.Debugf("kiro: streamToChannel received event type: %s", eventType)
|
||||
if log.IsLevelEnabled(log.TraceLevel) {
|
||||
log.Tracef("kiro: streamToChannel event payload: %s", string(payload))
|
||||
}
|
||||
|
||||
// Check for error/exception events in the payload (Kiro API may return errors with HTTP 200)
|
||||
// These can appear as top-level fields or nested within the event
|
||||
if errType, hasErrType := event["_type"].(string); hasErrType {
|
||||
// AWS-style error: {"_type": "com.amazon.aws.codewhisperer#ValidationException", "message": "..."}
|
||||
errMsg := ""
|
||||
if msg, ok := event["message"].(string); ok {
|
||||
errMsg = msg
|
||||
}
|
||||
log.Errorf("kiro: received AWS error in stream: type=%s, message=%s", errType, errMsg)
|
||||
out <- cliproxyexecutor.StreamChunk{Err: fmt.Errorf("kiro API error: %s - %s", errType, errMsg)}
|
||||
return
|
||||
}
|
||||
if errType, hasErrType := event["type"].(string); hasErrType && (errType == "error" || errType == "exception") {
|
||||
// Generic error event
|
||||
errMsg := ""
|
||||
if msg, ok := event["message"].(string); ok {
|
||||
errMsg = msg
|
||||
} else if errObj, ok := event["error"].(map[string]interface{}); ok {
|
||||
if msg, ok := errObj["message"].(string); ok {
|
||||
errMsg = msg
|
||||
}
|
||||
}
|
||||
log.Errorf("kiro: received error event in stream: type=%s, message=%s", errType, errMsg)
|
||||
out <- cliproxyexecutor.StreamChunk{Err: fmt.Errorf("kiro API error: %s", errMsg)}
|
||||
return
|
||||
}
|
||||
|
||||
// Send message_start on first event
|
||||
if !messageStartSent {
|
||||
msgStart := e.buildClaudeMessageStartEvent(model, totalUsage.InputTokens)
|
||||
@@ -1364,9 +1601,19 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
}
|
||||
}
|
||||
|
||||
// Handle text content
|
||||
// Handle text content with duplicate detection
|
||||
if contentDelta != "" {
|
||||
// Check for duplicate content - skip if identical to last content event
|
||||
// Based on AIClient-2-API implementation for Kiro
|
||||
if contentDelta == lastContentEvent {
|
||||
log.Debugf("kiro: skipping duplicate content event (len: %d)", len(contentDelta))
|
||||
continue
|
||||
}
|
||||
lastContentEvent = contentDelta
|
||||
|
||||
outputLen += len(contentDelta)
|
||||
// Accumulate content for streaming token calculation
|
||||
accumulatedContent.WriteString(contentDelta)
|
||||
// Start text content block if needed
|
||||
if !isTextBlockOpen {
|
||||
contentBlockIndex++
|
||||
@@ -1538,8 +1785,32 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback for output tokens if not received from upstream
|
||||
if totalUsage.OutputTokens == 0 && outputLen > 0 {
|
||||
// Streaming token calculation - calculate output tokens from accumulated content
|
||||
// This provides more accurate token counting than simple character division
|
||||
if totalUsage.OutputTokens == 0 && accumulatedContent.Len() > 0 {
|
||||
// Try to use tiktoken for accurate counting
|
||||
if enc, err := tokenizerForModel(model); err == nil {
|
||||
if tokenCount, countErr := enc.Count(accumulatedContent.String()); countErr == nil {
|
||||
totalUsage.OutputTokens = int64(tokenCount)
|
||||
log.Debugf("kiro: streamToChannel calculated output tokens using tiktoken: %d", totalUsage.OutputTokens)
|
||||
} else {
|
||||
// Fallback on count error: estimate from character count
|
||||
totalUsage.OutputTokens = int64(accumulatedContent.Len() / 4)
|
||||
if totalUsage.OutputTokens == 0 {
|
||||
totalUsage.OutputTokens = 1
|
||||
}
|
||||
log.Debugf("kiro: streamToChannel tiktoken count failed, estimated from chars: %d", totalUsage.OutputTokens)
|
||||
}
|
||||
} else {
|
||||
// Fallback: estimate from character count (roughly 4 chars per token)
|
||||
totalUsage.OutputTokens = int64(accumulatedContent.Len() / 4)
|
||||
if totalUsage.OutputTokens == 0 {
|
||||
totalUsage.OutputTokens = 1
|
||||
}
|
||||
log.Debugf("kiro: streamToChannel estimated output tokens from chars: %d (content len: %d)", totalUsage.OutputTokens, accumulatedContent.Len())
|
||||
}
|
||||
} else if totalUsage.OutputTokens == 0 && outputLen > 0 {
|
||||
// Legacy fallback using outputLen
|
||||
totalUsage.OutputTokens = int64(outputLen / 4)
|
||||
if totalUsage.OutputTokens == 0 {
|
||||
totalUsage.OutputTokens = 1
|
||||
@@ -1553,9 +1824,18 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
stopReason = "tool_use"
|
||||
}
|
||||
|
||||
// Send message_delta and message_stop
|
||||
msgStop := e.buildClaudeMessageStopEvent(stopReason, totalUsage)
|
||||
sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, msgStop, &translatorParam)
|
||||
// Send message_delta event
|
||||
msgDelta := e.buildClaudeMessageDeltaEvent(stopReason, totalUsage)
|
||||
sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, msgDelta, &translatorParam)
|
||||
for _, chunk := range sseData {
|
||||
if chunk != "" {
|
||||
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
|
||||
}
|
||||
}
|
||||
|
||||
// Send message_stop event separately
|
||||
msgStop := e.buildClaudeMessageStopOnlyEvent()
|
||||
sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, msgStop, &translatorParam)
|
||||
for _, chunk := range sseData {
|
||||
if chunk != "" {
|
||||
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
|
||||
@@ -1646,8 +1926,8 @@ func (e *KiroExecutor) buildClaudeContentBlockStopEvent(index int) []byte {
|
||||
return []byte("event: content_block_stop\ndata: " + string(result))
|
||||
}
|
||||
|
||||
func (e *KiroExecutor) buildClaudeMessageStopEvent(stopReason string, usageInfo usage.Detail) []byte {
|
||||
// First message_delta
|
||||
// buildClaudeMessageDeltaEvent creates the message_delta event with stop_reason and usage.
|
||||
func (e *KiroExecutor) buildClaudeMessageDeltaEvent(stopReason string, usageInfo usage.Detail) []byte {
|
||||
deltaEvent := map[string]interface{}{
|
||||
"type": "message_delta",
|
||||
"delta": map[string]interface{}{
|
||||
@@ -1660,14 +1940,16 @@ func (e *KiroExecutor) buildClaudeMessageStopEvent(stopReason string, usageInfo
|
||||
},
|
||||
}
|
||||
deltaResult, _ := json.Marshal(deltaEvent)
|
||||
return []byte("event: message_delta\ndata: " + string(deltaResult))
|
||||
}
|
||||
|
||||
// Then message_stop
|
||||
// buildClaudeMessageStopOnlyEvent creates only the message_stop event.
|
||||
func (e *KiroExecutor) buildClaudeMessageStopOnlyEvent() []byte {
|
||||
stopEvent := map[string]interface{}{
|
||||
"type": "message_stop",
|
||||
}
|
||||
stopResult, _ := json.Marshal(stopEvent)
|
||||
|
||||
return []byte("event: message_delta\ndata: " + string(deltaResult) + "\n\nevent: message_stop\ndata: " + string(stopResult))
|
||||
return []byte("event: message_stop\ndata: " + string(stopResult))
|
||||
}
|
||||
|
||||
// buildClaudeFinalEvent constructs the final Claude-style event.
|
||||
@@ -1873,6 +2155,12 @@ func (e *KiroExecutor) streamEventStream(ctx context.Context, body io.Reader, c
|
||||
return fmt.Errorf("failed to read message: %w", err)
|
||||
}
|
||||
|
||||
// Validate headersLen to prevent slice out of bounds
|
||||
if headersLen+4 > uint32(len(remaining)) {
|
||||
log.Warnf("kiro: invalid headersLen %d exceeds remaining buffer %d", headersLen, len(remaining))
|
||||
continue
|
||||
}
|
||||
|
||||
eventType := e.extractEventType(remaining[:headersLen+4])
|
||||
|
||||
payloadStart := 4 + headersLen
|
||||
@@ -1886,6 +2174,7 @@ func (e *KiroExecutor) streamEventStream(ctx context.Context, body io.Reader, c
|
||||
|
||||
var event map[string]interface{}
|
||||
if err := json.Unmarshal(payload, &event); err != nil {
|
||||
log.Warnf("kiro: failed to unmarshal event payload: %v, raw: %s", err, string(payload))
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -1983,9 +2272,19 @@ func (e *KiroExecutor) streamEventStream(ctx context.Context, body io.Reader, c
|
||||
}
|
||||
totalUsage.TotalTokens = totalUsage.InputTokens + totalUsage.OutputTokens
|
||||
|
||||
// Always use end_turn (no tool_use support)
|
||||
msgStop := e.buildClaudeMessageStopEvent("end_turn", totalUsage)
|
||||
sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, msgStop, &translatorParam)
|
||||
// Send message_delta event
|
||||
msgDelta := e.buildClaudeMessageDeltaEvent("end_turn", totalUsage)
|
||||
sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, msgDelta, &translatorParam)
|
||||
for _, chunk := range sseData {
|
||||
if chunk != "" {
|
||||
c.Writer.Write([]byte(chunk + "\n\n"))
|
||||
}
|
||||
}
|
||||
c.Writer.Flush()
|
||||
|
||||
// Send message_stop event separately
|
||||
msgStop := e.buildClaudeMessageStopOnlyEvent()
|
||||
sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, msgStop, &translatorParam)
|
||||
for _, chunk := range sseData {
|
||||
if chunk != "" {
|
||||
c.Writer.Write([]byte(chunk + "\n\n"))
|
||||
@@ -2057,6 +2356,128 @@ func (e *KiroExecutor) isTokenExpired(accessToken string) bool {
|
||||
return isExpired
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Message Merging Support - Merge adjacent messages with the same role
|
||||
// Based on AIClient-2-API implementation
|
||||
// ============================================================================
|
||||
|
||||
// mergeAdjacentMessages merges adjacent messages with the same role.
|
||||
// This reduces API call complexity and improves compatibility.
|
||||
// Based on AIClient-2-API implementation.
|
||||
func mergeAdjacentMessages(messages []gjson.Result) []gjson.Result {
|
||||
if len(messages) <= 1 {
|
||||
return messages
|
||||
}
|
||||
|
||||
var merged []gjson.Result
|
||||
for _, msg := range messages {
|
||||
if len(merged) == 0 {
|
||||
merged = append(merged, msg)
|
||||
continue
|
||||
}
|
||||
|
||||
lastMsg := merged[len(merged)-1]
|
||||
currentRole := msg.Get("role").String()
|
||||
lastRole := lastMsg.Get("role").String()
|
||||
|
||||
if currentRole == lastRole {
|
||||
// Merge content from current message into last message
|
||||
mergedContent := mergeMessageContent(lastMsg, msg)
|
||||
// Create a new merged message JSON
|
||||
mergedMsg := createMergedMessage(lastRole, mergedContent)
|
||||
merged[len(merged)-1] = gjson.Parse(mergedMsg)
|
||||
} else {
|
||||
merged = append(merged, msg)
|
||||
}
|
||||
}
|
||||
|
||||
return merged
|
||||
}
|
||||
|
||||
// mergeMessageContent merges the content of two messages with the same role.
|
||||
// Handles both string content and array content (with text, tool_use, tool_result blocks).
|
||||
func mergeMessageContent(msg1, msg2 gjson.Result) string {
|
||||
content1 := msg1.Get("content")
|
||||
content2 := msg2.Get("content")
|
||||
|
||||
// Extract content blocks from both messages
|
||||
var blocks1, blocks2 []map[string]interface{}
|
||||
|
||||
if content1.IsArray() {
|
||||
for _, block := range content1.Array() {
|
||||
blocks1 = append(blocks1, blockToMap(block))
|
||||
}
|
||||
} else if content1.Type == gjson.String {
|
||||
blocks1 = append(blocks1, map[string]interface{}{
|
||||
"type": "text",
|
||||
"text": content1.String(),
|
||||
})
|
||||
}
|
||||
|
||||
if content2.IsArray() {
|
||||
for _, block := range content2.Array() {
|
||||
blocks2 = append(blocks2, blockToMap(block))
|
||||
}
|
||||
} else if content2.Type == gjson.String {
|
||||
blocks2 = append(blocks2, map[string]interface{}{
|
||||
"type": "text",
|
||||
"text": content2.String(),
|
||||
})
|
||||
}
|
||||
|
||||
// Merge text blocks if both end/start with text
|
||||
if len(blocks1) > 0 && len(blocks2) > 0 {
|
||||
if blocks1[len(blocks1)-1]["type"] == "text" && blocks2[0]["type"] == "text" {
|
||||
// Merge the last text block of msg1 with the first text block of msg2
|
||||
text1 := blocks1[len(blocks1)-1]["text"].(string)
|
||||
text2 := blocks2[0]["text"].(string)
|
||||
blocks1[len(blocks1)-1]["text"] = text1 + "\n" + text2
|
||||
blocks2 = blocks2[1:] // Remove the merged block from blocks2
|
||||
}
|
||||
}
|
||||
|
||||
// Combine all blocks
|
||||
allBlocks := append(blocks1, blocks2...)
|
||||
|
||||
// Convert to JSON
|
||||
result, _ := json.Marshal(allBlocks)
|
||||
return string(result)
|
||||
}
|
||||
|
||||
// blockToMap converts a gjson.Result block to a map[string]interface{}
|
||||
func blockToMap(block gjson.Result) map[string]interface{} {
|
||||
result := make(map[string]interface{})
|
||||
block.ForEach(func(key, value gjson.Result) bool {
|
||||
if value.IsObject() {
|
||||
result[key.String()] = blockToMap(value)
|
||||
} else if value.IsArray() {
|
||||
var arr []interface{}
|
||||
for _, item := range value.Array() {
|
||||
if item.IsObject() {
|
||||
arr = append(arr, blockToMap(item))
|
||||
} else {
|
||||
arr = append(arr, item.Value())
|
||||
}
|
||||
}
|
||||
result[key.String()] = arr
|
||||
} else {
|
||||
result[key.String()] = value.Value()
|
||||
}
|
||||
return true
|
||||
})
|
||||
return result
|
||||
}
|
||||
|
||||
// createMergedMessage creates a JSON string for a merged message
|
||||
func createMergedMessage(role string, content string) string {
|
||||
msg := map[string]interface{}{
|
||||
"role": role,
|
||||
"content": json.RawMessage(content),
|
||||
}
|
||||
result, _ := json.Marshal(msg)
|
||||
return string(result)
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Tool Calling Support - Embedded tool call parsing and input buffering
|
||||
// Based on amq2api and AIClient-2-API implementations
|
||||
@@ -2079,8 +2500,6 @@ var (
|
||||
whitespaceCollapsePattern = regexp.MustCompile(`\s+`)
|
||||
// trailingCommaPattern matches trailing commas before closing braces/brackets
|
||||
trailingCommaPattern = regexp.MustCompile(`,\s*([}\]])`)
|
||||
// unquotedKeyPattern matches unquoted JSON keys that need quoting
|
||||
unquotedKeyPattern = regexp.MustCompile(`([{,]\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*:`)
|
||||
)
|
||||
|
||||
// parseEmbeddedToolCalls extracts [Called tool_name with args: {...}] format from text.
|
||||
@@ -2246,14 +2665,208 @@ func findMatchingBracket(text string, startPos int) int {
|
||||
}
|
||||
|
||||
// repairJSON attempts to fix common JSON issues that may occur in tool call arguments.
|
||||
// Based on AIClient-2-API's JSON repair implementation.
|
||||
// Based on AIClient-2-API's JSON repair implementation with a more conservative strategy.
|
||||
//
|
||||
// Conservative repair strategy:
|
||||
// 1. First try to parse JSON directly - if valid, return as-is
|
||||
// 2. Only attempt repair if parsing fails
|
||||
// 3. After repair, validate the result - if still invalid, return original
|
||||
//
|
||||
// Handles incomplete JSON by balancing brackets and removing trailing incomplete content.
|
||||
// Uses pre-compiled regex patterns for performance.
|
||||
func repairJSON(raw string) string {
|
||||
func repairJSON(jsonString string) string {
|
||||
// Handle empty or invalid input
|
||||
if jsonString == "" {
|
||||
return "{}"
|
||||
}
|
||||
|
||||
str := strings.TrimSpace(jsonString)
|
||||
if str == "" {
|
||||
return "{}"
|
||||
}
|
||||
|
||||
// CONSERVATIVE STRATEGY: First try to parse directly
|
||||
// If the JSON is already valid, return it unchanged
|
||||
var testParse interface{}
|
||||
if err := json.Unmarshal([]byte(str), &testParse); err == nil {
|
||||
log.Debugf("kiro: repairJSON - JSON is already valid, returning unchanged")
|
||||
return str
|
||||
}
|
||||
|
||||
log.Debugf("kiro: repairJSON - JSON parse failed, attempting repair")
|
||||
originalStr := str // Keep original for fallback
|
||||
|
||||
// First, escape unescaped newlines/tabs within JSON string values
|
||||
str = escapeNewlinesInStrings(str)
|
||||
// Remove trailing commas before closing braces/brackets
|
||||
repaired := trailingCommaPattern.ReplaceAllString(raw, "$1")
|
||||
// Fix unquoted keys (basic attempt - handles simple cases)
|
||||
repaired = unquotedKeyPattern.ReplaceAllString(repaired, `$1"$2":`)
|
||||
return repaired
|
||||
str = trailingCommaPattern.ReplaceAllString(str, "$1")
|
||||
|
||||
// Calculate bracket balance to detect incomplete JSON
|
||||
braceCount := 0 // {} balance
|
||||
bracketCount := 0 // [] balance
|
||||
inString := false
|
||||
escape := false
|
||||
lastValidIndex := -1
|
||||
|
||||
for i := 0; i < len(str); i++ {
|
||||
char := str[i]
|
||||
|
||||
// Handle escape sequences
|
||||
if escape {
|
||||
escape = false
|
||||
continue
|
||||
}
|
||||
|
||||
if char == '\\' {
|
||||
escape = true
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle string boundaries
|
||||
if char == '"' {
|
||||
inString = !inString
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip characters inside strings (they don't affect bracket balance)
|
||||
if inString {
|
||||
continue
|
||||
}
|
||||
|
||||
// Track bracket balance
|
||||
switch char {
|
||||
case '{':
|
||||
braceCount++
|
||||
case '}':
|
||||
braceCount--
|
||||
case '[':
|
||||
bracketCount++
|
||||
case ']':
|
||||
bracketCount--
|
||||
}
|
||||
|
||||
// Record last valid position (where brackets are balanced or positive)
|
||||
if braceCount >= 0 && bracketCount >= 0 {
|
||||
lastValidIndex = i
|
||||
}
|
||||
}
|
||||
|
||||
// If brackets are unbalanced, try to repair
|
||||
if braceCount > 0 || bracketCount > 0 {
|
||||
// Truncate to last valid position if we have incomplete content
|
||||
if lastValidIndex > 0 && lastValidIndex < len(str)-1 {
|
||||
// Check if truncation would help (only truncate if there's trailing garbage)
|
||||
truncated := str[:lastValidIndex+1]
|
||||
// Recount brackets after truncation
|
||||
braceCount = 0
|
||||
bracketCount = 0
|
||||
inString = false
|
||||
escape = false
|
||||
for i := 0; i < len(truncated); i++ {
|
||||
char := truncated[i]
|
||||
if escape {
|
||||
escape = false
|
||||
continue
|
||||
}
|
||||
if char == '\\' {
|
||||
escape = true
|
||||
continue
|
||||
}
|
||||
if char == '"' {
|
||||
inString = !inString
|
||||
continue
|
||||
}
|
||||
if inString {
|
||||
continue
|
||||
}
|
||||
switch char {
|
||||
case '{':
|
||||
braceCount++
|
||||
case '}':
|
||||
braceCount--
|
||||
case '[':
|
||||
bracketCount++
|
||||
case ']':
|
||||
bracketCount--
|
||||
}
|
||||
}
|
||||
str = truncated
|
||||
}
|
||||
|
||||
// Add missing closing brackets
|
||||
for braceCount > 0 {
|
||||
str += "}"
|
||||
braceCount--
|
||||
}
|
||||
for bracketCount > 0 {
|
||||
str += "]"
|
||||
bracketCount--
|
||||
}
|
||||
}
|
||||
|
||||
// CONSERVATIVE STRATEGY: Validate repaired JSON
|
||||
// If repair didn't produce valid JSON, return original string
|
||||
if err := json.Unmarshal([]byte(str), &testParse); err != nil {
|
||||
log.Warnf("kiro: repairJSON - repair failed to produce valid JSON, returning original")
|
||||
return originalStr
|
||||
}
|
||||
|
||||
log.Debugf("kiro: repairJSON - successfully repaired JSON")
|
||||
return str
|
||||
}
|
||||
|
||||
// escapeNewlinesInStrings escapes literal newlines, tabs, and other control characters
|
||||
// that appear inside JSON string values. This handles cases where streaming fragments
|
||||
// contain unescaped control characters within string content.
|
||||
func escapeNewlinesInStrings(raw string) string {
|
||||
var result strings.Builder
|
||||
result.Grow(len(raw) + 100) // Pre-allocate with some extra space
|
||||
|
||||
inString := false
|
||||
escaped := false
|
||||
|
||||
for i := 0; i < len(raw); i++ {
|
||||
c := raw[i]
|
||||
|
||||
if escaped {
|
||||
// Previous character was backslash, this is an escape sequence
|
||||
result.WriteByte(c)
|
||||
escaped = false
|
||||
continue
|
||||
}
|
||||
|
||||
if c == '\\' && inString {
|
||||
// Start of escape sequence
|
||||
result.WriteByte(c)
|
||||
escaped = true
|
||||
continue
|
||||
}
|
||||
|
||||
if c == '"' {
|
||||
// Toggle string state
|
||||
inString = !inString
|
||||
result.WriteByte(c)
|
||||
continue
|
||||
}
|
||||
|
||||
if inString {
|
||||
// Inside a string, escape control characters
|
||||
switch c {
|
||||
case '\n':
|
||||
result.WriteString("\\n")
|
||||
case '\r':
|
||||
result.WriteString("\\r")
|
||||
case '\t':
|
||||
result.WriteString("\\t")
|
||||
default:
|
||||
result.WriteByte(c)
|
||||
}
|
||||
} else {
|
||||
result.WriteByte(c)
|
||||
}
|
||||
}
|
||||
|
||||
return result.String()
|
||||
}
|
||||
|
||||
// processToolUseEvent handles a toolUseEvent from the Kiro stream.
|
||||
@@ -2330,6 +2943,8 @@ func (e *KiroExecutor) processToolUseEvent(event map[string]interface{}, current
|
||||
|
||||
// Accumulate input fragments
|
||||
if currentToolUse != nil && inputFragment != "" {
|
||||
// Accumulate fragments directly - they form valid JSON when combined
|
||||
// The fragments are already decoded from JSON, so we just concatenate them
|
||||
currentToolUse.inputBuffer.WriteString(inputFragment)
|
||||
log.Debugf("kiro: accumulated input fragment, total length: %d", currentToolUse.inputBuffer.Len())
|
||||
}
|
||||
|
||||
@@ -58,6 +58,10 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
|
||||
translated = e.overrideModel(translated, modelOverride)
|
||||
}
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
|
||||
translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model)
|
||||
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
|
||||
translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
|
||||
}
|
||||
|
||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
|
||||
@@ -143,6 +147,10 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
|
||||
translated = e.overrideModel(translated, modelOverride)
|
||||
}
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
|
||||
translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model)
|
||||
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
|
||||
translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
|
||||
}
|
||||
|
||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
|
||||
|
||||
@@ -12,8 +12,8 @@ import (
|
||||
// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N)
|
||||
// for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
|
||||
func applyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
|
||||
budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(metadata)
|
||||
if !ok {
|
||||
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
|
||||
if !ok || (budgetOverride == nil && includeOverride == nil) {
|
||||
return payload
|
||||
}
|
||||
if !util.ModelSupportsThinking(model) {
|
||||
@@ -29,17 +29,60 @@ func applyThinkingMetadata(payload []byte, metadata map[string]any, model string
|
||||
// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N)
|
||||
// for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking.
|
||||
func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
|
||||
budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(metadata)
|
||||
if !ok {
|
||||
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
|
||||
if !ok || (budgetOverride == nil && includeOverride == nil) {
|
||||
return payload
|
||||
}
|
||||
if budgetOverride != nil && util.ModelSupportsThinking(model) {
|
||||
if !util.ModelSupportsThinking(model) {
|
||||
return payload
|
||||
}
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
|
||||
}
|
||||
|
||||
// applyReasoningEffortMetadata applies reasoning effort overrides (reasoning.effort) when present in metadata.
|
||||
// It avoids overwriting an existing reasoning.effort field and only applies to models that support thinking.
|
||||
func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model string) []byte {
|
||||
if len(metadata) == 0 {
|
||||
return payload
|
||||
}
|
||||
if !util.ModelSupportsThinking(model) {
|
||||
return payload
|
||||
}
|
||||
if gjson.GetBytes(payload, "reasoning.effort").Exists() {
|
||||
return payload
|
||||
}
|
||||
if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
|
||||
if updated, err := sjson.SetBytes(payload, "reasoning.effort", effort); err == nil {
|
||||
return updated
|
||||
}
|
||||
}
|
||||
return payload
|
||||
}
|
||||
|
||||
// applyReasoningEffortMetadataChatCompletions applies reasoning_effort (OpenAI chat completions field)
|
||||
// when present in metadata. It avoids overwriting an existing reasoning_effort field.
|
||||
func applyReasoningEffortMetadataChatCompletions(payload []byte, metadata map[string]any, model string) []byte {
|
||||
if len(metadata) == 0 {
|
||||
return payload
|
||||
}
|
||||
if !util.ModelSupportsThinking(model) {
|
||||
return payload
|
||||
}
|
||||
if gjson.GetBytes(payload, "reasoning_effort").Exists() {
|
||||
return payload
|
||||
}
|
||||
if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
|
||||
if updated, err := sjson.SetBytes(payload, "reasoning_effort", effort); err == nil {
|
||||
return updated
|
||||
}
|
||||
}
|
||||
return payload
|
||||
}
|
||||
|
||||
// applyPayloadConfig applies payload default and override rules from configuration
|
||||
// to the given JSON payload for the specified model.
|
||||
// Defaults only fill missing fields, while overrides always overwrite existing values.
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
|
||||
qwenauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
||||
@@ -50,6 +51,10 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("openai")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
|
||||
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
}
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
|
||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||
@@ -121,6 +126,10 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
||||
to := sdktranslator.FromString("openai")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
|
||||
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
|
||||
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
}
|
||||
toolsResult := gjson.GetBytes(body, "tools")
|
||||
// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
|
||||
// This will have no real consequences. It's just to scare Qwen3.
|
||||
|
||||
@@ -171,7 +171,7 @@ func convertClaudeEventToOpenAI(jsonStr string, model string) []string {
|
||||
return results
|
||||
|
||||
case "message_delta":
|
||||
// Final message delta with stop_reason
|
||||
// Final message delta with stop_reason and usage
|
||||
stopReason := root.Get("delta.stop_reason").String()
|
||||
if stopReason != "" {
|
||||
finishReason := "stop"
|
||||
@@ -196,6 +196,19 @@ func convertClaudeEventToOpenAI(jsonStr string, model string) []string {
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// Extract and include usage information from message_delta event
|
||||
usage := root.Get("usage")
|
||||
if usage.Exists() {
|
||||
inputTokens := usage.Get("input_tokens").Int()
|
||||
outputTokens := usage.Get("output_tokens").Int()
|
||||
response["usage"] = map[string]interface{}{
|
||||
"prompt_tokens": inputTokens,
|
||||
"completion_tokens": outputTokens,
|
||||
"total_tokens": inputTokens + outputTokens,
|
||||
}
|
||||
}
|
||||
|
||||
result, _ := json.Marshal(response)
|
||||
results = append(results, string(result))
|
||||
}
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/tidwall/gjson"
|
||||
@@ -15,80 +13,6 @@ const (
|
||||
GeminiOriginalModelMetadataKey = "gemini_original_model"
|
||||
)
|
||||
|
||||
func ParseGeminiThinkingSuffix(model string) (string, *int, *bool, bool) {
|
||||
if model == "" {
|
||||
return model, nil, nil, false
|
||||
}
|
||||
lower := strings.ToLower(model)
|
||||
if !strings.HasPrefix(lower, "gemini-") {
|
||||
return model, nil, nil, false
|
||||
}
|
||||
|
||||
if strings.HasSuffix(lower, "-nothinking") {
|
||||
base := model[:len(model)-len("-nothinking")]
|
||||
budgetValue := 0
|
||||
if strings.HasPrefix(lower, "gemini-2.5-pro") {
|
||||
budgetValue = 128
|
||||
}
|
||||
include := false
|
||||
return base, &budgetValue, &include, true
|
||||
}
|
||||
|
||||
// Handle "-reasoning" suffix: enables thinking with dynamic budget (-1)
|
||||
// Maps: gemini-2.5-flash-reasoning -> gemini-2.5-flash with thinkingBudget=-1
|
||||
if strings.HasSuffix(lower, "-reasoning") {
|
||||
base := model[:len(model)-len("-reasoning")]
|
||||
budgetValue := -1 // Dynamic budget
|
||||
include := true
|
||||
return base, &budgetValue, &include, true
|
||||
}
|
||||
|
||||
idx := strings.LastIndex(lower, "-thinking-")
|
||||
if idx == -1 {
|
||||
return model, nil, nil, false
|
||||
}
|
||||
|
||||
digits := model[idx+len("-thinking-"):]
|
||||
if digits == "" {
|
||||
return model, nil, nil, false
|
||||
}
|
||||
end := len(digits)
|
||||
for i := 0; i < len(digits); i++ {
|
||||
if digits[i] < '0' || digits[i] > '9' {
|
||||
end = i
|
||||
break
|
||||
}
|
||||
}
|
||||
if end == 0 {
|
||||
return model, nil, nil, false
|
||||
}
|
||||
valueStr := digits[:end]
|
||||
value, err := strconv.Atoi(valueStr)
|
||||
if err != nil {
|
||||
return model, nil, nil, false
|
||||
}
|
||||
base := model[:idx]
|
||||
budgetValue := value
|
||||
return base, &budgetValue, nil, true
|
||||
}
|
||||
|
||||
func NormalizeGeminiThinkingModel(modelName string) (string, map[string]any) {
|
||||
baseModel, budget, include, matched := ParseGeminiThinkingSuffix(modelName)
|
||||
if !matched {
|
||||
return baseModel, nil
|
||||
}
|
||||
metadata := map[string]any{
|
||||
GeminiOriginalModelMetadataKey: modelName,
|
||||
}
|
||||
if budget != nil {
|
||||
metadata[GeminiThinkingBudgetMetadataKey] = *budget
|
||||
}
|
||||
if include != nil {
|
||||
metadata[GeminiIncludeThoughtsMetadataKey] = *include
|
||||
}
|
||||
return baseModel, metadata
|
||||
}
|
||||
|
||||
func ApplyGeminiThinkingConfig(body []byte, budget *int, includeThoughts *bool) []byte {
|
||||
if budget == nil && includeThoughts == nil {
|
||||
return body
|
||||
@@ -133,80 +57,6 @@ func ApplyGeminiCLIThinkingConfig(body []byte, budget *int, includeThoughts *boo
|
||||
return updated
|
||||
}
|
||||
|
||||
func GeminiThinkingFromMetadata(metadata map[string]any) (*int, *bool, bool) {
|
||||
if len(metadata) == 0 {
|
||||
return nil, nil, false
|
||||
}
|
||||
var (
|
||||
budgetPtr *int
|
||||
includePtr *bool
|
||||
matched bool
|
||||
)
|
||||
if rawBudget, ok := metadata[GeminiThinkingBudgetMetadataKey]; ok {
|
||||
switch v := rawBudget.(type) {
|
||||
case int:
|
||||
budget := v
|
||||
budgetPtr = &budget
|
||||
matched = true
|
||||
case int32:
|
||||
budget := int(v)
|
||||
budgetPtr = &budget
|
||||
matched = true
|
||||
case int64:
|
||||
budget := int(v)
|
||||
budgetPtr = &budget
|
||||
matched = true
|
||||
case float64:
|
||||
budget := int(v)
|
||||
budgetPtr = &budget
|
||||
matched = true
|
||||
case json.Number:
|
||||
if val, err := v.Int64(); err == nil {
|
||||
budget := int(val)
|
||||
budgetPtr = &budget
|
||||
matched = true
|
||||
}
|
||||
}
|
||||
}
|
||||
if rawInclude, ok := metadata[GeminiIncludeThoughtsMetadataKey]; ok {
|
||||
switch v := rawInclude.(type) {
|
||||
case bool:
|
||||
include := v
|
||||
includePtr = &include
|
||||
matched = true
|
||||
case string:
|
||||
if parsed, err := strconv.ParseBool(v); err == nil {
|
||||
include := parsed
|
||||
includePtr = &include
|
||||
matched = true
|
||||
}
|
||||
case json.Number:
|
||||
if val, err := v.Int64(); err == nil {
|
||||
include := val != 0
|
||||
includePtr = &include
|
||||
matched = true
|
||||
}
|
||||
case int:
|
||||
include := v != 0
|
||||
includePtr = &include
|
||||
matched = true
|
||||
case int32:
|
||||
include := v != 0
|
||||
includePtr = &include
|
||||
matched = true
|
||||
case int64:
|
||||
include := v != 0
|
||||
includePtr = &include
|
||||
matched = true
|
||||
case float64:
|
||||
include := v != 0
|
||||
includePtr = &include
|
||||
matched = true
|
||||
}
|
||||
}
|
||||
return budgetPtr, includePtr, matched
|
||||
}
|
||||
|
||||
// modelsWithDefaultThinking lists models that should have thinking enabled by default
|
||||
// when no explicit thinkingConfig is provided.
|
||||
var modelsWithDefaultThinking = map[string]bool{
|
||||
|
||||
327
internal/util/thinking_suffix.go
Normal file
327
internal/util/thinking_suffix.go
Normal file
@@ -0,0 +1,327 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
ThinkingBudgetMetadataKey = "thinking_budget"
|
||||
ThinkingIncludeThoughtsMetadataKey = "thinking_include_thoughts"
|
||||
ReasoningEffortMetadataKey = "reasoning_effort"
|
||||
ThinkingOriginalModelMetadataKey = "thinking_original_model"
|
||||
)
|
||||
|
||||
// NormalizeThinkingModel parses dynamic thinking suffixes on model names and returns
|
||||
// the normalized base model with extracted metadata. Supported patterns:
|
||||
// - "-thinking-<number>" extracts a numeric budget
|
||||
// - "-thinking-<level>" extracts a reasoning effort level (minimal/low/medium/high/xhigh/auto/none)
|
||||
// - "-thinking" maps to a default reasoning effort of "medium"
|
||||
// - "-reasoning" maps to dynamic budget (-1) and include_thoughts=true
|
||||
// - "-nothinking" maps to budget=0 and include_thoughts=false
|
||||
func NormalizeThinkingModel(modelName string) (string, map[string]any) {
|
||||
if modelName == "" {
|
||||
return modelName, nil
|
||||
}
|
||||
|
||||
lower := strings.ToLower(modelName)
|
||||
baseModel := modelName
|
||||
|
||||
var (
|
||||
budgetOverride *int
|
||||
includeThoughts *bool
|
||||
reasoningEffort *string
|
||||
matched bool
|
||||
)
|
||||
|
||||
switch {
|
||||
case strings.HasSuffix(lower, "-nothinking"):
|
||||
baseModel = modelName[:len(modelName)-len("-nothinking")]
|
||||
budget := 0
|
||||
include := false
|
||||
budgetOverride = &budget
|
||||
includeThoughts = &include
|
||||
matched = true
|
||||
case strings.HasSuffix(lower, "-reasoning"):
|
||||
baseModel = modelName[:len(modelName)-len("-reasoning")]
|
||||
budget := -1
|
||||
include := true
|
||||
budgetOverride = &budget
|
||||
includeThoughts = &include
|
||||
matched = true
|
||||
default:
|
||||
if idx := strings.LastIndex(lower, "-thinking-"); idx != -1 {
|
||||
value := modelName[idx+len("-thinking-"):]
|
||||
if value != "" {
|
||||
if parsed, ok := parseIntPrefix(value); ok {
|
||||
baseModel = modelName[:idx]
|
||||
budgetOverride = &parsed
|
||||
matched = true
|
||||
} else if effort, okEffort := normalizeReasoningEffort(value); okEffort {
|
||||
baseModel = modelName[:idx]
|
||||
reasoningEffort = &effort
|
||||
matched = true
|
||||
}
|
||||
}
|
||||
} else if strings.HasSuffix(lower, "-thinking") {
|
||||
baseModel = modelName[:len(modelName)-len("-thinking")]
|
||||
effort := "medium"
|
||||
reasoningEffort = &effort
|
||||
matched = true
|
||||
}
|
||||
}
|
||||
|
||||
if !matched {
|
||||
return baseModel, nil
|
||||
}
|
||||
|
||||
metadata := map[string]any{
|
||||
ThinkingOriginalModelMetadataKey: modelName,
|
||||
}
|
||||
if budgetOverride != nil {
|
||||
metadata[ThinkingBudgetMetadataKey] = *budgetOverride
|
||||
}
|
||||
if includeThoughts != nil {
|
||||
metadata[ThinkingIncludeThoughtsMetadataKey] = *includeThoughts
|
||||
}
|
||||
if reasoningEffort != nil {
|
||||
metadata[ReasoningEffortMetadataKey] = *reasoningEffort
|
||||
}
|
||||
return baseModel, metadata
|
||||
}
|
||||
|
||||
// ThinkingFromMetadata extracts thinking overrides from metadata produced by NormalizeThinkingModel.
|
||||
// It accepts both the new generic keys and legacy Gemini-specific keys.
|
||||
func ThinkingFromMetadata(metadata map[string]any) (*int, *bool, *string, bool) {
|
||||
if len(metadata) == 0 {
|
||||
return nil, nil, nil, false
|
||||
}
|
||||
|
||||
var (
|
||||
budgetPtr *int
|
||||
includePtr *bool
|
||||
effortPtr *string
|
||||
matched bool
|
||||
)
|
||||
|
||||
readBudget := func(key string) {
|
||||
if budgetPtr != nil {
|
||||
return
|
||||
}
|
||||
if raw, ok := metadata[key]; ok {
|
||||
if v, okNumber := parseNumberToInt(raw); okNumber {
|
||||
budget := v
|
||||
budgetPtr = &budget
|
||||
matched = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
readInclude := func(key string) {
|
||||
if includePtr != nil {
|
||||
return
|
||||
}
|
||||
if raw, ok := metadata[key]; ok {
|
||||
switch v := raw.(type) {
|
||||
case bool:
|
||||
val := v
|
||||
includePtr = &val
|
||||
matched = true
|
||||
case *bool:
|
||||
if v != nil {
|
||||
val := *v
|
||||
includePtr = &val
|
||||
matched = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
readEffort := func(key string) {
|
||||
if effortPtr != nil {
|
||||
return
|
||||
}
|
||||
if raw, ok := metadata[key]; ok {
|
||||
if val, okStr := raw.(string); okStr && strings.TrimSpace(val) != "" {
|
||||
normalized := strings.ToLower(strings.TrimSpace(val))
|
||||
effortPtr = &normalized
|
||||
matched = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
readBudget(ThinkingBudgetMetadataKey)
|
||||
readBudget(GeminiThinkingBudgetMetadataKey)
|
||||
readInclude(ThinkingIncludeThoughtsMetadataKey)
|
||||
readInclude(GeminiIncludeThoughtsMetadataKey)
|
||||
readEffort(ReasoningEffortMetadataKey)
|
||||
readEffort("reasoning.effort")
|
||||
|
||||
return budgetPtr, includePtr, effortPtr, matched
|
||||
}
|
||||
|
||||
// ResolveThinkingConfigFromMetadata derives thinking budget/include overrides,
|
||||
// converting reasoning effort strings into budgets when possible.
|
||||
func ResolveThinkingConfigFromMetadata(model string, metadata map[string]any) (*int, *bool, bool) {
|
||||
budget, include, effort, matched := ThinkingFromMetadata(metadata)
|
||||
if !matched {
|
||||
return nil, nil, false
|
||||
}
|
||||
|
||||
if budget == nil && effort != nil {
|
||||
if derived, ok := ThinkingEffortToBudget(model, *effort); ok {
|
||||
budget = &derived
|
||||
}
|
||||
}
|
||||
return budget, include, budget != nil || include != nil || effort != nil
|
||||
}
|
||||
|
||||
// ReasoningEffortFromMetadata resolves a reasoning effort string from metadata,
|
||||
// inferring "auto" and "none" when budgets request dynamic or disabled thinking.
|
||||
func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) {
|
||||
budget, include, effort, matched := ThinkingFromMetadata(metadata)
|
||||
if !matched {
|
||||
return "", false
|
||||
}
|
||||
if effort != nil && *effort != "" {
|
||||
return *effort, true
|
||||
}
|
||||
if budget != nil {
|
||||
switch *budget {
|
||||
case -1:
|
||||
return "auto", true
|
||||
case 0:
|
||||
return "none", true
|
||||
}
|
||||
}
|
||||
if include != nil && !*include {
|
||||
return "none", true
|
||||
}
|
||||
return "", true
|
||||
}
|
||||
|
||||
// ThinkingEffortToBudget maps reasoning effort levels to approximate budgets,
|
||||
// clamping the result to the model's supported range.
|
||||
func ThinkingEffortToBudget(model, effort string) (int, bool) {
|
||||
if effort == "" {
|
||||
return 0, false
|
||||
}
|
||||
switch strings.ToLower(effort) {
|
||||
case "none":
|
||||
return 0, true
|
||||
case "auto":
|
||||
return NormalizeThinkingBudget(model, -1), true
|
||||
case "minimal":
|
||||
return NormalizeThinkingBudget(model, 512), true
|
||||
case "low":
|
||||
return NormalizeThinkingBudget(model, 1024), true
|
||||
case "medium":
|
||||
return NormalizeThinkingBudget(model, 8192), true
|
||||
case "high":
|
||||
return NormalizeThinkingBudget(model, 24576), true
|
||||
case "xhigh":
|
||||
return NormalizeThinkingBudget(model, 32768), true
|
||||
default:
|
||||
return 0, false
|
||||
}
|
||||
}
|
||||
|
||||
// ResolveOriginalModel returns the original model name stored in metadata (if present),
|
||||
// otherwise falls back to the provided model.
|
||||
func ResolveOriginalModel(model string, metadata map[string]any) string {
|
||||
normalize := func(name string) string {
|
||||
if name == "" {
|
||||
return ""
|
||||
}
|
||||
if base, _ := NormalizeThinkingModel(name); base != "" {
|
||||
return base
|
||||
}
|
||||
return strings.TrimSpace(name)
|
||||
}
|
||||
|
||||
if metadata != nil {
|
||||
if v, ok := metadata[ThinkingOriginalModelMetadataKey]; ok {
|
||||
if s, okStr := v.(string); okStr && strings.TrimSpace(s) != "" {
|
||||
if base := normalize(s); base != "" {
|
||||
return base
|
||||
}
|
||||
}
|
||||
}
|
||||
if v, ok := metadata[GeminiOriginalModelMetadataKey]; ok {
|
||||
if s, okStr := v.(string); okStr && strings.TrimSpace(s) != "" {
|
||||
if base := normalize(s); base != "" {
|
||||
return base
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Fallback: try to re-normalize the model name when metadata was dropped.
|
||||
if base := normalize(model); base != "" {
|
||||
return base
|
||||
}
|
||||
return model
|
||||
}
|
||||
|
||||
func parseIntPrefix(value string) (int, bool) {
|
||||
if value == "" {
|
||||
return 0, false
|
||||
}
|
||||
digits := strings.TrimLeft(value, "-")
|
||||
if digits == "" {
|
||||
return 0, false
|
||||
}
|
||||
end := len(digits)
|
||||
for i := 0; i < len(digits); i++ {
|
||||
if digits[i] < '0' || digits[i] > '9' {
|
||||
end = i
|
||||
break
|
||||
}
|
||||
}
|
||||
if end == 0 {
|
||||
return 0, false
|
||||
}
|
||||
val, err := strconv.Atoi(digits[:end])
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
return val, true
|
||||
}
|
||||
|
||||
func parseNumberToInt(raw any) (int, bool) {
|
||||
switch v := raw.(type) {
|
||||
case int:
|
||||
return v, true
|
||||
case int32:
|
||||
return int(v), true
|
||||
case int64:
|
||||
return int(v), true
|
||||
case float64:
|
||||
return int(v), true
|
||||
case json.Number:
|
||||
if val, err := v.Int64(); err == nil {
|
||||
return int(val), true
|
||||
}
|
||||
case string:
|
||||
if strings.TrimSpace(v) == "" {
|
||||
return 0, false
|
||||
}
|
||||
if parsed, err := strconv.Atoi(strings.TrimSpace(v)); err == nil {
|
||||
return parsed, true
|
||||
}
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
func normalizeReasoningEffort(value string) (string, bool) {
|
||||
if value == "" {
|
||||
return "", false
|
||||
}
|
||||
effort := strings.ToLower(strings.TrimSpace(value))
|
||||
switch effort {
|
||||
case "minimal", "low", "medium", "high", "xhigh", "auto", "none":
|
||||
return effort, true
|
||||
default:
|
||||
return "", false
|
||||
}
|
||||
}
|
||||
@@ -343,18 +343,32 @@ func (h *BaseAPIHandler) getRequestDetails(modelName string) (providers []string
|
||||
|
||||
providerName, extractedModelName, isDynamic := h.parseDynamicModel(resolvedModelName)
|
||||
|
||||
// First, normalize the model name to handle suffixes like "-thinking-128"
|
||||
// This needs to happen before determining the provider for non-dynamic models.
|
||||
normalizedModel, metadata = normalizeModelMetadata(resolvedModelName)
|
||||
targetModelName := resolvedModelName
|
||||
if isDynamic {
|
||||
targetModelName = extractedModelName
|
||||
}
|
||||
|
||||
// Normalize the model name to handle dynamic thinking suffixes before determining the provider.
|
||||
normalizedModel, metadata = normalizeModelMetadata(targetModelName)
|
||||
|
||||
if isDynamic {
|
||||
providers = []string{providerName}
|
||||
// For dynamic models, the extractedModelName is already normalized by parseDynamicModel
|
||||
// so we use it as the final normalizedModel.
|
||||
normalizedModel = extractedModelName
|
||||
} else {
|
||||
// For non-dynamic models, use the normalizedModel to get the provider name.
|
||||
providers = util.GetProviderName(normalizedModel)
|
||||
if len(providers) == 0 && metadata != nil {
|
||||
if originalRaw, ok := metadata[util.ThinkingOriginalModelMetadataKey]; ok {
|
||||
if originalModel, okStr := originalRaw.(string); okStr {
|
||||
originalModel = strings.TrimSpace(originalModel)
|
||||
if originalModel != "" && !strings.EqualFold(originalModel, normalizedModel) {
|
||||
if altProviders := util.GetProviderName(originalModel); len(altProviders) > 0 {
|
||||
providers = altProviders
|
||||
normalizedModel = originalModel
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(providers) == 0 {
|
||||
@@ -402,7 +416,7 @@ func cloneBytes(src []byte) []byte {
|
||||
}
|
||||
|
||||
func normalizeModelMetadata(modelName string) (string, map[string]any) {
|
||||
return util.NormalizeGeminiThinkingModel(modelName)
|
||||
return util.NormalizeThinkingModel(modelName)
|
||||
}
|
||||
|
||||
func cloneMetadata(src map[string]any) map[string]any {
|
||||
|
||||
Reference in New Issue
Block a user