mirror of
https://github.com/router-for-me/CLIProxyAPIPlus.git
synced 2026-03-11 08:15:14 +00:00
Compare commits
31 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
28469576bf | ||
|
|
ef0edbfe69 | ||
|
|
bb6312b4fc | ||
|
|
3c315551b0 | ||
|
|
27c9c5c4da | ||
|
|
fc9f6c974a | ||
|
|
242b4d5754 | ||
|
|
4ce7c61a17 | ||
|
|
a74ee3f319 | ||
|
|
564bcbaa54 | ||
|
|
88bdd25f06 | ||
|
|
e79f65fd8e | ||
|
|
2760989401 | ||
|
|
facfe7c518 | ||
|
|
6285459c08 | ||
|
|
21bbceca0c | ||
|
|
f6300c72b7 | ||
|
|
007572b58e | ||
|
|
3a81ab22fd | ||
|
|
519da2e042 | ||
|
|
169f4295d0 | ||
|
|
d06d0eab2f | ||
|
|
3ffd120ae9 | ||
|
|
a03d514095 | ||
|
|
69fccf0015 | ||
|
|
1da03bfe15 | ||
|
|
6133bac226 | ||
|
|
f302be5ce6 | ||
|
|
cd4e84a360 | ||
|
|
76c563d161 | ||
|
|
a89514951f |
@@ -105,7 +105,7 @@ ws-auth: false
|
||||
# excluded-models:
|
||||
# - "claude-opus-4-5-20251101" # exclude specific models (exact match)
|
||||
# - "claude-3-*" # wildcard matching prefix (e.g. claude-3-7-sonnet-20250219)
|
||||
# - "*-think" # wildcard matching suffix (e.g. claude-opus-4-5-thinking)
|
||||
# - "*-thinking" # wildcard matching suffix (e.g. claude-opus-4-5-thinking)
|
||||
# - "*haiku*" # wildcard matching substring (e.g. claude-3-5-haiku-20241022)
|
||||
|
||||
# Kiro (AWS CodeWhisperer) configuration
|
||||
|
||||
@@ -16,6 +16,7 @@ func GetClaudeModels() []*ModelInfo {
|
||||
DisplayName: "Claude 4.5 Haiku",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
// Thinking: not supported for Haiku models
|
||||
},
|
||||
{
|
||||
ID: "claude-sonnet-4-5-20250929",
|
||||
@@ -49,6 +50,7 @@ func GetClaudeModels() []*ModelInfo {
|
||||
DisplayName: "Claude 4.1 Opus",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 32000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "claude-opus-4-20250514",
|
||||
@@ -59,6 +61,7 @@ func GetClaudeModels() []*ModelInfo {
|
||||
DisplayName: "Claude 4 Opus",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 32000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "claude-sonnet-4-20250514",
|
||||
@@ -69,6 +72,7 @@ func GetClaudeModels() []*ModelInfo {
|
||||
DisplayName: "Claude 4 Sonnet",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "claude-3-7-sonnet-20250219",
|
||||
@@ -79,6 +83,7 @@ func GetClaudeModels() []*ModelInfo {
|
||||
DisplayName: "Claude 3.7 Sonnet",
|
||||
ContextLength: 128000,
|
||||
MaxCompletionTokens: 8192,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "claude-3-5-haiku-20241022",
|
||||
@@ -89,6 +94,7 @@ func GetClaudeModels() []*ModelInfo {
|
||||
DisplayName: "Claude 3.5 Haiku",
|
||||
ContextLength: 128000,
|
||||
MaxCompletionTokens: 8192,
|
||||
// Thinking: not supported for Haiku models
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -476,6 +482,7 @@ func GetOpenAIModels() []*ModelInfo {
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-codex",
|
||||
@@ -489,6 +496,7 @@ func GetOpenAIModels() []*ModelInfo {
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-codex-mini",
|
||||
@@ -502,6 +510,7 @@ func GetOpenAIModels() []*ModelInfo {
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1",
|
||||
@@ -515,6 +524,7 @@ func GetOpenAIModels() []*ModelInfo {
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex",
|
||||
@@ -528,6 +538,7 @@ func GetOpenAIModels() []*ModelInfo {
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex-mini",
|
||||
@@ -541,6 +552,7 @@ func GetOpenAIModels() []*ModelInfo {
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex-max",
|
||||
@@ -554,6 +566,7 @@ func GetOpenAIModels() []*ModelInfo {
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -610,6 +623,7 @@ func GetIFlowModels() []*ModelInfo {
|
||||
DisplayName string
|
||||
Description string
|
||||
Created int64
|
||||
Thinking *ThinkingSupport
|
||||
}{
|
||||
{ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600},
|
||||
{ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800},
|
||||
@@ -619,17 +633,17 @@ func GetIFlowModels() []*ModelInfo {
|
||||
{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
|
||||
{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400},
|
||||
{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
|
||||
{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 general model", Created: 1762387200},
|
||||
{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
|
||||
{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2", Created: 1764576000},
|
||||
{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000},
|
||||
{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200},
|
||||
{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200},
|
||||
{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
|
||||
{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
|
||||
{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
|
||||
{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
|
||||
{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
|
||||
{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
|
||||
{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
|
||||
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000},
|
||||
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
|
||||
}
|
||||
models := make([]*ModelInfo, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
@@ -641,6 +655,7 @@ func GetIFlowModels() []*ModelInfo {
|
||||
Type: "iflow",
|
||||
DisplayName: entry.DisplayName,
|
||||
Description: entry.Description,
|
||||
Thinking: entry.Thinking,
|
||||
})
|
||||
}
|
||||
return models
|
||||
|
||||
@@ -63,6 +63,9 @@ type ThinkingSupport struct {
|
||||
ZeroAllowed bool `json:"zero_allowed,omitempty"`
|
||||
// DynamicAllowed indicates whether -1 is a valid value (dynamic thinking budget).
|
||||
DynamicAllowed bool `json:"dynamic_allowed,omitempty"`
|
||||
// Levels defines discrete reasoning effort levels (e.g., "low", "medium", "high").
|
||||
// When set, the model uses level-based reasoning instead of token budgets.
|
||||
Levels []string `json:"levels,omitempty"`
|
||||
}
|
||||
|
||||
// ModelRegistration tracks a model's availability
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// Package executor provides runtime execution capabilities for various AI service providers.
|
||||
// This file implements the AI Studio executor that routes requests through a websocket-backed
|
||||
// transport for the AI Studio provider.
|
||||
package executor
|
||||
|
||||
import (
|
||||
@@ -26,19 +29,28 @@ type AIStudioExecutor struct {
|
||||
cfg *config.Config
|
||||
}
|
||||
|
||||
// NewAIStudioExecutor constructs a websocket executor for the provider name.
|
||||
// NewAIStudioExecutor creates a new AI Studio executor instance.
|
||||
//
|
||||
// Parameters:
|
||||
// - cfg: The application configuration
|
||||
// - provider: The provider name
|
||||
// - relay: The websocket relay manager
|
||||
//
|
||||
// Returns:
|
||||
// - *AIStudioExecutor: A new AI Studio executor instance
|
||||
func NewAIStudioExecutor(cfg *config.Config, provider string, relay *wsrelay.Manager) *AIStudioExecutor {
|
||||
return &AIStudioExecutor{provider: strings.ToLower(provider), relay: relay, cfg: cfg}
|
||||
}
|
||||
|
||||
// Identifier returns the logical provider key for routing.
|
||||
// Identifier returns the executor identifier.
|
||||
func (e *AIStudioExecutor) Identifier() string { return "aistudio" }
|
||||
|
||||
// PrepareRequest is a no-op because websocket transport already injects headers.
|
||||
// PrepareRequest prepares the HTTP request for execution (no-op for AI Studio).
|
||||
func (e *AIStudioExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Auth) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Execute performs a non-streaming request to the AI Studio API.
|
||||
func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
@@ -92,6 +104,7 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// ExecuteStream performs a streaming request to the AI Studio API.
|
||||
func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
@@ -239,6 +252,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
|
||||
return stream, nil
|
||||
}
|
||||
|
||||
// CountTokens counts tokens for the given request using the AI Studio API.
|
||||
func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
_, body, err := e.translateRequest(req, opts, false)
|
||||
if err != nil {
|
||||
@@ -293,8 +307,8 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A
|
||||
return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
|
||||
}
|
||||
|
||||
func (e *AIStudioExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
|
||||
_ = ctx
|
||||
// Refresh refreshes the authentication credentials (no-op for AI Studio).
|
||||
func (e *AIStudioExecutor) Refresh(_ context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
|
||||
return auth, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// Package executor provides runtime execution capabilities for various AI service providers.
|
||||
// This file implements the Antigravity executor that proxies requests to the antigravity
|
||||
// upstream using OAuth credentials.
|
||||
package executor
|
||||
|
||||
import (
|
||||
@@ -30,16 +33,15 @@ import (
|
||||
const (
|
||||
antigravityBaseURLDaily = "https://daily-cloudcode-pa.sandbox.googleapis.com"
|
||||
// antigravityBaseURLAutopush = "https://autopush-cloudcode-pa.sandbox.googleapis.com"
|
||||
antigravityBaseURLProd = "https://cloudcode-pa.googleapis.com"
|
||||
antigravityStreamPath = "/v1internal:streamGenerateContent"
|
||||
antigravityGeneratePath = "/v1internal:generateContent"
|
||||
antigravityModelsPath = "/v1internal:fetchAvailableModels"
|
||||
antigravityClientID = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
|
||||
antigravityClientSecret = "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"
|
||||
defaultAntigravityAgent = "antigravity/1.11.5 windows/amd64"
|
||||
antigravityAuthType = "antigravity"
|
||||
refreshSkew = 3000 * time.Second
|
||||
streamScannerBuffer int = 20_971_520
|
||||
antigravityBaseURLProd = "https://cloudcode-pa.googleapis.com"
|
||||
antigravityStreamPath = "/v1internal:streamGenerateContent"
|
||||
antigravityGeneratePath = "/v1internal:generateContent"
|
||||
antigravityModelsPath = "/v1internal:fetchAvailableModels"
|
||||
antigravityClientID = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
|
||||
antigravityClientSecret = "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"
|
||||
defaultAntigravityAgent = "antigravity/1.11.5 windows/amd64"
|
||||
antigravityAuthType = "antigravity"
|
||||
refreshSkew = 3000 * time.Second
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -52,18 +54,24 @@ type AntigravityExecutor struct {
|
||||
cfg *config.Config
|
||||
}
|
||||
|
||||
// NewAntigravityExecutor constructs a new executor instance.
|
||||
// NewAntigravityExecutor creates a new Antigravity executor instance.
|
||||
//
|
||||
// Parameters:
|
||||
// - cfg: The application configuration
|
||||
//
|
||||
// Returns:
|
||||
// - *AntigravityExecutor: A new Antigravity executor instance
|
||||
func NewAntigravityExecutor(cfg *config.Config) *AntigravityExecutor {
|
||||
return &AntigravityExecutor{cfg: cfg}
|
||||
}
|
||||
|
||||
// Identifier implements ProviderExecutor.
|
||||
// Identifier returns the executor identifier.
|
||||
func (e *AntigravityExecutor) Identifier() string { return antigravityAuthType }
|
||||
|
||||
// PrepareRequest implements ProviderExecutor.
|
||||
// PrepareRequest prepares the HTTP request for execution (no-op for Antigravity).
|
||||
func (e *AntigravityExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Auth) error { return nil }
|
||||
|
||||
// Execute handles non-streaming requests via the antigravity generate endpoint.
|
||||
// Execute performs a non-streaming request to the Antigravity API.
|
||||
func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth)
|
||||
if errToken != nil {
|
||||
@@ -156,7 +164,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
|
||||
return resp, err
|
||||
}
|
||||
|
||||
// ExecuteStream handles streaming requests via the antigravity upstream.
|
||||
// ExecuteStream performs a streaming request to the Antigravity API.
|
||||
func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
ctx = context.WithValue(ctx, "alt", "")
|
||||
|
||||
@@ -296,7 +304,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Refresh refreshes the OAuth token using the refresh token.
|
||||
// Refresh refreshes the authentication credentials using the refresh token.
|
||||
func (e *AntigravityExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
|
||||
if auth == nil {
|
||||
return auth, nil
|
||||
@@ -308,7 +316,7 @@ func (e *AntigravityExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Au
|
||||
return updated, nil
|
||||
}
|
||||
|
||||
// CountTokens is not supported for the antigravity provider.
|
||||
// CountTokens counts tokens for the given request (not supported for Antigravity).
|
||||
func (e *AntigravityExecutor) CountTokens(context.Context, *cliproxyauth.Auth, cliproxyexecutor.Request, cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
return cliproxyexecutor.Response{}, statusErr{code: http.StatusNotImplemented, msg: "count tokens not supported"}
|
||||
}
|
||||
|
||||
@@ -254,7 +254,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
||||
// If from == to (Claude → Claude), directly forward the SSE stream without translation
|
||||
if from == to {
|
||||
scanner := bufio.NewScanner(decodedBody)
|
||||
scanner.Buffer(nil, 20_971_520)
|
||||
scanner.Buffer(nil, 52_428_800) // 50MB
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
appendAPIResponseChunk(ctx, e.cfg, line)
|
||||
@@ -277,7 +277,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
||||
|
||||
// For other formats, use translation
|
||||
scanner := bufio.NewScanner(decodedBody)
|
||||
scanner.Buffer(nil, 20_971_520)
|
||||
scanner.Buffer(nil, 52_428_800) // 50MB
|
||||
var param any
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
@@ -450,59 +450,15 @@ func extractAndRemoveBetas(body []byte) ([]string, []byte) {
|
||||
return betas, body
|
||||
}
|
||||
|
||||
// injectThinkingConfig adds thinking configuration based on metadata or legacy suffixes.
|
||||
// injectThinkingConfig adds thinking configuration based on metadata using the unified flow.
|
||||
// It uses util.ResolveClaudeThinkingConfig which internally calls ResolveThinkingConfigFromMetadata
|
||||
// and NormalizeThinkingBudget, ensuring consistency with other executors like Gemini.
|
||||
func (e *ClaudeExecutor) injectThinkingConfig(modelName string, metadata map[string]any, body []byte) []byte {
|
||||
// Only inject if thinking config is not already present
|
||||
if gjson.GetBytes(body, "thinking").Exists() {
|
||||
budget, ok := util.ResolveClaudeThinkingConfig(modelName, metadata)
|
||||
if !ok {
|
||||
return body
|
||||
}
|
||||
|
||||
budgetTokens, ok := resolveClaudeThinkingBudget(modelName, metadata)
|
||||
if !ok || budgetTokens <= 0 {
|
||||
return body
|
||||
}
|
||||
|
||||
body, _ = sjson.SetBytes(body, "thinking.type", "enabled")
|
||||
body, _ = sjson.SetBytes(body, "thinking.budget_tokens", budgetTokens)
|
||||
return body
|
||||
}
|
||||
|
||||
func resolveClaudeThinkingBudget(modelName string, metadata map[string]any) (int, bool) {
|
||||
budget, include, effort, matched := util.ThinkingFromMetadata(metadata)
|
||||
if matched {
|
||||
if include != nil && !*include {
|
||||
return 0, false
|
||||
}
|
||||
if budget != nil {
|
||||
normalized := util.NormalizeThinkingBudget(modelName, *budget)
|
||||
if normalized > 0 {
|
||||
return normalized, true
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
if effort != nil {
|
||||
if derived, ok := util.ThinkingEffortToBudget(modelName, *effort); ok && derived > 0 {
|
||||
return derived, true
|
||||
}
|
||||
}
|
||||
}
|
||||
return claudeBudgetFromSuffix(modelName)
|
||||
}
|
||||
|
||||
func claudeBudgetFromSuffix(modelName string) (int, bool) {
|
||||
lower := strings.ToLower(strings.TrimSpace(modelName))
|
||||
switch {
|
||||
case strings.HasSuffix(lower, "-thinking-low"):
|
||||
return 1024, true
|
||||
case strings.HasSuffix(lower, "-thinking-medium"):
|
||||
return 8192, true
|
||||
case strings.HasSuffix(lower, "-thinking-high"):
|
||||
return 24576, true
|
||||
case strings.HasSuffix(lower, "-thinking"):
|
||||
return 8192, true
|
||||
default:
|
||||
return 0, false
|
||||
}
|
||||
return util.ApplyClaudeThinkingConfig(body, budget)
|
||||
}
|
||||
|
||||
// ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled.
|
||||
|
||||
@@ -54,7 +54,11 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("codex")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
|
||||
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
|
||||
body = normalizeThinkingConfig(body, upstreamModel)
|
||||
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||
return resp, errValidate
|
||||
}
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
body, _ = sjson.SetBytes(body, "stream", true)
|
||||
@@ -148,7 +152,11 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
||||
to := sdktranslator.FromString("codex")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
|
||||
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
|
||||
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
|
||||
body = normalizeThinkingConfig(body, upstreamModel)
|
||||
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||
return nil, errValidate
|
||||
}
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
@@ -208,7 +216,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
||||
}
|
||||
}()
|
||||
scanner := bufio.NewScanner(httpResp.Body)
|
||||
scanner.Buffer(nil, 20_971_520)
|
||||
scanner.Buffer(nil, 52_428_800) // 50MB
|
||||
var param any
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
@@ -246,7 +254,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
|
||||
|
||||
modelForCounting := req.Model
|
||||
|
||||
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
|
||||
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
||||
body, _ = sjson.SetBytes(body, "stream", false)
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// Package executor provides runtime execution capabilities for various AI service providers.
|
||||
// This file implements the Gemini CLI executor that talks to Cloud Code Assist endpoints
|
||||
// using OAuth credentials from auth metadata.
|
||||
package executor
|
||||
|
||||
import (
|
||||
@@ -29,11 +32,11 @@ import (
|
||||
const (
|
||||
codeAssistEndpoint = "https://cloudcode-pa.googleapis.com"
|
||||
codeAssistVersion = "v1internal"
|
||||
geminiOauthClientID = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com"
|
||||
geminiOauthClientSecret = "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"
|
||||
geminiOAuthClientID = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com"
|
||||
geminiOAuthClientSecret = "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"
|
||||
)
|
||||
|
||||
var geminiOauthScopes = []string{
|
||||
var geminiOAuthScopes = []string{
|
||||
"https://www.googleapis.com/auth/cloud-platform",
|
||||
"https://www.googleapis.com/auth/userinfo.email",
|
||||
"https://www.googleapis.com/auth/userinfo.profile",
|
||||
@@ -44,14 +47,24 @@ type GeminiCLIExecutor struct {
|
||||
cfg *config.Config
|
||||
}
|
||||
|
||||
// NewGeminiCLIExecutor creates a new Gemini CLI executor instance.
|
||||
//
|
||||
// Parameters:
|
||||
// - cfg: The application configuration
|
||||
//
|
||||
// Returns:
|
||||
// - *GeminiCLIExecutor: A new Gemini CLI executor instance
|
||||
func NewGeminiCLIExecutor(cfg *config.Config) *GeminiCLIExecutor {
|
||||
return &GeminiCLIExecutor{cfg: cfg}
|
||||
}
|
||||
|
||||
// Identifier returns the executor identifier.
|
||||
func (e *GeminiCLIExecutor) Identifier() string { return "gemini-cli" }
|
||||
|
||||
// PrepareRequest prepares the HTTP request for execution (no-op for Gemini CLI).
|
||||
func (e *GeminiCLIExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Auth) error { return nil }
|
||||
|
||||
// Execute performs a non-streaming request to the Gemini CLI API.
|
||||
func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth)
|
||||
if err != nil {
|
||||
@@ -189,6 +202,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
|
||||
return resp, err
|
||||
}
|
||||
|
||||
// ExecuteStream performs a streaming request to the Gemini CLI API.
|
||||
func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth)
|
||||
if err != nil {
|
||||
@@ -309,7 +323,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
||||
}()
|
||||
if opts.Alt == "" {
|
||||
scanner := bufio.NewScanner(resp.Body)
|
||||
scanner.Buffer(nil, 20_971_520)
|
||||
scanner.Buffer(nil, streamScannerBuffer)
|
||||
var param any
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
@@ -371,6 +385,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// CountTokens counts tokens for the given request using the Gemini CLI API.
|
||||
func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth)
|
||||
if err != nil {
|
||||
@@ -471,9 +486,8 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
|
||||
return cliproxyexecutor.Response{}, newGeminiStatusErr(lastStatus, lastBody)
|
||||
}
|
||||
|
||||
func (e *GeminiCLIExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
|
||||
log.Debugf("gemini cli executor: refresh called")
|
||||
_ = ctx
|
||||
// Refresh refreshes the authentication credentials (no-op for Gemini CLI).
|
||||
func (e *GeminiCLIExecutor) Refresh(_ context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
|
||||
return auth, nil
|
||||
}
|
||||
|
||||
@@ -515,9 +529,9 @@ func prepareGeminiCLITokenSource(ctx context.Context, cfg *config.Config, auth *
|
||||
}
|
||||
|
||||
conf := &oauth2.Config{
|
||||
ClientID: geminiOauthClientID,
|
||||
ClientSecret: geminiOauthClientSecret,
|
||||
Scopes: geminiOauthScopes,
|
||||
ClientID: geminiOAuthClientID,
|
||||
ClientSecret: geminiOAuthClientSecret,
|
||||
Scopes: geminiOAuthScopes,
|
||||
Endpoint: google.Endpoint,
|
||||
}
|
||||
|
||||
|
||||
@@ -11,7 +11,6 @@ import (
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
@@ -21,8 +20,6 @@ import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
"golang.org/x/oauth2"
|
||||
"golang.org/x/oauth2/google"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -31,6 +28,9 @@ const (
|
||||
|
||||
// glAPIVersion is the API version used for Gemini requests.
|
||||
glAPIVersion = "v1beta"
|
||||
|
||||
// streamScannerBuffer is the buffer size for SSE stream scanning.
|
||||
streamScannerBuffer = 52_428_800
|
||||
)
|
||||
|
||||
// GeminiExecutor is a stateless executor for the official Gemini API using API keys.
|
||||
@@ -48,9 +48,11 @@ type GeminiExecutor struct {
|
||||
//
|
||||
// Returns:
|
||||
// - *GeminiExecutor: A new Gemini executor instance
|
||||
func NewGeminiExecutor(cfg *config.Config) *GeminiExecutor { return &GeminiExecutor{cfg: cfg} }
|
||||
func NewGeminiExecutor(cfg *config.Config) *GeminiExecutor {
|
||||
return &GeminiExecutor{cfg: cfg}
|
||||
}
|
||||
|
||||
// Identifier returns the executor identifier for Gemini.
|
||||
// Identifier returns the executor identifier.
|
||||
func (e *GeminiExecutor) Identifier() string { return "gemini" }
|
||||
|
||||
// PrepareRequest prepares the HTTP request for execution (no-op for Gemini).
|
||||
@@ -164,6 +166,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// ExecuteStream performs a streaming request to the Gemini API.
|
||||
func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
apiKey, bearer := geminiCreds(auth)
|
||||
|
||||
@@ -249,7 +252,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
||||
}
|
||||
}()
|
||||
scanner := bufio.NewScanner(httpResp.Body)
|
||||
scanner.Buffer(nil, 20_971_520)
|
||||
scanner.Buffer(nil, streamScannerBuffer)
|
||||
var param any
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
@@ -280,6 +283,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
||||
return stream, nil
|
||||
}
|
||||
|
||||
// CountTokens counts tokens for the given request using the Gemini API.
|
||||
func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
apiKey, bearer := geminiCreds(auth)
|
||||
|
||||
@@ -353,106 +357,8 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
|
||||
return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
|
||||
}
|
||||
|
||||
func (e *GeminiExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
|
||||
log.Debugf("gemini executor: refresh called")
|
||||
// OAuth bearer token refresh for official Gemini API.
|
||||
if auth == nil {
|
||||
return nil, fmt.Errorf("gemini executor: auth is nil")
|
||||
}
|
||||
if auth.Metadata == nil {
|
||||
return auth, nil
|
||||
}
|
||||
// Token data is typically nested under "token" map in Gemini files.
|
||||
tokenMap, _ := auth.Metadata["token"].(map[string]any)
|
||||
var refreshToken, accessToken, clientID, clientSecret, tokenURI, expiryStr string
|
||||
if tokenMap != nil {
|
||||
if v, ok := tokenMap["refresh_token"].(string); ok {
|
||||
refreshToken = v
|
||||
}
|
||||
if v, ok := tokenMap["access_token"].(string); ok {
|
||||
accessToken = v
|
||||
}
|
||||
if v, ok := tokenMap["client_id"].(string); ok {
|
||||
clientID = v
|
||||
}
|
||||
if v, ok := tokenMap["client_secret"].(string); ok {
|
||||
clientSecret = v
|
||||
}
|
||||
if v, ok := tokenMap["token_uri"].(string); ok {
|
||||
tokenURI = v
|
||||
}
|
||||
if v, ok := tokenMap["expiry"].(string); ok {
|
||||
expiryStr = v
|
||||
}
|
||||
} else {
|
||||
// Fallback to top-level keys if present
|
||||
if v, ok := auth.Metadata["refresh_token"].(string); ok {
|
||||
refreshToken = v
|
||||
}
|
||||
if v, ok := auth.Metadata["access_token"].(string); ok {
|
||||
accessToken = v
|
||||
}
|
||||
if v, ok := auth.Metadata["client_id"].(string); ok {
|
||||
clientID = v
|
||||
}
|
||||
if v, ok := auth.Metadata["client_secret"].(string); ok {
|
||||
clientSecret = v
|
||||
}
|
||||
if v, ok := auth.Metadata["token_uri"].(string); ok {
|
||||
tokenURI = v
|
||||
}
|
||||
if v, ok := auth.Metadata["expiry"].(string); ok {
|
||||
expiryStr = v
|
||||
}
|
||||
}
|
||||
if refreshToken == "" {
|
||||
// Nothing to do for API key or cookie based entries
|
||||
return auth, nil
|
||||
}
|
||||
|
||||
// Prepare oauth2 config; default to Google endpoints
|
||||
endpoint := google.Endpoint
|
||||
if tokenURI != "" {
|
||||
endpoint.TokenURL = tokenURI
|
||||
}
|
||||
conf := &oauth2.Config{ClientID: clientID, ClientSecret: clientSecret, Endpoint: endpoint}
|
||||
|
||||
// Ensure proxy-aware HTTP client for token refresh
|
||||
httpClient := util.SetProxy(&e.cfg.SDKConfig, &http.Client{})
|
||||
ctx = context.WithValue(ctx, oauth2.HTTPClient, httpClient)
|
||||
|
||||
// Build base token
|
||||
tok := &oauth2.Token{AccessToken: accessToken, RefreshToken: refreshToken}
|
||||
if t, err := time.Parse(time.RFC3339, expiryStr); err == nil {
|
||||
tok.Expiry = t
|
||||
}
|
||||
newTok, err := conf.TokenSource(ctx, tok).Token()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Persist back to metadata; prefer nested token map if present
|
||||
if tokenMap == nil {
|
||||
tokenMap = make(map[string]any)
|
||||
}
|
||||
tokenMap["access_token"] = newTok.AccessToken
|
||||
tokenMap["refresh_token"] = newTok.RefreshToken
|
||||
tokenMap["expiry"] = newTok.Expiry.Format(time.RFC3339)
|
||||
if clientID != "" {
|
||||
tokenMap["client_id"] = clientID
|
||||
}
|
||||
if clientSecret != "" {
|
||||
tokenMap["client_secret"] = clientSecret
|
||||
}
|
||||
if tokenURI != "" {
|
||||
tokenMap["token_uri"] = tokenURI
|
||||
}
|
||||
auth.Metadata["token"] = tokenMap
|
||||
|
||||
// Also mirror top-level access_token for compatibility if previously present
|
||||
if _, ok := auth.Metadata["access_token"]; ok {
|
||||
auth.Metadata["access_token"] = newTok.AccessToken
|
||||
}
|
||||
// Refresh refreshes the authentication credentials (no-op for Gemini API key).
|
||||
func (e *GeminiExecutor) Refresh(_ context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
|
||||
return auth, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Package executor contains provider executors. This file implements the Vertex AI
|
||||
// Gemini executor that talks to Google Vertex AI endpoints using service account
|
||||
// credentials imported by the CLI.
|
||||
// Package executor provides runtime execution capabilities for various AI service providers.
|
||||
// This file implements the Vertex AI Gemini executor that talks to Google Vertex AI
|
||||
// endpoints using service account credentials or API keys.
|
||||
package executor
|
||||
|
||||
import (
|
||||
@@ -36,20 +36,26 @@ type GeminiVertexExecutor struct {
|
||||
cfg *config.Config
|
||||
}
|
||||
|
||||
// NewGeminiVertexExecutor constructs the Vertex executor.
|
||||
// NewGeminiVertexExecutor creates a new Vertex AI Gemini executor instance.
|
||||
//
|
||||
// Parameters:
|
||||
// - cfg: The application configuration
|
||||
//
|
||||
// Returns:
|
||||
// - *GeminiVertexExecutor: A new Vertex AI Gemini executor instance
|
||||
func NewGeminiVertexExecutor(cfg *config.Config) *GeminiVertexExecutor {
|
||||
return &GeminiVertexExecutor{cfg: cfg}
|
||||
}
|
||||
|
||||
// Identifier returns provider key for manager routing.
|
||||
// Identifier returns the executor identifier.
|
||||
func (e *GeminiVertexExecutor) Identifier() string { return "vertex" }
|
||||
|
||||
// PrepareRequest is a no-op for Vertex.
|
||||
// PrepareRequest prepares the HTTP request for execution (no-op for Vertex).
|
||||
func (e *GeminiVertexExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Auth) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Execute handles non-streaming requests.
|
||||
// Execute performs a non-streaming request to the Vertex AI API.
|
||||
func (e *GeminiVertexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
// Try API key authentication first
|
||||
apiKey, baseURL := vertexAPICreds(auth)
|
||||
@@ -67,7 +73,7 @@ func (e *GeminiVertexExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
|
||||
return e.executeWithAPIKey(ctx, auth, req, opts, apiKey, baseURL)
|
||||
}
|
||||
|
||||
// ExecuteStream handles SSE streaming for Vertex.
|
||||
// ExecuteStream performs a streaming request to the Vertex AI API.
|
||||
func (e *GeminiVertexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
// Try API key authentication first
|
||||
apiKey, baseURL := vertexAPICreds(auth)
|
||||
@@ -85,7 +91,7 @@ func (e *GeminiVertexExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
|
||||
return e.executeStreamWithAPIKey(ctx, auth, req, opts, apiKey, baseURL)
|
||||
}
|
||||
|
||||
// CountTokens calls Vertex countTokens endpoint.
|
||||
// CountTokens counts tokens for the given request using the Vertex AI API.
|
||||
func (e *GeminiVertexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
// Try API key authentication first
|
||||
apiKey, baseURL := vertexAPICreds(auth)
|
||||
@@ -103,185 +109,7 @@ func (e *GeminiVertexExecutor) CountTokens(ctx context.Context, auth *cliproxyau
|
||||
return e.countTokensWithAPIKey(ctx, auth, req, opts, apiKey, baseURL)
|
||||
}
|
||||
|
||||
// countTokensWithServiceAccount handles token counting using service account credentials.
|
||||
func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (cliproxyexecutor.Response, error) {
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
|
||||
}
|
||||
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
|
||||
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
|
||||
translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel)
|
||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
|
||||
|
||||
baseURL := vertexBaseURL(location)
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, "countTokens")
|
||||
|
||||
httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
|
||||
if errNewReq != nil {
|
||||
return cliproxyexecutor.Response{}, errNewReq
|
||||
}
|
||||
httpReq.Header.Set("Content-Type", "application/json")
|
||||
if token, errTok := vertexAccessToken(ctx, e.cfg, auth, saJSON); errTok == nil && token != "" {
|
||||
httpReq.Header.Set("Authorization", "Bearer "+token)
|
||||
} else if errTok != nil {
|
||||
log.Errorf("vertex executor: access token error: %v", errTok)
|
||||
return cliproxyexecutor.Response{}, statusErr{code: 500, msg: "internal server error"}
|
||||
}
|
||||
applyGeminiHeaders(httpReq, auth)
|
||||
|
||||
var authID, authLabel, authType, authValue string
|
||||
if auth != nil {
|
||||
authID = auth.ID
|
||||
authLabel = auth.Label
|
||||
authType, authValue = auth.AccountInfo()
|
||||
}
|
||||
recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
|
||||
URL: url,
|
||||
Method: http.MethodPost,
|
||||
Headers: httpReq.Header.Clone(),
|
||||
Body: translatedReq,
|
||||
Provider: e.Identifier(),
|
||||
AuthID: authID,
|
||||
AuthLabel: authLabel,
|
||||
AuthType: authType,
|
||||
AuthValue: authValue,
|
||||
})
|
||||
|
||||
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||
httpResp, errDo := httpClient.Do(httpReq)
|
||||
if errDo != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, errDo)
|
||||
return cliproxyexecutor.Response{}, errDo
|
||||
}
|
||||
defer func() {
|
||||
if errClose := httpResp.Body.Close(); errClose != nil {
|
||||
log.Errorf("vertex executor: close response body error: %v", errClose)
|
||||
}
|
||||
}()
|
||||
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
b, _ := io.ReadAll(httpResp.Body)
|
||||
appendAPIResponseChunk(ctx, e.cfg, b)
|
||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
|
||||
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(b)}
|
||||
}
|
||||
data, errRead := io.ReadAll(httpResp.Body)
|
||||
if errRead != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, errRead)
|
||||
return cliproxyexecutor.Response{}, errRead
|
||||
}
|
||||
appendAPIResponseChunk(ctx, e.cfg, data)
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
|
||||
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)}
|
||||
}
|
||||
count := gjson.GetBytes(data, "totalTokens").Int()
|
||||
out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
|
||||
return cliproxyexecutor.Response{Payload: []byte(out)}, nil
|
||||
}
|
||||
|
||||
// countTokensWithAPIKey handles token counting using API key credentials.
|
||||
func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (cliproxyexecutor.Response, error) {
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
|
||||
}
|
||||
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
|
||||
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
|
||||
translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel)
|
||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
|
||||
|
||||
// For API key auth, use simpler URL format without project/location
|
||||
if baseURL == "" {
|
||||
baseURL = "https://generativelanguage.googleapis.com"
|
||||
}
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, req.Model, "countTokens")
|
||||
|
||||
httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
|
||||
if errNewReq != nil {
|
||||
return cliproxyexecutor.Response{}, errNewReq
|
||||
}
|
||||
httpReq.Header.Set("Content-Type", "application/json")
|
||||
if apiKey != "" {
|
||||
httpReq.Header.Set("x-goog-api-key", apiKey)
|
||||
}
|
||||
applyGeminiHeaders(httpReq, auth)
|
||||
|
||||
var authID, authLabel, authType, authValue string
|
||||
if auth != nil {
|
||||
authID = auth.ID
|
||||
authLabel = auth.Label
|
||||
authType, authValue = auth.AccountInfo()
|
||||
}
|
||||
recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
|
||||
URL: url,
|
||||
Method: http.MethodPost,
|
||||
Headers: httpReq.Header.Clone(),
|
||||
Body: translatedReq,
|
||||
Provider: e.Identifier(),
|
||||
AuthID: authID,
|
||||
AuthLabel: authLabel,
|
||||
AuthType: authType,
|
||||
AuthValue: authValue,
|
||||
})
|
||||
|
||||
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||
httpResp, errDo := httpClient.Do(httpReq)
|
||||
if errDo != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, errDo)
|
||||
return cliproxyexecutor.Response{}, errDo
|
||||
}
|
||||
defer func() {
|
||||
if errClose := httpResp.Body.Close(); errClose != nil {
|
||||
log.Errorf("vertex executor: close response body error: %v", errClose)
|
||||
}
|
||||
}()
|
||||
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
b, _ := io.ReadAll(httpResp.Body)
|
||||
appendAPIResponseChunk(ctx, e.cfg, b)
|
||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
|
||||
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(b)}
|
||||
}
|
||||
data, errRead := io.ReadAll(httpResp.Body)
|
||||
if errRead != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, errRead)
|
||||
return cliproxyexecutor.Response{}, errRead
|
||||
}
|
||||
appendAPIResponseChunk(ctx, e.cfg, data)
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
|
||||
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)}
|
||||
}
|
||||
count := gjson.GetBytes(data, "totalTokens").Int()
|
||||
out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
|
||||
return cliproxyexecutor.Response{Payload: []byte(out)}, nil
|
||||
}
|
||||
|
||||
// Refresh is a no-op for service account based credentials.
|
||||
// Refresh refreshes the authentication credentials (no-op for Vertex).
|
||||
func (e *GeminiVertexExecutor) Refresh(_ context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
|
||||
return auth, nil
|
||||
}
|
||||
@@ -579,7 +407,7 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
|
||||
}
|
||||
}()
|
||||
scanner := bufio.NewScanner(httpResp.Body)
|
||||
scanner.Buffer(nil, 20_971_520)
|
||||
scanner.Buffer(nil, streamScannerBuffer)
|
||||
var param any
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
@@ -696,7 +524,7 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
|
||||
}
|
||||
}()
|
||||
scanner := bufio.NewScanner(httpResp.Body)
|
||||
scanner.Buffer(nil, 20_971_520)
|
||||
scanner.Buffer(nil, streamScannerBuffer)
|
||||
var param any
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
@@ -722,6 +550,184 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
|
||||
return stream, nil
|
||||
}
|
||||
|
||||
// countTokensWithServiceAccount counts tokens using service account credentials.
|
||||
func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (cliproxyexecutor.Response, error) {
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
|
||||
}
|
||||
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
|
||||
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
|
||||
translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel)
|
||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
|
||||
|
||||
baseURL := vertexBaseURL(location)
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, "countTokens")
|
||||
|
||||
httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
|
||||
if errNewReq != nil {
|
||||
return cliproxyexecutor.Response{}, errNewReq
|
||||
}
|
||||
httpReq.Header.Set("Content-Type", "application/json")
|
||||
if token, errTok := vertexAccessToken(ctx, e.cfg, auth, saJSON); errTok == nil && token != "" {
|
||||
httpReq.Header.Set("Authorization", "Bearer "+token)
|
||||
} else if errTok != nil {
|
||||
log.Errorf("vertex executor: access token error: %v", errTok)
|
||||
return cliproxyexecutor.Response{}, statusErr{code: 500, msg: "internal server error"}
|
||||
}
|
||||
applyGeminiHeaders(httpReq, auth)
|
||||
|
||||
var authID, authLabel, authType, authValue string
|
||||
if auth != nil {
|
||||
authID = auth.ID
|
||||
authLabel = auth.Label
|
||||
authType, authValue = auth.AccountInfo()
|
||||
}
|
||||
recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
|
||||
URL: url,
|
||||
Method: http.MethodPost,
|
||||
Headers: httpReq.Header.Clone(),
|
||||
Body: translatedReq,
|
||||
Provider: e.Identifier(),
|
||||
AuthID: authID,
|
||||
AuthLabel: authLabel,
|
||||
AuthType: authType,
|
||||
AuthValue: authValue,
|
||||
})
|
||||
|
||||
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||
httpResp, errDo := httpClient.Do(httpReq)
|
||||
if errDo != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, errDo)
|
||||
return cliproxyexecutor.Response{}, errDo
|
||||
}
|
||||
defer func() {
|
||||
if errClose := httpResp.Body.Close(); errClose != nil {
|
||||
log.Errorf("vertex executor: close response body error: %v", errClose)
|
||||
}
|
||||
}()
|
||||
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
b, _ := io.ReadAll(httpResp.Body)
|
||||
appendAPIResponseChunk(ctx, e.cfg, b)
|
||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
|
||||
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(b)}
|
||||
}
|
||||
data, errRead := io.ReadAll(httpResp.Body)
|
||||
if errRead != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, errRead)
|
||||
return cliproxyexecutor.Response{}, errRead
|
||||
}
|
||||
appendAPIResponseChunk(ctx, e.cfg, data)
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
|
||||
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)}
|
||||
}
|
||||
count := gjson.GetBytes(data, "totalTokens").Int()
|
||||
out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
|
||||
return cliproxyexecutor.Response{Payload: []byte(out)}, nil
|
||||
}
|
||||
|
||||
// countTokensWithAPIKey handles token counting using API key credentials.
|
||||
func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (cliproxyexecutor.Response, error) {
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
|
||||
}
|
||||
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
|
||||
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
|
||||
translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel)
|
||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
|
||||
|
||||
// For API key auth, use simpler URL format without project/location
|
||||
if baseURL == "" {
|
||||
baseURL = "https://generativelanguage.googleapis.com"
|
||||
}
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, req.Model, "countTokens")
|
||||
|
||||
httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
|
||||
if errNewReq != nil {
|
||||
return cliproxyexecutor.Response{}, errNewReq
|
||||
}
|
||||
httpReq.Header.Set("Content-Type", "application/json")
|
||||
if apiKey != "" {
|
||||
httpReq.Header.Set("x-goog-api-key", apiKey)
|
||||
}
|
||||
applyGeminiHeaders(httpReq, auth)
|
||||
|
||||
var authID, authLabel, authType, authValue string
|
||||
if auth != nil {
|
||||
authID = auth.ID
|
||||
authLabel = auth.Label
|
||||
authType, authValue = auth.AccountInfo()
|
||||
}
|
||||
recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
|
||||
URL: url,
|
||||
Method: http.MethodPost,
|
||||
Headers: httpReq.Header.Clone(),
|
||||
Body: translatedReq,
|
||||
Provider: e.Identifier(),
|
||||
AuthID: authID,
|
||||
AuthLabel: authLabel,
|
||||
AuthType: authType,
|
||||
AuthValue: authValue,
|
||||
})
|
||||
|
||||
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||
httpResp, errDo := httpClient.Do(httpReq)
|
||||
if errDo != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, errDo)
|
||||
return cliproxyexecutor.Response{}, errDo
|
||||
}
|
||||
defer func() {
|
||||
if errClose := httpResp.Body.Close(); errClose != nil {
|
||||
log.Errorf("vertex executor: close response body error: %v", errClose)
|
||||
}
|
||||
}()
|
||||
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
b, _ := io.ReadAll(httpResp.Body)
|
||||
appendAPIResponseChunk(ctx, e.cfg, b)
|
||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
|
||||
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(b)}
|
||||
}
|
||||
data, errRead := io.ReadAll(httpResp.Body)
|
||||
if errRead != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, errRead)
|
||||
return cliproxyexecutor.Response{}, errRead
|
||||
}
|
||||
appendAPIResponseChunk(ctx, e.cfg, data)
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
|
||||
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)}
|
||||
}
|
||||
count := gjson.GetBytes(data, "totalTokens").Int()
|
||||
out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
|
||||
return cliproxyexecutor.Response{Payload: []byte(out)}, nil
|
||||
}
|
||||
|
||||
// vertexCreds extracts project, location and raw service account JSON from auth metadata.
|
||||
func vertexCreds(a *cliproxyauth.Auth) (projectID, location string, serviceAccountJSON []byte, err error) {
|
||||
if a == nil || a.Metadata == nil {
|
||||
|
||||
@@ -57,10 +57,15 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("openai")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
|
||||
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
|
||||
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel != "" {
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
}
|
||||
body = normalizeThinkingConfig(body, upstreamModel)
|
||||
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||
return resp, errValidate
|
||||
}
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
|
||||
endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
|
||||
@@ -143,10 +148,15 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
||||
to := sdktranslator.FromString("openai")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
|
||||
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
|
||||
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
|
||||
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel != "" {
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
}
|
||||
body = normalizeThinkingConfig(body, upstreamModel)
|
||||
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||
return nil, errValidate
|
||||
}
|
||||
// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
|
||||
toolsResult := gjson.GetBytes(body, "tools")
|
||||
if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
|
||||
@@ -209,7 +219,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
||||
}()
|
||||
|
||||
scanner := bufio.NewScanner(httpResp.Body)
|
||||
scanner.Buffer(nil, 20_971_520)
|
||||
scanner.Buffer(nil, 52_428_800) // 50MB
|
||||
var param any
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
|
||||
@@ -121,7 +121,12 @@ func (e *KiroExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
|
||||
return resp, fmt.Errorf("kiro: access token not found in auth")
|
||||
}
|
||||
if profileArn == "" {
|
||||
log.Warnf("kiro: profile ARN not found in auth, API calls may fail")
|
||||
// Only warn if not using builder-id auth (which doesn't need profileArn)
|
||||
if auth == nil || auth.Metadata == nil {
|
||||
log.Debugf("kiro: profile ARN not found in auth (may be normal for builder-id)")
|
||||
} else if authMethod, ok := auth.Metadata["auth_method"].(string); !ok || authMethod != "builder-id" {
|
||||
log.Warnf("kiro: profile ARN not found in auth, API calls may fail")
|
||||
}
|
||||
}
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
@@ -161,10 +166,19 @@ func (e *KiroExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
|
||||
currentOrigin = "CLI"
|
||||
}
|
||||
|
||||
kiroPayload := e.buildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly)
|
||||
// Determine if profileArn should be included based on auth method
|
||||
// profileArn is only needed for social auth (Google OAuth), not for builder-id (AWS SSO)
|
||||
effectiveProfileArn := profileArn
|
||||
if auth != nil && auth.Metadata != nil {
|
||||
if authMethod, ok := auth.Metadata["auth_method"].(string); ok && authMethod == "builder-id" {
|
||||
effectiveProfileArn = "" // Don't include profileArn for builder-id auth
|
||||
}
|
||||
}
|
||||
|
||||
kiroPayload := e.buildKiroPayload(body, kiroModelID, effectiveProfileArn, currentOrigin, isAgentic, isChatOnly)
|
||||
|
||||
// Execute with retry on 401/403 and 429 (quota exhausted)
|
||||
resp, err = e.executeWithRetry(ctx, auth, req, opts, accessToken, profileArn, kiroPayload, body, from, to, reporter, currentOrigin, kiroModelID, isAgentic, isChatOnly)
|
||||
resp, err = e.executeWithRetry(ctx, auth, req, opts, accessToken, effectiveProfileArn, kiroPayload, body, from, to, reporter, currentOrigin, kiroModelID, isAgentic, isChatOnly)
|
||||
return resp, err
|
||||
}
|
||||
|
||||
@@ -301,9 +315,18 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
|
||||
}
|
||||
}
|
||||
if len(content) > 0 {
|
||||
usageInfo.OutputTokens = int64(len(content) / 4)
|
||||
// Use tiktoken for more accurate output token calculation
|
||||
if enc, encErr := tokenizerForModel(req.Model); encErr == nil {
|
||||
if tokenCount, countErr := enc.Count(content); countErr == nil {
|
||||
usageInfo.OutputTokens = int64(tokenCount)
|
||||
}
|
||||
}
|
||||
// Fallback to character count estimation if tiktoken fails
|
||||
if usageInfo.OutputTokens == 0 {
|
||||
usageInfo.OutputTokens = 1
|
||||
usageInfo.OutputTokens = int64(len(content) / 4)
|
||||
if usageInfo.OutputTokens == 0 {
|
||||
usageInfo.OutputTokens = 1
|
||||
}
|
||||
}
|
||||
}
|
||||
usageInfo.TotalTokens = usageInfo.InputTokens + usageInfo.OutputTokens
|
||||
@@ -330,7 +353,12 @@ func (e *KiroExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
||||
return nil, fmt.Errorf("kiro: access token not found in auth")
|
||||
}
|
||||
if profileArn == "" {
|
||||
log.Warnf("kiro: profile ARN not found in auth, API calls may fail")
|
||||
// Only warn if not using builder-id auth (which doesn't need profileArn)
|
||||
if auth == nil || auth.Metadata == nil {
|
||||
log.Debugf("kiro: profile ARN not found in auth (may be normal for builder-id)")
|
||||
} else if authMethod, ok := auth.Metadata["auth_method"].(string); !ok || authMethod != "builder-id" {
|
||||
log.Warnf("kiro: profile ARN not found in auth, API calls may fail")
|
||||
}
|
||||
}
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
@@ -370,10 +398,19 @@ func (e *KiroExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
||||
currentOrigin = "CLI"
|
||||
}
|
||||
|
||||
kiroPayload := e.buildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly)
|
||||
// Determine if profileArn should be included based on auth method
|
||||
// profileArn is only needed for social auth (Google OAuth), not for builder-id (AWS SSO)
|
||||
effectiveProfileArn := profileArn
|
||||
if auth != nil && auth.Metadata != nil {
|
||||
if authMethod, ok := auth.Metadata["auth_method"].(string); ok && authMethod == "builder-id" {
|
||||
effectiveProfileArn = "" // Don't include profileArn for builder-id auth
|
||||
}
|
||||
}
|
||||
|
||||
kiroPayload := e.buildKiroPayload(body, kiroModelID, effectiveProfileArn, currentOrigin, isAgentic, isChatOnly)
|
||||
|
||||
// Execute stream with retry on 401/403 and 429 (quota exhausted)
|
||||
return e.executeStreamWithRetry(ctx, auth, req, opts, accessToken, profileArn, kiroPayload, body, from, reporter, currentOrigin, kiroModelID, isAgentic, isChatOnly)
|
||||
return e.executeStreamWithRetry(ctx, auth, req, opts, accessToken, effectiveProfileArn, kiroPayload, body, from, reporter, currentOrigin, kiroModelID, isAgentic, isChatOnly)
|
||||
}
|
||||
|
||||
// executeStreamWithRetry performs the streaming HTTP request with automatic retry on auth errors.
|
||||
@@ -491,6 +528,12 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
|
||||
|
||||
go func(resp *http.Response) {
|
||||
defer close(out)
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
log.Errorf("kiro: panic in stream handler: %v", r)
|
||||
out <- cliproxyexecutor.StreamChunk{Err: fmt.Errorf("internal error: %v", r)}
|
||||
}
|
||||
}()
|
||||
defer func() {
|
||||
if errClose := resp.Body.Close(); errClose != nil {
|
||||
log.Errorf("response body close error: %v", errClose)
|
||||
@@ -587,10 +630,10 @@ type kiroPayload struct {
|
||||
}
|
||||
|
||||
type kiroConversationState struct {
|
||||
ChatTriggerType string `json:"chatTriggerType"` // Required: "MANUAL" - must be first field
|
||||
ConversationID string `json:"conversationId"`
|
||||
History []kiroHistoryMessage `json:"history"`
|
||||
CurrentMessage kiroCurrentMessage `json:"currentMessage"`
|
||||
ChatTriggerType string `json:"chatTriggerType"` // Required: "MANUAL"
|
||||
History []kiroHistoryMessage `json:"history,omitempty"` // Only include when non-empty
|
||||
}
|
||||
|
||||
type kiroCurrentMessage struct {
|
||||
@@ -627,9 +670,9 @@ type kiroUserInputMessageContext struct {
|
||||
}
|
||||
|
||||
type kiroToolResult struct {
|
||||
ToolUseID string `json:"toolUseId"`
|
||||
Content []kiroTextContent `json:"content"`
|
||||
Status string `json:"status"`
|
||||
ToolUseID string `json:"toolUseId"`
|
||||
}
|
||||
|
||||
type kiroTextContent struct {
|
||||
@@ -735,7 +778,9 @@ func (e *KiroExecutor) buildKiroPayload(claudeBody []byte, modelID, profileArn,
|
||||
var currentUserMsg *kiroUserInputMessage
|
||||
var currentToolResults []kiroToolResult
|
||||
|
||||
messagesArray := messages.Array()
|
||||
// Merge adjacent messages with the same role before processing
|
||||
// This reduces API call complexity and improves compatibility
|
||||
messagesArray := mergeAdjacentMessages(messages.Array())
|
||||
for i, msg := range messagesArray {
|
||||
role := msg.Get("role").String()
|
||||
isLastMessage := i == len(messagesArray)-1
|
||||
@@ -746,6 +791,14 @@ func (e *KiroExecutor) buildKiroPayload(claudeBody []byte, modelID, profileArn,
|
||||
currentUserMsg = &userMsg
|
||||
currentToolResults = toolResults
|
||||
} else {
|
||||
// CRITICAL: Kiro API requires content to be non-empty for history messages too
|
||||
if strings.TrimSpace(userMsg.Content) == "" {
|
||||
if len(toolResults) > 0 {
|
||||
userMsg.Content = "Tool results provided."
|
||||
} else {
|
||||
userMsg.Content = "Continue"
|
||||
}
|
||||
}
|
||||
// For history messages, embed tool results in context
|
||||
if len(toolResults) > 0 {
|
||||
userMsg.UserInputMessageContext = &kiroUserInputMessageContext{
|
||||
@@ -758,9 +811,24 @@ func (e *KiroExecutor) buildKiroPayload(claudeBody []byte, modelID, profileArn,
|
||||
}
|
||||
} else if role == "assistant" {
|
||||
assistantMsg := e.buildAssistantMessageStruct(msg)
|
||||
history = append(history, kiroHistoryMessage{
|
||||
AssistantResponseMessage: &assistantMsg,
|
||||
})
|
||||
// If this is the last message and it's an assistant message,
|
||||
// we need to add it to history and create a "Continue" user message
|
||||
// because Kiro API requires currentMessage to be userInputMessage type
|
||||
if isLastMessage {
|
||||
history = append(history, kiroHistoryMessage{
|
||||
AssistantResponseMessage: &assistantMsg,
|
||||
})
|
||||
// Create a "Continue" user message as currentMessage
|
||||
currentUserMsg = &kiroUserInputMessage{
|
||||
Content: "Continue",
|
||||
ModelID: modelID,
|
||||
Origin: origin,
|
||||
}
|
||||
} else {
|
||||
history = append(history, kiroHistoryMessage{
|
||||
AssistantResponseMessage: &assistantMsg,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -777,7 +845,35 @@ func (e *KiroExecutor) buildKiroPayload(claudeBody []byte, modelID, profileArn,
|
||||
|
||||
// Add the actual user message
|
||||
contentBuilder.WriteString(currentUserMsg.Content)
|
||||
currentUserMsg.Content = contentBuilder.String()
|
||||
finalContent := contentBuilder.String()
|
||||
|
||||
// CRITICAL: Kiro API requires content to be non-empty, even when toolResults are present
|
||||
// If content is empty or only whitespace, provide a default message
|
||||
if strings.TrimSpace(finalContent) == "" {
|
||||
if len(currentToolResults) > 0 {
|
||||
finalContent = "Tool results provided."
|
||||
} else {
|
||||
finalContent = "Continue"
|
||||
}
|
||||
log.Debugf("kiro: content was empty, using default: %s", finalContent)
|
||||
}
|
||||
currentUserMsg.Content = finalContent
|
||||
|
||||
// Deduplicate currentToolResults before adding to context
|
||||
// Kiro API does not accept duplicate toolUseIds
|
||||
if len(currentToolResults) > 0 {
|
||||
seenIDs := make(map[string]bool)
|
||||
uniqueToolResults := make([]kiroToolResult, 0, len(currentToolResults))
|
||||
for _, tr := range currentToolResults {
|
||||
if !seenIDs[tr.ToolUseID] {
|
||||
seenIDs[tr.ToolUseID] = true
|
||||
uniqueToolResults = append(uniqueToolResults, tr)
|
||||
} else {
|
||||
log.Debugf("kiro: skipping duplicate toolResult in currentMessage: %s", tr.ToolUseID)
|
||||
}
|
||||
}
|
||||
currentToolResults = uniqueToolResults
|
||||
}
|
||||
|
||||
// Build userInputMessageContext with tools and tool results
|
||||
if len(kiroTools) > 0 || len(currentToolResults) > 0 {
|
||||
@@ -805,21 +901,18 @@ func (e *KiroExecutor) buildKiroPayload(claudeBody []byte, modelID, profileArn,
|
||||
}}
|
||||
}
|
||||
|
||||
// Build payload with correct field order (matches struct definition)
|
||||
// Note: history is omitempty, so nil/empty slice won't be serialized
|
||||
payload := kiroPayload{
|
||||
ConversationState: kiroConversationState{
|
||||
ChatTriggerType: "MANUAL", // Required by Kiro API - must be first
|
||||
ConversationID: uuid.New().String(),
|
||||
History: history,
|
||||
CurrentMessage: currentMessage,
|
||||
ChatTriggerType: "MANUAL", // Required by Kiro API
|
||||
History: history, // Will be omitted if empty due to omitempty tag
|
||||
},
|
||||
ProfileArn: profileArn,
|
||||
}
|
||||
|
||||
// Ensure history is not nil (empty array)
|
||||
if payload.ConversationState.History == nil {
|
||||
payload.ConversationState.History = []kiroHistoryMessage{}
|
||||
}
|
||||
|
||||
result, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
log.Debugf("kiro: failed to marshal payload: %v", err)
|
||||
@@ -830,11 +923,15 @@ func (e *KiroExecutor) buildKiroPayload(claudeBody []byte, modelID, profileArn,
|
||||
|
||||
// buildUserMessageStruct builds a user message and extracts tool results
|
||||
// origin parameter determines which quota to use: "CLI" for Amazon Q, "AI_EDITOR" for Kiro IDE.
|
||||
// IMPORTANT: Kiro API does not accept duplicate toolUseIds, so we deduplicate here.
|
||||
func (e *KiroExecutor) buildUserMessageStruct(msg gjson.Result, modelID, origin string) (kiroUserInputMessage, []kiroToolResult) {
|
||||
content := msg.Get("content")
|
||||
var contentBuilder strings.Builder
|
||||
var toolResults []kiroToolResult
|
||||
var images []kiroImage
|
||||
|
||||
// Track seen toolUseIds to deduplicate - Kiro API rejects duplicate toolUseIds
|
||||
seenToolUseIDs := make(map[string]bool)
|
||||
|
||||
if content.IsArray() {
|
||||
for _, part := range content.Array() {
|
||||
@@ -864,6 +961,14 @@ func (e *KiroExecutor) buildUserMessageStruct(msg gjson.Result, modelID, origin
|
||||
case "tool_result":
|
||||
// Extract tool result for API
|
||||
toolUseID := part.Get("tool_use_id").String()
|
||||
|
||||
// Skip duplicate toolUseIds - Kiro API does not accept duplicates
|
||||
if seenToolUseIDs[toolUseID] {
|
||||
log.Debugf("kiro: skipping duplicate tool_result with toolUseId: %s", toolUseID)
|
||||
continue
|
||||
}
|
||||
seenToolUseIDs[toolUseID] = true
|
||||
|
||||
isError := part.Get("is_error").Bool()
|
||||
resultContent := part.Get("content")
|
||||
|
||||
@@ -1001,6 +1106,12 @@ func (e *KiroExecutor) parseEventStream(body io.Reader) (string, []kiroToolUse,
|
||||
return content.String(), toolUses, usageInfo, fmt.Errorf("failed to read message: %w", err)
|
||||
}
|
||||
|
||||
// Validate headersLen to prevent slice out of bounds
|
||||
if headersLen+4 > uint32(len(remaining)) {
|
||||
log.Warnf("kiro: invalid headersLen %d exceeds remaining buffer %d", headersLen, len(remaining))
|
||||
continue
|
||||
}
|
||||
|
||||
// Extract event type from headers
|
||||
eventType := e.extractEventType(remaining[:headersLen+4])
|
||||
|
||||
@@ -1018,6 +1129,37 @@ func (e *KiroExecutor) parseEventStream(body io.Reader) (string, []kiroToolUse,
|
||||
continue
|
||||
}
|
||||
|
||||
// DIAGNOSTIC: Log all received event types for debugging
|
||||
log.Debugf("kiro: parseEventStream received event type: %s", eventType)
|
||||
if log.IsLevelEnabled(log.TraceLevel) {
|
||||
log.Tracef("kiro: parseEventStream event payload: %s", string(payload))
|
||||
}
|
||||
|
||||
// Check for error/exception events in the payload (Kiro API may return errors with HTTP 200)
|
||||
// These can appear as top-level fields or nested within the event
|
||||
if errType, hasErrType := event["_type"].(string); hasErrType {
|
||||
// AWS-style error: {"_type": "com.amazon.aws.codewhisperer#ValidationException", "message": "..."}
|
||||
errMsg := ""
|
||||
if msg, ok := event["message"].(string); ok {
|
||||
errMsg = msg
|
||||
}
|
||||
log.Errorf("kiro: received AWS error in event stream: type=%s, message=%s", errType, errMsg)
|
||||
return "", nil, usageInfo, fmt.Errorf("kiro API error: %s - %s", errType, errMsg)
|
||||
}
|
||||
if errType, hasErrType := event["type"].(string); hasErrType && (errType == "error" || errType == "exception") {
|
||||
// Generic error event
|
||||
errMsg := ""
|
||||
if msg, ok := event["message"].(string); ok {
|
||||
errMsg = msg
|
||||
} else if errObj, ok := event["error"].(map[string]interface{}); ok {
|
||||
if msg, ok := errObj["message"].(string); ok {
|
||||
errMsg = msg
|
||||
}
|
||||
}
|
||||
log.Errorf("kiro: received error event in stream: type=%s, message=%s", errType, errMsg)
|
||||
return "", nil, usageInfo, fmt.Errorf("kiro API error: %s", errMsg)
|
||||
}
|
||||
|
||||
// Handle different event types
|
||||
switch eventType {
|
||||
case "assistantResponseEvent":
|
||||
@@ -1231,8 +1373,9 @@ func (e *KiroExecutor) buildClaudeResponse(content string, toolUses []kiroToolUs
|
||||
// streamToChannel converts AWS Event Stream to channel-based streaming.
|
||||
// Supports tool calling - emits tool_use content blocks when tools are used.
|
||||
// Includes embedded [Called ...] tool call parsing and input buffering for toolUseEvent.
|
||||
// Implements duplicate content filtering using lastContentEvent detection (based on AIClient-2-API).
|
||||
func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out chan<- cliproxyexecutor.StreamChunk, targetFormat sdktranslator.Format, model string, originalReq, claudeBody []byte, reporter *usageReporter) {
|
||||
reader := bufio.NewReader(body)
|
||||
reader := bufio.NewReaderSize(body, 20*1024*1024) // 20MB buffer to match other providers
|
||||
var totalUsage usage.Detail
|
||||
var hasToolUses bool // Track if any tool uses were emitted
|
||||
|
||||
@@ -1240,6 +1383,15 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
processedIDs := make(map[string]bool)
|
||||
var currentToolUse *toolUseState
|
||||
|
||||
// Duplicate content detection - tracks last content event to filter duplicates
|
||||
// Based on AIClient-2-API implementation for Kiro
|
||||
var lastContentEvent string
|
||||
|
||||
// Streaming token calculation - accumulate content for real-time token counting
|
||||
// Based on AIClient-2-API implementation
|
||||
var accumulatedContent strings.Builder
|
||||
accumulatedContent.Grow(4096) // Pre-allocate 4KB capacity to reduce reallocations
|
||||
|
||||
// Translator param for maintaining tool call state across streaming events
|
||||
// IMPORTANT: This must persist across all TranslateStream calls
|
||||
var translatorParam any
|
||||
@@ -1279,6 +1431,51 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
prelude := make([]byte, 8)
|
||||
_, err := io.ReadFull(reader, prelude)
|
||||
if err == io.EOF {
|
||||
// Flush any incomplete tool use before ending stream
|
||||
if currentToolUse != nil && !processedIDs[currentToolUse.toolUseID] {
|
||||
log.Warnf("kiro: flushing incomplete tool use at EOF: %s (ID: %s)", currentToolUse.name, currentToolUse.toolUseID)
|
||||
fullInput := currentToolUse.inputBuffer.String()
|
||||
repairedJSON := repairJSON(fullInput)
|
||||
var finalInput map[string]interface{}
|
||||
if err := json.Unmarshal([]byte(repairedJSON), &finalInput); err != nil {
|
||||
log.Warnf("kiro: failed to parse incomplete tool input at EOF: %v", err)
|
||||
finalInput = make(map[string]interface{})
|
||||
}
|
||||
|
||||
processedIDs[currentToolUse.toolUseID] = true
|
||||
contentBlockIndex++
|
||||
|
||||
// Send tool_use content block
|
||||
blockStart := e.buildClaudeContentBlockStartEvent(contentBlockIndex, "tool_use", currentToolUse.toolUseID, currentToolUse.name)
|
||||
sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam)
|
||||
for _, chunk := range sseData {
|
||||
if chunk != "" {
|
||||
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
|
||||
}
|
||||
}
|
||||
|
||||
// Send tool input as delta
|
||||
inputBytes, _ := json.Marshal(finalInput)
|
||||
inputDelta := e.buildClaudeInputJsonDeltaEvent(string(inputBytes), contentBlockIndex)
|
||||
sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, inputDelta, &translatorParam)
|
||||
for _, chunk := range sseData {
|
||||
if chunk != "" {
|
||||
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
|
||||
}
|
||||
}
|
||||
|
||||
// Close block
|
||||
blockStop := e.buildClaudeContentBlockStopEvent(contentBlockIndex)
|
||||
sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStop, &translatorParam)
|
||||
for _, chunk := range sseData {
|
||||
if chunk != "" {
|
||||
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
|
||||
}
|
||||
}
|
||||
|
||||
hasToolUses = true
|
||||
currentToolUse = nil
|
||||
}
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
@@ -1304,6 +1501,12 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
return
|
||||
}
|
||||
|
||||
// Validate headersLen to prevent slice out of bounds
|
||||
if headersLen+4 > uint32(len(remaining)) {
|
||||
log.Warnf("kiro: invalid headersLen %d exceeds remaining buffer %d", headersLen, len(remaining))
|
||||
continue
|
||||
}
|
||||
|
||||
eventType := e.extractEventType(remaining[:headersLen+4])
|
||||
|
||||
payloadStart := 4 + headersLen
|
||||
@@ -1317,9 +1520,43 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
|
||||
var event map[string]interface{}
|
||||
if err := json.Unmarshal(payload, &event); err != nil {
|
||||
log.Warnf("kiro: failed to unmarshal event payload: %v, raw: %s", err, string(payload))
|
||||
continue
|
||||
}
|
||||
|
||||
// DIAGNOSTIC: Log all received event types for debugging
|
||||
log.Debugf("kiro: streamToChannel received event type: %s", eventType)
|
||||
if log.IsLevelEnabled(log.TraceLevel) {
|
||||
log.Tracef("kiro: streamToChannel event payload: %s", string(payload))
|
||||
}
|
||||
|
||||
// Check for error/exception events in the payload (Kiro API may return errors with HTTP 200)
|
||||
// These can appear as top-level fields or nested within the event
|
||||
if errType, hasErrType := event["_type"].(string); hasErrType {
|
||||
// AWS-style error: {"_type": "com.amazon.aws.codewhisperer#ValidationException", "message": "..."}
|
||||
errMsg := ""
|
||||
if msg, ok := event["message"].(string); ok {
|
||||
errMsg = msg
|
||||
}
|
||||
log.Errorf("kiro: received AWS error in stream: type=%s, message=%s", errType, errMsg)
|
||||
out <- cliproxyexecutor.StreamChunk{Err: fmt.Errorf("kiro API error: %s - %s", errType, errMsg)}
|
||||
return
|
||||
}
|
||||
if errType, hasErrType := event["type"].(string); hasErrType && (errType == "error" || errType == "exception") {
|
||||
// Generic error event
|
||||
errMsg := ""
|
||||
if msg, ok := event["message"].(string); ok {
|
||||
errMsg = msg
|
||||
} else if errObj, ok := event["error"].(map[string]interface{}); ok {
|
||||
if msg, ok := errObj["message"].(string); ok {
|
||||
errMsg = msg
|
||||
}
|
||||
}
|
||||
log.Errorf("kiro: received error event in stream: type=%s, message=%s", errType, errMsg)
|
||||
out <- cliproxyexecutor.StreamChunk{Err: fmt.Errorf("kiro API error: %s", errMsg)}
|
||||
return
|
||||
}
|
||||
|
||||
// Send message_start on first event
|
||||
if !messageStartSent {
|
||||
msgStart := e.buildClaudeMessageStartEvent(model, totalUsage.InputTokens)
|
||||
@@ -1364,9 +1601,19 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
}
|
||||
}
|
||||
|
||||
// Handle text content
|
||||
// Handle text content with duplicate detection
|
||||
if contentDelta != "" {
|
||||
// Check for duplicate content - skip if identical to last content event
|
||||
// Based on AIClient-2-API implementation for Kiro
|
||||
if contentDelta == lastContentEvent {
|
||||
log.Debugf("kiro: skipping duplicate content event (len: %d)", len(contentDelta))
|
||||
continue
|
||||
}
|
||||
lastContentEvent = contentDelta
|
||||
|
||||
outputLen += len(contentDelta)
|
||||
// Accumulate content for streaming token calculation
|
||||
accumulatedContent.WriteString(contentDelta)
|
||||
// Start text content block if needed
|
||||
if !isTextBlockOpen {
|
||||
contentBlockIndex++
|
||||
@@ -1538,8 +1785,32 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback for output tokens if not received from upstream
|
||||
if totalUsage.OutputTokens == 0 && outputLen > 0 {
|
||||
// Streaming token calculation - calculate output tokens from accumulated content
|
||||
// This provides more accurate token counting than simple character division
|
||||
if totalUsage.OutputTokens == 0 && accumulatedContent.Len() > 0 {
|
||||
// Try to use tiktoken for accurate counting
|
||||
if enc, err := tokenizerForModel(model); err == nil {
|
||||
if tokenCount, countErr := enc.Count(accumulatedContent.String()); countErr == nil {
|
||||
totalUsage.OutputTokens = int64(tokenCount)
|
||||
log.Debugf("kiro: streamToChannel calculated output tokens using tiktoken: %d", totalUsage.OutputTokens)
|
||||
} else {
|
||||
// Fallback on count error: estimate from character count
|
||||
totalUsage.OutputTokens = int64(accumulatedContent.Len() / 4)
|
||||
if totalUsage.OutputTokens == 0 {
|
||||
totalUsage.OutputTokens = 1
|
||||
}
|
||||
log.Debugf("kiro: streamToChannel tiktoken count failed, estimated from chars: %d", totalUsage.OutputTokens)
|
||||
}
|
||||
} else {
|
||||
// Fallback: estimate from character count (roughly 4 chars per token)
|
||||
totalUsage.OutputTokens = int64(accumulatedContent.Len() / 4)
|
||||
if totalUsage.OutputTokens == 0 {
|
||||
totalUsage.OutputTokens = 1
|
||||
}
|
||||
log.Debugf("kiro: streamToChannel estimated output tokens from chars: %d (content len: %d)", totalUsage.OutputTokens, accumulatedContent.Len())
|
||||
}
|
||||
} else if totalUsage.OutputTokens == 0 && outputLen > 0 {
|
||||
// Legacy fallback using outputLen
|
||||
totalUsage.OutputTokens = int64(outputLen / 4)
|
||||
if totalUsage.OutputTokens == 0 {
|
||||
totalUsage.OutputTokens = 1
|
||||
@@ -1553,9 +1824,18 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
stopReason = "tool_use"
|
||||
}
|
||||
|
||||
// Send message_delta and message_stop
|
||||
msgStop := e.buildClaudeMessageStopEvent(stopReason, totalUsage)
|
||||
sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, msgStop, &translatorParam)
|
||||
// Send message_delta event
|
||||
msgDelta := e.buildClaudeMessageDeltaEvent(stopReason, totalUsage)
|
||||
sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, msgDelta, &translatorParam)
|
||||
for _, chunk := range sseData {
|
||||
if chunk != "" {
|
||||
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
|
||||
}
|
||||
}
|
||||
|
||||
// Send message_stop event separately
|
||||
msgStop := e.buildClaudeMessageStopOnlyEvent()
|
||||
sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, msgStop, &translatorParam)
|
||||
for _, chunk := range sseData {
|
||||
if chunk != "" {
|
||||
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
|
||||
@@ -1646,8 +1926,8 @@ func (e *KiroExecutor) buildClaudeContentBlockStopEvent(index int) []byte {
|
||||
return []byte("event: content_block_stop\ndata: " + string(result))
|
||||
}
|
||||
|
||||
func (e *KiroExecutor) buildClaudeMessageStopEvent(stopReason string, usageInfo usage.Detail) []byte {
|
||||
// First message_delta
|
||||
// buildClaudeMessageDeltaEvent creates the message_delta event with stop_reason and usage.
|
||||
func (e *KiroExecutor) buildClaudeMessageDeltaEvent(stopReason string, usageInfo usage.Detail) []byte {
|
||||
deltaEvent := map[string]interface{}{
|
||||
"type": "message_delta",
|
||||
"delta": map[string]interface{}{
|
||||
@@ -1660,14 +1940,16 @@ func (e *KiroExecutor) buildClaudeMessageStopEvent(stopReason string, usageInfo
|
||||
},
|
||||
}
|
||||
deltaResult, _ := json.Marshal(deltaEvent)
|
||||
return []byte("event: message_delta\ndata: " + string(deltaResult))
|
||||
}
|
||||
|
||||
// Then message_stop
|
||||
// buildClaudeMessageStopOnlyEvent creates only the message_stop event.
|
||||
func (e *KiroExecutor) buildClaudeMessageStopOnlyEvent() []byte {
|
||||
stopEvent := map[string]interface{}{
|
||||
"type": "message_stop",
|
||||
}
|
||||
stopResult, _ := json.Marshal(stopEvent)
|
||||
|
||||
return []byte("event: message_delta\ndata: " + string(deltaResult) + "\n\nevent: message_stop\ndata: " + string(stopResult))
|
||||
return []byte("event: message_stop\ndata: " + string(stopResult))
|
||||
}
|
||||
|
||||
// buildClaudeFinalEvent constructs the final Claude-style event.
|
||||
@@ -1873,6 +2155,12 @@ func (e *KiroExecutor) streamEventStream(ctx context.Context, body io.Reader, c
|
||||
return fmt.Errorf("failed to read message: %w", err)
|
||||
}
|
||||
|
||||
// Validate headersLen to prevent slice out of bounds
|
||||
if headersLen+4 > uint32(len(remaining)) {
|
||||
log.Warnf("kiro: invalid headersLen %d exceeds remaining buffer %d", headersLen, len(remaining))
|
||||
continue
|
||||
}
|
||||
|
||||
eventType := e.extractEventType(remaining[:headersLen+4])
|
||||
|
||||
payloadStart := 4 + headersLen
|
||||
@@ -1886,6 +2174,7 @@ func (e *KiroExecutor) streamEventStream(ctx context.Context, body io.Reader, c
|
||||
|
||||
var event map[string]interface{}
|
||||
if err := json.Unmarshal(payload, &event); err != nil {
|
||||
log.Warnf("kiro: failed to unmarshal event payload: %v, raw: %s", err, string(payload))
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -1983,9 +2272,19 @@ func (e *KiroExecutor) streamEventStream(ctx context.Context, body io.Reader, c
|
||||
}
|
||||
totalUsage.TotalTokens = totalUsage.InputTokens + totalUsage.OutputTokens
|
||||
|
||||
// Always use end_turn (no tool_use support)
|
||||
msgStop := e.buildClaudeMessageStopEvent("end_turn", totalUsage)
|
||||
sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, msgStop, &translatorParam)
|
||||
// Send message_delta event
|
||||
msgDelta := e.buildClaudeMessageDeltaEvent("end_turn", totalUsage)
|
||||
sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, msgDelta, &translatorParam)
|
||||
for _, chunk := range sseData {
|
||||
if chunk != "" {
|
||||
c.Writer.Write([]byte(chunk + "\n\n"))
|
||||
}
|
||||
}
|
||||
c.Writer.Flush()
|
||||
|
||||
// Send message_stop event separately
|
||||
msgStop := e.buildClaudeMessageStopOnlyEvent()
|
||||
sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, msgStop, &translatorParam)
|
||||
for _, chunk := range sseData {
|
||||
if chunk != "" {
|
||||
c.Writer.Write([]byte(chunk + "\n\n"))
|
||||
@@ -2057,6 +2356,128 @@ func (e *KiroExecutor) isTokenExpired(accessToken string) bool {
|
||||
return isExpired
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Message Merging Support - Merge adjacent messages with the same role
|
||||
// Based on AIClient-2-API implementation
|
||||
// ============================================================================
|
||||
|
||||
// mergeAdjacentMessages merges adjacent messages with the same role.
|
||||
// This reduces API call complexity and improves compatibility.
|
||||
// Based on AIClient-2-API implementation.
|
||||
func mergeAdjacentMessages(messages []gjson.Result) []gjson.Result {
|
||||
if len(messages) <= 1 {
|
||||
return messages
|
||||
}
|
||||
|
||||
var merged []gjson.Result
|
||||
for _, msg := range messages {
|
||||
if len(merged) == 0 {
|
||||
merged = append(merged, msg)
|
||||
continue
|
||||
}
|
||||
|
||||
lastMsg := merged[len(merged)-1]
|
||||
currentRole := msg.Get("role").String()
|
||||
lastRole := lastMsg.Get("role").String()
|
||||
|
||||
if currentRole == lastRole {
|
||||
// Merge content from current message into last message
|
||||
mergedContent := mergeMessageContent(lastMsg, msg)
|
||||
// Create a new merged message JSON
|
||||
mergedMsg := createMergedMessage(lastRole, mergedContent)
|
||||
merged[len(merged)-1] = gjson.Parse(mergedMsg)
|
||||
} else {
|
||||
merged = append(merged, msg)
|
||||
}
|
||||
}
|
||||
|
||||
return merged
|
||||
}
|
||||
|
||||
// mergeMessageContent merges the content of two messages with the same role.
|
||||
// Handles both string content and array content (with text, tool_use, tool_result blocks).
|
||||
func mergeMessageContent(msg1, msg2 gjson.Result) string {
|
||||
content1 := msg1.Get("content")
|
||||
content2 := msg2.Get("content")
|
||||
|
||||
// Extract content blocks from both messages
|
||||
var blocks1, blocks2 []map[string]interface{}
|
||||
|
||||
if content1.IsArray() {
|
||||
for _, block := range content1.Array() {
|
||||
blocks1 = append(blocks1, blockToMap(block))
|
||||
}
|
||||
} else if content1.Type == gjson.String {
|
||||
blocks1 = append(blocks1, map[string]interface{}{
|
||||
"type": "text",
|
||||
"text": content1.String(),
|
||||
})
|
||||
}
|
||||
|
||||
if content2.IsArray() {
|
||||
for _, block := range content2.Array() {
|
||||
blocks2 = append(blocks2, blockToMap(block))
|
||||
}
|
||||
} else if content2.Type == gjson.String {
|
||||
blocks2 = append(blocks2, map[string]interface{}{
|
||||
"type": "text",
|
||||
"text": content2.String(),
|
||||
})
|
||||
}
|
||||
|
||||
// Merge text blocks if both end/start with text
|
||||
if len(blocks1) > 0 && len(blocks2) > 0 {
|
||||
if blocks1[len(blocks1)-1]["type"] == "text" && blocks2[0]["type"] == "text" {
|
||||
// Merge the last text block of msg1 with the first text block of msg2
|
||||
text1 := blocks1[len(blocks1)-1]["text"].(string)
|
||||
text2 := blocks2[0]["text"].(string)
|
||||
blocks1[len(blocks1)-1]["text"] = text1 + "\n" + text2
|
||||
blocks2 = blocks2[1:] // Remove the merged block from blocks2
|
||||
}
|
||||
}
|
||||
|
||||
// Combine all blocks
|
||||
allBlocks := append(blocks1, blocks2...)
|
||||
|
||||
// Convert to JSON
|
||||
result, _ := json.Marshal(allBlocks)
|
||||
return string(result)
|
||||
}
|
||||
|
||||
// blockToMap converts a gjson.Result block to a map[string]interface{}
|
||||
func blockToMap(block gjson.Result) map[string]interface{} {
|
||||
result := make(map[string]interface{})
|
||||
block.ForEach(func(key, value gjson.Result) bool {
|
||||
if value.IsObject() {
|
||||
result[key.String()] = blockToMap(value)
|
||||
} else if value.IsArray() {
|
||||
var arr []interface{}
|
||||
for _, item := range value.Array() {
|
||||
if item.IsObject() {
|
||||
arr = append(arr, blockToMap(item))
|
||||
} else {
|
||||
arr = append(arr, item.Value())
|
||||
}
|
||||
}
|
||||
result[key.String()] = arr
|
||||
} else {
|
||||
result[key.String()] = value.Value()
|
||||
}
|
||||
return true
|
||||
})
|
||||
return result
|
||||
}
|
||||
|
||||
// createMergedMessage creates a JSON string for a merged message
|
||||
func createMergedMessage(role string, content string) string {
|
||||
msg := map[string]interface{}{
|
||||
"role": role,
|
||||
"content": json.RawMessage(content),
|
||||
}
|
||||
result, _ := json.Marshal(msg)
|
||||
return string(result)
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Tool Calling Support - Embedded tool call parsing and input buffering
|
||||
// Based on amq2api and AIClient-2-API implementations
|
||||
@@ -2079,8 +2500,6 @@ var (
|
||||
whitespaceCollapsePattern = regexp.MustCompile(`\s+`)
|
||||
// trailingCommaPattern matches trailing commas before closing braces/brackets
|
||||
trailingCommaPattern = regexp.MustCompile(`,\s*([}\]])`)
|
||||
// unquotedKeyPattern matches unquoted JSON keys that need quoting
|
||||
unquotedKeyPattern = regexp.MustCompile(`([{,]\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*:`)
|
||||
)
|
||||
|
||||
// parseEmbeddedToolCalls extracts [Called tool_name with args: {...}] format from text.
|
||||
@@ -2246,14 +2665,208 @@ func findMatchingBracket(text string, startPos int) int {
|
||||
}
|
||||
|
||||
// repairJSON attempts to fix common JSON issues that may occur in tool call arguments.
|
||||
// Based on AIClient-2-API's JSON repair implementation.
|
||||
// Based on AIClient-2-API's JSON repair implementation with a more conservative strategy.
|
||||
//
|
||||
// Conservative repair strategy:
|
||||
// 1. First try to parse JSON directly - if valid, return as-is
|
||||
// 2. Only attempt repair if parsing fails
|
||||
// 3. After repair, validate the result - if still invalid, return original
|
||||
//
|
||||
// Handles incomplete JSON by balancing brackets and removing trailing incomplete content.
|
||||
// Uses pre-compiled regex patterns for performance.
|
||||
func repairJSON(raw string) string {
|
||||
func repairJSON(jsonString string) string {
|
||||
// Handle empty or invalid input
|
||||
if jsonString == "" {
|
||||
return "{}"
|
||||
}
|
||||
|
||||
str := strings.TrimSpace(jsonString)
|
||||
if str == "" {
|
||||
return "{}"
|
||||
}
|
||||
|
||||
// CONSERVATIVE STRATEGY: First try to parse directly
|
||||
// If the JSON is already valid, return it unchanged
|
||||
var testParse interface{}
|
||||
if err := json.Unmarshal([]byte(str), &testParse); err == nil {
|
||||
log.Debugf("kiro: repairJSON - JSON is already valid, returning unchanged")
|
||||
return str
|
||||
}
|
||||
|
||||
log.Debugf("kiro: repairJSON - JSON parse failed, attempting repair")
|
||||
originalStr := str // Keep original for fallback
|
||||
|
||||
// First, escape unescaped newlines/tabs within JSON string values
|
||||
str = escapeNewlinesInStrings(str)
|
||||
// Remove trailing commas before closing braces/brackets
|
||||
repaired := trailingCommaPattern.ReplaceAllString(raw, "$1")
|
||||
// Fix unquoted keys (basic attempt - handles simple cases)
|
||||
repaired = unquotedKeyPattern.ReplaceAllString(repaired, `$1"$2":`)
|
||||
return repaired
|
||||
str = trailingCommaPattern.ReplaceAllString(str, "$1")
|
||||
|
||||
// Calculate bracket balance to detect incomplete JSON
|
||||
braceCount := 0 // {} balance
|
||||
bracketCount := 0 // [] balance
|
||||
inString := false
|
||||
escape := false
|
||||
lastValidIndex := -1
|
||||
|
||||
for i := 0; i < len(str); i++ {
|
||||
char := str[i]
|
||||
|
||||
// Handle escape sequences
|
||||
if escape {
|
||||
escape = false
|
||||
continue
|
||||
}
|
||||
|
||||
if char == '\\' {
|
||||
escape = true
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle string boundaries
|
||||
if char == '"' {
|
||||
inString = !inString
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip characters inside strings (they don't affect bracket balance)
|
||||
if inString {
|
||||
continue
|
||||
}
|
||||
|
||||
// Track bracket balance
|
||||
switch char {
|
||||
case '{':
|
||||
braceCount++
|
||||
case '}':
|
||||
braceCount--
|
||||
case '[':
|
||||
bracketCount++
|
||||
case ']':
|
||||
bracketCount--
|
||||
}
|
||||
|
||||
// Record last valid position (where brackets are balanced or positive)
|
||||
if braceCount >= 0 && bracketCount >= 0 {
|
||||
lastValidIndex = i
|
||||
}
|
||||
}
|
||||
|
||||
// If brackets are unbalanced, try to repair
|
||||
if braceCount > 0 || bracketCount > 0 {
|
||||
// Truncate to last valid position if we have incomplete content
|
||||
if lastValidIndex > 0 && lastValidIndex < len(str)-1 {
|
||||
// Check if truncation would help (only truncate if there's trailing garbage)
|
||||
truncated := str[:lastValidIndex+1]
|
||||
// Recount brackets after truncation
|
||||
braceCount = 0
|
||||
bracketCount = 0
|
||||
inString = false
|
||||
escape = false
|
||||
for i := 0; i < len(truncated); i++ {
|
||||
char := truncated[i]
|
||||
if escape {
|
||||
escape = false
|
||||
continue
|
||||
}
|
||||
if char == '\\' {
|
||||
escape = true
|
||||
continue
|
||||
}
|
||||
if char == '"' {
|
||||
inString = !inString
|
||||
continue
|
||||
}
|
||||
if inString {
|
||||
continue
|
||||
}
|
||||
switch char {
|
||||
case '{':
|
||||
braceCount++
|
||||
case '}':
|
||||
braceCount--
|
||||
case '[':
|
||||
bracketCount++
|
||||
case ']':
|
||||
bracketCount--
|
||||
}
|
||||
}
|
||||
str = truncated
|
||||
}
|
||||
|
||||
// Add missing closing brackets
|
||||
for braceCount > 0 {
|
||||
str += "}"
|
||||
braceCount--
|
||||
}
|
||||
for bracketCount > 0 {
|
||||
str += "]"
|
||||
bracketCount--
|
||||
}
|
||||
}
|
||||
|
||||
// CONSERVATIVE STRATEGY: Validate repaired JSON
|
||||
// If repair didn't produce valid JSON, return original string
|
||||
if err := json.Unmarshal([]byte(str), &testParse); err != nil {
|
||||
log.Warnf("kiro: repairJSON - repair failed to produce valid JSON, returning original")
|
||||
return originalStr
|
||||
}
|
||||
|
||||
log.Debugf("kiro: repairJSON - successfully repaired JSON")
|
||||
return str
|
||||
}
|
||||
|
||||
// escapeNewlinesInStrings escapes literal newlines, tabs, and other control characters
|
||||
// that appear inside JSON string values. This handles cases where streaming fragments
|
||||
// contain unescaped control characters within string content.
|
||||
func escapeNewlinesInStrings(raw string) string {
|
||||
var result strings.Builder
|
||||
result.Grow(len(raw) + 100) // Pre-allocate with some extra space
|
||||
|
||||
inString := false
|
||||
escaped := false
|
||||
|
||||
for i := 0; i < len(raw); i++ {
|
||||
c := raw[i]
|
||||
|
||||
if escaped {
|
||||
// Previous character was backslash, this is an escape sequence
|
||||
result.WriteByte(c)
|
||||
escaped = false
|
||||
continue
|
||||
}
|
||||
|
||||
if c == '\\' && inString {
|
||||
// Start of escape sequence
|
||||
result.WriteByte(c)
|
||||
escaped = true
|
||||
continue
|
||||
}
|
||||
|
||||
if c == '"' {
|
||||
// Toggle string state
|
||||
inString = !inString
|
||||
result.WriteByte(c)
|
||||
continue
|
||||
}
|
||||
|
||||
if inString {
|
||||
// Inside a string, escape control characters
|
||||
switch c {
|
||||
case '\n':
|
||||
result.WriteString("\\n")
|
||||
case '\r':
|
||||
result.WriteString("\\r")
|
||||
case '\t':
|
||||
result.WriteString("\\t")
|
||||
default:
|
||||
result.WriteByte(c)
|
||||
}
|
||||
} else {
|
||||
result.WriteByte(c)
|
||||
}
|
||||
}
|
||||
|
||||
return result.String()
|
||||
}
|
||||
|
||||
// processToolUseEvent handles a toolUseEvent from the Kiro stream.
|
||||
@@ -2330,6 +2943,8 @@ func (e *KiroExecutor) processToolUseEvent(event map[string]interface{}, current
|
||||
|
||||
// Accumulate input fragments
|
||||
if currentToolUse != nil && inputFragment != "" {
|
||||
// Accumulate fragments directly - they form valid JSON when combined
|
||||
// The fragments are already decoded from JSON, so we just concatenate them
|
||||
currentToolUse.inputBuffer.WriteString(inputFragment)
|
||||
log.Debugf("kiro: accumulated input fragment, total length: %d", currentToolUse.inputBuffer.Len())
|
||||
}
|
||||
|
||||
@@ -58,10 +58,15 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
|
||||
translated = e.overrideModel(translated, modelOverride)
|
||||
}
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
|
||||
translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model)
|
||||
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
|
||||
translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel != "" {
|
||||
translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
|
||||
}
|
||||
translated = normalizeThinkingConfig(translated, upstreamModel)
|
||||
if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
|
||||
return resp, errValidate
|
||||
}
|
||||
|
||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
|
||||
@@ -147,10 +152,15 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
|
||||
translated = e.overrideModel(translated, modelOverride)
|
||||
}
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
|
||||
translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model)
|
||||
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
|
||||
translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel != "" {
|
||||
translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
|
||||
}
|
||||
translated = normalizeThinkingConfig(translated, upstreamModel)
|
||||
if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
|
||||
return nil, errValidate
|
||||
}
|
||||
|
||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
|
||||
@@ -214,7 +224,7 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
|
||||
}
|
||||
}()
|
||||
scanner := bufio.NewScanner(httpResp.Body)
|
||||
scanner.Buffer(nil, 20_971_520)
|
||||
scanner.Buffer(nil, 52_428_800) // 50MB
|
||||
var param any
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
package executor
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
@@ -9,7 +11,7 @@ import (
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N)
|
||||
// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192))
|
||||
// for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
|
||||
func applyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
|
||||
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
|
||||
@@ -26,7 +28,7 @@ func applyThinkingMetadata(payload []byte, metadata map[string]any, model string
|
||||
return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
|
||||
}
|
||||
|
||||
// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N)
|
||||
// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192))
|
||||
// for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking.
|
||||
func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
|
||||
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
|
||||
@@ -43,40 +45,21 @@ func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model str
|
||||
return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
|
||||
}
|
||||
|
||||
// applyReasoningEffortMetadata applies reasoning effort overrides (reasoning.effort) when present in metadata.
|
||||
// It avoids overwriting an existing reasoning.effort field and only applies to models that support thinking.
|
||||
func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model string) []byte {
|
||||
// applyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path.
|
||||
// Metadata values take precedence over any existing field when the model supports thinking, intentionally
|
||||
// overwriting caller-provided values to honor suffix/default metadata priority.
|
||||
func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string) []byte {
|
||||
if len(metadata) == 0 {
|
||||
return payload
|
||||
}
|
||||
if !util.ModelSupportsThinking(model) {
|
||||
return payload
|
||||
}
|
||||
if gjson.GetBytes(payload, "reasoning.effort").Exists() {
|
||||
if field == "" {
|
||||
return payload
|
||||
}
|
||||
if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
|
||||
if updated, err := sjson.SetBytes(payload, "reasoning.effort", effort); err == nil {
|
||||
return updated
|
||||
}
|
||||
}
|
||||
return payload
|
||||
}
|
||||
|
||||
// applyReasoningEffortMetadataChatCompletions applies reasoning_effort (OpenAI chat completions field)
|
||||
// when present in metadata. It avoids overwriting an existing reasoning_effort field.
|
||||
func applyReasoningEffortMetadataChatCompletions(payload []byte, metadata map[string]any, model string) []byte {
|
||||
if len(metadata) == 0 {
|
||||
return payload
|
||||
}
|
||||
if !util.ModelSupportsThinking(model) {
|
||||
return payload
|
||||
}
|
||||
if gjson.GetBytes(payload, "reasoning_effort").Exists() {
|
||||
return payload
|
||||
}
|
||||
if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
|
||||
if updated, err := sjson.SetBytes(payload, "reasoning_effort", effort); err == nil {
|
||||
if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
|
||||
return updated
|
||||
}
|
||||
}
|
||||
@@ -232,3 +215,93 @@ func matchModelPattern(pattern, model string) bool {
|
||||
}
|
||||
return pi == len(pattern)
|
||||
}
|
||||
|
||||
// normalizeThinkingConfig normalizes thinking-related fields in the payload
|
||||
// based on model capabilities. For models without thinking support, it strips
|
||||
// reasoning fields. For models with level-based thinking, it validates and
|
||||
// normalizes the reasoning effort level.
|
||||
func normalizeThinkingConfig(payload []byte, model string) []byte {
|
||||
if len(payload) == 0 || model == "" {
|
||||
return payload
|
||||
}
|
||||
|
||||
if !util.ModelSupportsThinking(model) {
|
||||
return stripThinkingFields(payload)
|
||||
}
|
||||
|
||||
if util.ModelUsesThinkingLevels(model) {
|
||||
return normalizeReasoningEffortLevel(payload, model)
|
||||
}
|
||||
|
||||
return payload
|
||||
}
|
||||
|
||||
// stripThinkingFields removes thinking-related fields from the payload for
|
||||
// models that do not support thinking.
|
||||
func stripThinkingFields(payload []byte) []byte {
|
||||
fieldsToRemove := []string{
|
||||
"reasoning",
|
||||
"reasoning_effort",
|
||||
"reasoning.effort",
|
||||
}
|
||||
out := payload
|
||||
for _, field := range fieldsToRemove {
|
||||
if gjson.GetBytes(out, field).Exists() {
|
||||
out, _ = sjson.DeleteBytes(out, field)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// normalizeReasoningEffortLevel validates and normalizes the reasoning_effort
|
||||
// or reasoning.effort field for level-based thinking models.
|
||||
func normalizeReasoningEffortLevel(payload []byte, model string) []byte {
|
||||
out := payload
|
||||
|
||||
if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() {
|
||||
if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
|
||||
out, _ = sjson.SetBytes(out, "reasoning_effort", normalized)
|
||||
}
|
||||
}
|
||||
|
||||
if effort := gjson.GetBytes(out, "reasoning.effort"); effort.Exists() {
|
||||
if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
|
||||
out, _ = sjson.SetBytes(out, "reasoning.effort", normalized)
|
||||
}
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
// validateThinkingConfig checks for unsupported reasoning levels on level-based models.
|
||||
// Returns a statusErr with 400 when an unsupported level is supplied to avoid silently
|
||||
// downgrading requests.
|
||||
func validateThinkingConfig(payload []byte, model string) error {
|
||||
if len(payload) == 0 || model == "" {
|
||||
return nil
|
||||
}
|
||||
if !util.ModelSupportsThinking(model) || !util.ModelUsesThinkingLevels(model) {
|
||||
return nil
|
||||
}
|
||||
|
||||
levels := util.GetModelThinkingLevels(model)
|
||||
checkField := func(path string) error {
|
||||
if effort := gjson.GetBytes(payload, path); effort.Exists() {
|
||||
if _, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); !ok {
|
||||
return statusErr{
|
||||
code: http.StatusBadRequest,
|
||||
msg: fmt.Sprintf("unsupported reasoning effort level %q for model %s (supported: %s)", effort.String(), model, strings.Join(levels, ", ")),
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := checkField("reasoning_effort"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := checkField("reasoning.effort"); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -51,10 +51,15 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("openai")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
|
||||
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
|
||||
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel != "" {
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
}
|
||||
body = normalizeThinkingConfig(body, upstreamModel)
|
||||
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||
return resp, errValidate
|
||||
}
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
|
||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||
@@ -126,10 +131,15 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
||||
to := sdktranslator.FromString("openai")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
|
||||
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
|
||||
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
|
||||
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel != "" {
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
}
|
||||
body = normalizeThinkingConfig(body, upstreamModel)
|
||||
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||
return nil, errValidate
|
||||
}
|
||||
toolsResult := gjson.GetBytes(body, "tools")
|
||||
// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
|
||||
// This will have no real consequences. It's just to scare Qwen3.
|
||||
@@ -190,7 +200,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
||||
}
|
||||
}()
|
||||
scanner := bufio.NewScanner(httpResp.Body)
|
||||
scanner.Buffer(nil, 20_971_520)
|
||||
scanner.Buffer(nil, 52_428_800) // 50MB
|
||||
var param any
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
|
||||
@@ -35,6 +35,7 @@ type Params struct {
|
||||
TotalTokenCount int64 // Cached total token count from usage metadata
|
||||
HasSentFinalEvents bool // Indicates if final content/message events have been sent
|
||||
HasToolUse bool // Indicates if tool use was observed in the stream
|
||||
HasContent bool // Tracks whether any content (text, thinking, or tool use) has been output
|
||||
}
|
||||
|
||||
// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
|
||||
@@ -69,11 +70,14 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
|
||||
|
||||
if bytes.Equal(rawJSON, []byte("[DONE]")) {
|
||||
output := ""
|
||||
appendFinalEvents(params, &output, true)
|
||||
|
||||
return []string{
|
||||
output + "event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
|
||||
// Only send final events if we have actually output content
|
||||
if params.HasContent {
|
||||
appendFinalEvents(params, &output, true)
|
||||
return []string{
|
||||
output + "event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
|
||||
}
|
||||
}
|
||||
return []string{}
|
||||
}
|
||||
|
||||
output := ""
|
||||
@@ -119,10 +123,12 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
|
||||
output = output + "event: content_block_delta\n"
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":""}}`, params.ResponseIndex), "delta.signature", thoughtSignature.String())
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
params.HasContent = true
|
||||
} else if params.ResponseType == 2 { // Continue existing thinking block if already in thinking state
|
||||
output = output + "event: content_block_delta\n"
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, params.ResponseIndex), "delta.thinking", partTextResult.String())
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
params.HasContent = true
|
||||
} else {
|
||||
// Transition from another state to thinking
|
||||
// First, close any existing content block
|
||||
@@ -146,6 +152,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, params.ResponseIndex), "delta.thinking", partTextResult.String())
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
params.ResponseType = 2 // Set state to thinking
|
||||
params.HasContent = true
|
||||
}
|
||||
} else {
|
||||
finishReasonResult := gjson.GetBytes(rawJSON, "response.candidates.0.finishReason")
|
||||
@@ -156,6 +163,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
|
||||
output = output + "event: content_block_delta\n"
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, params.ResponseIndex), "delta.text", partTextResult.String())
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
params.HasContent = true
|
||||
} else {
|
||||
// Transition from another state to text content
|
||||
// First, close any existing content block
|
||||
@@ -179,6 +187,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, params.ResponseIndex), "delta.text", partTextResult.String())
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
params.ResponseType = 1 // Set state to content
|
||||
params.HasContent = true
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -230,6 +239,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
}
|
||||
params.ResponseType = 3
|
||||
params.HasContent = true
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -269,6 +279,11 @@ func appendFinalEvents(params *Params, output *string, force bool) {
|
||||
return
|
||||
}
|
||||
|
||||
// Only send final events if we have actually output content
|
||||
if !params.HasContent {
|
||||
return
|
||||
}
|
||||
|
||||
if params.ResponseType != 0 {
|
||||
*output = *output + "event: content_block_stop\n"
|
||||
*output = *output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, params.ResponseIndex)
|
||||
|
||||
@@ -331,9 +331,8 @@ func ConvertClaudeResponseToGeminiNonStream(_ context.Context, modelName string,
|
||||
streamingEvents := make([][]byte, 0)
|
||||
|
||||
scanner := bufio.NewScanner(bytes.NewReader(rawJSON))
|
||||
// Use a smaller initial buffer (64KB) that can grow up to 20MB if needed
|
||||
// This prevents allocating 20MB for every request regardless of size
|
||||
scanner.Buffer(make([]byte, 64*1024), 20_971_520)
|
||||
buffer := make([]byte, 52_428_800) // 50MB
|
||||
scanner.Buffer(buffer, 52_428_800)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
// log.Debug(string(line))
|
||||
|
||||
@@ -445,8 +445,8 @@ func ConvertClaudeResponseToOpenAIResponsesNonStream(_ context.Context, _ string
|
||||
// Use a simple scanner to iterate through raw bytes
|
||||
// Note: extremely large responses may require increasing the buffer
|
||||
scanner := bufio.NewScanner(bytes.NewReader(rawJSON))
|
||||
buf := make([]byte, 20_971_520)
|
||||
scanner.Buffer(buf, 20_971_520)
|
||||
buf := make([]byte, 52_428_800) // 50MB
|
||||
scanner.Buffer(buf, 52_428_800)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
if !bytes.HasPrefix(line, dataTag) {
|
||||
|
||||
@@ -26,6 +26,7 @@ type Params struct {
|
||||
HasFirstResponse bool // Indicates if the initial message_start event has been sent
|
||||
ResponseType int // Current response type: 0=none, 1=content, 2=thinking, 3=function
|
||||
ResponseIndex int // Index counter for content blocks in the streaming response
|
||||
HasContent bool // Tracks whether any content (text, thinking, or tool use) has been output
|
||||
}
|
||||
|
||||
// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
|
||||
@@ -57,19 +58,23 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
|
||||
}
|
||||
|
||||
if bytes.Equal(rawJSON, []byte("[DONE]")) {
|
||||
return []string{
|
||||
"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
|
||||
// Only send message_stop if we have actually output content
|
||||
if (*param).(*Params).HasContent {
|
||||
return []string{
|
||||
"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
|
||||
}
|
||||
}
|
||||
return []string{}
|
||||
}
|
||||
|
||||
// Track whether tools are being used in this response chunk
|
||||
usedTool := false
|
||||
var sb strings.Builder
|
||||
output := ""
|
||||
|
||||
// Initialize the streaming session with a message_start event
|
||||
// This is only sent for the very first response chunk to establish the streaming session
|
||||
if !(*param).(*Params).HasFirstResponse {
|
||||
sb.WriteString("event: message_start\n")
|
||||
output = "event: message_start\n"
|
||||
|
||||
// Create the initial message structure with default values according to Claude Code API specification
|
||||
// This follows the Claude Code API specification for streaming message initialization
|
||||
@@ -82,7 +87,7 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
|
||||
if responseIDResult := gjson.GetBytes(rawJSON, "response.responseId"); responseIDResult.Exists() {
|
||||
messageStartTemplate, _ = sjson.Set(messageStartTemplate, "message.id", responseIDResult.String())
|
||||
}
|
||||
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", messageStartTemplate))
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", messageStartTemplate)
|
||||
|
||||
(*param).(*Params).HasFirstResponse = true
|
||||
}
|
||||
@@ -105,53 +110,67 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
|
||||
if partResult.Get("thought").Bool() {
|
||||
// Continue existing thinking block if already in thinking state
|
||||
if (*param).(*Params).ResponseType == 2 {
|
||||
sb.WriteString("event: content_block_delta\n")
|
||||
output = output + "event: content_block_delta\n"
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
|
||||
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
(*param).(*Params).HasContent = true
|
||||
} else {
|
||||
// Transition from another state to thinking
|
||||
// First, close any existing content block
|
||||
if (*param).(*Params).ResponseType != 0 {
|
||||
sb.WriteString("event: content_block_stop\n")
|
||||
sb.WriteString(fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex))
|
||||
sb.WriteString("\n\n\n")
|
||||
if (*param).(*Params).ResponseType == 2 {
|
||||
// output = output + "event: content_block_delta\n"
|
||||
// output = output + fmt.Sprintf(`data: {"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":null}}`, (*param).(*Params).ResponseIndex)
|
||||
// output = output + "\n\n\n"
|
||||
}
|
||||
output = output + "event: content_block_stop\n"
|
||||
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
|
||||
output = output + "\n\n\n"
|
||||
(*param).(*Params).ResponseIndex++
|
||||
}
|
||||
|
||||
// Start a new thinking content block
|
||||
sb.WriteString("event: content_block_start\n")
|
||||
sb.WriteString(fmt.Sprintf(`data: {"type":"content_block_start","index":%d,"content_block":{"type":"thinking","thinking":""}}`, (*param).(*Params).ResponseIndex))
|
||||
sb.WriteString("\n\n\n")
|
||||
sb.WriteString("event: content_block_delta\n")
|
||||
output = output + "event: content_block_start\n"
|
||||
output = output + fmt.Sprintf(`data: {"type":"content_block_start","index":%d,"content_block":{"type":"thinking","thinking":""}}`, (*param).(*Params).ResponseIndex)
|
||||
output = output + "\n\n\n"
|
||||
output = output + "event: content_block_delta\n"
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
|
||||
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
(*param).(*Params).ResponseType = 2 // Set state to thinking
|
||||
(*param).(*Params).HasContent = true
|
||||
}
|
||||
} else {
|
||||
// Process regular text content (user-visible output)
|
||||
// Continue existing text block if already in content state
|
||||
if (*param).(*Params).ResponseType == 1 {
|
||||
sb.WriteString("event: content_block_delta\n")
|
||||
output = output + "event: content_block_delta\n"
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
|
||||
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
(*param).(*Params).HasContent = true
|
||||
} else {
|
||||
// Transition from another state to text content
|
||||
// First, close any existing content block
|
||||
if (*param).(*Params).ResponseType != 0 {
|
||||
sb.WriteString("event: content_block_stop\n")
|
||||
sb.WriteString(fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex))
|
||||
sb.WriteString("\n\n\n")
|
||||
if (*param).(*Params).ResponseType == 2 {
|
||||
// output = output + "event: content_block_delta\n"
|
||||
// output = output + fmt.Sprintf(`data: {"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":null}}`, (*param).(*Params).ResponseIndex)
|
||||
// output = output + "\n\n\n"
|
||||
}
|
||||
output = output + "event: content_block_stop\n"
|
||||
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
|
||||
output = output + "\n\n\n"
|
||||
(*param).(*Params).ResponseIndex++
|
||||
}
|
||||
|
||||
// Start a new text content block
|
||||
sb.WriteString("event: content_block_start\n")
|
||||
sb.WriteString(fmt.Sprintf(`data: {"type":"content_block_start","index":%d,"content_block":{"type":"text","text":""}}`, (*param).(*Params).ResponseIndex))
|
||||
sb.WriteString("\n\n\n")
|
||||
sb.WriteString("event: content_block_delta\n")
|
||||
output = output + "event: content_block_start\n"
|
||||
output = output + fmt.Sprintf(`data: {"type":"content_block_start","index":%d,"content_block":{"type":"text","text":""}}`, (*param).(*Params).ResponseIndex)
|
||||
output = output + "\n\n\n"
|
||||
output = output + "event: content_block_delta\n"
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
|
||||
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
(*param).(*Params).ResponseType = 1 // Set state to content
|
||||
(*param).(*Params).HasContent = true
|
||||
}
|
||||
}
|
||||
} else if functionCallResult.Exists() {
|
||||
@@ -163,37 +182,45 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
|
||||
// Handle state transitions when switching to function calls
|
||||
// Close any existing function call block first
|
||||
if (*param).(*Params).ResponseType == 3 {
|
||||
sb.WriteString("event: content_block_stop\n")
|
||||
sb.WriteString(fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex))
|
||||
sb.WriteString("\n\n\n")
|
||||
output = output + "event: content_block_stop\n"
|
||||
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
|
||||
output = output + "\n\n\n"
|
||||
(*param).(*Params).ResponseIndex++
|
||||
(*param).(*Params).ResponseType = 0
|
||||
}
|
||||
|
||||
// Special handling for thinking state transition
|
||||
if (*param).(*Params).ResponseType == 2 {
|
||||
// output = output + "event: content_block_delta\n"
|
||||
// output = output + fmt.Sprintf(`data: {"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":null}}`, (*param).(*Params).ResponseIndex)
|
||||
// output = output + "\n\n\n"
|
||||
}
|
||||
|
||||
// Close any other existing content block
|
||||
if (*param).(*Params).ResponseType != 0 {
|
||||
sb.WriteString("event: content_block_stop\n")
|
||||
sb.WriteString(fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex))
|
||||
sb.WriteString("\n\n\n")
|
||||
output = output + "event: content_block_stop\n"
|
||||
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
|
||||
output = output + "\n\n\n"
|
||||
(*param).(*Params).ResponseIndex++
|
||||
}
|
||||
|
||||
// Start a new tool use content block
|
||||
// This creates the structure for a function call in Claude Code format
|
||||
sb.WriteString("event: content_block_start\n")
|
||||
output = output + "event: content_block_start\n"
|
||||
|
||||
// Create the tool use block with unique ID and function details
|
||||
data := fmt.Sprintf(`{"type":"content_block_start","index":%d,"content_block":{"type":"tool_use","id":"","name":"","input":{}}}`, (*param).(*Params).ResponseIndex)
|
||||
data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&toolUseIDCounter, 1)))
|
||||
data, _ = sjson.Set(data, "content_block.name", fcName)
|
||||
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
|
||||
if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
|
||||
sb.WriteString("event: content_block_delta\n")
|
||||
output = output + "event: content_block_delta\n"
|
||||
data, _ = sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"input_json_delta","partial_json":""}}`, (*param).(*Params).ResponseIndex), "delta.partial_json", fcArgsResult.Raw)
|
||||
sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
}
|
||||
(*param).(*Params).ResponseType = 3
|
||||
(*param).(*Params).HasContent = true
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -202,32 +229,35 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
|
||||
// Process usage metadata and finish reason when present in the response
|
||||
if usageResult.Exists() && bytes.Contains(rawJSON, []byte(`"finishReason"`)) {
|
||||
if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
|
||||
// Close the final content block
|
||||
sb.WriteString("event: content_block_stop\n")
|
||||
sb.WriteString(fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex))
|
||||
sb.WriteString("\n\n\n")
|
||||
// Only send final events if we have actually output content
|
||||
if (*param).(*Params).HasContent {
|
||||
// Close the final content block
|
||||
output = output + "event: content_block_stop\n"
|
||||
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
|
||||
output = output + "\n\n\n"
|
||||
|
||||
// Send the final message delta with usage information and stop reason
|
||||
sb.WriteString("event: message_delta\n")
|
||||
sb.WriteString(`data: `)
|
||||
// Send the final message delta with usage information and stop reason
|
||||
output = output + "event: message_delta\n"
|
||||
output = output + `data: `
|
||||
|
||||
// Create the message delta template with appropriate stop reason
|
||||
template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
||||
// Set tool_use stop reason if tools were used in this response
|
||||
if usedTool {
|
||||
template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
||||
// Create the message delta template with appropriate stop reason
|
||||
template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
||||
// Set tool_use stop reason if tools were used in this response
|
||||
if usedTool {
|
||||
template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
||||
}
|
||||
|
||||
// Include thinking tokens in output token count if present
|
||||
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
|
||||
template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
|
||||
template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
|
||||
|
||||
output = output + template + "\n\n\n"
|
||||
}
|
||||
|
||||
// Include thinking tokens in output token count if present
|
||||
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
|
||||
template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
|
||||
template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
|
||||
|
||||
sb.WriteString(template + "\n\n\n")
|
||||
}
|
||||
}
|
||||
|
||||
return []string{sb.String()}
|
||||
return []string{output}
|
||||
}
|
||||
|
||||
// ConvertGeminiCLIResponseToClaudeNonStream converts a non-streaming Gemini CLI response to a non-streaming Claude response.
|
||||
|
||||
@@ -25,6 +25,7 @@ type Params struct {
|
||||
HasFirstResponse bool
|
||||
ResponseType int
|
||||
ResponseIndex int
|
||||
HasContent bool // Tracks whether any content (text, thinking, or tool use) has been output
|
||||
}
|
||||
|
||||
// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
|
||||
@@ -57,9 +58,13 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
|
||||
}
|
||||
|
||||
if bytes.Equal(rawJSON, []byte("[DONE]")) {
|
||||
return []string{
|
||||
"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
|
||||
// Only send message_stop if we have actually output content
|
||||
if (*param).(*Params).HasContent {
|
||||
return []string{
|
||||
"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
|
||||
}
|
||||
}
|
||||
return []string{}
|
||||
}
|
||||
|
||||
// Track whether tools are being used in this response chunk
|
||||
@@ -108,6 +113,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
|
||||
output = output + "event: content_block_delta\n"
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
(*param).(*Params).HasContent = true
|
||||
} else {
|
||||
// Transition from another state to thinking
|
||||
// First, close any existing content block
|
||||
@@ -131,6 +137,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
(*param).(*Params).ResponseType = 2 // Set state to thinking
|
||||
(*param).(*Params).HasContent = true
|
||||
}
|
||||
} else {
|
||||
// Process regular text content (user-visible output)
|
||||
@@ -139,6 +146,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
|
||||
output = output + "event: content_block_delta\n"
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
(*param).(*Params).HasContent = true
|
||||
} else {
|
||||
// Transition from another state to text content
|
||||
// First, close any existing content block
|
||||
@@ -162,6 +170,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
(*param).(*Params).ResponseType = 1 // Set state to content
|
||||
(*param).(*Params).HasContent = true
|
||||
}
|
||||
}
|
||||
} else if functionCallResult.Exists() {
|
||||
@@ -211,6 +220,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
}
|
||||
(*param).(*Params).ResponseType = 3
|
||||
(*param).(*Params).HasContent = true
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -218,23 +228,26 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
|
||||
usageResult := gjson.GetBytes(rawJSON, "usageMetadata")
|
||||
if usageResult.Exists() && bytes.Contains(rawJSON, []byte(`"finishReason"`)) {
|
||||
if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
|
||||
output = output + "event: content_block_stop\n"
|
||||
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
|
||||
output = output + "\n\n\n"
|
||||
// Only send final events if we have actually output content
|
||||
if (*param).(*Params).HasContent {
|
||||
output = output + "event: content_block_stop\n"
|
||||
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
|
||||
output = output + "\n\n\n"
|
||||
|
||||
output = output + "event: message_delta\n"
|
||||
output = output + `data: `
|
||||
output = output + "event: message_delta\n"
|
||||
output = output + `data: `
|
||||
|
||||
template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
||||
if usedTool {
|
||||
template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
||||
template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
||||
if usedTool {
|
||||
template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
||||
}
|
||||
|
||||
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
|
||||
template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
|
||||
template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
|
||||
|
||||
output = output + template + "\n\n\n"
|
||||
}
|
||||
|
||||
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
|
||||
template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
|
||||
template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
|
||||
|
||||
output = output + template + "\n\n\n"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -171,7 +171,7 @@ func convertClaudeEventToOpenAI(jsonStr string, model string) []string {
|
||||
return results
|
||||
|
||||
case "message_delta":
|
||||
// Final message delta with stop_reason
|
||||
// Final message delta with stop_reason and usage
|
||||
stopReason := root.Get("delta.stop_reason").String()
|
||||
if stopReason != "" {
|
||||
finishReason := "stop"
|
||||
@@ -196,6 +196,19 @@ func convertClaudeEventToOpenAI(jsonStr string, model string) []string {
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// Extract and include usage information from message_delta event
|
||||
usage := root.Get("usage")
|
||||
if usage.Exists() {
|
||||
inputTokens := usage.Get("input_tokens").Int()
|
||||
outputTokens := usage.Get("output_tokens").Int()
|
||||
response["usage"] = map[string]interface{}{
|
||||
"prompt_tokens": inputTokens,
|
||||
"completion_tokens": outputTokens,
|
||||
"total_tokens": inputTokens + outputTokens,
|
||||
}
|
||||
}
|
||||
|
||||
result, _ := json.Marshal(response)
|
||||
results = append(results, string(result))
|
||||
}
|
||||
|
||||
46
internal/util/claude_thinking.go
Normal file
46
internal/util/claude_thinking.go
Normal file
@@ -0,0 +1,46 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// ApplyClaudeThinkingConfig applies thinking configuration to a Claude API request payload.
|
||||
// It sets the thinking.type to "enabled" and thinking.budget_tokens to the specified budget.
|
||||
// If budget is nil or the payload already has thinking config, it returns the payload unchanged.
|
||||
func ApplyClaudeThinkingConfig(body []byte, budget *int) []byte {
|
||||
if budget == nil {
|
||||
return body
|
||||
}
|
||||
if gjson.GetBytes(body, "thinking").Exists() {
|
||||
return body
|
||||
}
|
||||
if *budget <= 0 {
|
||||
return body
|
||||
}
|
||||
updated := body
|
||||
updated, _ = sjson.SetBytes(updated, "thinking.type", "enabled")
|
||||
updated, _ = sjson.SetBytes(updated, "thinking.budget_tokens", *budget)
|
||||
return updated
|
||||
}
|
||||
|
||||
// ResolveClaudeThinkingConfig resolves thinking configuration from metadata for Claude models.
|
||||
// It uses the unified ResolveThinkingConfigFromMetadata and normalizes the budget.
|
||||
// Returns the normalized budget (nil if thinking should not be enabled) and whether it matched.
|
||||
func ResolveClaudeThinkingConfig(modelName string, metadata map[string]any) (*int, bool) {
|
||||
budget, include, matched := ResolveThinkingConfigFromMetadata(modelName, metadata)
|
||||
if !matched {
|
||||
return nil, false
|
||||
}
|
||||
if include != nil && !*include {
|
||||
return nil, true
|
||||
}
|
||||
if budget == nil {
|
||||
return nil, true
|
||||
}
|
||||
normalized := NormalizeThinkingBudget(modelName, *budget)
|
||||
if normalized <= 0 {
|
||||
return nil, true
|
||||
}
|
||||
return &normalized, true
|
||||
}
|
||||
@@ -1,6 +1,8 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
)
|
||||
|
||||
@@ -67,3 +69,39 @@ func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zero
|
||||
}
|
||||
return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
|
||||
}
|
||||
|
||||
// GetModelThinkingLevels returns the discrete reasoning effort levels for the model.
|
||||
// Returns nil if the model has no thinking support or no levels defined.
|
||||
func GetModelThinkingLevels(model string) []string {
|
||||
if model == "" {
|
||||
return nil
|
||||
}
|
||||
info := registry.GetGlobalRegistry().GetModelInfo(model)
|
||||
if info == nil || info.Thinking == nil {
|
||||
return nil
|
||||
}
|
||||
return info.Thinking.Levels
|
||||
}
|
||||
|
||||
// ModelUsesThinkingLevels reports whether the model uses discrete reasoning
|
||||
// effort levels instead of numeric budgets.
|
||||
func ModelUsesThinkingLevels(model string) bool {
|
||||
levels := GetModelThinkingLevels(model)
|
||||
return len(levels) > 0
|
||||
}
|
||||
|
||||
// NormalizeReasoningEffortLevel validates and normalizes a reasoning effort
|
||||
// level for the given model. Returns false when the level is not supported.
|
||||
func NormalizeReasoningEffortLevel(model, effort string) (string, bool) {
|
||||
levels := GetModelThinkingLevels(model)
|
||||
if len(levels) == 0 {
|
||||
return "", false
|
||||
}
|
||||
loweredEffort := strings.ToLower(strings.TrimSpace(effort))
|
||||
for _, lvl := range levels {
|
||||
if strings.ToLower(lvl) == loweredEffort {
|
||||
return lvl, true
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
@@ -14,61 +14,59 @@ const (
|
||||
)
|
||||
|
||||
// NormalizeThinkingModel parses dynamic thinking suffixes on model names and returns
|
||||
// the normalized base model with extracted metadata. Supported patterns:
|
||||
// - "-thinking-<number>" extracts a numeric budget
|
||||
// - "-thinking-<level>" extracts a reasoning effort level (minimal/low/medium/high/xhigh/auto/none)
|
||||
// - "-thinking" maps to a default reasoning effort of "medium"
|
||||
// - "-reasoning" maps to dynamic budget (-1) and include_thoughts=true
|
||||
// - "-nothinking" maps to budget=0 and include_thoughts=false
|
||||
// the normalized base model with extracted metadata. Supported pattern:
|
||||
// - "(<value>)" where value can be:
|
||||
// - A numeric budget (e.g., "(8192)", "(16384)")
|
||||
// - A reasoning effort level (e.g., "(high)", "(medium)", "(low)")
|
||||
//
|
||||
// Examples:
|
||||
// - "claude-sonnet-4-5-20250929(16384)" → budget=16384
|
||||
// - "gpt-5.1(high)" → reasoning_effort="high"
|
||||
// - "gemini-2.5-pro(32768)" → budget=32768
|
||||
//
|
||||
// Note: Empty parentheses "()" are not supported and will be ignored.
|
||||
func NormalizeThinkingModel(modelName string) (string, map[string]any) {
|
||||
if modelName == "" {
|
||||
return modelName, nil
|
||||
}
|
||||
|
||||
lower := strings.ToLower(modelName)
|
||||
baseModel := modelName
|
||||
|
||||
var (
|
||||
budgetOverride *int
|
||||
includeThoughts *bool
|
||||
reasoningEffort *string
|
||||
matched bool
|
||||
)
|
||||
|
||||
switch {
|
||||
case strings.HasSuffix(lower, "-nothinking"):
|
||||
baseModel = modelName[:len(modelName)-len("-nothinking")]
|
||||
budget := 0
|
||||
include := false
|
||||
budgetOverride = &budget
|
||||
includeThoughts = &include
|
||||
matched = true
|
||||
case strings.HasSuffix(lower, "-reasoning"):
|
||||
baseModel = modelName[:len(modelName)-len("-reasoning")]
|
||||
budget := -1
|
||||
include := true
|
||||
budgetOverride = &budget
|
||||
includeThoughts = &include
|
||||
matched = true
|
||||
default:
|
||||
if idx := strings.LastIndex(lower, "-thinking-"); idx != -1 {
|
||||
value := modelName[idx+len("-thinking-"):]
|
||||
if value != "" {
|
||||
if parsed, ok := parseIntPrefix(value); ok {
|
||||
baseModel = modelName[:idx]
|
||||
budgetOverride = &parsed
|
||||
matched = true
|
||||
} else if effort, okEffort := normalizeReasoningEffort(value); okEffort {
|
||||
baseModel = modelName[:idx]
|
||||
reasoningEffort = &effort
|
||||
matched = true
|
||||
}
|
||||
}
|
||||
} else if strings.HasSuffix(lower, "-thinking") {
|
||||
baseModel = modelName[:len(modelName)-len("-thinking")]
|
||||
effort := "medium"
|
||||
reasoningEffort = &effort
|
||||
// Match "(<value>)" pattern at the end of the model name
|
||||
if idx := strings.LastIndex(modelName, "("); idx != -1 {
|
||||
if !strings.HasSuffix(modelName, ")") {
|
||||
// Incomplete parenthesis, ignore
|
||||
return baseModel, nil
|
||||
}
|
||||
|
||||
value := modelName[idx+1 : len(modelName)-1] // Extract content between ( and )
|
||||
if value == "" {
|
||||
// Empty parentheses not supported
|
||||
return baseModel, nil
|
||||
}
|
||||
|
||||
candidateBase := modelName[:idx]
|
||||
|
||||
// Auto-detect: pure numeric → budget, string → reasoning effort level
|
||||
if parsed, ok := parseIntPrefix(value); ok {
|
||||
// Numeric value: treat as thinking budget
|
||||
baseModel = candidateBase
|
||||
budgetOverride = &parsed
|
||||
matched = true
|
||||
} else {
|
||||
// String value: treat as reasoning effort level
|
||||
baseModel = candidateBase
|
||||
raw := strings.ToLower(strings.TrimSpace(value))
|
||||
if raw != "" {
|
||||
reasoningEffort = &raw
|
||||
matched = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -82,9 +80,6 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) {
|
||||
if budgetOverride != nil {
|
||||
metadata[ThinkingBudgetMetadataKey] = *budgetOverride
|
||||
}
|
||||
if includeThoughts != nil {
|
||||
metadata[ThinkingIncludeThoughtsMetadataKey] = *includeThoughts
|
||||
}
|
||||
if reasoningEffort != nil {
|
||||
metadata[ReasoningEffortMetadataKey] = *reasoningEffort
|
||||
}
|
||||
@@ -185,7 +180,7 @@ func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) {
|
||||
return "", false
|
||||
}
|
||||
if effort != nil && *effort != "" {
|
||||
return *effort, true
|
||||
return strings.ToLower(strings.TrimSpace(*effort)), true
|
||||
}
|
||||
if budget != nil {
|
||||
switch *budget {
|
||||
@@ -207,7 +202,11 @@ func ThinkingEffortToBudget(model, effort string) (int, bool) {
|
||||
if effort == "" {
|
||||
return 0, false
|
||||
}
|
||||
switch strings.ToLower(effort) {
|
||||
normalized, ok := NormalizeReasoningEffortLevel(model, effort)
|
||||
if !ok {
|
||||
normalized = strings.ToLower(strings.TrimSpace(effort))
|
||||
}
|
||||
switch normalized {
|
||||
case "none":
|
||||
return 0, true
|
||||
case "auto":
|
||||
@@ -312,16 +311,3 @@ func parseNumberToInt(raw any) (int, bool) {
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
func normalizeReasoningEffort(value string) (string, bool) {
|
||||
if value == "" {
|
||||
return "", false
|
||||
}
|
||||
effort := strings.ToLower(strings.TrimSpace(value))
|
||||
switch effort {
|
||||
case "minimal", "low", "medium", "high", "xhigh", "auto", "none":
|
||||
return effort, true
|
||||
default:
|
||||
return "", false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -271,6 +271,11 @@ func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.
|
||||
continue
|
||||
}
|
||||
if errMsg != nil {
|
||||
status := http.StatusInternalServerError
|
||||
if errMsg.StatusCode > 0 {
|
||||
status = errMsg.StatusCode
|
||||
}
|
||||
c.Status(status)
|
||||
// An error occurred: emit as a proper SSE error event
|
||||
errorBytes, _ := json.Marshal(h.toClaudeError(errMsg))
|
||||
_, _ = writer.WriteString("event: error\n")
|
||||
@@ -278,6 +283,7 @@ func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.
|
||||
_, _ = writer.Write(errorBytes)
|
||||
_, _ = writer.WriteString("\n\n")
|
||||
_ = writer.Flush()
|
||||
flusher.Flush()
|
||||
}
|
||||
var execErr error
|
||||
if errMsg != nil {
|
||||
|
||||
Reference in New Issue
Block a user