mirror of
https://github.com/router-for-me/CLIProxyAPIPlus.git
synced 2026-03-30 09:18:12 +00:00
Merge branch 'router-for-me:main' into main
This commit is contained in:
@@ -765,21 +765,23 @@ func GetIFlowModels() []*ModelInfo {
|
|||||||
type AntigravityModelConfig struct {
|
type AntigravityModelConfig struct {
|
||||||
Thinking *ThinkingSupport
|
Thinking *ThinkingSupport
|
||||||
MaxCompletionTokens int
|
MaxCompletionTokens int
|
||||||
Name string
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetAntigravityModelConfig returns static configuration for antigravity models.
|
// GetAntigravityModelConfig returns static configuration for antigravity models.
|
||||||
// Keys use upstream model names returned by the Antigravity models endpoint.
|
// Keys use upstream model names returned by the Antigravity models endpoint.
|
||||||
func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
|
func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
|
||||||
return map[string]*AntigravityModelConfig{
|
return map[string]*AntigravityModelConfig{
|
||||||
"gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash"},
|
"gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
|
||||||
"gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash-lite"},
|
"gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
|
||||||
"rev19-uic3-1p": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/rev19-uic3-1p"},
|
"rev19-uic3-1p": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}},
|
||||||
"gemini-3-pro-high": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-high"},
|
"gemini-3-pro-high": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
|
||||||
"gemini-3-pro-image": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-image"},
|
"gemini-3-pro-image": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
|
||||||
"gemini-3-flash": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, Name: "models/gemini-3-flash"},
|
"gemini-3-flash": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}},
|
||||||
"claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
|
"claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
|
||||||
"claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
|
"claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
|
||||||
|
"claude-sonnet-4-5": {MaxCompletionTokens: 64000},
|
||||||
|
"gpt-oss-120b-medium": {},
|
||||||
|
"tab_flash_lite_preview": {},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -809,10 +811,9 @@ func LookupStaticModelInfo(modelID string) *ModelInfo {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Check Antigravity static config
|
// Check Antigravity static config
|
||||||
if cfg := GetAntigravityModelConfig()[modelID]; cfg != nil && cfg.Thinking != nil {
|
if cfg := GetAntigravityModelConfig()[modelID]; cfg != nil {
|
||||||
return &ModelInfo{
|
return &ModelInfo{
|
||||||
ID: modelID,
|
ID: modelID,
|
||||||
Name: cfg.Name,
|
|
||||||
Thinking: cfg.Thinking,
|
Thinking: cfg.Thinking,
|
||||||
MaxCompletionTokens: cfg.MaxCompletionTokens,
|
MaxCompletionTokens: cfg.MaxCompletionTokens,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1005,9 +1005,6 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
|
|||||||
}
|
}
|
||||||
modelCfg := modelConfig[modelID]
|
modelCfg := modelConfig[modelID]
|
||||||
modelName := modelID
|
modelName := modelID
|
||||||
if modelCfg != nil && modelCfg.Name != "" {
|
|
||||||
modelName = modelCfg.Name
|
|
||||||
}
|
|
||||||
modelInfo := ®istry.ModelInfo{
|
modelInfo := ®istry.ModelInfo{
|
||||||
ID: modelID,
|
ID: modelID,
|
||||||
Name: modelName,
|
Name: modelName,
|
||||||
@@ -1410,13 +1407,6 @@ func geminiToAntigravity(modelName string, payload []byte, projectID string) []b
|
|||||||
template, _ = sjson.Delete(template, "request.safetySettings")
|
template, _ = sjson.Delete(template, "request.safetySettings")
|
||||||
template, _ = sjson.Set(template, "request.toolConfig.functionCallingConfig.mode", "VALIDATED")
|
template, _ = sjson.Set(template, "request.toolConfig.functionCallingConfig.mode", "VALIDATED")
|
||||||
|
|
||||||
if !strings.HasPrefix(modelName, "gemini-3-") {
|
|
||||||
if thinkingLevel := gjson.Get(template, "request.generationConfig.thinkingConfig.thinkingLevel"); thinkingLevel.Exists() {
|
|
||||||
template, _ = sjson.Delete(template, "request.generationConfig.thinkingConfig.thinkingLevel")
|
|
||||||
template, _ = sjson.Set(template, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if strings.Contains(modelName, "claude") {
|
if strings.Contains(modelName, "claude") {
|
||||||
gjson.Get(template, "request.tools").ForEach(func(key, tool gjson.Result) bool {
|
gjson.Get(template, "request.tools").ForEach(func(key, tool gjson.Result) bool {
|
||||||
tool.Get("functionDeclarations").ForEach(func(funKey, funcDecl gjson.Result) bool {
|
tool.Get("functionDeclarations").ForEach(func(funKey, funcDecl gjson.Result) bool {
|
||||||
|
|||||||
@@ -17,7 +17,6 @@ import (
|
|||||||
claudeauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude"
|
claudeauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude"
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||||
@@ -119,9 +118,6 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
|||||||
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
|
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
|
||||||
body = disableThinkingIfToolChoiceForced(body)
|
body = disableThinkingIfToolChoiceForced(body)
|
||||||
|
|
||||||
// Ensure max_tokens > thinking.budget_tokens when thinking is enabled
|
|
||||||
body = ensureMaxTokensForThinking(baseModel, body)
|
|
||||||
|
|
||||||
// Extract betas from body and convert to header
|
// Extract betas from body and convert to header
|
||||||
var extraBetas []string
|
var extraBetas []string
|
||||||
extraBetas, body = extractAndRemoveBetas(body)
|
extraBetas, body = extractAndRemoveBetas(body)
|
||||||
@@ -250,9 +246,6 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
|||||||
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
|
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
|
||||||
body = disableThinkingIfToolChoiceForced(body)
|
body = disableThinkingIfToolChoiceForced(body)
|
||||||
|
|
||||||
// Ensure max_tokens > thinking.budget_tokens when thinking is enabled
|
|
||||||
body = ensureMaxTokensForThinking(baseModel, body)
|
|
||||||
|
|
||||||
// Extract betas from body and convert to header
|
// Extract betas from body and convert to header
|
||||||
var extraBetas []string
|
var extraBetas []string
|
||||||
extraBetas, body = extractAndRemoveBetas(body)
|
extraBetas, body = extractAndRemoveBetas(body)
|
||||||
@@ -541,81 +534,6 @@ func disableThinkingIfToolChoiceForced(body []byte) []byte {
|
|||||||
return body
|
return body
|
||||||
}
|
}
|
||||||
|
|
||||||
// ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled.
|
|
||||||
// Anthropic API requires this constraint; violating it returns a 400 error.
|
|
||||||
// This function should be called after all thinking configuration is finalized.
|
|
||||||
// It looks up the model's MaxCompletionTokens from the registry to use as the cap.
|
|
||||||
func ensureMaxTokensForThinking(modelName string, body []byte) []byte {
|
|
||||||
thinkingType := gjson.GetBytes(body, "thinking.type").String()
|
|
||||||
if thinkingType != "enabled" {
|
|
||||||
return body
|
|
||||||
}
|
|
||||||
|
|
||||||
budgetTokens := gjson.GetBytes(body, "thinking.budget_tokens").Int()
|
|
||||||
if budgetTokens <= 0 {
|
|
||||||
return body
|
|
||||||
}
|
|
||||||
|
|
||||||
maxTokens := gjson.GetBytes(body, "max_tokens").Int()
|
|
||||||
|
|
||||||
// Look up the model's max completion tokens from the registry
|
|
||||||
maxCompletionTokens := 0
|
|
||||||
if modelInfo := registry.LookupModelInfo(modelName); modelInfo != nil {
|
|
||||||
maxCompletionTokens = modelInfo.MaxCompletionTokens
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fall back to budget + buffer if registry lookup fails or returns 0
|
|
||||||
const fallbackBuffer = 4000
|
|
||||||
requiredMaxTokens := budgetTokens + fallbackBuffer
|
|
||||||
if maxCompletionTokens > 0 {
|
|
||||||
requiredMaxTokens = int64(maxCompletionTokens)
|
|
||||||
}
|
|
||||||
|
|
||||||
if maxTokens < requiredMaxTokens {
|
|
||||||
body, _ = sjson.SetBytes(body, "max_tokens", requiredMaxTokens)
|
|
||||||
}
|
|
||||||
return body
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *ClaudeExecutor) resolveClaudeConfig(auth *cliproxyauth.Auth) *config.ClaudeKey {
|
|
||||||
if auth == nil || e.cfg == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
var attrKey, attrBase string
|
|
||||||
if auth.Attributes != nil {
|
|
||||||
attrKey = strings.TrimSpace(auth.Attributes["api_key"])
|
|
||||||
attrBase = strings.TrimSpace(auth.Attributes["base_url"])
|
|
||||||
}
|
|
||||||
for i := range e.cfg.ClaudeKey {
|
|
||||||
entry := &e.cfg.ClaudeKey[i]
|
|
||||||
cfgKey := strings.TrimSpace(entry.APIKey)
|
|
||||||
cfgBase := strings.TrimSpace(entry.BaseURL)
|
|
||||||
if attrKey != "" && attrBase != "" {
|
|
||||||
if strings.EqualFold(cfgKey, attrKey) && strings.EqualFold(cfgBase, attrBase) {
|
|
||||||
return entry
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if attrKey != "" && strings.EqualFold(cfgKey, attrKey) {
|
|
||||||
if cfgBase == "" || strings.EqualFold(cfgBase, attrBase) {
|
|
||||||
return entry
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if attrKey == "" && attrBase != "" && strings.EqualFold(cfgBase, attrBase) {
|
|
||||||
return entry
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if attrKey != "" {
|
|
||||||
for i := range e.cfg.ClaudeKey {
|
|
||||||
entry := &e.cfg.ClaudeKey[i]
|
|
||||||
if strings.EqualFold(strings.TrimSpace(entry.APIKey), attrKey) {
|
|
||||||
return entry
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type compositeReadCloser struct {
|
type compositeReadCloser struct {
|
||||||
io.Reader
|
io.Reader
|
||||||
closers []func() error
|
closers []func() error
|
||||||
|
|||||||
@@ -80,9 +80,66 @@ func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *
|
|||||||
|
|
||||||
result, _ := sjson.SetBytes(body, "thinking.type", "enabled")
|
result, _ := sjson.SetBytes(body, "thinking.type", "enabled")
|
||||||
result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget)
|
result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget)
|
||||||
|
|
||||||
|
// Ensure max_tokens > thinking.budget_tokens (Anthropic API constraint)
|
||||||
|
result = a.normalizeClaudeBudget(result, config.Budget, modelInfo)
|
||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// normalizeClaudeBudget applies Claude-specific constraints to ensure max_tokens > budget_tokens.
|
||||||
|
// Anthropic API requires this constraint; violating it returns a 400 error.
|
||||||
|
func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo *registry.ModelInfo) []byte {
|
||||||
|
if budgetTokens <= 0 {
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure the request satisfies Claude constraints:
|
||||||
|
// 1) Determine effective max_tokens (request overrides model default)
|
||||||
|
// 2) If budget_tokens >= max_tokens, reduce budget_tokens to max_tokens-1
|
||||||
|
// 3) If the adjusted budget falls below the model minimum, leave the request unchanged
|
||||||
|
// 4) If max_tokens came from model default, write it back into the request
|
||||||
|
|
||||||
|
effectiveMax, setDefaultMax := a.effectiveMaxTokens(body, modelInfo)
|
||||||
|
if setDefaultMax && effectiveMax > 0 {
|
||||||
|
body, _ = sjson.SetBytes(body, "max_tokens", effectiveMax)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute the budget we would apply after enforcing budget_tokens < max_tokens.
|
||||||
|
adjustedBudget := budgetTokens
|
||||||
|
if effectiveMax > 0 && adjustedBudget >= effectiveMax {
|
||||||
|
adjustedBudget = effectiveMax - 1
|
||||||
|
}
|
||||||
|
|
||||||
|
minBudget := 0
|
||||||
|
if modelInfo != nil && modelInfo.Thinking != nil {
|
||||||
|
minBudget = modelInfo.Thinking.Min
|
||||||
|
}
|
||||||
|
if minBudget > 0 && adjustedBudget > 0 && adjustedBudget < minBudget {
|
||||||
|
// If enforcing the max_tokens constraint would push the budget below the model minimum,
|
||||||
|
// leave the request unchanged.
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
|
if adjustedBudget != budgetTokens {
|
||||||
|
body, _ = sjson.SetBytes(body, "thinking.budget_tokens", adjustedBudget)
|
||||||
|
}
|
||||||
|
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
|
// effectiveMaxTokens returns the max tokens to cap thinking:
|
||||||
|
// prefer request-provided max_tokens; otherwise fall back to model default.
|
||||||
|
// The boolean indicates whether the value came from the model default (and thus should be written back).
|
||||||
|
func (a *Applier) effectiveMaxTokens(body []byte, modelInfo *registry.ModelInfo) (max int, fromModel bool) {
|
||||||
|
if maxTok := gjson.GetBytes(body, "max_tokens"); maxTok.Exists() && maxTok.Int() > 0 {
|
||||||
|
return int(maxTok.Int()), false
|
||||||
|
}
|
||||||
|
if modelInfo != nil && modelInfo.MaxCompletionTokens > 0 {
|
||||||
|
return modelInfo.MaxCompletionTokens, true
|
||||||
|
}
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
|
||||||
func applyCompatibleClaude(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
|
func applyCompatibleClaude(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
|
||||||
if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
|
if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
|
||||||
return body, nil
|
return body, nil
|
||||||
|
|||||||
@@ -117,8 +117,12 @@ func ConvertCodexResponseToClaude(_ context.Context, _ string, originalRequestRa
|
|||||||
} else {
|
} else {
|
||||||
template, _ = sjson.Set(template, "delta.stop_reason", "end_turn")
|
template, _ = sjson.Set(template, "delta.stop_reason", "end_turn")
|
||||||
}
|
}
|
||||||
template, _ = sjson.Set(template, "usage.input_tokens", rootResult.Get("response.usage.input_tokens").Int())
|
inputTokens, outputTokens, cachedTokens := extractResponsesUsage(rootResult.Get("response.usage"))
|
||||||
template, _ = sjson.Set(template, "usage.output_tokens", rootResult.Get("response.usage.output_tokens").Int())
|
template, _ = sjson.Set(template, "usage.input_tokens", inputTokens)
|
||||||
|
template, _ = sjson.Set(template, "usage.output_tokens", outputTokens)
|
||||||
|
if cachedTokens > 0 {
|
||||||
|
template, _ = sjson.Set(template, "usage.cache_read_input_tokens", cachedTokens)
|
||||||
|
}
|
||||||
|
|
||||||
output = "event: message_delta\n"
|
output = "event: message_delta\n"
|
||||||
output += fmt.Sprintf("data: %s\n\n", template)
|
output += fmt.Sprintf("data: %s\n\n", template)
|
||||||
@@ -204,8 +208,12 @@ func ConvertCodexResponseToClaudeNonStream(_ context.Context, _ string, original
|
|||||||
out := `{"id":"","type":"message","role":"assistant","model":"","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":0,"output_tokens":0}}`
|
out := `{"id":"","type":"message","role":"assistant","model":"","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":0,"output_tokens":0}}`
|
||||||
out, _ = sjson.Set(out, "id", responseData.Get("id").String())
|
out, _ = sjson.Set(out, "id", responseData.Get("id").String())
|
||||||
out, _ = sjson.Set(out, "model", responseData.Get("model").String())
|
out, _ = sjson.Set(out, "model", responseData.Get("model").String())
|
||||||
out, _ = sjson.Set(out, "usage.input_tokens", responseData.Get("usage.input_tokens").Int())
|
inputTokens, outputTokens, cachedTokens := extractResponsesUsage(responseData.Get("usage"))
|
||||||
out, _ = sjson.Set(out, "usage.output_tokens", responseData.Get("usage.output_tokens").Int())
|
out, _ = sjson.Set(out, "usage.input_tokens", inputTokens)
|
||||||
|
out, _ = sjson.Set(out, "usage.output_tokens", outputTokens)
|
||||||
|
if cachedTokens > 0 {
|
||||||
|
out, _ = sjson.Set(out, "usage.cache_read_input_tokens", cachedTokens)
|
||||||
|
}
|
||||||
|
|
||||||
hasToolCall := false
|
hasToolCall := false
|
||||||
|
|
||||||
@@ -308,12 +316,27 @@ func ConvertCodexResponseToClaudeNonStream(_ context.Context, _ string, original
|
|||||||
out, _ = sjson.SetRaw(out, "stop_sequence", stopSequence.Raw)
|
out, _ = sjson.SetRaw(out, "stop_sequence", stopSequence.Raw)
|
||||||
}
|
}
|
||||||
|
|
||||||
if responseData.Get("usage.input_tokens").Exists() || responseData.Get("usage.output_tokens").Exists() {
|
return out
|
||||||
out, _ = sjson.Set(out, "usage.input_tokens", responseData.Get("usage.input_tokens").Int())
|
}
|
||||||
out, _ = sjson.Set(out, "usage.output_tokens", responseData.Get("usage.output_tokens").Int())
|
|
||||||
|
func extractResponsesUsage(usage gjson.Result) (int64, int64, int64) {
|
||||||
|
if !usage.Exists() || usage.Type == gjson.Null {
|
||||||
|
return 0, 0, 0
|
||||||
}
|
}
|
||||||
|
|
||||||
return out
|
inputTokens := usage.Get("input_tokens").Int()
|
||||||
|
outputTokens := usage.Get("output_tokens").Int()
|
||||||
|
cachedTokens := usage.Get("input_tokens_details.cached_tokens").Int()
|
||||||
|
|
||||||
|
if cachedTokens > 0 {
|
||||||
|
if inputTokens >= cachedTokens {
|
||||||
|
inputTokens -= cachedTokens
|
||||||
|
} else {
|
||||||
|
inputTokens = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return inputTokens, outputTokens, cachedTokens
|
||||||
}
|
}
|
||||||
|
|
||||||
// buildReverseMapFromClaudeOriginalShortToOriginal builds a map[short]original from original Claude request tools.
|
// buildReverseMapFromClaudeOriginalShortToOriginal builds a map[short]original from original Claude request tools.
|
||||||
|
|||||||
@@ -88,7 +88,7 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream
|
|||||||
var messagesJSON = "[]"
|
var messagesJSON = "[]"
|
||||||
|
|
||||||
// Handle system message first
|
// Handle system message first
|
||||||
systemMsgJSON := `{"role":"system","content":[{"type":"text","text":"Use ANY tool, the parameters MUST accord with RFC 8259 (The JavaScript Object Notation (JSON) Data Interchange Format), the keys and value MUST be enclosed in double quotes."}]}`
|
systemMsgJSON := `{"role":"system","content":[]}`
|
||||||
if system := root.Get("system"); system.Exists() {
|
if system := root.Get("system"); system.Exists() {
|
||||||
if system.Type == gjson.String {
|
if system.Type == gjson.String {
|
||||||
if system.String() != "" {
|
if system.String() != "" {
|
||||||
|
|||||||
@@ -289,21 +289,17 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI
|
|||||||
// Only process if usage has actual values (not null)
|
// Only process if usage has actual values (not null)
|
||||||
if param.FinishReason != "" {
|
if param.FinishReason != "" {
|
||||||
usage := root.Get("usage")
|
usage := root.Get("usage")
|
||||||
var inputTokens, outputTokens int64
|
var inputTokens, outputTokens, cachedTokens int64
|
||||||
if usage.Exists() && usage.Type != gjson.Null {
|
if usage.Exists() && usage.Type != gjson.Null {
|
||||||
// Check if usage has actual token counts
|
inputTokens, outputTokens, cachedTokens = extractOpenAIUsage(usage)
|
||||||
promptTokens := usage.Get("prompt_tokens")
|
|
||||||
completionTokens := usage.Get("completion_tokens")
|
|
||||||
|
|
||||||
if promptTokens.Exists() && completionTokens.Exists() {
|
|
||||||
inputTokens = promptTokens.Int()
|
|
||||||
outputTokens = completionTokens.Int()
|
|
||||||
}
|
|
||||||
// Send message_delta with usage
|
// Send message_delta with usage
|
||||||
messageDeltaJSON := `{"type":"message_delta","delta":{"stop_reason":"","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
messageDeltaJSON := `{"type":"message_delta","delta":{"stop_reason":"","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
||||||
messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "delta.stop_reason", mapOpenAIFinishReasonToAnthropic(param.FinishReason))
|
messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "delta.stop_reason", mapOpenAIFinishReasonToAnthropic(param.FinishReason))
|
||||||
messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.input_tokens", inputTokens)
|
messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.input_tokens", inputTokens)
|
||||||
messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.output_tokens", outputTokens)
|
messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.output_tokens", outputTokens)
|
||||||
|
if cachedTokens > 0 {
|
||||||
|
messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.cache_read_input_tokens", cachedTokens)
|
||||||
|
}
|
||||||
results = append(results, "event: message_delta\ndata: "+messageDeltaJSON+"\n\n")
|
results = append(results, "event: message_delta\ndata: "+messageDeltaJSON+"\n\n")
|
||||||
param.MessageDeltaSent = true
|
param.MessageDeltaSent = true
|
||||||
|
|
||||||
@@ -423,13 +419,12 @@ func convertOpenAINonStreamingToAnthropic(rawJSON []byte) []string {
|
|||||||
|
|
||||||
// Set usage information
|
// Set usage information
|
||||||
if usage := root.Get("usage"); usage.Exists() {
|
if usage := root.Get("usage"); usage.Exists() {
|
||||||
out, _ = sjson.Set(out, "usage.input_tokens", usage.Get("prompt_tokens").Int())
|
inputTokens, outputTokens, cachedTokens := extractOpenAIUsage(usage)
|
||||||
out, _ = sjson.Set(out, "usage.output_tokens", usage.Get("completion_tokens").Int())
|
out, _ = sjson.Set(out, "usage.input_tokens", inputTokens)
|
||||||
reasoningTokens := int64(0)
|
out, _ = sjson.Set(out, "usage.output_tokens", outputTokens)
|
||||||
if v := usage.Get("completion_tokens_details.reasoning_tokens"); v.Exists() {
|
if cachedTokens > 0 {
|
||||||
reasoningTokens = v.Int()
|
out, _ = sjson.Set(out, "usage.cache_read_input_tokens", cachedTokens)
|
||||||
}
|
}
|
||||||
out, _ = sjson.Set(out, "usage.reasoning_tokens", reasoningTokens)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return []string{out}
|
return []string{out}
|
||||||
@@ -674,8 +669,12 @@ func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, origina
|
|||||||
}
|
}
|
||||||
|
|
||||||
if respUsage := root.Get("usage"); respUsage.Exists() {
|
if respUsage := root.Get("usage"); respUsage.Exists() {
|
||||||
out, _ = sjson.Set(out, "usage.input_tokens", respUsage.Get("prompt_tokens").Int())
|
inputTokens, outputTokens, cachedTokens := extractOpenAIUsage(respUsage)
|
||||||
out, _ = sjson.Set(out, "usage.output_tokens", respUsage.Get("completion_tokens").Int())
|
out, _ = sjson.Set(out, "usage.input_tokens", inputTokens)
|
||||||
|
out, _ = sjson.Set(out, "usage.output_tokens", outputTokens)
|
||||||
|
if cachedTokens > 0 {
|
||||||
|
out, _ = sjson.Set(out, "usage.cache_read_input_tokens", cachedTokens)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !stopReasonSet {
|
if !stopReasonSet {
|
||||||
@@ -692,3 +691,23 @@ func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, origina
|
|||||||
func ClaudeTokenCount(ctx context.Context, count int64) string {
|
func ClaudeTokenCount(ctx context.Context, count int64) string {
|
||||||
return fmt.Sprintf(`{"input_tokens":%d}`, count)
|
return fmt.Sprintf(`{"input_tokens":%d}`, count)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func extractOpenAIUsage(usage gjson.Result) (int64, int64, int64) {
|
||||||
|
if !usage.Exists() || usage.Type == gjson.Null {
|
||||||
|
return 0, 0, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
inputTokens := usage.Get("prompt_tokens").Int()
|
||||||
|
outputTokens := usage.Get("completion_tokens").Int()
|
||||||
|
cachedTokens := usage.Get("prompt_tokens_details.cached_tokens").Int()
|
||||||
|
|
||||||
|
if cachedTokens > 0 {
|
||||||
|
if inputTokens >= cachedTokens {
|
||||||
|
inputTokens -= cachedTokens
|
||||||
|
} else {
|
||||||
|
inputTokens = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return inputTokens, outputTokens, cachedTokens
|
||||||
|
}
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ import (
|
|||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||||
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
||||||
"github.com/tidwall/gjson"
|
"github.com/tidwall/gjson"
|
||||||
|
"github.com/tidwall/sjson"
|
||||||
)
|
)
|
||||||
|
|
||||||
// thinkingTestCase represents a common test case structure for both suffix and body tests.
|
// thinkingTestCase represents a common test case structure for both suffix and body tests.
|
||||||
@@ -2707,6 +2708,9 @@ func runThinkingTests(t *testing.T, cases []thinkingTestCase) {
|
|||||||
[]byte(tc.inputJSON),
|
[]byte(tc.inputJSON),
|
||||||
true,
|
true,
|
||||||
)
|
)
|
||||||
|
if applyTo == "claude" {
|
||||||
|
body, _ = sjson.SetBytes(body, "max_tokens", 200000)
|
||||||
|
}
|
||||||
|
|
||||||
body, err := thinking.ApplyThinking(body, tc.model, tc.from, applyTo)
|
body, err := thinking.ApplyThinking(body, tc.model, tc.from, applyTo)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user