mirror of
https://github.com/router-for-me/CLIProxyAPIPlus.git
synced 2026-04-24 06:40:31 +00:00
Compare commits
21 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d06e2dc83c | ||
|
|
336867853b | ||
|
|
6403ff4ec4 | ||
|
|
d222469b44 | ||
|
|
790a17ce98 | ||
|
|
d473c952fb | ||
|
|
7646a2b877 | ||
|
|
62090f2568 | ||
|
|
d35152bbef | ||
|
|
c281f4cbaf | ||
|
|
09455f9e85 | ||
|
|
c8e72ba0dc | ||
|
|
375ef252ab | ||
|
|
ee552f8720 | ||
|
|
2e88c4858e | ||
|
|
3f50da85c1 | ||
|
|
8be06255f7 | ||
|
|
60936b5185 | ||
|
|
72274099aa | ||
|
|
b7f7b3a1d8 | ||
|
|
dcae098e23 |
@@ -13,8 +13,6 @@ Dockerfile
|
|||||||
docs/*
|
docs/*
|
||||||
README.md
|
README.md
|
||||||
README_CN.md
|
README_CN.md
|
||||||
MANAGEMENT_API.md
|
|
||||||
MANAGEMENT_API_CN.md
|
|
||||||
LICENSE
|
LICENSE
|
||||||
|
|
||||||
# Runtime data folders (should be mounted as volumes)
|
# Runtime data folders (should be mounted as volumes)
|
||||||
@@ -32,3 +30,4 @@ bin/*
|
|||||||
.agent/*
|
.agent/*
|
||||||
.bmad/*
|
.bmad/*
|
||||||
_bmad/*
|
_bmad/*
|
||||||
|
_bmad-output/*
|
||||||
|
|||||||
7
.github/ISSUE_TEMPLATE/bug_report.md
vendored
7
.github/ISSUE_TEMPLATE/bug_report.md
vendored
@@ -7,6 +7,13 @@ assignees: ''
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
**Is it a request payload issue?**
|
||||||
|
[ ] Yes, this is a request payload issue. I am using a client/cURL to send a request payload, but I received an unexpected error.
|
||||||
|
[ ] No, it's another issue.
|
||||||
|
|
||||||
|
**If it's a request payload issue, you MUST know**
|
||||||
|
Our team doesn't have any GODs or ORACLEs or MIND READERs. Please make sure to attach the request log or curl payload.
|
||||||
|
|
||||||
**Describe the bug**
|
**Describe the bug**
|
||||||
A clear and concise description of what the bug is.
|
A clear and concise description of what the bug is.
|
||||||
|
|
||||||
|
|||||||
7
.gitignore
vendored
7
.gitignore
vendored
@@ -12,11 +12,15 @@ bin/*
|
|||||||
logs/*
|
logs/*
|
||||||
conv/*
|
conv/*
|
||||||
temp/*
|
temp/*
|
||||||
|
refs/*
|
||||||
|
|
||||||
|
# Storage backends
|
||||||
pgstore/*
|
pgstore/*
|
||||||
gitstore/*
|
gitstore/*
|
||||||
objectstore/*
|
objectstore/*
|
||||||
|
|
||||||
|
# Static assets
|
||||||
static/*
|
static/*
|
||||||
refs/*
|
|
||||||
|
|
||||||
# Authentication data
|
# Authentication data
|
||||||
auths/*
|
auths/*
|
||||||
@@ -36,6 +40,7 @@ GEMINI.md
|
|||||||
.agent/*
|
.agent/*
|
||||||
.bmad/*
|
.bmad/*
|
||||||
_bmad/*
|
_bmad/*
|
||||||
|
_bmad-output/*
|
||||||
.mcp/cache/
|
.mcp/cache/
|
||||||
|
|
||||||
# macOS
|
# macOS
|
||||||
|
|||||||
@@ -879,8 +879,8 @@ func getOrCreateMapValue(mapNode *yaml.Node, key string) *yaml.Node {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// mergeMappingPreserve merges keys from src into dst mapping node while preserving
|
// mergeMappingPreserve merges keys from src into dst mapping node while preserving
|
||||||
// key order and comments of existing keys in dst. Unknown keys from src are appended
|
// key order and comments of existing keys in dst. New keys are only added if their
|
||||||
// to dst at the end, copying their node structure from src.
|
// value is non-zero to avoid polluting the config with defaults.
|
||||||
func mergeMappingPreserve(dst, src *yaml.Node) {
|
func mergeMappingPreserve(dst, src *yaml.Node) {
|
||||||
if dst == nil || src == nil {
|
if dst == nil || src == nil {
|
||||||
return
|
return
|
||||||
@@ -891,20 +891,19 @@ func mergeMappingPreserve(dst, src *yaml.Node) {
|
|||||||
copyNodeShallow(dst, src)
|
copyNodeShallow(dst, src)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// Build a lookup of existing keys in dst
|
|
||||||
for i := 0; i+1 < len(src.Content); i += 2 {
|
for i := 0; i+1 < len(src.Content); i += 2 {
|
||||||
sk := src.Content[i]
|
sk := src.Content[i]
|
||||||
sv := src.Content[i+1]
|
sv := src.Content[i+1]
|
||||||
idx := findMapKeyIndex(dst, sk.Value)
|
idx := findMapKeyIndex(dst, sk.Value)
|
||||||
if idx >= 0 {
|
if idx >= 0 {
|
||||||
// Merge into existing value node
|
// Merge into existing value node (always update, even to zero values)
|
||||||
dv := dst.Content[idx+1]
|
dv := dst.Content[idx+1]
|
||||||
mergeNodePreserve(dv, sv)
|
mergeNodePreserve(dv, sv)
|
||||||
} else {
|
} else {
|
||||||
if shouldSkipEmptyCollectionOnPersist(sk.Value, sv) {
|
// New key: only add if value is non-zero to avoid polluting config with defaults
|
||||||
|
if isZeroValueNode(sv) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
// Append new key/value pair by deep-copying from src
|
|
||||||
dst.Content = append(dst.Content, deepCopyNode(sk), deepCopyNode(sv))
|
dst.Content = append(dst.Content, deepCopyNode(sk), deepCopyNode(sv))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -987,32 +986,49 @@ func findMapKeyIndex(mapNode *yaml.Node, key string) int {
|
|||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
|
|
||||||
func shouldSkipEmptyCollectionOnPersist(key string, node *yaml.Node) bool {
|
// isZeroValueNode returns true if the YAML node represents a zero/default value
|
||||||
switch key {
|
// that should not be written as a new key to preserve config cleanliness.
|
||||||
case "generative-language-api-key",
|
// For mappings and sequences, recursively checks if all children are zero values.
|
||||||
"gemini-api-key",
|
func isZeroValueNode(node *yaml.Node) bool {
|
||||||
"vertex-api-key",
|
|
||||||
"claude-api-key",
|
|
||||||
"codex-api-key",
|
|
||||||
"openai-compatibility":
|
|
||||||
return isEmptyCollectionNode(node)
|
|
||||||
default:
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func isEmptyCollectionNode(node *yaml.Node) bool {
|
|
||||||
if node == nil {
|
if node == nil {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
switch node.Kind {
|
switch node.Kind {
|
||||||
case yaml.SequenceNode:
|
|
||||||
return len(node.Content) == 0
|
|
||||||
case yaml.ScalarNode:
|
case yaml.ScalarNode:
|
||||||
return node.Tag == "!!null"
|
switch node.Tag {
|
||||||
default:
|
case "!!bool":
|
||||||
return false
|
return node.Value == "false"
|
||||||
|
case "!!int", "!!float":
|
||||||
|
return node.Value == "0" || node.Value == "0.0"
|
||||||
|
case "!!str":
|
||||||
|
return node.Value == ""
|
||||||
|
case "!!null":
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
case yaml.SequenceNode:
|
||||||
|
if len(node.Content) == 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// Check if all elements are zero values
|
||||||
|
for _, child := range node.Content {
|
||||||
|
if !isZeroValueNode(child) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
case yaml.MappingNode:
|
||||||
|
if len(node.Content) == 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// Check if all values are zero values (values are at odd indices)
|
||||||
|
for i := 1; i < len(node.Content); i += 2 {
|
||||||
|
if !isZeroValueNode(node.Content[i]) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// deepCopyNode creates a deep copy of a yaml.Node graph.
|
// deepCopyNode creates a deep copy of a yaml.Node graph.
|
||||||
|
|||||||
@@ -30,13 +30,13 @@ type SDKConfig struct {
|
|||||||
// StreamingConfig holds server streaming behavior configuration.
|
// StreamingConfig holds server streaming behavior configuration.
|
||||||
type StreamingConfig struct {
|
type StreamingConfig struct {
|
||||||
// KeepAliveSeconds controls how often the server emits SSE heartbeats (": keep-alive\n\n").
|
// KeepAliveSeconds controls how often the server emits SSE heartbeats (": keep-alive\n\n").
|
||||||
// nil means default (15 seconds). <= 0 disables keep-alives.
|
// <= 0 disables keep-alives. Default is 0.
|
||||||
KeepAliveSeconds *int `yaml:"keepalive-seconds,omitempty" json:"keepalive-seconds,omitempty"`
|
KeepAliveSeconds int `yaml:"keepalive-seconds,omitempty" json:"keepalive-seconds,omitempty"`
|
||||||
|
|
||||||
// BootstrapRetries controls how many times the server may retry a streaming request before any bytes are sent,
|
// BootstrapRetries controls how many times the server may retry a streaming request before any bytes are sent,
|
||||||
// to allow auth rotation / transient recovery.
|
// to allow auth rotation / transient recovery.
|
||||||
// nil means default (2). 0 disables bootstrap retries.
|
// <= 0 disables bootstrap retries. Default is 0.
|
||||||
BootstrapRetries *int `yaml:"bootstrap-retries,omitempty" json:"bootstrap-retries,omitempty"`
|
BootstrapRetries int `yaml:"bootstrap-retries,omitempty" json:"bootstrap-retries,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// AccessConfig groups request authentication providers.
|
// AccessConfig groups request authentication providers.
|
||||||
|
|||||||
@@ -741,7 +741,7 @@ func GetIFlowModels() []*ModelInfo {
|
|||||||
{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
|
{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
|
||||||
{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
|
{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
|
||||||
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000},
|
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000},
|
||||||
{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000},
|
{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
|
||||||
}
|
}
|
||||||
models := make([]*ModelInfo, 0, len(entries))
|
models := make([]*ModelInfo, 0, len(entries))
|
||||||
for _, entry := range entries {
|
for _, entry := range entries {
|
||||||
|
|||||||
@@ -67,6 +67,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
|||||||
return resp, errValidate
|
return resp, errValidate
|
||||||
}
|
}
|
||||||
body = applyIFlowThinkingConfig(body)
|
body = applyIFlowThinkingConfig(body)
|
||||||
|
body = preserveReasoningContentInMessages(body)
|
||||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||||
|
|
||||||
endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
|
endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
|
||||||
@@ -159,6 +160,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
|||||||
return nil, errValidate
|
return nil, errValidate
|
||||||
}
|
}
|
||||||
body = applyIFlowThinkingConfig(body)
|
body = applyIFlowThinkingConfig(body)
|
||||||
|
body = preserveReasoningContentInMessages(body)
|
||||||
// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
|
// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
|
||||||
toolsResult := gjson.GetBytes(body, "tools")
|
toolsResult := gjson.GetBytes(body, "tools")
|
||||||
if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
|
if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
|
||||||
@@ -445,20 +447,98 @@ func ensureToolsArray(body []byte) []byte {
|
|||||||
return updated
|
return updated
|
||||||
}
|
}
|
||||||
|
|
||||||
// applyIFlowThinkingConfig converts normalized reasoning_effort to iFlow chat_template_kwargs.enable_thinking.
|
// preserveReasoningContentInMessages ensures reasoning_content from assistant messages in the
|
||||||
// This should be called after NormalizeThinkingConfig has processed the payload.
|
// conversation history is preserved when sending to iFlow models that support thinking.
|
||||||
// iFlow only supports boolean enable_thinking, so any non-"none" effort enables thinking.
|
// This is critical for multi-turn conversations where the model needs to see its previous
|
||||||
func applyIFlowThinkingConfig(body []byte) []byte {
|
// reasoning to maintain coherent thought chains across tool calls and conversation turns.
|
||||||
effort := gjson.GetBytes(body, "reasoning_effort")
|
//
|
||||||
if !effort.Exists() {
|
// For GLM-4.7 and MiniMax-M2.1, the full assistant response (including reasoning) must be
|
||||||
|
// appended back into message history before the next call.
|
||||||
|
func preserveReasoningContentInMessages(body []byte) []byte {
|
||||||
|
model := strings.ToLower(gjson.GetBytes(body, "model").String())
|
||||||
|
|
||||||
|
// Only apply to models that support thinking with history preservation
|
||||||
|
needsPreservation := strings.HasPrefix(model, "glm-4.7") ||
|
||||||
|
strings.HasPrefix(model, "glm-4-7") ||
|
||||||
|
strings.HasPrefix(model, "minimax-m2.1") ||
|
||||||
|
strings.HasPrefix(model, "minimax-m2-1")
|
||||||
|
|
||||||
|
if !needsPreservation {
|
||||||
return body
|
return body
|
||||||
}
|
}
|
||||||
|
|
||||||
val := strings.ToLower(strings.TrimSpace(effort.String()))
|
messages := gjson.GetBytes(body, "messages")
|
||||||
enableThinking := val != "none" && val != ""
|
if !messages.Exists() || !messages.IsArray() {
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
body, _ = sjson.DeleteBytes(body, "reasoning_effort")
|
// Check if any assistant message already has reasoning_content preserved
|
||||||
body, _ = sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
|
hasReasoningContent := false
|
||||||
|
messages.ForEach(func(_, msg gjson.Result) bool {
|
||||||
|
role := msg.Get("role").String()
|
||||||
|
if role == "assistant" {
|
||||||
|
rc := msg.Get("reasoning_content")
|
||||||
|
if rc.Exists() && rc.String() != "" {
|
||||||
|
hasReasoningContent = true
|
||||||
|
return false // stop iteration
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
|
||||||
|
// If reasoning content is already present, the messages are properly formatted
|
||||||
|
// No need to modify - the client has correctly preserved reasoning in history
|
||||||
|
if hasReasoningContent {
|
||||||
|
log.Debugf("iflow executor: reasoning_content found in message history for %s", model)
|
||||||
|
}
|
||||||
|
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
|
// applyIFlowThinkingConfig converts normalized reasoning_effort to model-specific thinking configurations.
|
||||||
|
// This should be called after NormalizeThinkingConfig has processed the payload.
|
||||||
|
//
|
||||||
|
// Model-specific handling:
|
||||||
|
// - GLM-4.7: Uses extra_body={"thinking": {"type": "enabled"}, "clear_thinking": false}
|
||||||
|
// - MiniMax-M2.1: Uses reasoning_split=true for OpenAI-style reasoning separation
|
||||||
|
// - Other iFlow models: Uses chat_template_kwargs.enable_thinking (boolean)
|
||||||
|
func applyIFlowThinkingConfig(body []byte) []byte {
|
||||||
|
effort := gjson.GetBytes(body, "reasoning_effort")
|
||||||
|
model := strings.ToLower(gjson.GetBytes(body, "model").String())
|
||||||
|
|
||||||
|
// Check if thinking should be enabled
|
||||||
|
val := ""
|
||||||
|
if effort.Exists() {
|
||||||
|
val = strings.ToLower(strings.TrimSpace(effort.String()))
|
||||||
|
}
|
||||||
|
enableThinking := effort.Exists() && val != "none" && val != ""
|
||||||
|
|
||||||
|
// Remove reasoning_effort as we'll convert to model-specific format
|
||||||
|
if effort.Exists() {
|
||||||
|
body, _ = sjson.DeleteBytes(body, "reasoning_effort")
|
||||||
|
}
|
||||||
|
|
||||||
|
// GLM-4.7: Use extra_body with thinking config and clear_thinking: false
|
||||||
|
if strings.HasPrefix(model, "glm-4.7") || strings.HasPrefix(model, "glm-4-7") {
|
||||||
|
if enableThinking {
|
||||||
|
body, _ = sjson.SetBytes(body, "extra_body.thinking.type", "enabled")
|
||||||
|
body, _ = sjson.SetBytes(body, "extra_body.clear_thinking", false)
|
||||||
|
}
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
|
// MiniMax-M2.1: Use reasoning_split=true for interleaved thinking
|
||||||
|
if strings.HasPrefix(model, "minimax-m2.1") || strings.HasPrefix(model, "minimax-m2-1") {
|
||||||
|
if enableThinking {
|
||||||
|
body, _ = sjson.SetBytes(body, "reasoning_split", true)
|
||||||
|
}
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
|
// Other iFlow models (including GLM-4.6): Use chat_template_kwargs.enable_thinking
|
||||||
|
if effort.Exists() {
|
||||||
|
body, _ = sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
|
||||||
|
}
|
||||||
|
|
||||||
return body
|
return body
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -482,12 +482,16 @@ func StripUsageMetadataFromJSON(rawJSON []byte) ([]byte, bool) {
|
|||||||
cleaned := jsonBytes
|
cleaned := jsonBytes
|
||||||
var changed bool
|
var changed bool
|
||||||
|
|
||||||
if gjson.GetBytes(cleaned, "usageMetadata").Exists() {
|
if usageMetadata = gjson.GetBytes(cleaned, "usageMetadata"); usageMetadata.Exists() {
|
||||||
|
// Rename usageMetadata to cpaUsageMetadata in the message_start event of Claude
|
||||||
|
cleaned, _ = sjson.SetRawBytes(cleaned, "cpaUsageMetadata", []byte(usageMetadata.Raw))
|
||||||
cleaned, _ = sjson.DeleteBytes(cleaned, "usageMetadata")
|
cleaned, _ = sjson.DeleteBytes(cleaned, "usageMetadata")
|
||||||
changed = true
|
changed = true
|
||||||
}
|
}
|
||||||
|
|
||||||
if gjson.GetBytes(cleaned, "response.usageMetadata").Exists() {
|
if usageMetadata = gjson.GetBytes(cleaned, "response.usageMetadata"); usageMetadata.Exists() {
|
||||||
|
// Rename usageMetadata to cpaUsageMetadata in the message_start event of Claude
|
||||||
|
cleaned, _ = sjson.SetRawBytes(cleaned, "response.cpaUsageMetadata", []byte(usageMetadata.Raw))
|
||||||
cleaned, _ = sjson.DeleteBytes(cleaned, "response.usageMetadata")
|
cleaned, _ = sjson.DeleteBytes(cleaned, "response.usageMetadata")
|
||||||
changed = true
|
changed = true
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -99,6 +99,14 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
|
|||||||
// This follows the Claude Code API specification for streaming message initialization
|
// This follows the Claude Code API specification for streaming message initialization
|
||||||
messageStartTemplate := `{"type": "message_start", "message": {"id": "msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY", "type": "message", "role": "assistant", "content": [], "model": "claude-3-5-sonnet-20241022", "stop_reason": null, "stop_sequence": null, "usage": {"input_tokens": 0, "output_tokens": 0}}}`
|
messageStartTemplate := `{"type": "message_start", "message": {"id": "msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY", "type": "message", "role": "assistant", "content": [], "model": "claude-3-5-sonnet-20241022", "stop_reason": null, "stop_sequence": null, "usage": {"input_tokens": 0, "output_tokens": 0}}}`
|
||||||
|
|
||||||
|
// Use cpaUsageMetadata within the message_start event for Claude.
|
||||||
|
if promptTokenCount := gjson.GetBytes(rawJSON, "response.cpaUsageMetadata.promptTokenCount"); promptTokenCount.Exists() {
|
||||||
|
messageStartTemplate, _ = sjson.Set(messageStartTemplate, "message.usage.input_tokens", promptTokenCount.Int())
|
||||||
|
}
|
||||||
|
if candidatesTokenCount := gjson.GetBytes(rawJSON, "response.cpaUsageMetadata.candidatesTokenCount"); candidatesTokenCount.Exists() {
|
||||||
|
messageStartTemplate, _ = sjson.Set(messageStartTemplate, "message.usage.output_tokens", candidatesTokenCount.Int())
|
||||||
|
}
|
||||||
|
|
||||||
// Override default values with actual response metadata if available from the Gemini CLI response
|
// Override default values with actual response metadata if available from the Gemini CLI response
|
||||||
if modelVersionResult := gjson.GetBytes(rawJSON, "response.modelVersion"); modelVersionResult.Exists() {
|
if modelVersionResult := gjson.GetBytes(rawJSON, "response.modelVersion"); modelVersionResult.Exists() {
|
||||||
messageStartTemplate, _ = sjson.Set(messageStartTemplate, "message.model", modelVersionResult.String())
|
messageStartTemplate, _ = sjson.Set(messageStartTemplate, "message.model", modelVersionResult.String())
|
||||||
|
|||||||
@@ -247,7 +247,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
|||||||
} else if role == "assistant" {
|
} else if role == "assistant" {
|
||||||
node := []byte(`{"role":"model","parts":[]}`)
|
node := []byte(`{"role":"model","parts":[]}`)
|
||||||
p := 0
|
p := 0
|
||||||
if content.Type == gjson.String {
|
if content.Type == gjson.String && content.String() != "" {
|
||||||
node, _ = sjson.SetBytes(node, "parts.-1.text", content.String())
|
node, _ = sjson.SetBytes(node, "parts.-1.text", content.String())
|
||||||
p++
|
p++
|
||||||
} else if content.IsArray() {
|
} else if content.IsArray() {
|
||||||
|
|||||||
@@ -209,9 +209,12 @@ func ConvertClaudeResponseToOpenAI(_ context.Context, modelName string, original
|
|||||||
if usage := root.Get("usage"); usage.Exists() {
|
if usage := root.Get("usage"); usage.Exists() {
|
||||||
inputTokens := usage.Get("input_tokens").Int()
|
inputTokens := usage.Get("input_tokens").Int()
|
||||||
outputTokens := usage.Get("output_tokens").Int()
|
outputTokens := usage.Get("output_tokens").Int()
|
||||||
template, _ = sjson.Set(template, "usage.prompt_tokens", inputTokens)
|
cacheReadInputTokens := usage.Get("cache_read_input_tokens").Int()
|
||||||
|
cacheCreationInputTokens := usage.Get("cache_creation_input_tokens").Int()
|
||||||
|
template, _ = sjson.Set(template, "usage.prompt_tokens", inputTokens+cacheCreationInputTokens)
|
||||||
template, _ = sjson.Set(template, "usage.completion_tokens", outputTokens)
|
template, _ = sjson.Set(template, "usage.completion_tokens", outputTokens)
|
||||||
template, _ = sjson.Set(template, "usage.total_tokens", inputTokens+outputTokens)
|
template, _ = sjson.Set(template, "usage.total_tokens", inputTokens+outputTokens)
|
||||||
|
template, _ = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cacheReadInputTokens)
|
||||||
}
|
}
|
||||||
return []string{template}
|
return []string{template}
|
||||||
|
|
||||||
@@ -285,8 +288,6 @@ func ConvertClaudeResponseToOpenAINonStream(_ context.Context, _ string, origina
|
|||||||
var messageID string
|
var messageID string
|
||||||
var model string
|
var model string
|
||||||
var createdAt int64
|
var createdAt int64
|
||||||
var inputTokens, outputTokens int64
|
|
||||||
var reasoningTokens int64
|
|
||||||
var stopReason string
|
var stopReason string
|
||||||
var contentParts []string
|
var contentParts []string
|
||||||
var reasoningParts []string
|
var reasoningParts []string
|
||||||
@@ -303,9 +304,6 @@ func ConvertClaudeResponseToOpenAINonStream(_ context.Context, _ string, origina
|
|||||||
messageID = message.Get("id").String()
|
messageID = message.Get("id").String()
|
||||||
model = message.Get("model").String()
|
model = message.Get("model").String()
|
||||||
createdAt = time.Now().Unix()
|
createdAt = time.Now().Unix()
|
||||||
if usage := message.Get("usage"); usage.Exists() {
|
|
||||||
inputTokens = usage.Get("input_tokens").Int()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
case "content_block_start":
|
case "content_block_start":
|
||||||
@@ -368,11 +366,14 @@ func ConvertClaudeResponseToOpenAINonStream(_ context.Context, _ string, origina
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if usage := root.Get("usage"); usage.Exists() {
|
if usage := root.Get("usage"); usage.Exists() {
|
||||||
outputTokens = usage.Get("output_tokens").Int()
|
inputTokens := usage.Get("input_tokens").Int()
|
||||||
// Estimate reasoning tokens from accumulated thinking content
|
outputTokens := usage.Get("output_tokens").Int()
|
||||||
if len(reasoningParts) > 0 {
|
cacheReadInputTokens := usage.Get("cache_read_input_tokens").Int()
|
||||||
reasoningTokens = int64(len(strings.Join(reasoningParts, "")) / 4) // Rough estimation
|
cacheCreationInputTokens := usage.Get("cache_creation_input_tokens").Int()
|
||||||
}
|
out, _ = sjson.Set(out, "usage.prompt_tokens", inputTokens+cacheCreationInputTokens)
|
||||||
|
out, _ = sjson.Set(out, "usage.completion_tokens", outputTokens)
|
||||||
|
out, _ = sjson.Set(out, "usage.total_tokens", inputTokens+outputTokens)
|
||||||
|
out, _ = sjson.Set(out, "usage.prompt_tokens_details.cached_tokens", cacheReadInputTokens)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -431,16 +432,5 @@ func ConvertClaudeResponseToOpenAINonStream(_ context.Context, _ string, origina
|
|||||||
out, _ = sjson.Set(out, "choices.0.finish_reason", mapAnthropicStopReasonToOpenAI(stopReason))
|
out, _ = sjson.Set(out, "choices.0.finish_reason", mapAnthropicStopReasonToOpenAI(stopReason))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set usage information including prompt tokens, completion tokens, and total tokens
|
|
||||||
totalTokens := inputTokens + outputTokens
|
|
||||||
out, _ = sjson.Set(out, "usage.prompt_tokens", inputTokens)
|
|
||||||
out, _ = sjson.Set(out, "usage.completion_tokens", outputTokens)
|
|
||||||
out, _ = sjson.Set(out, "usage.total_tokens", totalTokens)
|
|
||||||
|
|
||||||
// Add reasoning tokens to usage details if any reasoning content was processed
|
|
||||||
if reasoningTokens > 0 {
|
|
||||||
out, _ = sjson.Set(out, "usage.completion_tokens_details.reasoning_tokens", reasoningTokens)
|
|
||||||
}
|
|
||||||
|
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -114,13 +114,16 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte
|
|||||||
var builder strings.Builder
|
var builder strings.Builder
|
||||||
if parts := item.Get("content"); parts.Exists() && parts.IsArray() {
|
if parts := item.Get("content"); parts.Exists() && parts.IsArray() {
|
||||||
parts.ForEach(func(_, part gjson.Result) bool {
|
parts.ForEach(func(_, part gjson.Result) bool {
|
||||||
text := part.Get("text").String()
|
textResult := part.Get("text")
|
||||||
|
text := textResult.String()
|
||||||
if builder.Len() > 0 && text != "" {
|
if builder.Len() > 0 && text != "" {
|
||||||
builder.WriteByte('\n')
|
builder.WriteByte('\n')
|
||||||
}
|
}
|
||||||
builder.WriteString(text)
|
builder.WriteString(text)
|
||||||
return true
|
return true
|
||||||
})
|
})
|
||||||
|
} else if parts.Type == gjson.String {
|
||||||
|
builder.WriteString(parts.String())
|
||||||
}
|
}
|
||||||
instructionsText = builder.String()
|
instructionsText = builder.String()
|
||||||
if instructionsText != "" {
|
if instructionsText != "" {
|
||||||
@@ -207,6 +210,8 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte
|
|||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
})
|
})
|
||||||
|
} else if parts.Type == gjson.String {
|
||||||
|
textAggregate.WriteString(parts.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fallback to given role if content types not decisive
|
// Fallback to given role if content types not decisive
|
||||||
|
|||||||
@@ -104,8 +104,8 @@ func BuildErrorResponseBody(status int, errText string) []byte {
|
|||||||
// Returning 0 disables keep-alives (default when unset).
|
// Returning 0 disables keep-alives (default when unset).
|
||||||
func StreamingKeepAliveInterval(cfg *config.SDKConfig) time.Duration {
|
func StreamingKeepAliveInterval(cfg *config.SDKConfig) time.Duration {
|
||||||
seconds := defaultStreamingKeepAliveSeconds
|
seconds := defaultStreamingKeepAliveSeconds
|
||||||
if cfg != nil && cfg.Streaming.KeepAliveSeconds != nil {
|
if cfg != nil {
|
||||||
seconds = *cfg.Streaming.KeepAliveSeconds
|
seconds = cfg.Streaming.KeepAliveSeconds
|
||||||
}
|
}
|
||||||
if seconds <= 0 {
|
if seconds <= 0 {
|
||||||
return 0
|
return 0
|
||||||
@@ -116,8 +116,8 @@ func StreamingKeepAliveInterval(cfg *config.SDKConfig) time.Duration {
|
|||||||
// StreamingBootstrapRetries returns how many times a streaming request may be retried before any bytes are sent.
|
// StreamingBootstrapRetries returns how many times a streaming request may be retried before any bytes are sent.
|
||||||
func StreamingBootstrapRetries(cfg *config.SDKConfig) int {
|
func StreamingBootstrapRetries(cfg *config.SDKConfig) int {
|
||||||
retries := defaultStreamingBootstrapRetries
|
retries := defaultStreamingBootstrapRetries
|
||||||
if cfg != nil && cfg.Streaming.BootstrapRetries != nil {
|
if cfg != nil {
|
||||||
retries = *cfg.Streaming.BootstrapRetries
|
retries = cfg.Streaming.BootstrapRetries
|
||||||
}
|
}
|
||||||
if retries < 0 {
|
if retries < 0 {
|
||||||
retries = 0
|
retries = 0
|
||||||
|
|||||||
@@ -94,10 +94,9 @@ func TestExecuteStreamWithAuthManager_RetriesBeforeFirstByte(t *testing.T) {
|
|||||||
registry.GetGlobalRegistry().UnregisterClient(auth2.ID)
|
registry.GetGlobalRegistry().UnregisterClient(auth2.ID)
|
||||||
})
|
})
|
||||||
|
|
||||||
bootstrapRetries := 1
|
|
||||||
handler := NewBaseAPIHandlers(&sdkconfig.SDKConfig{
|
handler := NewBaseAPIHandlers(&sdkconfig.SDKConfig{
|
||||||
Streaming: sdkconfig.StreamingConfig{
|
Streaming: sdkconfig.StreamingConfig{
|
||||||
BootstrapRetries: &bootstrapRetries,
|
BootstrapRetries: 1,
|
||||||
},
|
},
|
||||||
}, manager)
|
}, manager)
|
||||||
dataChan, errChan := handler.ExecuteStreamWithAuthManager(context.Background(), "openai", "test-model", []byte(`{"model":"test-model"}`), "")
|
dataChan, errChan := handler.ExecuteStreamWithAuthManager(context.Background(), "openai", "test-model", []byte(`{"model":"test-model"}`), "")
|
||||||
|
|||||||
@@ -263,7 +263,6 @@ func (m *Manager) Execute(ctx context.Context, providers []string, req cliproxye
|
|||||||
return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
|
return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
|
||||||
}
|
}
|
||||||
rotated := m.rotateProviders(req.Model, normalized)
|
rotated := m.rotateProviders(req.Model, normalized)
|
||||||
defer m.advanceProviderCursor(req.Model, normalized)
|
|
||||||
|
|
||||||
retryTimes, maxWait := m.retrySettings()
|
retryTimes, maxWait := m.retrySettings()
|
||||||
attempts := retryTimes + 1
|
attempts := retryTimes + 1
|
||||||
@@ -302,7 +301,6 @@ func (m *Manager) ExecuteCount(ctx context.Context, providers []string, req clip
|
|||||||
return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
|
return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
|
||||||
}
|
}
|
||||||
rotated := m.rotateProviders(req.Model, normalized)
|
rotated := m.rotateProviders(req.Model, normalized)
|
||||||
defer m.advanceProviderCursor(req.Model, normalized)
|
|
||||||
|
|
||||||
retryTimes, maxWait := m.retrySettings()
|
retryTimes, maxWait := m.retrySettings()
|
||||||
attempts := retryTimes + 1
|
attempts := retryTimes + 1
|
||||||
@@ -341,7 +339,6 @@ func (m *Manager) ExecuteStream(ctx context.Context, providers []string, req cli
|
|||||||
return nil, &Error{Code: "provider_not_found", Message: "no provider supplied"}
|
return nil, &Error{Code: "provider_not_found", Message: "no provider supplied"}
|
||||||
}
|
}
|
||||||
rotated := m.rotateProviders(req.Model, normalized)
|
rotated := m.rotateProviders(req.Model, normalized)
|
||||||
defer m.advanceProviderCursor(req.Model, normalized)
|
|
||||||
|
|
||||||
retryTimes, maxWait := m.retrySettings()
|
retryTimes, maxWait := m.retrySettings()
|
||||||
attempts := retryTimes + 1
|
attempts := retryTimes + 1
|
||||||
@@ -640,13 +637,20 @@ func (m *Manager) normalizeProviders(providers []string) []string {
|
|||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// rotateProviders returns a rotated view of the providers list starting from the
|
||||||
|
// current offset for the model, and atomically increments the offset for the next call.
|
||||||
|
// This ensures concurrent requests get different starting providers.
|
||||||
func (m *Manager) rotateProviders(model string, providers []string) []string {
|
func (m *Manager) rotateProviders(model string, providers []string) []string {
|
||||||
if len(providers) == 0 {
|
if len(providers) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
m.mu.RLock()
|
|
||||||
|
// Atomic read-and-increment: get current offset and advance cursor in one lock
|
||||||
|
m.mu.Lock()
|
||||||
offset := m.providerOffsets[model]
|
offset := m.providerOffsets[model]
|
||||||
m.mu.RUnlock()
|
m.providerOffsets[model] = (offset + 1) % len(providers)
|
||||||
|
m.mu.Unlock()
|
||||||
|
|
||||||
if len(providers) > 0 {
|
if len(providers) > 0 {
|
||||||
offset %= len(providers)
|
offset %= len(providers)
|
||||||
}
|
}
|
||||||
@@ -662,19 +666,6 @@ func (m *Manager) rotateProviders(model string, providers []string) []string {
|
|||||||
return rotated
|
return rotated
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Manager) advanceProviderCursor(model string, providers []string) {
|
|
||||||
if len(providers) == 0 {
|
|
||||||
m.mu.Lock()
|
|
||||||
delete(m.providerOffsets, model)
|
|
||||||
m.mu.Unlock()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
m.mu.Lock()
|
|
||||||
current := m.providerOffsets[model]
|
|
||||||
m.providerOffsets[model] = (current + 1) % len(providers)
|
|
||||||
m.mu.Unlock()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Manager) retrySettings() (int, time.Duration) {
|
func (m *Manager) retrySettings() (int, time.Duration) {
|
||||||
if m == nil {
|
if m == nil {
|
||||||
return 0, 0
|
return 0, 0
|
||||||
|
|||||||
Reference in New Issue
Block a user