mirror of
https://github.com/router-for-me/CLIProxyAPIPlus.git
synced 2026-04-04 03:31:21 +00:00
Compare commits
22 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
98509f615c | ||
|
|
e7a66ae504 | ||
|
|
754b126944 | ||
|
|
ae37ccffbf | ||
|
|
42c062bb5b | ||
|
|
87bf0b73d5 | ||
|
|
f389667ec3 | ||
|
|
29dba0399b | ||
|
|
a824e7cd0b | ||
|
|
140faef7dc | ||
|
|
adb580b344 | ||
|
|
06405f2129 | ||
|
|
b849bf79d6 | ||
|
|
59af2c57b1 | ||
|
|
d1fd2c4ad4 | ||
|
|
b6c6379bfa | ||
|
|
8f0e66b72e | ||
|
|
f63cf6ff7a | ||
|
|
d2419ed49d | ||
|
|
516d22c695 | ||
|
|
73cda6e836 | ||
|
|
0805989ee5 |
@@ -105,6 +105,10 @@ routing:
|
|||||||
# When true, enable authentication for the WebSocket API (/v1/ws).
|
# When true, enable authentication for the WebSocket API (/v1/ws).
|
||||||
ws-auth: false
|
ws-auth: false
|
||||||
|
|
||||||
|
# When true, enable Gemini CLI internal endpoints (/v1internal:*).
|
||||||
|
# Default is false for safety.
|
||||||
|
enable-gemini-cli-endpoint: false
|
||||||
|
|
||||||
# When > 0, emit blank lines every N seconds for non-streaming responses to prevent idle timeouts.
|
# When > 0, emit blank lines every N seconds for non-streaming responses to prevent idle timeouts.
|
||||||
nonstream-keepalive-interval: 0
|
nonstream-keepalive-interval: 0
|
||||||
|
|
||||||
|
|||||||
@@ -573,6 +573,8 @@ func (s *Server) registerManagementRoutes() {
|
|||||||
mgmt.PUT("/quota-exceeded/switch-preview-model", s.mgmt.PutSwitchPreviewModel)
|
mgmt.PUT("/quota-exceeded/switch-preview-model", s.mgmt.PutSwitchPreviewModel)
|
||||||
mgmt.PATCH("/quota-exceeded/switch-preview-model", s.mgmt.PutSwitchPreviewModel)
|
mgmt.PATCH("/quota-exceeded/switch-preview-model", s.mgmt.PutSwitchPreviewModel)
|
||||||
|
|
||||||
|
mgmt.GET("/copilot-quota", s.mgmt.GetCopilotQuota)
|
||||||
|
|
||||||
mgmt.GET("/api-keys", s.mgmt.GetAPIKeys)
|
mgmt.GET("/api-keys", s.mgmt.GetAPIKeys)
|
||||||
mgmt.PUT("/api-keys", s.mgmt.PutAPIKeys)
|
mgmt.PUT("/api-keys", s.mgmt.PutAPIKeys)
|
||||||
mgmt.PATCH("/api-keys", s.mgmt.PatchAPIKeys)
|
mgmt.PATCH("/api-keys", s.mgmt.PatchAPIKeys)
|
||||||
|
|||||||
@@ -235,6 +235,74 @@ type CopilotModelEntry struct {
|
|||||||
Capabilities map[string]any `json:"capabilities,omitempty"`
|
Capabilities map[string]any `json:"capabilities,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CopilotModelLimits holds the token limits returned by the Copilot /models API
|
||||||
|
// under capabilities.limits. These limits vary by account type (individual vs
|
||||||
|
// business) and are the authoritative source for enforcing prompt size.
|
||||||
|
type CopilotModelLimits struct {
|
||||||
|
// MaxContextWindowTokens is the total context window (prompt + output).
|
||||||
|
MaxContextWindowTokens int
|
||||||
|
// MaxPromptTokens is the hard limit on input/prompt tokens.
|
||||||
|
// Exceeding this triggers a 400 error from the Copilot API.
|
||||||
|
MaxPromptTokens int
|
||||||
|
// MaxOutputTokens is the maximum number of output/completion tokens.
|
||||||
|
MaxOutputTokens int
|
||||||
|
}
|
||||||
|
|
||||||
|
// Limits extracts the token limits from the model's capabilities map.
|
||||||
|
// Returns nil if no limits are available or the structure is unexpected.
|
||||||
|
//
|
||||||
|
// Expected Copilot API shape:
|
||||||
|
//
|
||||||
|
// "capabilities": {
|
||||||
|
// "limits": {
|
||||||
|
// "max_context_window_tokens": 200000,
|
||||||
|
// "max_prompt_tokens": 168000,
|
||||||
|
// "max_output_tokens": 32000
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
func (e *CopilotModelEntry) Limits() *CopilotModelLimits {
|
||||||
|
if e.Capabilities == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
limitsRaw, ok := e.Capabilities["limits"]
|
||||||
|
if !ok {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
limitsMap, ok := limitsRaw.(map[string]any)
|
||||||
|
if !ok {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
result := &CopilotModelLimits{
|
||||||
|
MaxContextWindowTokens: anyToInt(limitsMap["max_context_window_tokens"]),
|
||||||
|
MaxPromptTokens: anyToInt(limitsMap["max_prompt_tokens"]),
|
||||||
|
MaxOutputTokens: anyToInt(limitsMap["max_output_tokens"]),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only return if at least one field is populated.
|
||||||
|
if result.MaxContextWindowTokens == 0 && result.MaxPromptTokens == 0 && result.MaxOutputTokens == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// anyToInt converts a JSON-decoded numeric value to int.
|
||||||
|
// Go's encoding/json decodes numbers into float64 when the target is any/interface{}.
|
||||||
|
func anyToInt(v any) int {
|
||||||
|
switch n := v.(type) {
|
||||||
|
case float64:
|
||||||
|
return int(n)
|
||||||
|
case float32:
|
||||||
|
return int(n)
|
||||||
|
case int:
|
||||||
|
return n
|
||||||
|
case int64:
|
||||||
|
return int(n)
|
||||||
|
default:
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// CopilotModelsResponse represents the response from the Copilot /models endpoint.
|
// CopilotModelsResponse represents the response from the Copilot /models endpoint.
|
||||||
type CopilotModelsResponse struct {
|
type CopilotModelsResponse struct {
|
||||||
Data []CopilotModelEntry `json:"data"`
|
Data []CopilotModelEntry `json:"data"`
|
||||||
|
|||||||
@@ -9,6 +9,10 @@ type SDKConfig struct {
|
|||||||
// ProxyURL is the URL of an optional proxy server to use for outbound requests.
|
// ProxyURL is the URL of an optional proxy server to use for outbound requests.
|
||||||
ProxyURL string `yaml:"proxy-url" json:"proxy-url"`
|
ProxyURL string `yaml:"proxy-url" json:"proxy-url"`
|
||||||
|
|
||||||
|
// EnableGeminiCLIEndpoint controls whether Gemini CLI internal endpoints (/v1internal:*) are enabled.
|
||||||
|
// Default is false for safety; when false, /v1internal:* requests are rejected.
|
||||||
|
EnableGeminiCLIEndpoint bool `yaml:"enable-gemini-cli-endpoint" json:"enable-gemini-cli-endpoint"`
|
||||||
|
|
||||||
// ForceModelPrefix requires explicit model prefixes (e.g., "teamA/gemini-3-pro-preview")
|
// ForceModelPrefix requires explicit model prefixes (e.g., "teamA/gemini-3-pro-preview")
|
||||||
// to target prefixed credentials. When false, unprefixed model requests may use prefixed
|
// to target prefixed credentials. When false, unprefixed model requests may use prefixed
|
||||||
// credentials as well.
|
// credentials as well.
|
||||||
|
|||||||
@@ -93,6 +93,30 @@ func GetAntigravityModels() []*ModelInfo {
|
|||||||
func GetCodeBuddyModels() []*ModelInfo {
|
func GetCodeBuddyModels() []*ModelInfo {
|
||||||
now := int64(1748044800) // 2025-05-24
|
now := int64(1748044800) // 2025-05-24
|
||||||
return []*ModelInfo{
|
return []*ModelInfo{
|
||||||
|
{
|
||||||
|
ID: "auto",
|
||||||
|
Object: "model",
|
||||||
|
Created: now,
|
||||||
|
OwnedBy: "tencent",
|
||||||
|
Type: "codebuddy",
|
||||||
|
DisplayName: "Auto",
|
||||||
|
Description: "Automatic model selection via CodeBuddy",
|
||||||
|
ContextLength: 128000,
|
||||||
|
MaxCompletionTokens: 32768,
|
||||||
|
SupportedEndpoints: []string{"/chat/completions"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ID: "glm-5.0-turbo",
|
||||||
|
Object: "model",
|
||||||
|
Created: now,
|
||||||
|
OwnedBy: "tencent",
|
||||||
|
Type: "codebuddy",
|
||||||
|
DisplayName: "GLM-5.0 Turbo",
|
||||||
|
Description: "GLM-5.0 Turbo via CodeBuddy",
|
||||||
|
ContextLength: 128000,
|
||||||
|
MaxCompletionTokens: 32768,
|
||||||
|
SupportedEndpoints: []string{"/chat/completions"},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
ID: "glm-5.0",
|
ID: "glm-5.0",
|
||||||
Object: "model",
|
Object: "model",
|
||||||
@@ -118,13 +142,13 @@ func GetCodeBuddyModels() []*ModelInfo {
|
|||||||
SupportedEndpoints: []string{"/chat/completions"},
|
SupportedEndpoints: []string{"/chat/completions"},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "minimax-m2.5",
|
ID: "minimax-m2.7",
|
||||||
Object: "model",
|
Object: "model",
|
||||||
Created: now,
|
Created: now,
|
||||||
OwnedBy: "tencent",
|
OwnedBy: "tencent",
|
||||||
Type: "codebuddy",
|
Type: "codebuddy",
|
||||||
DisplayName: "MiniMax M2.5",
|
DisplayName: "MiniMax M2.7",
|
||||||
Description: "MiniMax M2.5 via CodeBuddy",
|
Description: "MiniMax M2.7 via CodeBuddy",
|
||||||
ContextLength: 200000,
|
ContextLength: 200000,
|
||||||
MaxCompletionTokens: 32768,
|
MaxCompletionTokens: 32768,
|
||||||
SupportedEndpoints: []string{"/chat/completions"},
|
SupportedEndpoints: []string{"/chat/completions"},
|
||||||
@@ -141,6 +165,19 @@ func GetCodeBuddyModels() []*ModelInfo {
|
|||||||
MaxCompletionTokens: 32768,
|
MaxCompletionTokens: 32768,
|
||||||
SupportedEndpoints: []string{"/chat/completions"},
|
SupportedEndpoints: []string{"/chat/completions"},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
ID: "kimi-k2-thinking",
|
||||||
|
Object: "model",
|
||||||
|
Created: now,
|
||||||
|
OwnedBy: "tencent",
|
||||||
|
Type: "codebuddy",
|
||||||
|
DisplayName: "Kimi K2 Thinking",
|
||||||
|
Description: "Kimi K2 Thinking via CodeBuddy",
|
||||||
|
ContextLength: 128000,
|
||||||
|
MaxCompletionTokens: 32768,
|
||||||
|
Thinking: &ThinkingSupport{ZeroAllowed: true},
|
||||||
|
SupportedEndpoints: []string{"/chat/completions"},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
ID: "deepseek-v3-2-volc",
|
ID: "deepseek-v3-2-volc",
|
||||||
Object: "model",
|
Object: "model",
|
||||||
@@ -148,24 +185,11 @@ func GetCodeBuddyModels() []*ModelInfo {
|
|||||||
OwnedBy: "tencent",
|
OwnedBy: "tencent",
|
||||||
Type: "codebuddy",
|
Type: "codebuddy",
|
||||||
DisplayName: "DeepSeek V3.2 (Volc)",
|
DisplayName: "DeepSeek V3.2 (Volc)",
|
||||||
Description: "DeepSeek V3.2 via CodeBuddy (Volcano Engine)",
|
Description: "DeepSeek V3.2 via CodeBuddy",
|
||||||
ContextLength: 128000,
|
ContextLength: 128000,
|
||||||
MaxCompletionTokens: 32768,
|
MaxCompletionTokens: 32768,
|
||||||
SupportedEndpoints: []string{"/chat/completions"},
|
SupportedEndpoints: []string{"/chat/completions"},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
ID: "hunyuan-2.0-thinking",
|
|
||||||
Object: "model",
|
|
||||||
Created: now,
|
|
||||||
OwnedBy: "tencent",
|
|
||||||
Type: "codebuddy",
|
|
||||||
DisplayName: "Hunyuan 2.0 Thinking",
|
|
||||||
Description: "Tencent Hunyuan 2.0 Thinking via CodeBuddy",
|
|
||||||
ContextLength: 128000,
|
|
||||||
MaxCompletionTokens: 32768,
|
|
||||||
Thinking: &ThinkingSupport{ZeroAllowed: true},
|
|
||||||
SupportedEndpoints: []string{"/chat/completions"},
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -525,6 +549,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
|
|||||||
ContextLength: 200000,
|
ContextLength: 200000,
|
||||||
MaxCompletionTokens: 64000,
|
MaxCompletionTokens: 64000,
|
||||||
SupportedEndpoints: []string{"/chat/completions"},
|
SupportedEndpoints: []string{"/chat/completions"},
|
||||||
|
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "claude-opus-4.6",
|
ID: "claude-opus-4.6",
|
||||||
@@ -537,6 +562,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
|
|||||||
ContextLength: 200000,
|
ContextLength: 200000,
|
||||||
MaxCompletionTokens: 64000,
|
MaxCompletionTokens: 64000,
|
||||||
SupportedEndpoints: []string{"/chat/completions"},
|
SupportedEndpoints: []string{"/chat/completions"},
|
||||||
|
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "claude-sonnet-4",
|
ID: "claude-sonnet-4",
|
||||||
@@ -549,6 +575,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
|
|||||||
ContextLength: 200000,
|
ContextLength: 200000,
|
||||||
MaxCompletionTokens: 64000,
|
MaxCompletionTokens: 64000,
|
||||||
SupportedEndpoints: []string{"/chat/completions"},
|
SupportedEndpoints: []string{"/chat/completions"},
|
||||||
|
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "claude-sonnet-4.5",
|
ID: "claude-sonnet-4.5",
|
||||||
@@ -561,6 +588,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
|
|||||||
ContextLength: 200000,
|
ContextLength: 200000,
|
||||||
MaxCompletionTokens: 64000,
|
MaxCompletionTokens: 64000,
|
||||||
SupportedEndpoints: []string{"/chat/completions"},
|
SupportedEndpoints: []string{"/chat/completions"},
|
||||||
|
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "claude-sonnet-4.6",
|
ID: "claude-sonnet-4.6",
|
||||||
@@ -573,6 +601,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
|
|||||||
ContextLength: 200000,
|
ContextLength: 200000,
|
||||||
MaxCompletionTokens: 64000,
|
MaxCompletionTokens: 64000,
|
||||||
SupportedEndpoints: []string{"/chat/completions"},
|
SupportedEndpoints: []string{"/chat/completions"},
|
||||||
|
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "gemini-2.5-pro",
|
ID: "gemini-2.5-pro",
|
||||||
|
|||||||
@@ -280,6 +280,7 @@
|
|||||||
"dynamic_allowed": true,
|
"dynamic_allowed": true,
|
||||||
"levels": [
|
"levels": [
|
||||||
"low",
|
"low",
|
||||||
|
"medium",
|
||||||
"high"
|
"high"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -554,6 +555,7 @@
|
|||||||
"dynamic_allowed": true,
|
"dynamic_allowed": true,
|
||||||
"levels": [
|
"levels": [
|
||||||
"low",
|
"low",
|
||||||
|
"medium",
|
||||||
"high"
|
"high"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -610,6 +612,8 @@
|
|||||||
"dynamic_allowed": true,
|
"dynamic_allowed": true,
|
||||||
"levels": [
|
"levels": [
|
||||||
"minimal",
|
"minimal",
|
||||||
|
"low",
|
||||||
|
"medium",
|
||||||
"high"
|
"high"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -838,6 +842,7 @@
|
|||||||
"dynamic_allowed": true,
|
"dynamic_allowed": true,
|
||||||
"levels": [
|
"levels": [
|
||||||
"low",
|
"low",
|
||||||
|
"medium",
|
||||||
"high"
|
"high"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -896,6 +901,8 @@
|
|||||||
"dynamic_allowed": true,
|
"dynamic_allowed": true,
|
||||||
"levels": [
|
"levels": [
|
||||||
"minimal",
|
"minimal",
|
||||||
|
"low",
|
||||||
|
"medium",
|
||||||
"high"
|
"high"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -1070,6 +1077,8 @@
|
|||||||
"dynamic_allowed": true,
|
"dynamic_allowed": true,
|
||||||
"levels": [
|
"levels": [
|
||||||
"minimal",
|
"minimal",
|
||||||
|
"low",
|
||||||
|
"medium",
|
||||||
"high"
|
"high"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -1371,6 +1380,75 @@
|
|||||||
"xhigh"
|
"xhigh"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "gpt-5.3-codex",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1770307200,
|
||||||
|
"owned_by": "openai",
|
||||||
|
"type": "openai",
|
||||||
|
"display_name": "GPT 5.3 Codex",
|
||||||
|
"version": "gpt-5.3",
|
||||||
|
"description": "Stable version of GPT 5.3 Codex, The best model for coding and agentic tasks across domains.",
|
||||||
|
"context_length": 400000,
|
||||||
|
"max_completion_tokens": 128000,
|
||||||
|
"supported_parameters": [
|
||||||
|
"tools"
|
||||||
|
],
|
||||||
|
"thinking": {
|
||||||
|
"levels": [
|
||||||
|
"low",
|
||||||
|
"medium",
|
||||||
|
"high",
|
||||||
|
"xhigh"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "gpt-5.4",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1772668800,
|
||||||
|
"owned_by": "openai",
|
||||||
|
"type": "openai",
|
||||||
|
"display_name": "GPT 5.4",
|
||||||
|
"version": "gpt-5.4",
|
||||||
|
"description": "Stable version of GPT 5.4",
|
||||||
|
"context_length": 1050000,
|
||||||
|
"max_completion_tokens": 128000,
|
||||||
|
"supported_parameters": [
|
||||||
|
"tools"
|
||||||
|
],
|
||||||
|
"thinking": {
|
||||||
|
"levels": [
|
||||||
|
"low",
|
||||||
|
"medium",
|
||||||
|
"high",
|
||||||
|
"xhigh"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "gpt-5.4-mini",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1773705600,
|
||||||
|
"owned_by": "openai",
|
||||||
|
"type": "openai",
|
||||||
|
"display_name": "GPT 5.4 Mini",
|
||||||
|
"version": "gpt-5.4-mini",
|
||||||
|
"description": "GPT-5.4 mini brings the strengths of GPT-5.4 to a faster, more efficient model designed for high-volume workloads.",
|
||||||
|
"context_length": 400000,
|
||||||
|
"max_completion_tokens": 128000,
|
||||||
|
"supported_parameters": [
|
||||||
|
"tools"
|
||||||
|
],
|
||||||
|
"thinking": {
|
||||||
|
"levels": [
|
||||||
|
"low",
|
||||||
|
"medium",
|
||||||
|
"high",
|
||||||
|
"xhigh"
|
||||||
|
]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"codex-team": [
|
"codex-team": [
|
||||||
@@ -1623,6 +1701,29 @@
|
|||||||
"xhigh"
|
"xhigh"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "gpt-5.4-mini",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1773705600,
|
||||||
|
"owned_by": "openai",
|
||||||
|
"type": "openai",
|
||||||
|
"display_name": "GPT 5.4 Mini",
|
||||||
|
"version": "gpt-5.4-mini",
|
||||||
|
"description": "GPT-5.4 mini brings the strengths of GPT-5.4 to a faster, more efficient model designed for high-volume workloads.",
|
||||||
|
"context_length": 400000,
|
||||||
|
"max_completion_tokens": 128000,
|
||||||
|
"supported_parameters": [
|
||||||
|
"tools"
|
||||||
|
],
|
||||||
|
"thinking": {
|
||||||
|
"levels": [
|
||||||
|
"low",
|
||||||
|
"medium",
|
||||||
|
"high",
|
||||||
|
"xhigh"
|
||||||
|
]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"codex-plus": [
|
"codex-plus": [
|
||||||
@@ -1898,6 +1999,29 @@
|
|||||||
"xhigh"
|
"xhigh"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "gpt-5.4-mini",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1773705600,
|
||||||
|
"owned_by": "openai",
|
||||||
|
"type": "openai",
|
||||||
|
"display_name": "GPT 5.4 Mini",
|
||||||
|
"version": "gpt-5.4-mini",
|
||||||
|
"description": "GPT-5.4 mini brings the strengths of GPT-5.4 to a faster, more efficient model designed for high-volume workloads.",
|
||||||
|
"context_length": 400000,
|
||||||
|
"max_completion_tokens": 128000,
|
||||||
|
"supported_parameters": [
|
||||||
|
"tools"
|
||||||
|
],
|
||||||
|
"thinking": {
|
||||||
|
"levels": [
|
||||||
|
"low",
|
||||||
|
"medium",
|
||||||
|
"high",
|
||||||
|
"xhigh"
|
||||||
|
]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"codex-pro": [
|
"codex-pro": [
|
||||||
@@ -2173,55 +2297,40 @@
|
|||||||
"xhigh"
|
"xhigh"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "gpt-5.4-mini",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1773705600,
|
||||||
|
"owned_by": "openai",
|
||||||
|
"type": "openai",
|
||||||
|
"display_name": "GPT 5.4 Mini",
|
||||||
|
"version": "gpt-5.4-mini",
|
||||||
|
"description": "GPT-5.4 mini brings the strengths of GPT-5.4 to a faster, more efficient model designed for high-volume workloads.",
|
||||||
|
"context_length": 400000,
|
||||||
|
"max_completion_tokens": 128000,
|
||||||
|
"supported_parameters": [
|
||||||
|
"tools"
|
||||||
|
],
|
||||||
|
"thinking": {
|
||||||
|
"levels": [
|
||||||
|
"low",
|
||||||
|
"medium",
|
||||||
|
"high",
|
||||||
|
"xhigh"
|
||||||
|
]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"qwen": [
|
"qwen": [
|
||||||
{
|
|
||||||
"id": "qwen3-coder-plus",
|
|
||||||
"object": "model",
|
|
||||||
"created": 1753228800,
|
|
||||||
"owned_by": "qwen",
|
|
||||||
"type": "qwen",
|
|
||||||
"display_name": "Qwen3 Coder Plus",
|
|
||||||
"version": "3.0",
|
|
||||||
"description": "Advanced code generation and understanding model",
|
|
||||||
"context_length": 32768,
|
|
||||||
"max_completion_tokens": 8192,
|
|
||||||
"supported_parameters": [
|
|
||||||
"temperature",
|
|
||||||
"top_p",
|
|
||||||
"max_tokens",
|
|
||||||
"stream",
|
|
||||||
"stop"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": "qwen3-coder-flash",
|
|
||||||
"object": "model",
|
|
||||||
"created": 1753228800,
|
|
||||||
"owned_by": "qwen",
|
|
||||||
"type": "qwen",
|
|
||||||
"display_name": "Qwen3 Coder Flash",
|
|
||||||
"version": "3.0",
|
|
||||||
"description": "Fast code generation model",
|
|
||||||
"context_length": 8192,
|
|
||||||
"max_completion_tokens": 2048,
|
|
||||||
"supported_parameters": [
|
|
||||||
"temperature",
|
|
||||||
"top_p",
|
|
||||||
"max_tokens",
|
|
||||||
"stream",
|
|
||||||
"stop"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"id": "coder-model",
|
"id": "coder-model",
|
||||||
"object": "model",
|
"object": "model",
|
||||||
"created": 1771171200,
|
"created": 1771171200,
|
||||||
"owned_by": "qwen",
|
"owned_by": "qwen",
|
||||||
"type": "qwen",
|
"type": "qwen",
|
||||||
"display_name": "Qwen 3.5 Plus",
|
"display_name": "Qwen 3.6 Plus",
|
||||||
"version": "3.5",
|
"version": "3.6",
|
||||||
"description": "efficient hybrid model with leading coding performance",
|
"description": "efficient hybrid model with leading coding performance",
|
||||||
"context_length": 1048576,
|
"context_length": 1048576,
|
||||||
"max_completion_tokens": 65536,
|
"max_completion_tokens": 65536,
|
||||||
@@ -2232,25 +2341,6 @@
|
|||||||
"stream",
|
"stream",
|
||||||
"stop"
|
"stop"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": "vision-model",
|
|
||||||
"object": "model",
|
|
||||||
"created": 1758672000,
|
|
||||||
"owned_by": "qwen",
|
|
||||||
"type": "qwen",
|
|
||||||
"display_name": "Qwen3 Vision Model",
|
|
||||||
"version": "3.0",
|
|
||||||
"description": "Vision model model",
|
|
||||||
"context_length": 32768,
|
|
||||||
"max_completion_tokens": 2048,
|
|
||||||
"supported_parameters": [
|
|
||||||
"temperature",
|
|
||||||
"top_p",
|
|
||||||
"max_tokens",
|
|
||||||
"stream",
|
|
||||||
"stop"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"iflow": [
|
"iflow": [
|
||||||
@@ -2639,11 +2729,12 @@
|
|||||||
"context_length": 1048576,
|
"context_length": 1048576,
|
||||||
"max_completion_tokens": 65535,
|
"max_completion_tokens": 65535,
|
||||||
"thinking": {
|
"thinking": {
|
||||||
"min": 128,
|
"min": 1,
|
||||||
"max": 32768,
|
"max": 65535,
|
||||||
"dynamic_allowed": true,
|
"dynamic_allowed": true,
|
||||||
"levels": [
|
"levels": [
|
||||||
"low",
|
"low",
|
||||||
|
"medium",
|
||||||
"high"
|
"high"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -2659,11 +2750,12 @@
|
|||||||
"context_length": 1048576,
|
"context_length": 1048576,
|
||||||
"max_completion_tokens": 65535,
|
"max_completion_tokens": 65535,
|
||||||
"thinking": {
|
"thinking": {
|
||||||
"min": 128,
|
"min": 1,
|
||||||
"max": 32768,
|
"max": 65535,
|
||||||
"dynamic_allowed": true,
|
"dynamic_allowed": true,
|
||||||
"levels": [
|
"levels": [
|
||||||
"low",
|
"low",
|
||||||
|
"medium",
|
||||||
"high"
|
"high"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -137,6 +137,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
|||||||
|
|
||||||
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
|
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
|
||||||
body = disableThinkingIfToolChoiceForced(body)
|
body = disableThinkingIfToolChoiceForced(body)
|
||||||
|
body = normalizeClaudeTemperatureForThinking(body)
|
||||||
|
|
||||||
// Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support)
|
// Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support)
|
||||||
if countCacheControls(body) == 0 {
|
if countCacheControls(body) == 0 {
|
||||||
@@ -307,6 +308,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
|||||||
|
|
||||||
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
|
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
|
||||||
body = disableThinkingIfToolChoiceForced(body)
|
body = disableThinkingIfToolChoiceForced(body)
|
||||||
|
body = normalizeClaudeTemperatureForThinking(body)
|
||||||
|
|
||||||
// Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support)
|
// Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support)
|
||||||
if countCacheControls(body) == 0 {
|
if countCacheControls(body) == 0 {
|
||||||
@@ -651,6 +653,25 @@ func disableThinkingIfToolChoiceForced(body []byte) []byte {
|
|||||||
return body
|
return body
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// normalizeClaudeTemperatureForThinking keeps Anthropic message requests valid when
|
||||||
|
// thinking is enabled. Anthropic rejects temperatures other than 1 when
|
||||||
|
// thinking.type is enabled/adaptive/auto.
|
||||||
|
func normalizeClaudeTemperatureForThinking(body []byte) []byte {
|
||||||
|
if !gjson.GetBytes(body, "temperature").Exists() {
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
|
thinkingType := strings.ToLower(strings.TrimSpace(gjson.GetBytes(body, "thinking.type").String()))
|
||||||
|
switch thinkingType {
|
||||||
|
case "enabled", "adaptive", "auto":
|
||||||
|
if temp := gjson.GetBytes(body, "temperature"); temp.Exists() && temp.Type == gjson.Number && temp.Float() == 1 {
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
body, _ = sjson.SetBytes(body, "temperature", 1)
|
||||||
|
}
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
type compositeReadCloser struct {
|
type compositeReadCloser struct {
|
||||||
io.Reader
|
io.Reader
|
||||||
closers []func() error
|
closers []func() error
|
||||||
@@ -827,6 +848,14 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string,
|
|||||||
hasClaude1MHeader = true
|
hasClaude1MHeader = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Also check auth attributes — GitLab Duo sets gitlab_duo_force_context_1m
|
||||||
|
// when routing through the Anthropic gateway, but the gin headers won't have
|
||||||
|
// X-CPA-CLAUDE-1M because the request is internally constructed.
|
||||||
|
if !hasClaude1MHeader && auth != nil && auth.Attributes != nil {
|
||||||
|
if auth.Attributes["gitlab_duo_force_context_1m"] == "true" {
|
||||||
|
hasClaude1MHeader = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Merge extra betas from request body and request flags.
|
// Merge extra betas from request body and request flags.
|
||||||
if len(extraBetas) > 0 || hasClaude1MHeader {
|
if len(extraBetas) > 0 || hasClaude1MHeader {
|
||||||
|
|||||||
@@ -1833,3 +1833,43 @@ func TestApplyCloaking_PreservesConfiguredStrictModeAndSensitiveWordsWhenModeOmi
|
|||||||
t.Fatalf("expected configured sensitive word obfuscation to apply, got %q", got)
|
t.Fatalf("expected configured sensitive word obfuscation to apply, got %q", got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestNormalizeClaudeTemperatureForThinking_AdaptiveCoercesToOne(t *testing.T) {
|
||||||
|
payload := []byte(`{"temperature":0,"thinking":{"type":"adaptive"},"output_config":{"effort":"max"}}`)
|
||||||
|
out := normalizeClaudeTemperatureForThinking(payload)
|
||||||
|
|
||||||
|
if got := gjson.GetBytes(out, "temperature").Float(); got != 1 {
|
||||||
|
t.Fatalf("temperature = %v, want 1", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeClaudeTemperatureForThinking_EnabledCoercesToOne(t *testing.T) {
|
||||||
|
payload := []byte(`{"temperature":0.2,"thinking":{"type":"enabled","budget_tokens":2048}}`)
|
||||||
|
out := normalizeClaudeTemperatureForThinking(payload)
|
||||||
|
|
||||||
|
if got := gjson.GetBytes(out, "temperature").Float(); got != 1 {
|
||||||
|
t.Fatalf("temperature = %v, want 1", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeClaudeTemperatureForThinking_NoThinkingLeavesTemperatureAlone(t *testing.T) {
|
||||||
|
payload := []byte(`{"temperature":0,"messages":[{"role":"user","content":"hi"}]}`)
|
||||||
|
out := normalizeClaudeTemperatureForThinking(payload)
|
||||||
|
|
||||||
|
if got := gjson.GetBytes(out, "temperature").Float(); got != 0 {
|
||||||
|
t.Fatalf("temperature = %v, want 0", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeClaudeTemperatureForThinking_AfterForcedToolChoiceKeepsOriginalTemperature(t *testing.T) {
|
||||||
|
payload := []byte(`{"temperature":0,"thinking":{"type":"adaptive"},"output_config":{"effort":"max"},"tool_choice":{"type":"any"}}`)
|
||||||
|
out := disableThinkingIfToolChoiceForced(payload)
|
||||||
|
out = normalizeClaudeTemperatureForThinking(out)
|
||||||
|
|
||||||
|
if gjson.GetBytes(out, "thinking").Exists() {
|
||||||
|
t.Fatalf("thinking should be removed when tool_choice forces tool use")
|
||||||
|
}
|
||||||
|
if got := gjson.GetBytes(out, "temperature").Float(); got != 0 {
|
||||||
|
t.Fatalf("temperature = %v, want 0", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@@ -17,6 +18,7 @@ import (
|
|||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||||
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
|
||||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||||
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
@@ -40,7 +42,7 @@ const (
|
|||||||
copilotEditorVersion = "vscode/1.107.0"
|
copilotEditorVersion = "vscode/1.107.0"
|
||||||
copilotPluginVersion = "copilot-chat/0.35.0"
|
copilotPluginVersion = "copilot-chat/0.35.0"
|
||||||
copilotIntegrationID = "vscode-chat"
|
copilotIntegrationID = "vscode-chat"
|
||||||
copilotOpenAIIntent = "conversation-panel"
|
copilotOpenAIIntent = "conversation-edits"
|
||||||
copilotGitHubAPIVer = "2025-04-01"
|
copilotGitHubAPIVer = "2025-04-01"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -126,6 +128,7 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
|
|||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||||
body = e.normalizeModel(req.Model, body)
|
body = e.normalizeModel(req.Model, body)
|
||||||
body = flattenAssistantContent(body)
|
body = flattenAssistantContent(body)
|
||||||
|
body = stripUnsupportedBetas(body)
|
||||||
|
|
||||||
// Detect vision content before input normalization removes messages
|
// Detect vision content before input normalization removes messages
|
||||||
hasVision := detectVisionContent(body)
|
hasVision := detectVisionContent(body)
|
||||||
@@ -142,6 +145,7 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
|
|||||||
if useResponses {
|
if useResponses {
|
||||||
body = normalizeGitHubCopilotResponsesInput(body)
|
body = normalizeGitHubCopilotResponsesInput(body)
|
||||||
body = normalizeGitHubCopilotResponsesTools(body)
|
body = normalizeGitHubCopilotResponsesTools(body)
|
||||||
|
body = applyGitHubCopilotResponsesDefaults(body)
|
||||||
} else {
|
} else {
|
||||||
body = normalizeGitHubCopilotChatTools(body)
|
body = normalizeGitHubCopilotChatTools(body)
|
||||||
}
|
}
|
||||||
@@ -225,9 +229,10 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
|
|||||||
if useResponses && from.String() == "claude" {
|
if useResponses && from.String() == "claude" {
|
||||||
converted = translateGitHubCopilotResponsesNonStreamToClaude(data)
|
converted = translateGitHubCopilotResponsesNonStreamToClaude(data)
|
||||||
} else {
|
} else {
|
||||||
|
data = normalizeGitHubCopilotReasoningField(data)
|
||||||
converted = sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, ¶m)
|
converted = sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, ¶m)
|
||||||
}
|
}
|
||||||
resp = cliproxyexecutor.Response{Payload: converted}
|
resp = cliproxyexecutor.Response{Payload: converted, Headers: httpResp.Header.Clone()}
|
||||||
reporter.ensurePublished(ctx)
|
reporter.ensurePublished(ctx)
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
@@ -256,6 +261,7 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
|
|||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||||
body = e.normalizeModel(req.Model, body)
|
body = e.normalizeModel(req.Model, body)
|
||||||
body = flattenAssistantContent(body)
|
body = flattenAssistantContent(body)
|
||||||
|
body = stripUnsupportedBetas(body)
|
||||||
|
|
||||||
// Detect vision content before input normalization removes messages
|
// Detect vision content before input normalization removes messages
|
||||||
hasVision := detectVisionContent(body)
|
hasVision := detectVisionContent(body)
|
||||||
@@ -272,6 +278,7 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
|
|||||||
if useResponses {
|
if useResponses {
|
||||||
body = normalizeGitHubCopilotResponsesInput(body)
|
body = normalizeGitHubCopilotResponsesInput(body)
|
||||||
body = normalizeGitHubCopilotResponsesTools(body)
|
body = normalizeGitHubCopilotResponsesTools(body)
|
||||||
|
body = applyGitHubCopilotResponsesDefaults(body)
|
||||||
} else {
|
} else {
|
||||||
body = normalizeGitHubCopilotChatTools(body)
|
body = normalizeGitHubCopilotChatTools(body)
|
||||||
}
|
}
|
||||||
@@ -378,7 +385,20 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
|
|||||||
if useResponses && from.String() == "claude" {
|
if useResponses && from.String() == "claude" {
|
||||||
chunks = translateGitHubCopilotResponsesStreamToClaude(bytes.Clone(line), ¶m)
|
chunks = translateGitHubCopilotResponsesStreamToClaude(bytes.Clone(line), ¶m)
|
||||||
} else {
|
} else {
|
||||||
chunks = sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(line), ¶m)
|
// Strip SSE "data: " prefix before reasoning field normalization,
|
||||||
|
// since normalizeGitHubCopilotReasoningField expects pure JSON.
|
||||||
|
// Re-wrap with the prefix afterward for the translator.
|
||||||
|
normalizedLine := bytes.Clone(line)
|
||||||
|
if bytes.HasPrefix(line, dataTag) {
|
||||||
|
sseData := bytes.TrimSpace(line[len(dataTag):])
|
||||||
|
if !bytes.Equal(sseData, []byte("[DONE]")) && gjson.ValidBytes(sseData) {
|
||||||
|
normalized := normalizeGitHubCopilotReasoningField(bytes.Clone(sseData))
|
||||||
|
if !bytes.Equal(normalized, sseData) {
|
||||||
|
normalizedLine = append(append([]byte(nil), dataTag...), normalized...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
chunks = sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, normalizedLine, ¶m)
|
||||||
}
|
}
|
||||||
for i := range chunks {
|
for i := range chunks {
|
||||||
out <- cliproxyexecutor.StreamChunk{Payload: bytes.Clone(chunks[i])}
|
out <- cliproxyexecutor.StreamChunk{Payload: bytes.Clone(chunks[i])}
|
||||||
@@ -400,9 +420,28 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
|
|||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// CountTokens is not supported for GitHub Copilot.
|
// CountTokens estimates token count locally using tiktoken, since the GitHub
|
||||||
func (e *GitHubCopilotExecutor) CountTokens(_ context.Context, _ *cliproxyauth.Auth, _ cliproxyexecutor.Request, _ cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
// Copilot API does not expose a dedicated token counting endpoint.
|
||||||
return cliproxyexecutor.Response{}, statusErr{code: http.StatusNotImplemented, msg: "count tokens not supported for github-copilot"}
|
func (e *GitHubCopilotExecutor) CountTokens(ctx context.Context, _ *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||||
|
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||||
|
|
||||||
|
from := opts.SourceFormat
|
||||||
|
to := sdktranslator.FromString("openai")
|
||||||
|
translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
|
||||||
|
|
||||||
|
enc, err := helps.TokenizerForModel(baseModel)
|
||||||
|
if err != nil {
|
||||||
|
return cliproxyexecutor.Response{}, fmt.Errorf("github copilot executor: tokenizer init failed: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
count, err := helps.CountOpenAIChatTokens(enc, translated)
|
||||||
|
if err != nil {
|
||||||
|
return cliproxyexecutor.Response{}, fmt.Errorf("github copilot executor: token counting failed: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
usageJSON := helps.BuildOpenAIUsageJSON(count)
|
||||||
|
translatedUsage := sdktranslator.TranslateTokenCount(ctx, to, from, count, usageJSON)
|
||||||
|
return cliproxyexecutor.Response{Payload: translatedUsage}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Refresh validates the GitHub token is still working.
|
// Refresh validates the GitHub token is still working.
|
||||||
@@ -491,46 +530,127 @@ func (e *GitHubCopilotExecutor) applyHeaders(r *http.Request, apiToken string, b
|
|||||||
r.Header.Set("X-Request-Id", uuid.NewString())
|
r.Header.Set("X-Request-Id", uuid.NewString())
|
||||||
|
|
||||||
initiator := "user"
|
initiator := "user"
|
||||||
if role := detectLastConversationRole(body); role == "assistant" || role == "tool" {
|
if isAgentInitiated(body) {
|
||||||
initiator = "agent"
|
initiator = "agent"
|
||||||
}
|
}
|
||||||
r.Header.Set("X-Initiator", initiator)
|
r.Header.Set("X-Initiator", initiator)
|
||||||
}
|
}
|
||||||
|
|
||||||
func detectLastConversationRole(body []byte) string {
|
// isAgentInitiated determines whether the current request is agent-initiated
|
||||||
|
// (tool callbacks, continuations) rather than user-initiated (new user prompt).
|
||||||
|
//
|
||||||
|
// GitHub Copilot uses the X-Initiator header for billing:
|
||||||
|
// - "user" → consumes premium request quota
|
||||||
|
// - "agent" → free (tool loops, continuations)
|
||||||
|
//
|
||||||
|
// The challenge: Claude Code sends tool results as role:"user" messages with
|
||||||
|
// content type "tool_result". After translation to OpenAI format, the tool_result
|
||||||
|
// part becomes a separate role:"tool" message, but if the original Claude message
|
||||||
|
// also contained text content (e.g. skill invocations, attachment descriptions),
|
||||||
|
// a role:"user" message is emitted AFTER the tool message, making the last message
|
||||||
|
// appear user-initiated when it's actually part of an agent tool loop.
|
||||||
|
//
|
||||||
|
// VSCode Copilot Chat solves this with explicit flags (iterationNumber,
|
||||||
|
// isContinuation, subAgentInvocationId). Since CPA doesn't have these flags,
|
||||||
|
// we infer agent status by checking whether the conversation contains prior
|
||||||
|
// assistant/tool messages — if it does, the current request is a continuation.
|
||||||
|
//
|
||||||
|
// References:
|
||||||
|
// - opencode#8030, opencode#15824: same root cause and fix approach
|
||||||
|
// - vscode-copilot-chat: toolCallingLoop.ts (iterationNumber === 0)
|
||||||
|
// - pi-ai: github-copilot-headers.ts (last message role check)
|
||||||
|
func isAgentInitiated(body []byte) bool {
|
||||||
if len(body) == 0 {
|
if len(body) == 0 {
|
||||||
return ""
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Chat Completions API: check messages array
|
||||||
if messages := gjson.GetBytes(body, "messages"); messages.Exists() && messages.IsArray() {
|
if messages := gjson.GetBytes(body, "messages"); messages.Exists() && messages.IsArray() {
|
||||||
arr := messages.Array()
|
arr := messages.Array()
|
||||||
|
if len(arr) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
lastRole := ""
|
||||||
for i := len(arr) - 1; i >= 0; i-- {
|
for i := len(arr) - 1; i >= 0; i-- {
|
||||||
if role := arr[i].Get("role").String(); role != "" {
|
if r := arr[i].Get("role").String(); r != "" {
|
||||||
return role
|
lastRole = r
|
||||||
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If last message is assistant or tool, clearly agent-initiated.
|
||||||
|
if lastRole == "assistant" || lastRole == "tool" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// If last message is "user", check whether it contains tool results
|
||||||
|
// (indicating a tool-loop continuation) or if the preceding message
|
||||||
|
// is an assistant tool_use. This is more precise than checking for
|
||||||
|
// any prior assistant message, which would false-positive on genuine
|
||||||
|
// multi-turn follow-ups.
|
||||||
|
if lastRole == "user" {
|
||||||
|
// Check if the last user message contains tool_result content
|
||||||
|
lastContent := arr[len(arr)-1].Get("content")
|
||||||
|
if lastContent.Exists() && lastContent.IsArray() {
|
||||||
|
for _, part := range lastContent.Array() {
|
||||||
|
if part.Get("type").String() == "tool_result" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Check if the second-to-last message is an assistant with tool_use
|
||||||
|
if len(arr) >= 2 {
|
||||||
|
prev := arr[len(arr)-2]
|
||||||
|
if prev.Get("role").String() == "assistant" {
|
||||||
|
prevContent := prev.Get("content")
|
||||||
|
if prevContent.Exists() && prevContent.IsArray() {
|
||||||
|
for _, part := range prevContent.Array() {
|
||||||
|
if part.Get("type").String() == "tool_use" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Responses API: check input array
|
||||||
if inputs := gjson.GetBytes(body, "input"); inputs.Exists() && inputs.IsArray() {
|
if inputs := gjson.GetBytes(body, "input"); inputs.Exists() && inputs.IsArray() {
|
||||||
arr := inputs.Array()
|
arr := inputs.Array()
|
||||||
for i := len(arr) - 1; i >= 0; i-- {
|
if len(arr) == 0 {
|
||||||
item := arr[i]
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// Most Responses input items carry a top-level role.
|
// Check last item
|
||||||
if role := item.Get("role").String(); role != "" {
|
last := arr[len(arr)-1]
|
||||||
return role
|
if role := last.Get("role").String(); role == "assistant" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
switch last.Get("type").String() {
|
||||||
|
case "function_call", "function_call_arguments", "computer_call":
|
||||||
|
return true
|
||||||
|
case "function_call_output", "function_call_response", "tool_result", "computer_call_output":
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// If last item is user-role, check for prior non-user items
|
||||||
|
for _, item := range arr {
|
||||||
|
if role := item.Get("role").String(); role == "assistant" {
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
switch item.Get("type").String() {
|
switch item.Get("type").String() {
|
||||||
case "function_call", "function_call_arguments", "computer_call":
|
case "function_call", "function_call_output", "function_call_response",
|
||||||
return "assistant"
|
"function_call_arguments", "computer_call", "computer_call_output":
|
||||||
case "function_call_output", "function_call_response", "tool_result", "computer_call_output":
|
return true
|
||||||
return "tool"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ""
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// detectVisionContent checks if the request body contains vision/image content.
|
// detectVisionContent checks if the request body contains vision/image content.
|
||||||
@@ -572,6 +692,85 @@ func (e *GitHubCopilotExecutor) normalizeModel(model string, body []byte) []byte
|
|||||||
return body
|
return body
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// copilotUnsupportedBetas lists beta headers that are Anthropic-specific and
|
||||||
|
// must not be forwarded to GitHub Copilot. The context-1m beta enables 1M
|
||||||
|
// context on Anthropic's API, but Copilot's Claude models are limited to
|
||||||
|
// ~128K-200K. Passing it through would not enable 1M on Copilot, but stripping
|
||||||
|
// it from the translated body avoids confusing downstream translators.
|
||||||
|
var copilotUnsupportedBetas = []string{
|
||||||
|
"context-1m-2025-08-07",
|
||||||
|
}
|
||||||
|
|
||||||
|
// stripUnsupportedBetas removes Anthropic-specific beta entries from the
|
||||||
|
// translated request body. In OpenAI format the betas may appear under
|
||||||
|
// "metadata.betas" or a top-level "betas" array; in Claude format they sit at
|
||||||
|
// "betas". This function checks all known locations.
|
||||||
|
func stripUnsupportedBetas(body []byte) []byte {
|
||||||
|
betaPaths := []string{"betas", "metadata.betas"}
|
||||||
|
for _, path := range betaPaths {
|
||||||
|
arr := gjson.GetBytes(body, path)
|
||||||
|
if !arr.Exists() || !arr.IsArray() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
var filtered []string
|
||||||
|
changed := false
|
||||||
|
for _, item := range arr.Array() {
|
||||||
|
beta := item.String()
|
||||||
|
if isCopilotUnsupportedBeta(beta) {
|
||||||
|
changed = true
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
filtered = append(filtered, beta)
|
||||||
|
}
|
||||||
|
if !changed {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if len(filtered) == 0 {
|
||||||
|
body, _ = sjson.DeleteBytes(body, path)
|
||||||
|
} else {
|
||||||
|
body, _ = sjson.SetBytes(body, path, filtered)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
|
func isCopilotUnsupportedBeta(beta string) bool {
|
||||||
|
return slices.Contains(copilotUnsupportedBetas, beta)
|
||||||
|
}
|
||||||
|
|
||||||
|
// normalizeGitHubCopilotReasoningField maps Copilot's non-standard
|
||||||
|
// 'reasoning_text' field to the standard OpenAI 'reasoning_content' field
|
||||||
|
// that the SDK translator expects. This handles both streaming deltas
|
||||||
|
// (choices[].delta.reasoning_text) and non-streaming messages
|
||||||
|
// (choices[].message.reasoning_text). The field is only renamed when
|
||||||
|
// 'reasoning_content' is absent or null, preserving standard responses.
|
||||||
|
// All choices are processed to support n>1 requests.
|
||||||
|
func normalizeGitHubCopilotReasoningField(data []byte) []byte {
|
||||||
|
choices := gjson.GetBytes(data, "choices")
|
||||||
|
if !choices.Exists() || !choices.IsArray() {
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
for i := range choices.Array() {
|
||||||
|
// Non-streaming: choices[i].message.reasoning_text
|
||||||
|
msgRT := fmt.Sprintf("choices.%d.message.reasoning_text", i)
|
||||||
|
msgRC := fmt.Sprintf("choices.%d.message.reasoning_content", i)
|
||||||
|
if rt := gjson.GetBytes(data, msgRT); rt.Exists() && rt.String() != "" {
|
||||||
|
if rc := gjson.GetBytes(data, msgRC); !rc.Exists() || rc.Type == gjson.Null || rc.String() == "" {
|
||||||
|
data, _ = sjson.SetBytes(data, msgRC, rt.String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Streaming: choices[i].delta.reasoning_text
|
||||||
|
deltaRT := fmt.Sprintf("choices.%d.delta.reasoning_text", i)
|
||||||
|
deltaRC := fmt.Sprintf("choices.%d.delta.reasoning_content", i)
|
||||||
|
if rt := gjson.GetBytes(data, deltaRT); rt.Exists() && rt.String() != "" {
|
||||||
|
if rc := gjson.GetBytes(data, deltaRC); !rc.Exists() || rc.Type == gjson.Null || rc.String() == "" {
|
||||||
|
data, _ = sjson.SetBytes(data, deltaRC, rt.String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
|
||||||
func useGitHubCopilotResponsesEndpoint(sourceFormat sdktranslator.Format, model string) bool {
|
func useGitHubCopilotResponsesEndpoint(sourceFormat sdktranslator.Format, model string) bool {
|
||||||
if sourceFormat.String() == "openai-response" {
|
if sourceFormat.String() == "openai-response" {
|
||||||
return true
|
return true
|
||||||
@@ -596,12 +795,7 @@ func lookupGitHubCopilotStaticModelInfo(model string) *registry.ModelInfo {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func containsEndpoint(endpoints []string, endpoint string) bool {
|
func containsEndpoint(endpoints []string, endpoint string) bool {
|
||||||
for _, item := range endpoints {
|
return slices.Contains(endpoints, endpoint)
|
||||||
if item == endpoint {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// flattenAssistantContent converts assistant message content from array format
|
// flattenAssistantContent converts assistant message content from array format
|
||||||
@@ -856,6 +1050,32 @@ func stripGitHubCopilotResponsesUnsupportedFields(body []byte) []byte {
|
|||||||
return body
|
return body
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// applyGitHubCopilotResponsesDefaults sets required fields for the Responses API
|
||||||
|
// that both vscode-copilot-chat and pi-ai always include.
|
||||||
|
//
|
||||||
|
// References:
|
||||||
|
// - vscode-copilot-chat: src/platform/endpoint/node/responsesApi.ts
|
||||||
|
// - pi-ai (badlogic/pi-mono): packages/ai/src/providers/openai-responses.ts
|
||||||
|
func applyGitHubCopilotResponsesDefaults(body []byte) []byte {
|
||||||
|
// store: false — prevents request/response storage
|
||||||
|
if !gjson.GetBytes(body, "store").Exists() {
|
||||||
|
body, _ = sjson.SetBytes(body, "store", false)
|
||||||
|
}
|
||||||
|
|
||||||
|
// include: ["reasoning.encrypted_content"] — enables reasoning content
|
||||||
|
// reuse across turns, avoiding redundant computation
|
||||||
|
if !gjson.GetBytes(body, "include").Exists() {
|
||||||
|
body, _ = sjson.SetRawBytes(body, "include", []byte(`["reasoning.encrypted_content"]`))
|
||||||
|
}
|
||||||
|
|
||||||
|
// If reasoning.effort is set but reasoning.summary is not, default to "auto"
|
||||||
|
if gjson.GetBytes(body, "reasoning.effort").Exists() && !gjson.GetBytes(body, "reasoning.summary").Exists() {
|
||||||
|
body, _ = sjson.SetBytes(body, "reasoning.summary", "auto")
|
||||||
|
}
|
||||||
|
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
func normalizeGitHubCopilotResponsesTools(body []byte) []byte {
|
func normalizeGitHubCopilotResponsesTools(body []byte) []byte {
|
||||||
tools := gjson.GetBytes(body, "tools")
|
tools := gjson.GetBytes(body, "tools")
|
||||||
if tools.Exists() {
|
if tools.Exists() {
|
||||||
@@ -1406,6 +1626,21 @@ func FetchGitHubCopilotModels(ctx context.Context, auth *cliproxyauth.Auth, cfg
|
|||||||
m.MaxCompletionTokens = defaultCopilotMaxCompletionTokens
|
m.MaxCompletionTokens = defaultCopilotMaxCompletionTokens
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Override with real limits from the Copilot API when available.
|
||||||
|
// The API returns per-account limits (individual vs business) under
|
||||||
|
// capabilities.limits, which are more accurate than our static
|
||||||
|
// fallback values. We use max_prompt_tokens as ContextLength because
|
||||||
|
// that's the hard limit the Copilot API enforces on prompt size —
|
||||||
|
// exceeding it triggers "prompt token count exceeds the limit" errors.
|
||||||
|
if limits := entry.Limits(); limits != nil {
|
||||||
|
if limits.MaxPromptTokens > 0 {
|
||||||
|
m.ContextLength = limits.MaxPromptTokens
|
||||||
|
}
|
||||||
|
if limits.MaxOutputTokens > 0 {
|
||||||
|
m.MaxCompletionTokens = limits.MaxOutputTokens
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
models = append(models, m)
|
models = append(models, m)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,11 +1,14 @@
|
|||||||
package executor
|
package executor
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
copilotauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/copilot"
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||||
|
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||||
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
||||||
"github.com/tidwall/gjson"
|
"github.com/tidwall/gjson"
|
||||||
)
|
)
|
||||||
@@ -72,7 +75,7 @@ func TestUseGitHubCopilotResponsesEndpoint_CodexModel(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestUseGitHubCopilotResponsesEndpoint_RegistryResponsesOnlyModel(t *testing.T) {
|
func TestUseGitHubCopilotResponsesEndpoint_RegistryResponsesOnlyModel(t *testing.T) {
|
||||||
t.Parallel()
|
// Not parallel: shares global model registry with DynamicRegistryWinsOverStatic.
|
||||||
if !useGitHubCopilotResponsesEndpoint(sdktranslator.FromString("openai"), "gpt-5.4") {
|
if !useGitHubCopilotResponsesEndpoint(sdktranslator.FromString("openai"), "gpt-5.4") {
|
||||||
t.Fatal("expected responses-only registry model to use /responses")
|
t.Fatal("expected responses-only registry model to use /responses")
|
||||||
}
|
}
|
||||||
@@ -82,7 +85,7 @@ func TestUseGitHubCopilotResponsesEndpoint_RegistryResponsesOnlyModel(t *testing
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestUseGitHubCopilotResponsesEndpoint_DynamicRegistryWinsOverStatic(t *testing.T) {
|
func TestUseGitHubCopilotResponsesEndpoint_DynamicRegistryWinsOverStatic(t *testing.T) {
|
||||||
t.Parallel()
|
// Not parallel: mutates global model registry, conflicts with RegistryResponsesOnlyModel.
|
||||||
|
|
||||||
reg := registry.GetGlobalRegistry()
|
reg := registry.GetGlobalRegistry()
|
||||||
clientID := "github-copilot-test-client"
|
clientID := "github-copilot-test-client"
|
||||||
@@ -251,14 +254,14 @@ func TestTranslateGitHubCopilotResponsesNonStreamToClaude_TextMapping(t *testing
|
|||||||
t.Parallel()
|
t.Parallel()
|
||||||
resp := []byte(`{"id":"resp_1","model":"gpt-5-codex","output":[{"type":"message","content":[{"type":"output_text","text":"hello"}]}],"usage":{"input_tokens":3,"output_tokens":5}}`)
|
resp := []byte(`{"id":"resp_1","model":"gpt-5-codex","output":[{"type":"message","content":[{"type":"output_text","text":"hello"}]}],"usage":{"input_tokens":3,"output_tokens":5}}`)
|
||||||
out := translateGitHubCopilotResponsesNonStreamToClaude(resp)
|
out := translateGitHubCopilotResponsesNonStreamToClaude(resp)
|
||||||
if gjson.Get(out, "type").String() != "message" {
|
if gjson.GetBytes(out, "type").String() != "message" {
|
||||||
t.Fatalf("type = %q, want message", gjson.Get(out, "type").String())
|
t.Fatalf("type = %q, want message", gjson.GetBytes(out, "type").String())
|
||||||
}
|
}
|
||||||
if gjson.Get(out, "content.0.type").String() != "text" {
|
if gjson.GetBytes(out, "content.0.type").String() != "text" {
|
||||||
t.Fatalf("content.0.type = %q, want text", gjson.Get(out, "content.0.type").String())
|
t.Fatalf("content.0.type = %q, want text", gjson.GetBytes(out, "content.0.type").String())
|
||||||
}
|
}
|
||||||
if gjson.Get(out, "content.0.text").String() != "hello" {
|
if gjson.GetBytes(out, "content.0.text").String() != "hello" {
|
||||||
t.Fatalf("content.0.text = %q, want hello", gjson.Get(out, "content.0.text").String())
|
t.Fatalf("content.0.text = %q, want hello", gjson.GetBytes(out, "content.0.text").String())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -266,14 +269,14 @@ func TestTranslateGitHubCopilotResponsesNonStreamToClaude_ToolUseMapping(t *test
|
|||||||
t.Parallel()
|
t.Parallel()
|
||||||
resp := []byte(`{"id":"resp_2","model":"gpt-5-codex","output":[{"type":"function_call","id":"fc_1","call_id":"call_1","name":"sum","arguments":"{\"a\":1}"}],"usage":{"input_tokens":1,"output_tokens":2}}`)
|
resp := []byte(`{"id":"resp_2","model":"gpt-5-codex","output":[{"type":"function_call","id":"fc_1","call_id":"call_1","name":"sum","arguments":"{\"a\":1}"}],"usage":{"input_tokens":1,"output_tokens":2}}`)
|
||||||
out := translateGitHubCopilotResponsesNonStreamToClaude(resp)
|
out := translateGitHubCopilotResponsesNonStreamToClaude(resp)
|
||||||
if gjson.Get(out, "content.0.type").String() != "tool_use" {
|
if gjson.GetBytes(out, "content.0.type").String() != "tool_use" {
|
||||||
t.Fatalf("content.0.type = %q, want tool_use", gjson.Get(out, "content.0.type").String())
|
t.Fatalf("content.0.type = %q, want tool_use", gjson.GetBytes(out, "content.0.type").String())
|
||||||
}
|
}
|
||||||
if gjson.Get(out, "content.0.name").String() != "sum" {
|
if gjson.GetBytes(out, "content.0.name").String() != "sum" {
|
||||||
t.Fatalf("content.0.name = %q, want sum", gjson.Get(out, "content.0.name").String())
|
t.Fatalf("content.0.name = %q, want sum", gjson.GetBytes(out, "content.0.name").String())
|
||||||
}
|
}
|
||||||
if gjson.Get(out, "stop_reason").String() != "tool_use" {
|
if gjson.GetBytes(out, "stop_reason").String() != "tool_use" {
|
||||||
t.Fatalf("stop_reason = %q, want tool_use", gjson.Get(out, "stop_reason").String())
|
t.Fatalf("stop_reason = %q, want tool_use", gjson.GetBytes(out, "stop_reason").String())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -282,18 +285,24 @@ func TestTranslateGitHubCopilotResponsesStreamToClaude_TextLifecycle(t *testing.
|
|||||||
var param any
|
var param any
|
||||||
|
|
||||||
created := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.created","response":{"id":"resp_1","model":"gpt-5-codex"}}`), ¶m)
|
created := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.created","response":{"id":"resp_1","model":"gpt-5-codex"}}`), ¶m)
|
||||||
if len(created) == 0 || !strings.Contains(created[0], "message_start") {
|
if len(created) == 0 || !strings.Contains(string(created[0]), "message_start") {
|
||||||
t.Fatalf("created events = %#v, want message_start", created)
|
t.Fatalf("created events = %#v, want message_start", created)
|
||||||
}
|
}
|
||||||
|
|
||||||
delta := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.output_text.delta","delta":"he"}`), ¶m)
|
delta := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.output_text.delta","delta":"he"}`), ¶m)
|
||||||
joinedDelta := strings.Join(delta, "")
|
var joinedDelta string
|
||||||
|
for _, d := range delta {
|
||||||
|
joinedDelta += string(d)
|
||||||
|
}
|
||||||
if !strings.Contains(joinedDelta, "content_block_start") || !strings.Contains(joinedDelta, "text_delta") {
|
if !strings.Contains(joinedDelta, "content_block_start") || !strings.Contains(joinedDelta, "text_delta") {
|
||||||
t.Fatalf("delta events = %#v, want content_block_start + text_delta", delta)
|
t.Fatalf("delta events = %#v, want content_block_start + text_delta", delta)
|
||||||
}
|
}
|
||||||
|
|
||||||
completed := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.completed","response":{"usage":{"input_tokens":7,"output_tokens":9}}}`), ¶m)
|
completed := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.completed","response":{"usage":{"input_tokens":7,"output_tokens":9}}}`), ¶m)
|
||||||
joinedCompleted := strings.Join(completed, "")
|
var joinedCompleted string
|
||||||
|
for _, c := range completed {
|
||||||
|
joinedCompleted += string(c)
|
||||||
|
}
|
||||||
if !strings.Contains(joinedCompleted, "message_delta") || !strings.Contains(joinedCompleted, "message_stop") {
|
if !strings.Contains(joinedCompleted, "message_delta") || !strings.Contains(joinedCompleted, "message_stop") {
|
||||||
t.Fatalf("completed events = %#v, want message_delta + message_stop", completed)
|
t.Fatalf("completed events = %#v, want message_delta + message_stop", completed)
|
||||||
}
|
}
|
||||||
@@ -312,15 +321,17 @@ func TestApplyHeaders_XInitiator_UserOnly(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestApplyHeaders_XInitiator_UserWhenLastRoleIsUser(t *testing.T) {
|
func TestApplyHeaders_XInitiator_AgentWhenLastUserButHistoryHasAssistant(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
e := &GitHubCopilotExecutor{}
|
e := &GitHubCopilotExecutor{}
|
||||||
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
|
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
|
||||||
// Last role governs the initiator decision.
|
// When the last role is "user" and the message contains tool_result content,
|
||||||
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"I will read the file"},{"role":"user","content":"tool result here"}]}`)
|
// the request is a continuation (e.g. Claude tool result translated to a
|
||||||
|
// synthetic user message). Should be "agent".
|
||||||
|
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"I will read the file"},{"role":"user","content":[{"type":"tool_result","tool_use_id":"tu1","content":"file contents..."}]}]}`)
|
||||||
e.applyHeaders(req, "token", body)
|
e.applyHeaders(req, "token", body)
|
||||||
if got := req.Header.Get("X-Initiator"); got != "user" {
|
if got := req.Header.Get("X-Initiator"); got != "agent" {
|
||||||
t.Fatalf("X-Initiator = %q, want user (last role is user)", got)
|
t.Fatalf("X-Initiator = %q, want agent (last user contains tool_result)", got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -328,10 +339,11 @@ func TestApplyHeaders_XInitiator_AgentWithToolRole(t *testing.T) {
|
|||||||
t.Parallel()
|
t.Parallel()
|
||||||
e := &GitHubCopilotExecutor{}
|
e := &GitHubCopilotExecutor{}
|
||||||
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
|
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
|
||||||
|
// When the last message has role "tool", it's clearly agent-initiated.
|
||||||
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"tool","content":"result"}]}`)
|
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"tool","content":"result"}]}`)
|
||||||
e.applyHeaders(req, "token", body)
|
e.applyHeaders(req, "token", body)
|
||||||
if got := req.Header.Get("X-Initiator"); got != "agent" {
|
if got := req.Header.Get("X-Initiator"); got != "agent" {
|
||||||
t.Fatalf("X-Initiator = %q, want agent (tool role exists)", got)
|
t.Fatalf("X-Initiator = %q, want agent (last role is tool)", got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -346,14 +358,15 @@ func TestApplyHeaders_XInitiator_InputArrayLastAssistantMessage(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestApplyHeaders_XInitiator_InputArrayLastUserMessage(t *testing.T) {
|
func TestApplyHeaders_XInitiator_InputArrayAgentWhenLastUserButHistoryHasAssistant(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
e := &GitHubCopilotExecutor{}
|
e := &GitHubCopilotExecutor{}
|
||||||
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
|
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
|
||||||
|
// Responses API: last item is user-role but history contains assistant → agent.
|
||||||
body := []byte(`{"input":[{"type":"message","role":"assistant","content":[{"type":"output_text","text":"I can help"}]},{"type":"message","role":"user","content":[{"type":"input_text","text":"Do X"}]}]}`)
|
body := []byte(`{"input":[{"type":"message","role":"assistant","content":[{"type":"output_text","text":"I can help"}]},{"type":"message","role":"user","content":[{"type":"input_text","text":"Do X"}]}]}`)
|
||||||
e.applyHeaders(req, "token", body)
|
e.applyHeaders(req, "token", body)
|
||||||
if got := req.Header.Get("X-Initiator"); got != "user" {
|
if got := req.Header.Get("X-Initiator"); got != "agent" {
|
||||||
t.Fatalf("X-Initiator = %q, want user (last role is user)", got)
|
t.Fatalf("X-Initiator = %q, want agent (history has assistant)", got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -368,6 +381,33 @@ func TestApplyHeaders_XInitiator_InputArrayLastFunctionCallOutput(t *testing.T)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestApplyHeaders_XInitiator_UserInMultiTurnNoTools(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
e := &GitHubCopilotExecutor{}
|
||||||
|
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
|
||||||
|
// Genuine multi-turn: user → assistant (plain text) → user follow-up.
|
||||||
|
// No tool messages → should be "user" (not a false-positive).
|
||||||
|
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"Hi there!"},{"role":"user","content":"what is 2+2?"}]}`)
|
||||||
|
e.applyHeaders(req, "token", body)
|
||||||
|
if got := req.Header.Get("X-Initiator"); got != "user" {
|
||||||
|
t.Fatalf("X-Initiator = %q, want user (genuine multi-turn, no tools)", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApplyHeaders_XInitiator_UserFollowUpAfterToolHistory(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
e := &GitHubCopilotExecutor{}
|
||||||
|
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
|
||||||
|
// User follow-up after a completed tool-use conversation.
|
||||||
|
// The last message is a genuine user question — should be "user", not "agent".
|
||||||
|
// This aligns with opencode's behavior: only active tool loops are agent-initiated.
|
||||||
|
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":[{"type":"tool_use","id":"tu1","name":"Read","input":{}}]},{"role":"tool","tool_call_id":"tu1","content":"file data"},{"role":"assistant","content":"I read the file."},{"role":"user","content":"What did we do so far?"}]}`)
|
||||||
|
e.applyHeaders(req, "token", body)
|
||||||
|
if got := req.Header.Get("X-Initiator"); got != "user" {
|
||||||
|
t.Fatalf("X-Initiator = %q, want user (genuine follow-up after tool history)", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// --- Tests for x-github-api-version header (Problem M) ---
|
// --- Tests for x-github-api-version header (Problem M) ---
|
||||||
|
|
||||||
func TestApplyHeaders_GitHubAPIVersion(t *testing.T) {
|
func TestApplyHeaders_GitHubAPIVersion(t *testing.T) {
|
||||||
@@ -414,3 +454,364 @@ func TestDetectVisionContent_NoMessages(t *testing.T) {
|
|||||||
t.Fatal("expected no vision content when messages field is absent")
|
t.Fatal("expected no vision content when messages field is absent")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --- Tests for applyGitHubCopilotResponsesDefaults ---
|
||||||
|
|
||||||
|
func TestApplyGitHubCopilotResponsesDefaults_SetsAllDefaults(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
body := []byte(`{"input":"hello","reasoning":{"effort":"medium"}}`)
|
||||||
|
got := applyGitHubCopilotResponsesDefaults(body)
|
||||||
|
|
||||||
|
if gjson.GetBytes(got, "store").Bool() != false {
|
||||||
|
t.Fatalf("store = %v, want false", gjson.GetBytes(got, "store").Raw)
|
||||||
|
}
|
||||||
|
inc := gjson.GetBytes(got, "include")
|
||||||
|
if !inc.IsArray() || inc.Array()[0].String() != "reasoning.encrypted_content" {
|
||||||
|
t.Fatalf("include = %s, want [\"reasoning.encrypted_content\"]", inc.Raw)
|
||||||
|
}
|
||||||
|
if gjson.GetBytes(got, "reasoning.summary").String() != "auto" {
|
||||||
|
t.Fatalf("reasoning.summary = %q, want auto", gjson.GetBytes(got, "reasoning.summary").String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApplyGitHubCopilotResponsesDefaults_DoesNotOverrideExisting(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
body := []byte(`{"input":"hello","store":true,"include":["other"],"reasoning":{"effort":"high","summary":"concise"}}`)
|
||||||
|
got := applyGitHubCopilotResponsesDefaults(body)
|
||||||
|
|
||||||
|
if gjson.GetBytes(got, "store").Bool() != true {
|
||||||
|
t.Fatalf("store should not be overridden, got %s", gjson.GetBytes(got, "store").Raw)
|
||||||
|
}
|
||||||
|
if gjson.GetBytes(got, "include").Array()[0].String() != "other" {
|
||||||
|
t.Fatalf("include should not be overridden, got %s", gjson.GetBytes(got, "include").Raw)
|
||||||
|
}
|
||||||
|
if gjson.GetBytes(got, "reasoning.summary").String() != "concise" {
|
||||||
|
t.Fatalf("reasoning.summary should not be overridden, got %q", gjson.GetBytes(got, "reasoning.summary").String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApplyGitHubCopilotResponsesDefaults_NoReasoningEffort(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
body := []byte(`{"input":"hello"}`)
|
||||||
|
got := applyGitHubCopilotResponsesDefaults(body)
|
||||||
|
|
||||||
|
if gjson.GetBytes(got, "store").Bool() != false {
|
||||||
|
t.Fatalf("store = %v, want false", gjson.GetBytes(got, "store").Raw)
|
||||||
|
}
|
||||||
|
// reasoning.summary should NOT be set when reasoning.effort is absent
|
||||||
|
if gjson.GetBytes(got, "reasoning.summary").Exists() {
|
||||||
|
t.Fatalf("reasoning.summary should not be set when reasoning.effort is absent, got %q", gjson.GetBytes(got, "reasoning.summary").String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Tests for normalizeGitHubCopilotReasoningField ---
|
||||||
|
|
||||||
|
func TestNormalizeReasoningField_NonStreaming(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
data := []byte(`{"choices":[{"message":{"content":"hello","reasoning_text":"I think..."}}]}`)
|
||||||
|
got := normalizeGitHubCopilotReasoningField(data)
|
||||||
|
rc := gjson.GetBytes(got, "choices.0.message.reasoning_content").String()
|
||||||
|
if rc != "I think..." {
|
||||||
|
t.Fatalf("reasoning_content = %q, want %q", rc, "I think...")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeReasoningField_Streaming(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
data := []byte(`{"choices":[{"delta":{"reasoning_text":"thinking delta"}}]}`)
|
||||||
|
got := normalizeGitHubCopilotReasoningField(data)
|
||||||
|
rc := gjson.GetBytes(got, "choices.0.delta.reasoning_content").String()
|
||||||
|
if rc != "thinking delta" {
|
||||||
|
t.Fatalf("reasoning_content = %q, want %q", rc, "thinking delta")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeReasoningField_PreservesExistingReasoningContent(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
data := []byte(`{"choices":[{"message":{"reasoning_text":"old","reasoning_content":"existing"}}]}`)
|
||||||
|
got := normalizeGitHubCopilotReasoningField(data)
|
||||||
|
rc := gjson.GetBytes(got, "choices.0.message.reasoning_content").String()
|
||||||
|
if rc != "existing" {
|
||||||
|
t.Fatalf("reasoning_content = %q, want %q (should not overwrite)", rc, "existing")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeReasoningField_MultiChoice(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
data := []byte(`{"choices":[{"message":{"reasoning_text":"thought-0"}},{"message":{"reasoning_text":"thought-1"}}]}`)
|
||||||
|
got := normalizeGitHubCopilotReasoningField(data)
|
||||||
|
rc0 := gjson.GetBytes(got, "choices.0.message.reasoning_content").String()
|
||||||
|
rc1 := gjson.GetBytes(got, "choices.1.message.reasoning_content").String()
|
||||||
|
if rc0 != "thought-0" {
|
||||||
|
t.Fatalf("choices[0].reasoning_content = %q, want %q", rc0, "thought-0")
|
||||||
|
}
|
||||||
|
if rc1 != "thought-1" {
|
||||||
|
t.Fatalf("choices[1].reasoning_content = %q, want %q", rc1, "thought-1")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeReasoningField_NoChoices(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
data := []byte(`{"id":"chatcmpl-123"}`)
|
||||||
|
got := normalizeGitHubCopilotReasoningField(data)
|
||||||
|
if string(got) != string(data) {
|
||||||
|
t.Fatalf("expected no change, got %s", string(got))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApplyHeaders_OpenAIIntentValue(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
e := &GitHubCopilotExecutor{}
|
||||||
|
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
|
||||||
|
e.applyHeaders(req, "token", nil)
|
||||||
|
if got := req.Header.Get("Openai-Intent"); got != "conversation-edits" {
|
||||||
|
t.Fatalf("Openai-Intent = %q, want conversation-edits", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Tests for CountTokens (local tiktoken estimation) ---
|
||||||
|
|
||||||
|
func TestCountTokens_ReturnsPositiveCount(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
e := &GitHubCopilotExecutor{}
|
||||||
|
body := []byte(`{"model":"gpt-4o","messages":[{"role":"user","content":"Hello, world!"}]}`)
|
||||||
|
resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{
|
||||||
|
Model: "gpt-4o",
|
||||||
|
Payload: body,
|
||||||
|
}, cliproxyexecutor.Options{
|
||||||
|
SourceFormat: sdktranslator.FromString("openai"),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("CountTokens() error: %v", err)
|
||||||
|
}
|
||||||
|
if len(resp.Payload) == 0 {
|
||||||
|
t.Fatal("CountTokens() returned empty payload")
|
||||||
|
}
|
||||||
|
// The response should contain a positive token count.
|
||||||
|
tokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int()
|
||||||
|
if tokens <= 0 {
|
||||||
|
t.Fatalf("expected positive token count, got %d", tokens)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCountTokens_ClaudeSourceFormatTranslates(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
e := &GitHubCopilotExecutor{}
|
||||||
|
body := []byte(`{"model":"claude-sonnet-4","messages":[{"role":"user","content":"Tell me a joke"}],"max_tokens":1024}`)
|
||||||
|
resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{
|
||||||
|
Model: "claude-sonnet-4",
|
||||||
|
Payload: body,
|
||||||
|
}, cliproxyexecutor.Options{
|
||||||
|
SourceFormat: sdktranslator.FromString("claude"),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("CountTokens() error: %v", err)
|
||||||
|
}
|
||||||
|
// Claude source format → should get input_tokens in response
|
||||||
|
inputTokens := gjson.GetBytes(resp.Payload, "input_tokens").Int()
|
||||||
|
if inputTokens <= 0 {
|
||||||
|
// Fallback: check usage.prompt_tokens (depends on translator registration)
|
||||||
|
promptTokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int()
|
||||||
|
if promptTokens <= 0 {
|
||||||
|
t.Fatalf("expected positive token count, got payload: %s", resp.Payload)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCountTokens_EmptyPayload(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
e := &GitHubCopilotExecutor{}
|
||||||
|
resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{
|
||||||
|
Model: "gpt-4o",
|
||||||
|
Payload: []byte(`{"model":"gpt-4o","messages":[]}`),
|
||||||
|
}, cliproxyexecutor.Options{
|
||||||
|
SourceFormat: sdktranslator.FromString("openai"),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("CountTokens() error: %v", err)
|
||||||
|
}
|
||||||
|
tokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int()
|
||||||
|
// Empty messages should return 0 tokens.
|
||||||
|
if tokens != 0 {
|
||||||
|
t.Fatalf("expected 0 tokens for empty messages, got %d", tokens)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStripUnsupportedBetas_RemovesContext1M(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
body := []byte(`{"model":"claude-opus-4.6","betas":["interleaved-thinking-2025-05-14","context-1m-2025-08-07","claude-code-20250219"],"messages":[]}`)
|
||||||
|
result := stripUnsupportedBetas(body)
|
||||||
|
|
||||||
|
betas := gjson.GetBytes(result, "betas")
|
||||||
|
if !betas.Exists() {
|
||||||
|
t.Fatal("betas field should still exist after stripping")
|
||||||
|
}
|
||||||
|
for _, item := range betas.Array() {
|
||||||
|
if item.String() == "context-1m-2025-08-07" {
|
||||||
|
t.Fatal("context-1m-2025-08-07 should have been stripped")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Other betas should be preserved
|
||||||
|
found := false
|
||||||
|
for _, item := range betas.Array() {
|
||||||
|
if item.String() == "interleaved-thinking-2025-05-14" {
|
||||||
|
found = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !found {
|
||||||
|
t.Fatal("other betas should be preserved")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStripUnsupportedBetas_NoBetasField(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
body := []byte(`{"model":"gpt-4o","messages":[]}`)
|
||||||
|
result := stripUnsupportedBetas(body)
|
||||||
|
|
||||||
|
// Should be unchanged
|
||||||
|
if string(result) != string(body) {
|
||||||
|
t.Fatalf("body should be unchanged when no betas field exists, got %s", string(result))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStripUnsupportedBetas_MetadataBetas(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
body := []byte(`{"model":"claude-opus-4.6","metadata":{"betas":["context-1m-2025-08-07","other-beta"]},"messages":[]}`)
|
||||||
|
result := stripUnsupportedBetas(body)
|
||||||
|
|
||||||
|
betas := gjson.GetBytes(result, "metadata.betas")
|
||||||
|
if !betas.Exists() {
|
||||||
|
t.Fatal("metadata.betas field should still exist after stripping")
|
||||||
|
}
|
||||||
|
for _, item := range betas.Array() {
|
||||||
|
if item.String() == "context-1m-2025-08-07" {
|
||||||
|
t.Fatal("context-1m-2025-08-07 should have been stripped from metadata.betas")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if betas.Array()[0].String() != "other-beta" {
|
||||||
|
t.Fatal("other betas in metadata.betas should be preserved")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStripUnsupportedBetas_AllBetasStripped(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
body := []byte(`{"model":"claude-opus-4.6","betas":["context-1m-2025-08-07"],"messages":[]}`)
|
||||||
|
result := stripUnsupportedBetas(body)
|
||||||
|
|
||||||
|
betas := gjson.GetBytes(result, "betas")
|
||||||
|
if betas.Exists() {
|
||||||
|
t.Fatal("betas field should be deleted when all betas are stripped")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCopilotModelEntry_Limits(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
capabilities map[string]any
|
||||||
|
wantNil bool
|
||||||
|
wantPrompt int
|
||||||
|
wantOutput int
|
||||||
|
wantContext int
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "nil capabilities",
|
||||||
|
capabilities: nil,
|
||||||
|
wantNil: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no limits key",
|
||||||
|
capabilities: map[string]any{"family": "claude-opus-4.6"},
|
||||||
|
wantNil: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "limits is not a map",
|
||||||
|
capabilities: map[string]any{"limits": "invalid"},
|
||||||
|
wantNil: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "all zero values",
|
||||||
|
capabilities: map[string]any{
|
||||||
|
"limits": map[string]any{
|
||||||
|
"max_context_window_tokens": float64(0),
|
||||||
|
"max_prompt_tokens": float64(0),
|
||||||
|
"max_output_tokens": float64(0),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantNil: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "individual account limits (128K prompt)",
|
||||||
|
capabilities: map[string]any{
|
||||||
|
"limits": map[string]any{
|
||||||
|
"max_context_window_tokens": float64(144000),
|
||||||
|
"max_prompt_tokens": float64(128000),
|
||||||
|
"max_output_tokens": float64(64000),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantNil: false,
|
||||||
|
wantPrompt: 128000,
|
||||||
|
wantOutput: 64000,
|
||||||
|
wantContext: 144000,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "business account limits (168K prompt)",
|
||||||
|
capabilities: map[string]any{
|
||||||
|
"limits": map[string]any{
|
||||||
|
"max_context_window_tokens": float64(200000),
|
||||||
|
"max_prompt_tokens": float64(168000),
|
||||||
|
"max_output_tokens": float64(32000),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantNil: false,
|
||||||
|
wantPrompt: 168000,
|
||||||
|
wantOutput: 32000,
|
||||||
|
wantContext: 200000,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "partial limits (only prompt)",
|
||||||
|
capabilities: map[string]any{
|
||||||
|
"limits": map[string]any{
|
||||||
|
"max_prompt_tokens": float64(128000),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantNil: false,
|
||||||
|
wantPrompt: 128000,
|
||||||
|
wantOutput: 0,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
entry := copilotauth.CopilotModelEntry{
|
||||||
|
ID: "claude-opus-4.6",
|
||||||
|
Capabilities: tt.capabilities,
|
||||||
|
}
|
||||||
|
limits := entry.Limits()
|
||||||
|
if tt.wantNil {
|
||||||
|
if limits != nil {
|
||||||
|
t.Fatalf("expected nil limits, got %+v", limits)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if limits == nil {
|
||||||
|
t.Fatal("expected non-nil limits, got nil")
|
||||||
|
}
|
||||||
|
if limits.MaxPromptTokens != tt.wantPrompt {
|
||||||
|
t.Errorf("MaxPromptTokens = %d, want %d", limits.MaxPromptTokens, tt.wantPrompt)
|
||||||
|
}
|
||||||
|
if limits.MaxOutputTokens != tt.wantOutput {
|
||||||
|
t.Errorf("MaxOutputTokens = %d, want %d", limits.MaxOutputTokens, tt.wantOutput)
|
||||||
|
}
|
||||||
|
if tt.wantContext > 0 && limits.MaxContextWindowTokens != tt.wantContext {
|
||||||
|
t.Errorf("MaxContextWindowTokens = %d, want %d", limits.MaxContextWindowTokens, tt.wantContext)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -30,6 +30,8 @@ const (
|
|||||||
qwenRateLimitWindow = time.Minute // sliding window duration
|
qwenRateLimitWindow = time.Minute // sliding window duration
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var qwenDefaultSystemMessage = []byte(`{"role":"system","content":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}]}`)
|
||||||
|
|
||||||
// qwenBeijingLoc caches the Beijing timezone to avoid repeated LoadLocation syscalls.
|
// qwenBeijingLoc caches the Beijing timezone to avoid repeated LoadLocation syscalls.
|
||||||
var qwenBeijingLoc = func() *time.Location {
|
var qwenBeijingLoc = func() *time.Location {
|
||||||
loc, err := time.LoadLocation("Asia/Shanghai")
|
loc, err := time.LoadLocation("Asia/Shanghai")
|
||||||
@@ -170,6 +172,36 @@ func timeUntilNextDay() time.Duration {
|
|||||||
return tomorrow.Sub(now)
|
return tomorrow.Sub(now)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ensureQwenSystemMessage prepends a default system message if none exists in "messages".
|
||||||
|
func ensureQwenSystemMessage(payload []byte) ([]byte, error) {
|
||||||
|
messages := gjson.GetBytes(payload, "messages")
|
||||||
|
if messages.Exists() && messages.IsArray() {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
buf.WriteByte('[')
|
||||||
|
buf.Write(qwenDefaultSystemMessage)
|
||||||
|
for _, msg := range messages.Array() {
|
||||||
|
buf.WriteByte(',')
|
||||||
|
buf.WriteString(msg.Raw)
|
||||||
|
}
|
||||||
|
buf.WriteByte(']')
|
||||||
|
updated, errSet := sjson.SetRawBytes(payload, "messages", buf.Bytes())
|
||||||
|
if errSet != nil {
|
||||||
|
return nil, fmt.Errorf("qwen executor: set default system message failed: %w", errSet)
|
||||||
|
}
|
||||||
|
return updated, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var buf bytes.Buffer
|
||||||
|
buf.WriteByte('[')
|
||||||
|
buf.Write(qwenDefaultSystemMessage)
|
||||||
|
buf.WriteByte(']')
|
||||||
|
updated, errSet := sjson.SetRawBytes(payload, "messages", buf.Bytes())
|
||||||
|
if errSet != nil {
|
||||||
|
return nil, fmt.Errorf("qwen executor: set default system message failed: %w", errSet)
|
||||||
|
}
|
||||||
|
return updated, nil
|
||||||
|
}
|
||||||
|
|
||||||
// QwenExecutor is a stateless executor for Qwen Code using OpenAI-compatible chat completions.
|
// QwenExecutor is a stateless executor for Qwen Code using OpenAI-compatible chat completions.
|
||||||
// If access token is unavailable, it falls back to legacy via ClientAdapter.
|
// If access token is unavailable, it falls back to legacy via ClientAdapter.
|
||||||
type QwenExecutor struct {
|
type QwenExecutor struct {
|
||||||
@@ -251,6 +283,10 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
|
|||||||
|
|
||||||
requestedModel := helps.PayloadRequestedModel(opts, req.Model)
|
requestedModel := helps.PayloadRequestedModel(opts, req.Model)
|
||||||
body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
|
body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
|
||||||
|
body, err = ensureQwenSystemMessage(body)
|
||||||
|
if err != nil {
|
||||||
|
return resp, err
|
||||||
|
}
|
||||||
|
|
||||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
|
||||||
@@ -357,15 +393,19 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
toolsResult := gjson.GetBytes(body, "tools")
|
// toolsResult := gjson.GetBytes(body, "tools")
|
||||||
// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
|
// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
|
||||||
// This will have no real consequences. It's just to scare Qwen3.
|
// This will have no real consequences. It's just to scare Qwen3.
|
||||||
if (toolsResult.IsArray() && len(toolsResult.Array()) == 0) || !toolsResult.Exists() {
|
// if (toolsResult.IsArray() && len(toolsResult.Array()) == 0) || !toolsResult.Exists() {
|
||||||
body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`))
|
// body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`))
|
||||||
}
|
// }
|
||||||
body, _ = sjson.SetBytes(body, "stream_options.include_usage", true)
|
body, _ = sjson.SetBytes(body, "stream_options.include_usage", true)
|
||||||
requestedModel := helps.PayloadRequestedModel(opts, req.Model)
|
requestedModel := helps.PayloadRequestedModel(opts, req.Model)
|
||||||
body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
|
body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
|
||||||
|
body, err = ensureQwenSystemMessage(body)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@@ -49,7 +50,23 @@ func (h *GeminiCLIAPIHandler) Models() []map[string]any {
|
|||||||
// CLIHandler handles CLI-specific requests for Gemini API operations.
|
// CLIHandler handles CLI-specific requests for Gemini API operations.
|
||||||
// It restricts access to localhost only and routes requests to appropriate internal handlers.
|
// It restricts access to localhost only and routes requests to appropriate internal handlers.
|
||||||
func (h *GeminiCLIAPIHandler) CLIHandler(c *gin.Context) {
|
func (h *GeminiCLIAPIHandler) CLIHandler(c *gin.Context) {
|
||||||
if !strings.HasPrefix(c.Request.RemoteAddr, "127.0.0.1:") {
|
if h.Cfg == nil || !h.Cfg.EnableGeminiCLIEndpoint {
|
||||||
|
c.JSON(http.StatusForbidden, handlers.ErrorResponse{
|
||||||
|
Error: handlers.ErrorDetail{
|
||||||
|
Message: "Gemini CLI endpoint is disabled",
|
||||||
|
Type: "forbidden",
|
||||||
|
},
|
||||||
|
})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
requestHost := c.Request.Host
|
||||||
|
requestHostname := requestHost
|
||||||
|
if hostname, _, errSplitHostPort := net.SplitHostPort(requestHost); errSplitHostPort == nil {
|
||||||
|
requestHostname = hostname
|
||||||
|
}
|
||||||
|
|
||||||
|
if !strings.HasPrefix(c.Request.RemoteAddr, "127.0.0.1:") || requestHostname != "127.0.0.1" {
|
||||||
c.JSON(http.StatusForbidden, handlers.ErrorResponse{
|
c.JSON(http.StatusForbidden, handlers.ErrorResponse{
|
||||||
Error: handlers.ErrorDetail{
|
Error: handlers.ErrorDetail{
|
||||||
Message: "CLI reply only allow local access",
|
Message: "CLI reply only allow local access",
|
||||||
|
|||||||
@@ -379,7 +379,7 @@ func shouldReplaceWebsocketTranscript(rawJSON []byte, nextInput gjson.Result) bo
|
|||||||
|
|
||||||
for _, item := range nextInput.Array() {
|
for _, item := range nextInput.Array() {
|
||||||
switch strings.TrimSpace(item.Get("type").String()) {
|
switch strings.TrimSpace(item.Get("type").String()) {
|
||||||
case "function_call":
|
case "function_call", "custom_tool_call":
|
||||||
return true
|
return true
|
||||||
case "message":
|
case "message":
|
||||||
role := strings.TrimSpace(item.Get("role").String())
|
role := strings.TrimSpace(item.Get("role").String())
|
||||||
@@ -431,7 +431,7 @@ func dedupeFunctionCallsByCallID(rawArray string) (string, error) {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String())
|
itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String())
|
||||||
if itemType == "function_call" {
|
if isResponsesToolCallType(itemType) {
|
||||||
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
|
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
|
||||||
if callID != "" {
|
if callID != "" {
|
||||||
if _, ok := seenCallIDs[callID]; ok {
|
if _, ok := seenCallIDs[callID]; ok {
|
||||||
|
|||||||
@@ -520,6 +520,92 @@ func TestRepairResponsesWebsocketToolCallsDropsOrphanOutputWhenCallMissing(t *te
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRepairResponsesWebsocketToolCallsInsertsCachedCustomToolOutput(t *testing.T) {
|
||||||
|
cache := newWebsocketToolOutputCache(time.Minute, 10)
|
||||||
|
sessionKey := "session-1"
|
||||||
|
|
||||||
|
cacheWarm := []byte(`{"previous_response_id":"resp-1","input":[{"type":"custom_tool_call_output","call_id":"call-1","output":"ok"}]}`)
|
||||||
|
warmed := repairResponsesWebsocketToolCallsWithCache(cache, sessionKey, cacheWarm)
|
||||||
|
if gjson.GetBytes(warmed, "input.0.call_id").String() != "call-1" {
|
||||||
|
t.Fatalf("expected warmup output to remain")
|
||||||
|
}
|
||||||
|
|
||||||
|
raw := []byte(`{"input":[{"type":"custom_tool_call","call_id":"call-1","name":"apply_patch"},{"type":"message","id":"msg-1"}]}`)
|
||||||
|
repaired := repairResponsesWebsocketToolCallsWithCache(cache, sessionKey, raw)
|
||||||
|
|
||||||
|
input := gjson.GetBytes(repaired, "input").Array()
|
||||||
|
if len(input) != 3 {
|
||||||
|
t.Fatalf("repaired input len = %d, want 3", len(input))
|
||||||
|
}
|
||||||
|
if input[0].Get("type").String() != "custom_tool_call" || input[0].Get("call_id").String() != "call-1" {
|
||||||
|
t.Fatalf("unexpected first item: %s", input[0].Raw)
|
||||||
|
}
|
||||||
|
if input[1].Get("type").String() != "custom_tool_call_output" || input[1].Get("call_id").String() != "call-1" {
|
||||||
|
t.Fatalf("missing inserted output: %s", input[1].Raw)
|
||||||
|
}
|
||||||
|
if input[2].Get("type").String() != "message" || input[2].Get("id").String() != "msg-1" {
|
||||||
|
t.Fatalf("unexpected trailing item: %s", input[2].Raw)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRepairResponsesWebsocketToolCallsDropsOrphanCustomToolCall(t *testing.T) {
|
||||||
|
cache := newWebsocketToolOutputCache(time.Minute, 10)
|
||||||
|
sessionKey := "session-1"
|
||||||
|
|
||||||
|
raw := []byte(`{"input":[{"type":"custom_tool_call","call_id":"call-1","name":"apply_patch"},{"type":"message","id":"msg-1"}]}`)
|
||||||
|
repaired := repairResponsesWebsocketToolCallsWithCache(cache, sessionKey, raw)
|
||||||
|
|
||||||
|
input := gjson.GetBytes(repaired, "input").Array()
|
||||||
|
if len(input) != 1 {
|
||||||
|
t.Fatalf("repaired input len = %d, want 1", len(input))
|
||||||
|
}
|
||||||
|
if input[0].Get("type").String() != "message" || input[0].Get("id").String() != "msg-1" {
|
||||||
|
t.Fatalf("unexpected remaining item: %s", input[0].Raw)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRepairResponsesWebsocketToolCallsInsertsCachedCustomToolCallForOrphanOutput(t *testing.T) {
|
||||||
|
outputCache := newWebsocketToolOutputCache(time.Minute, 10)
|
||||||
|
callCache := newWebsocketToolOutputCache(time.Minute, 10)
|
||||||
|
sessionKey := "session-1"
|
||||||
|
|
||||||
|
callCache.record(sessionKey, "call-1", []byte(`{"type":"custom_tool_call","call_id":"call-1","name":"apply_patch"}`))
|
||||||
|
|
||||||
|
raw := []byte(`{"input":[{"type":"custom_tool_call_output","call_id":"call-1","output":"ok"},{"type":"message","id":"msg-1"}]}`)
|
||||||
|
repaired := repairResponsesWebsocketToolCallsWithCaches(outputCache, callCache, sessionKey, raw)
|
||||||
|
|
||||||
|
input := gjson.GetBytes(repaired, "input").Array()
|
||||||
|
if len(input) != 3 {
|
||||||
|
t.Fatalf("repaired input len = %d, want 3", len(input))
|
||||||
|
}
|
||||||
|
if input[0].Get("type").String() != "custom_tool_call" || input[0].Get("call_id").String() != "call-1" {
|
||||||
|
t.Fatalf("missing inserted call: %s", input[0].Raw)
|
||||||
|
}
|
||||||
|
if input[1].Get("type").String() != "custom_tool_call_output" || input[1].Get("call_id").String() != "call-1" {
|
||||||
|
t.Fatalf("unexpected output item: %s", input[1].Raw)
|
||||||
|
}
|
||||||
|
if input[2].Get("type").String() != "message" || input[2].Get("id").String() != "msg-1" {
|
||||||
|
t.Fatalf("unexpected trailing item: %s", input[2].Raw)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRepairResponsesWebsocketToolCallsDropsOrphanCustomToolOutputWhenCallMissing(t *testing.T) {
|
||||||
|
outputCache := newWebsocketToolOutputCache(time.Minute, 10)
|
||||||
|
callCache := newWebsocketToolOutputCache(time.Minute, 10)
|
||||||
|
sessionKey := "session-1"
|
||||||
|
|
||||||
|
raw := []byte(`{"input":[{"type":"custom_tool_call_output","call_id":"call-1","output":"ok"},{"type":"message","id":"msg-1"}]}`)
|
||||||
|
repaired := repairResponsesWebsocketToolCallsWithCaches(outputCache, callCache, sessionKey, raw)
|
||||||
|
|
||||||
|
input := gjson.GetBytes(repaired, "input").Array()
|
||||||
|
if len(input) != 1 {
|
||||||
|
t.Fatalf("repaired input len = %d, want 1", len(input))
|
||||||
|
}
|
||||||
|
if input[0].Get("type").String() != "message" || input[0].Get("id").String() != "msg-1" {
|
||||||
|
t.Fatalf("unexpected remaining item: %s", input[0].Raw)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestRecordResponsesWebsocketToolCallsFromPayloadWithCache(t *testing.T) {
|
func TestRecordResponsesWebsocketToolCallsFromPayloadWithCache(t *testing.T) {
|
||||||
cache := newWebsocketToolOutputCache(time.Minute, 10)
|
cache := newWebsocketToolOutputCache(time.Minute, 10)
|
||||||
sessionKey := "session-1"
|
sessionKey := "session-1"
|
||||||
@@ -536,6 +622,38 @@ func TestRecordResponsesWebsocketToolCallsFromPayloadWithCache(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRecordResponsesWebsocketCustomToolCallsFromCompletedPayloadWithCache(t *testing.T) {
|
||||||
|
cache := newWebsocketToolOutputCache(time.Minute, 10)
|
||||||
|
sessionKey := "session-1"
|
||||||
|
|
||||||
|
payload := []byte(`{"type":"response.completed","response":{"id":"resp-1","output":[{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch","input":"*** Begin Patch"}]}}`)
|
||||||
|
recordResponsesWebsocketToolCallsFromPayloadWithCache(cache, sessionKey, payload)
|
||||||
|
|
||||||
|
cached, ok := cache.get(sessionKey, "call-1")
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("expected cached custom tool call")
|
||||||
|
}
|
||||||
|
if gjson.GetBytes(cached, "type").String() != "custom_tool_call" || gjson.GetBytes(cached, "call_id").String() != "call-1" {
|
||||||
|
t.Fatalf("unexpected cached custom tool call: %s", cached)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRecordResponsesWebsocketCustomToolCallsFromOutputItemDoneWithCache(t *testing.T) {
|
||||||
|
cache := newWebsocketToolOutputCache(time.Minute, 10)
|
||||||
|
sessionKey := "session-1"
|
||||||
|
|
||||||
|
payload := []byte(`{"type":"response.output_item.done","item":{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch","input":"*** Begin Patch"}}`)
|
||||||
|
recordResponsesWebsocketToolCallsFromPayloadWithCache(cache, sessionKey, payload)
|
||||||
|
|
||||||
|
cached, ok := cache.get(sessionKey, "call-1")
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("expected cached custom tool call")
|
||||||
|
}
|
||||||
|
if gjson.GetBytes(cached, "type").String() != "custom_tool_call" || gjson.GetBytes(cached, "call_id").String() != "call-1" {
|
||||||
|
t.Fatalf("unexpected cached custom tool call: %s", cached)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestForwardResponsesWebsocketPreservesCompletedEvent(t *testing.T) {
|
func TestForwardResponsesWebsocketPreservesCompletedEvent(t *testing.T) {
|
||||||
gin.SetMode(gin.TestMode)
|
gin.SetMode(gin.TestMode)
|
||||||
|
|
||||||
@@ -1023,6 +1141,161 @@ func TestNormalizeResponsesWebsocketRequestDropsDuplicateFunctionCallsByCallID(t
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestNormalizeResponsesWebsocketRequestTreatsCustomToolTranscriptReplacementAsReset(t *testing.T) {
|
||||||
|
lastRequest := []byte(`{"model":"test-model","stream":true,"input":[{"type":"message","id":"msg-1"},{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch"},{"type":"custom_tool_call_output","id":"tool-out-1","call_id":"call-1"},{"type":"message","id":"assistant-1","role":"assistant"}]}`)
|
||||||
|
lastResponseOutput := []byte(`[
|
||||||
|
{"type":"message","id":"assistant-1","role":"assistant"}
|
||||||
|
]`)
|
||||||
|
raw := []byte(`{"type":"response.create","input":[{"type":"custom_tool_call","id":"ctc-compact","call_id":"call-1","name":"apply_patch"},{"type":"custom_tool_call_output","id":"tool-out-compact","call_id":"call-1"},{"type":"message","id":"msg-2"}]}`)
|
||||||
|
|
||||||
|
normalized, next, errMsg := normalizeResponsesWebsocketRequest(raw, lastRequest, lastResponseOutput)
|
||||||
|
if errMsg != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", errMsg.Error)
|
||||||
|
}
|
||||||
|
if gjson.GetBytes(normalized, "previous_response_id").Exists() {
|
||||||
|
t.Fatalf("previous_response_id must not exist in transcript replacement mode")
|
||||||
|
}
|
||||||
|
items := gjson.GetBytes(normalized, "input").Array()
|
||||||
|
if len(items) != 3 {
|
||||||
|
t.Fatalf("replacement input len = %d, want 3: %s", len(items), normalized)
|
||||||
|
}
|
||||||
|
if items[0].Get("id").String() != "ctc-compact" ||
|
||||||
|
items[1].Get("id").String() != "tool-out-compact" ||
|
||||||
|
items[2].Get("id").String() != "msg-2" {
|
||||||
|
t.Fatalf("replacement transcript was not preserved as-is: %s", normalized)
|
||||||
|
}
|
||||||
|
if !bytes.Equal(next, normalized) {
|
||||||
|
t.Fatalf("next request snapshot should match replacement request")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeResponsesWebsocketRequestDropsDuplicateCustomToolCallsByCallID(t *testing.T) {
|
||||||
|
lastRequest := []byte(`{"model":"test-model","stream":true,"input":[{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch"},{"type":"custom_tool_call_output","id":"tool-out-1","call_id":"call-1"}]}`)
|
||||||
|
lastResponseOutput := []byte(`[
|
||||||
|
{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch"}
|
||||||
|
]`)
|
||||||
|
raw := []byte(`{"type":"response.create","input":[{"type":"message","id":"msg-2"}]}`)
|
||||||
|
|
||||||
|
normalized, _, errMsg := normalizeResponsesWebsocketRequest(raw, lastRequest, lastResponseOutput)
|
||||||
|
if errMsg != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", errMsg.Error)
|
||||||
|
}
|
||||||
|
|
||||||
|
items := gjson.GetBytes(normalized, "input").Array()
|
||||||
|
if len(items) != 3 {
|
||||||
|
t.Fatalf("merged input len = %d, want 3: %s", len(items), normalized)
|
||||||
|
}
|
||||||
|
if items[0].Get("id").String() != "ctc-1" ||
|
||||||
|
items[1].Get("id").String() != "tool-out-1" ||
|
||||||
|
items[2].Get("id").String() != "msg-2" {
|
||||||
|
t.Fatalf("unexpected merged input order: %s", normalized)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResponsesWebsocketCompactionResetsTurnStateOnCustomToolTranscriptReplacement(t *testing.T) {
|
||||||
|
gin.SetMode(gin.TestMode)
|
||||||
|
|
||||||
|
executor := &websocketCompactionCaptureExecutor{}
|
||||||
|
manager := coreauth.NewManager(nil, nil, nil)
|
||||||
|
manager.RegisterExecutor(executor)
|
||||||
|
auth := &coreauth.Auth{ID: "auth-sse", Provider: executor.Identifier(), Status: coreauth.StatusActive}
|
||||||
|
if _, err := manager.Register(context.Background(), auth); err != nil {
|
||||||
|
t.Fatalf("Register auth: %v", err)
|
||||||
|
}
|
||||||
|
registry.GetGlobalRegistry().RegisterClient(auth.ID, auth.Provider, []*registry.ModelInfo{{ID: "test-model"}})
|
||||||
|
t.Cleanup(func() {
|
||||||
|
registry.GetGlobalRegistry().UnregisterClient(auth.ID)
|
||||||
|
})
|
||||||
|
|
||||||
|
base := handlers.NewBaseAPIHandlers(&sdkconfig.SDKConfig{}, manager)
|
||||||
|
h := NewOpenAIResponsesAPIHandler(base)
|
||||||
|
router := gin.New()
|
||||||
|
router.GET("/v1/responses/ws", h.ResponsesWebsocket)
|
||||||
|
router.POST("/v1/responses/compact", h.Compact)
|
||||||
|
|
||||||
|
server := httptest.NewServer(router)
|
||||||
|
defer server.Close()
|
||||||
|
|
||||||
|
wsURL := "ws" + strings.TrimPrefix(server.URL, "http") + "/v1/responses/ws"
|
||||||
|
conn, _, err := websocket.DefaultDialer.Dial(wsURL, nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("dial websocket: %v", err)
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
if errClose := conn.Close(); errClose != nil {
|
||||||
|
t.Fatalf("close websocket: %v", errClose)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
requests := []string{
|
||||||
|
`{"type":"response.create","model":"test-model","input":[{"type":"message","id":"msg-1"}]}`,
|
||||||
|
`{"type":"response.create","input":[{"type":"custom_tool_call_output","call_id":"call-1","id":"tool-out-1"}]}`,
|
||||||
|
}
|
||||||
|
for i := range requests {
|
||||||
|
if errWrite := conn.WriteMessage(websocket.TextMessage, []byte(requests[i])); errWrite != nil {
|
||||||
|
t.Fatalf("write websocket message %d: %v", i+1, errWrite)
|
||||||
|
}
|
||||||
|
_, payload, errReadMessage := conn.ReadMessage()
|
||||||
|
if errReadMessage != nil {
|
||||||
|
t.Fatalf("read websocket message %d: %v", i+1, errReadMessage)
|
||||||
|
}
|
||||||
|
if got := gjson.GetBytes(payload, "type").String(); got != wsEventTypeCompleted {
|
||||||
|
t.Fatalf("message %d payload type = %s, want %s", i+1, got, wsEventTypeCompleted)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
compactResp, errPost := server.Client().Post(
|
||||||
|
server.URL+"/v1/responses/compact",
|
||||||
|
"application/json",
|
||||||
|
strings.NewReader(`{"model":"test-model","input":[{"type":"message","id":"summary-1"}]}`),
|
||||||
|
)
|
||||||
|
if errPost != nil {
|
||||||
|
t.Fatalf("compact request failed: %v", errPost)
|
||||||
|
}
|
||||||
|
if errClose := compactResp.Body.Close(); errClose != nil {
|
||||||
|
t.Fatalf("close compact response body: %v", errClose)
|
||||||
|
}
|
||||||
|
if compactResp.StatusCode != http.StatusOK {
|
||||||
|
t.Fatalf("compact status = %d, want %d", compactResp.StatusCode, http.StatusOK)
|
||||||
|
}
|
||||||
|
|
||||||
|
postCompact := `{"type":"response.create","input":[{"type":"custom_tool_call","id":"ctc-compact","call_id":"call-1","name":"apply_patch"},{"type":"custom_tool_call_output","id":"tool-out-compact","call_id":"call-1"},{"type":"message","id":"msg-2"}]}`
|
||||||
|
if errWrite := conn.WriteMessage(websocket.TextMessage, []byte(postCompact)); errWrite != nil {
|
||||||
|
t.Fatalf("write post-compact websocket message: %v", errWrite)
|
||||||
|
}
|
||||||
|
_, payload, errReadMessage := conn.ReadMessage()
|
||||||
|
if errReadMessage != nil {
|
||||||
|
t.Fatalf("read post-compact websocket message: %v", errReadMessage)
|
||||||
|
}
|
||||||
|
if got := gjson.GetBytes(payload, "type").String(); got != wsEventTypeCompleted {
|
||||||
|
t.Fatalf("post-compact payload type = %s, want %s", got, wsEventTypeCompleted)
|
||||||
|
}
|
||||||
|
|
||||||
|
executor.mu.Lock()
|
||||||
|
defer executor.mu.Unlock()
|
||||||
|
|
||||||
|
if executor.compactPayload == nil {
|
||||||
|
t.Fatalf("compact payload was not captured")
|
||||||
|
}
|
||||||
|
if len(executor.streamPayloads) != 3 {
|
||||||
|
t.Fatalf("stream payload count = %d, want 3", len(executor.streamPayloads))
|
||||||
|
}
|
||||||
|
|
||||||
|
merged := executor.streamPayloads[2]
|
||||||
|
items := gjson.GetBytes(merged, "input").Array()
|
||||||
|
if len(items) != 3 {
|
||||||
|
t.Fatalf("merged input len = %d, want 3: %s", len(items), merged)
|
||||||
|
}
|
||||||
|
if items[0].Get("id").String() != "ctc-compact" ||
|
||||||
|
items[1].Get("id").String() != "tool-out-compact" ||
|
||||||
|
items[2].Get("id").String() != "msg-2" {
|
||||||
|
t.Fatalf("unexpected post-compact input order: %s", merged)
|
||||||
|
}
|
||||||
|
if items[0].Get("call_id").String() != "call-1" {
|
||||||
|
t.Fatalf("post-compact custom tool call id = %s, want call-1", items[0].Get("call_id").String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestResponsesWebsocketCompactionResetsTurnStateOnTranscriptReplacement(t *testing.T) {
|
func TestResponsesWebsocketCompactionResetsTurnStateOnTranscriptReplacement(t *testing.T) {
|
||||||
gin.SetMode(gin.TestMode)
|
gin.SetMode(gin.TestMode)
|
||||||
|
|
||||||
|
|||||||
@@ -266,15 +266,15 @@ func repairResponsesToolCallsArray(outputCache, callCache *websocketToolOutputCa
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String())
|
itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String())
|
||||||
switch itemType {
|
switch {
|
||||||
case "function_call_output":
|
case isResponsesToolCallOutputType(itemType):
|
||||||
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
|
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
|
||||||
if callID == "" {
|
if callID == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
outputPresent[callID] = struct{}{}
|
outputPresent[callID] = struct{}{}
|
||||||
outputCache.record(sessionKey, callID, item)
|
outputCache.record(sessionKey, callID, item)
|
||||||
case "function_call":
|
case isResponsesToolCallType(itemType):
|
||||||
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
|
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
|
||||||
if callID == "" {
|
if callID == "" {
|
||||||
continue
|
continue
|
||||||
@@ -293,7 +293,7 @@ func repairResponsesToolCallsArray(outputCache, callCache *websocketToolOutputCa
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String())
|
itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String())
|
||||||
if itemType == "function_call_output" {
|
if isResponsesToolCallOutputType(itemType) {
|
||||||
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
|
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
|
||||||
if callID == "" {
|
if callID == "" {
|
||||||
// Upstream rejects tool outputs without a call_id; drop it.
|
// Upstream rejects tool outputs without a call_id; drop it.
|
||||||
@@ -325,7 +325,7 @@ func repairResponsesToolCallsArray(outputCache, callCache *websocketToolOutputCa
|
|||||||
// Drop orphaned function_call_output items; upstream rejects transcripts with missing calls.
|
// Drop orphaned function_call_output items; upstream rejects transcripts with missing calls.
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if itemType != "function_call" {
|
if !isResponsesToolCallType(itemType) {
|
||||||
filtered = append(filtered, item)
|
filtered = append(filtered, item)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -376,7 +376,7 @@ func recordResponsesWebsocketToolCallsFromPayloadWithCache(cache *websocketToolO
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
for _, item := range output.Array() {
|
for _, item := range output.Array() {
|
||||||
if strings.TrimSpace(item.Get("type").String()) != "function_call" {
|
if !isResponsesToolCallType(item.Get("type").String()) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
callID := strings.TrimSpace(item.Get("call_id").String())
|
callID := strings.TrimSpace(item.Get("call_id").String())
|
||||||
@@ -390,7 +390,7 @@ func recordResponsesWebsocketToolCallsFromPayloadWithCache(cache *websocketToolO
|
|||||||
if !item.Exists() || !item.IsObject() {
|
if !item.Exists() || !item.IsObject() {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if strings.TrimSpace(item.Get("type").String()) != "function_call" {
|
if !isResponsesToolCallType(item.Get("type").String()) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
callID := strings.TrimSpace(item.Get("call_id").String())
|
callID := strings.TrimSpace(item.Get("call_id").String())
|
||||||
@@ -400,3 +400,21 @@ func recordResponsesWebsocketToolCallsFromPayloadWithCache(cache *websocketToolO
|
|||||||
cache.record(sessionKey, callID, json.RawMessage(item.Raw))
|
cache.record(sessionKey, callID, json.RawMessage(item.Raw))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isResponsesToolCallType(itemType string) bool {
|
||||||
|
switch strings.TrimSpace(itemType) {
|
||||||
|
case "function_call", "custom_tool_call":
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func isResponsesToolCallOutputType(itemType string) bool {
|
||||||
|
switch strings.TrimSpace(itemType) {
|
||||||
|
case "function_call_output", "custom_tool_call_output":
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user