Compare commits

..

19 Commits

Author SHA1 Message Date
Luis Pater
98509f615c Merge pull request #485 from kunish/fix/copilot-premium-request-inflation
fix(copilot): reduce premium request inflation, enable thinking, and use dynamic API limits
2026-04-04 02:19:56 +08:00
Luis Pater
e7a66ae504 Merge branch 'router-for-me:main' into main 2026-04-04 02:18:06 +08:00
Luis Pater
754b126944 fix(executor): remove commented-out code in QwenExecutor 2026-04-04 02:14:48 +08:00
Luis Pater
ae37ccffbf Merge pull request #2520 from Arronlong/main
fix:qwen invalid_parameter_error
2026-04-04 02:13:09 +08:00
Luis Pater
42c062bb5b Merge pull request #2509 from adamhelfgott/fix-claude-thinking-temperature
Normalize Claude temperature when thinking is enabled
2026-04-03 23:55:50 +08:00
kunish
87bf0b73d5 fix(copilot): use dynamic API limits to prevent prompt token overflow
The Copilot API enforces per-account prompt token limits (128K individual,
168K business) that differ from the static 200K context length advertised
by the proxy. This mismatch caused Claude Code to accumulate context
beyond the actual limit, triggering "prompt token count exceeds the limit
of 128000" errors.

Changes:
- Extract max_prompt_tokens and max_output_tokens from the Copilot
  /models API response (capabilities.limits) and use them as the
  authoritative ContextLength and MaxCompletionTokens values
- Add CopilotModelLimits struct and Limits() helper to parse limits
  from the existing Capabilities map
- Fix GitLab Duo context-1m beta header not being set when routing
  through the Anthropic gateway (gitlab_duo_force_context_1m attr
  was set but only gin headers were checked)
- Fix flaky parallel tests that shared global model registry state
2026-04-03 23:54:17 +08:00
Luis Pater
f389667ec3 Merge pull request #2513 from lonr-6/codex/fix-ws-custom-tool-repair-v2
fix: repair responses websocket custom tool call pairing
2026-04-03 23:45:38 +08:00
Arronlong
29dba0399b Comment out system message check in Qwen executor
fix qwen invalid_parameter_error
2026-04-03 23:07:33 +08:00
Luis Pater
a824e7cd0b feat(models): add GPT-5.3, GPT-5.4, and GPT-5.4-mini with enhanced "thinking" levels 2026-04-03 23:05:10 +08:00
Luis Pater
140faef7dc Merge branch 'router-for-me:main' into main 2026-04-03 21:48:23 +08:00
Luis Pater
adb580b344 feat(security): add configuration to toggle Gemini CLI endpoint access
Closes: #2445
2026-04-03 21:46:49 +08:00
Luis Pater
06405f2129 fix(security): enforce stricter localhost validation for GeminiCLIAPIHandler
Closes: #2445
2026-04-03 21:22:03 +08:00
kunish
b849bf79d6 fix(copilot): address code review — SSE reasoning, multi-choice, agent detection
- Strip SSE `data:` prefix before normalizing reasoning_text→reasoning_content
  in streaming mode; re-wrap afterward for the translator
- Iterate all choices in normalizeGitHubCopilotReasoningField (not just
  choices[0]) to support n>1 requests
- Remove over-broad tool-role fallback in isAgentInitiated that scanned
  all messages for role:"tool", aligning with opencode's approach of only
  detecting active tool loops — genuine user follow-ups after tool use are
  no longer mis-classified as agent-initiated
- Add 5 reasoning normalization tests; update 2 X-Initiator tests to match
  refined semantics
2026-04-03 20:51:19 +08:00
kunish
59af2c57b1 fix(copilot): reduce premium request inflation and enable thinking
This commit addresses three issues with Claude Code through GitHub
Copilot:

1. **Premium request inflation**: Responses API requests were missing
   Openai-Intent headers and proper defaults, causing Copilot to bill
   each tool-loop continuation as a new premium request. Fixed by adding
   isAgentInitiated() heuristic (checks for tool_result content or
   preceding assistant tool_use), applying Responses API defaults
   (store, include, reasoning.summary), and local tiktoken-based token
   counting to avoid extra API calls.

2. **Context overflow**: Claude Code's modelSupports1M() hardcodes
   opus-4-6 as 1M-capable, but Copilot only supports ~128K-200K.
   Fixed by stripping the context-1m-2025-08-07 beta from translated
   request bodies. Also forwards response headers in non-streaming
   Execute() and registers the GET /copilot-quota management API route.

3. **Thinking not working**: Add ThinkingSupport with level-based
   reasoning to Claude models in the static definitions. Normalize
   Copilot's non-standard 'reasoning_text' response field to
   'reasoning_content' before passing to the SDK translator. Use
   caller-provided context in CountTokens instead of Background().
2026-04-03 20:24:30 +08:00
Kai Wang
d1fd2c4ad4 fix: repair websocket custom tool calls 2026-04-03 17:11:44 +08:00
Kai Wang
b6c6379bfa fix: repair websocket custom tool calls 2026-04-03 17:11:42 +08:00
Kai Wang
8f0e66b72e fix: repair websocket custom tool calls 2026-04-03 17:11:41 +08:00
Adam Helfgott
f63cf6ff7a Normalize Claude temperature for thinking 2026-04-03 03:45:51 -04:00
Luis Pater
d2419ed49d feat(executor): ensure default system message in QwenExecutor payload 2026-04-03 11:18:48 +08:00
15 changed files with 1359 additions and 131 deletions

View File

@@ -105,6 +105,10 @@ routing:
# When true, enable authentication for the WebSocket API (/v1/ws).
ws-auth: false
# When true, enable Gemini CLI internal endpoints (/v1internal:*).
# Default is false for safety.
enable-gemini-cli-endpoint: false
# When > 0, emit blank lines every N seconds for non-streaming responses to prevent idle timeouts.
nonstream-keepalive-interval: 0

View File

@@ -573,6 +573,8 @@ func (s *Server) registerManagementRoutes() {
mgmt.PUT("/quota-exceeded/switch-preview-model", s.mgmt.PutSwitchPreviewModel)
mgmt.PATCH("/quota-exceeded/switch-preview-model", s.mgmt.PutSwitchPreviewModel)
mgmt.GET("/copilot-quota", s.mgmt.GetCopilotQuota)
mgmt.GET("/api-keys", s.mgmt.GetAPIKeys)
mgmt.PUT("/api-keys", s.mgmt.PutAPIKeys)
mgmt.PATCH("/api-keys", s.mgmt.PatchAPIKeys)

View File

@@ -235,6 +235,74 @@ type CopilotModelEntry struct {
Capabilities map[string]any `json:"capabilities,omitempty"`
}
// CopilotModelLimits holds the token limits returned by the Copilot /models API
// under capabilities.limits. These limits vary by account type (individual vs
// business) and are the authoritative source for enforcing prompt size.
type CopilotModelLimits struct {
// MaxContextWindowTokens is the total context window (prompt + output).
MaxContextWindowTokens int
// MaxPromptTokens is the hard limit on input/prompt tokens.
// Exceeding this triggers a 400 error from the Copilot API.
MaxPromptTokens int
// MaxOutputTokens is the maximum number of output/completion tokens.
MaxOutputTokens int
}
// Limits extracts the token limits from the model's capabilities map.
// Returns nil if no limits are available or the structure is unexpected.
//
// Expected Copilot API shape:
//
// "capabilities": {
// "limits": {
// "max_context_window_tokens": 200000,
// "max_prompt_tokens": 168000,
// "max_output_tokens": 32000
// }
// }
func (e *CopilotModelEntry) Limits() *CopilotModelLimits {
if e.Capabilities == nil {
return nil
}
limitsRaw, ok := e.Capabilities["limits"]
if !ok {
return nil
}
limitsMap, ok := limitsRaw.(map[string]any)
if !ok {
return nil
}
result := &CopilotModelLimits{
MaxContextWindowTokens: anyToInt(limitsMap["max_context_window_tokens"]),
MaxPromptTokens: anyToInt(limitsMap["max_prompt_tokens"]),
MaxOutputTokens: anyToInt(limitsMap["max_output_tokens"]),
}
// Only return if at least one field is populated.
if result.MaxContextWindowTokens == 0 && result.MaxPromptTokens == 0 && result.MaxOutputTokens == 0 {
return nil
}
return result
}
// anyToInt converts a JSON-decoded numeric value to int.
// Go's encoding/json decodes numbers into float64 when the target is any/interface{}.
func anyToInt(v any) int {
switch n := v.(type) {
case float64:
return int(n)
case float32:
return int(n)
case int:
return n
case int64:
return int(n)
default:
return 0
}
}
// CopilotModelsResponse represents the response from the Copilot /models endpoint.
type CopilotModelsResponse struct {
Data []CopilotModelEntry `json:"data"`

View File

@@ -9,6 +9,10 @@ type SDKConfig struct {
// ProxyURL is the URL of an optional proxy server to use for outbound requests.
ProxyURL string `yaml:"proxy-url" json:"proxy-url"`
// EnableGeminiCLIEndpoint controls whether Gemini CLI internal endpoints (/v1internal:*) are enabled.
// Default is false for safety; when false, /v1internal:* requests are rejected.
EnableGeminiCLIEndpoint bool `yaml:"enable-gemini-cli-endpoint" json:"enable-gemini-cli-endpoint"`
// ForceModelPrefix requires explicit model prefixes (e.g., "teamA/gemini-3-pro-preview")
// to target prefixed credentials. When false, unprefixed model requests may use prefixed
// credentials as well.

View File

@@ -549,6 +549,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
ContextLength: 200000,
MaxCompletionTokens: 64000,
SupportedEndpoints: []string{"/chat/completions"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "claude-opus-4.6",
@@ -561,6 +562,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
ContextLength: 200000,
MaxCompletionTokens: 64000,
SupportedEndpoints: []string{"/chat/completions"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "claude-sonnet-4",
@@ -573,6 +575,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
ContextLength: 200000,
MaxCompletionTokens: 64000,
SupportedEndpoints: []string{"/chat/completions"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "claude-sonnet-4.5",
@@ -585,6 +588,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
ContextLength: 200000,
MaxCompletionTokens: 64000,
SupportedEndpoints: []string{"/chat/completions"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "claude-sonnet-4.6",
@@ -597,6 +601,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
ContextLength: 200000,
MaxCompletionTokens: 64000,
SupportedEndpoints: []string{"/chat/completions"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "gemini-2.5-pro",

View File

@@ -280,6 +280,7 @@
"dynamic_allowed": true,
"levels": [
"low",
"medium",
"high"
]
}
@@ -554,6 +555,7 @@
"dynamic_allowed": true,
"levels": [
"low",
"medium",
"high"
]
}
@@ -610,6 +612,8 @@
"dynamic_allowed": true,
"levels": [
"minimal",
"low",
"medium",
"high"
]
}
@@ -838,6 +842,7 @@
"dynamic_allowed": true,
"levels": [
"low",
"medium",
"high"
]
}
@@ -896,6 +901,8 @@
"dynamic_allowed": true,
"levels": [
"minimal",
"low",
"medium",
"high"
]
}
@@ -1070,6 +1077,8 @@
"dynamic_allowed": true,
"levels": [
"minimal",
"low",
"medium",
"high"
]
}
@@ -1371,6 +1380,75 @@
"xhigh"
]
}
},
{
"id": "gpt-5.3-codex",
"object": "model",
"created": 1770307200,
"owned_by": "openai",
"type": "openai",
"display_name": "GPT 5.3 Codex",
"version": "gpt-5.3",
"description": "Stable version of GPT 5.3 Codex, The best model for coding and agentic tasks across domains.",
"context_length": 400000,
"max_completion_tokens": 128000,
"supported_parameters": [
"tools"
],
"thinking": {
"levels": [
"low",
"medium",
"high",
"xhigh"
]
}
},
{
"id": "gpt-5.4",
"object": "model",
"created": 1772668800,
"owned_by": "openai",
"type": "openai",
"display_name": "GPT 5.4",
"version": "gpt-5.4",
"description": "Stable version of GPT 5.4",
"context_length": 1050000,
"max_completion_tokens": 128000,
"supported_parameters": [
"tools"
],
"thinking": {
"levels": [
"low",
"medium",
"high",
"xhigh"
]
}
},
{
"id": "gpt-5.4-mini",
"object": "model",
"created": 1773705600,
"owned_by": "openai",
"type": "openai",
"display_name": "GPT 5.4 Mini",
"version": "gpt-5.4-mini",
"description": "GPT-5.4 mini brings the strengths of GPT-5.4 to a faster, more efficient model designed for high-volume workloads.",
"context_length": 400000,
"max_completion_tokens": 128000,
"supported_parameters": [
"tools"
],
"thinking": {
"levels": [
"low",
"medium",
"high",
"xhigh"
]
}
}
],
"codex-team": [
@@ -1623,6 +1701,29 @@
"xhigh"
]
}
},
{
"id": "gpt-5.4-mini",
"object": "model",
"created": 1773705600,
"owned_by": "openai",
"type": "openai",
"display_name": "GPT 5.4 Mini",
"version": "gpt-5.4-mini",
"description": "GPT-5.4 mini brings the strengths of GPT-5.4 to a faster, more efficient model designed for high-volume workloads.",
"context_length": 400000,
"max_completion_tokens": 128000,
"supported_parameters": [
"tools"
],
"thinking": {
"levels": [
"low",
"medium",
"high",
"xhigh"
]
}
}
],
"codex-plus": [
@@ -1898,6 +1999,29 @@
"xhigh"
]
}
},
{
"id": "gpt-5.4-mini",
"object": "model",
"created": 1773705600,
"owned_by": "openai",
"type": "openai",
"display_name": "GPT 5.4 Mini",
"version": "gpt-5.4-mini",
"description": "GPT-5.4 mini brings the strengths of GPT-5.4 to a faster, more efficient model designed for high-volume workloads.",
"context_length": 400000,
"max_completion_tokens": 128000,
"supported_parameters": [
"tools"
],
"thinking": {
"levels": [
"low",
"medium",
"high",
"xhigh"
]
}
}
],
"codex-pro": [
@@ -2173,55 +2297,40 @@
"xhigh"
]
}
},
{
"id": "gpt-5.4-mini",
"object": "model",
"created": 1773705600,
"owned_by": "openai",
"type": "openai",
"display_name": "GPT 5.4 Mini",
"version": "gpt-5.4-mini",
"description": "GPT-5.4 mini brings the strengths of GPT-5.4 to a faster, more efficient model designed for high-volume workloads.",
"context_length": 400000,
"max_completion_tokens": 128000,
"supported_parameters": [
"tools"
],
"thinking": {
"levels": [
"low",
"medium",
"high",
"xhigh"
]
}
}
],
"qwen": [
{
"id": "qwen3-coder-plus",
"object": "model",
"created": 1753228800,
"owned_by": "qwen",
"type": "qwen",
"display_name": "Qwen3 Coder Plus",
"version": "3.0",
"description": "Advanced code generation and understanding model",
"context_length": 32768,
"max_completion_tokens": 8192,
"supported_parameters": [
"temperature",
"top_p",
"max_tokens",
"stream",
"stop"
]
},
{
"id": "qwen3-coder-flash",
"object": "model",
"created": 1753228800,
"owned_by": "qwen",
"type": "qwen",
"display_name": "Qwen3 Coder Flash",
"version": "3.0",
"description": "Fast code generation model",
"context_length": 8192,
"max_completion_tokens": 2048,
"supported_parameters": [
"temperature",
"top_p",
"max_tokens",
"stream",
"stop"
]
},
{
"id": "coder-model",
"object": "model",
"created": 1771171200,
"owned_by": "qwen",
"type": "qwen",
"display_name": "Qwen 3.5 Plus",
"version": "3.5",
"display_name": "Qwen 3.6 Plus",
"version": "3.6",
"description": "efficient hybrid model with leading coding performance",
"context_length": 1048576,
"max_completion_tokens": 65536,
@@ -2232,25 +2341,6 @@
"stream",
"stop"
]
},
{
"id": "vision-model",
"object": "model",
"created": 1758672000,
"owned_by": "qwen",
"type": "qwen",
"display_name": "Qwen3 Vision Model",
"version": "3.0",
"description": "Vision model model",
"context_length": 32768,
"max_completion_tokens": 2048,
"supported_parameters": [
"temperature",
"top_p",
"max_tokens",
"stream",
"stop"
]
}
],
"iflow": [
@@ -2639,11 +2729,12 @@
"context_length": 1048576,
"max_completion_tokens": 65535,
"thinking": {
"min": 128,
"max": 32768,
"min": 1,
"max": 65535,
"dynamic_allowed": true,
"levels": [
"low",
"medium",
"high"
]
}
@@ -2659,11 +2750,12 @@
"context_length": 1048576,
"max_completion_tokens": 65535,
"thinking": {
"min": 128,
"max": 32768,
"min": 1,
"max": 65535,
"dynamic_allowed": true,
"levels": [
"low",
"medium",
"high"
]
}

View File

@@ -137,6 +137,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
body = disableThinkingIfToolChoiceForced(body)
body = normalizeClaudeTemperatureForThinking(body)
// Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support)
if countCacheControls(body) == 0 {
@@ -307,6 +308,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
body = disableThinkingIfToolChoiceForced(body)
body = normalizeClaudeTemperatureForThinking(body)
// Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support)
if countCacheControls(body) == 0 {
@@ -651,6 +653,25 @@ func disableThinkingIfToolChoiceForced(body []byte) []byte {
return body
}
// normalizeClaudeTemperatureForThinking keeps Anthropic message requests valid when
// thinking is enabled. Anthropic rejects temperatures other than 1 when
// thinking.type is enabled/adaptive/auto.
func normalizeClaudeTemperatureForThinking(body []byte) []byte {
if !gjson.GetBytes(body, "temperature").Exists() {
return body
}
thinkingType := strings.ToLower(strings.TrimSpace(gjson.GetBytes(body, "thinking.type").String()))
switch thinkingType {
case "enabled", "adaptive", "auto":
if temp := gjson.GetBytes(body, "temperature"); temp.Exists() && temp.Type == gjson.Number && temp.Float() == 1 {
return body
}
body, _ = sjson.SetBytes(body, "temperature", 1)
}
return body
}
type compositeReadCloser struct {
io.Reader
closers []func() error
@@ -827,6 +848,14 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string,
hasClaude1MHeader = true
}
}
// Also check auth attributes — GitLab Duo sets gitlab_duo_force_context_1m
// when routing through the Anthropic gateway, but the gin headers won't have
// X-CPA-CLAUDE-1M because the request is internally constructed.
if !hasClaude1MHeader && auth != nil && auth.Attributes != nil {
if auth.Attributes["gitlab_duo_force_context_1m"] == "true" {
hasClaude1MHeader = true
}
}
// Merge extra betas from request body and request flags.
if len(extraBetas) > 0 || hasClaude1MHeader {

View File

@@ -1833,3 +1833,43 @@ func TestApplyCloaking_PreservesConfiguredStrictModeAndSensitiveWordsWhenModeOmi
t.Fatalf("expected configured sensitive word obfuscation to apply, got %q", got)
}
}
func TestNormalizeClaudeTemperatureForThinking_AdaptiveCoercesToOne(t *testing.T) {
payload := []byte(`{"temperature":0,"thinking":{"type":"adaptive"},"output_config":{"effort":"max"}}`)
out := normalizeClaudeTemperatureForThinking(payload)
if got := gjson.GetBytes(out, "temperature").Float(); got != 1 {
t.Fatalf("temperature = %v, want 1", got)
}
}
func TestNormalizeClaudeTemperatureForThinking_EnabledCoercesToOne(t *testing.T) {
payload := []byte(`{"temperature":0.2,"thinking":{"type":"enabled","budget_tokens":2048}}`)
out := normalizeClaudeTemperatureForThinking(payload)
if got := gjson.GetBytes(out, "temperature").Float(); got != 1 {
t.Fatalf("temperature = %v, want 1", got)
}
}
func TestNormalizeClaudeTemperatureForThinking_NoThinkingLeavesTemperatureAlone(t *testing.T) {
payload := []byte(`{"temperature":0,"messages":[{"role":"user","content":"hi"}]}`)
out := normalizeClaudeTemperatureForThinking(payload)
if got := gjson.GetBytes(out, "temperature").Float(); got != 0 {
t.Fatalf("temperature = %v, want 0", got)
}
}
func TestNormalizeClaudeTemperatureForThinking_AfterForcedToolChoiceKeepsOriginalTemperature(t *testing.T) {
payload := []byte(`{"temperature":0,"thinking":{"type":"adaptive"},"output_config":{"effort":"max"},"tool_choice":{"type":"any"}}`)
out := disableThinkingIfToolChoiceForced(payload)
out = normalizeClaudeTemperatureForThinking(out)
if gjson.GetBytes(out, "thinking").Exists() {
t.Fatalf("thinking should be removed when tool_choice forces tool use")
}
if got := gjson.GetBytes(out, "temperature").Float(); got != 0 {
t.Fatalf("temperature = %v, want 0", got)
}
}

View File

@@ -7,6 +7,7 @@ import (
"fmt"
"io"
"net/http"
"slices"
"strings"
"sync"
"time"
@@ -17,6 +18,7 @@ import (
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
log "github.com/sirupsen/logrus"
@@ -40,7 +42,7 @@ const (
copilotEditorVersion = "vscode/1.107.0"
copilotPluginVersion = "copilot-chat/0.35.0"
copilotIntegrationID = "vscode-chat"
copilotOpenAIIntent = "conversation-panel"
copilotOpenAIIntent = "conversation-edits"
copilotGitHubAPIVer = "2025-04-01"
)
@@ -126,6 +128,7 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
body = e.normalizeModel(req.Model, body)
body = flattenAssistantContent(body)
body = stripUnsupportedBetas(body)
// Detect vision content before input normalization removes messages
hasVision := detectVisionContent(body)
@@ -142,6 +145,7 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
if useResponses {
body = normalizeGitHubCopilotResponsesInput(body)
body = normalizeGitHubCopilotResponsesTools(body)
body = applyGitHubCopilotResponsesDefaults(body)
} else {
body = normalizeGitHubCopilotChatTools(body)
}
@@ -225,9 +229,10 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
if useResponses && from.String() == "claude" {
converted = translateGitHubCopilotResponsesNonStreamToClaude(data)
} else {
data = normalizeGitHubCopilotReasoningField(data)
converted = sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
}
resp = cliproxyexecutor.Response{Payload: converted}
resp = cliproxyexecutor.Response{Payload: converted, Headers: httpResp.Header.Clone()}
reporter.ensurePublished(ctx)
return resp, nil
}
@@ -256,6 +261,7 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
body = e.normalizeModel(req.Model, body)
body = flattenAssistantContent(body)
body = stripUnsupportedBetas(body)
// Detect vision content before input normalization removes messages
hasVision := detectVisionContent(body)
@@ -272,6 +278,7 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
if useResponses {
body = normalizeGitHubCopilotResponsesInput(body)
body = normalizeGitHubCopilotResponsesTools(body)
body = applyGitHubCopilotResponsesDefaults(body)
} else {
body = normalizeGitHubCopilotChatTools(body)
}
@@ -378,7 +385,20 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
if useResponses && from.String() == "claude" {
chunks = translateGitHubCopilotResponsesStreamToClaude(bytes.Clone(line), &param)
} else {
chunks = sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(line), &param)
// Strip SSE "data: " prefix before reasoning field normalization,
// since normalizeGitHubCopilotReasoningField expects pure JSON.
// Re-wrap with the prefix afterward for the translator.
normalizedLine := bytes.Clone(line)
if bytes.HasPrefix(line, dataTag) {
sseData := bytes.TrimSpace(line[len(dataTag):])
if !bytes.Equal(sseData, []byte("[DONE]")) && gjson.ValidBytes(sseData) {
normalized := normalizeGitHubCopilotReasoningField(bytes.Clone(sseData))
if !bytes.Equal(normalized, sseData) {
normalizedLine = append(append([]byte(nil), dataTag...), normalized...)
}
}
}
chunks = sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, normalizedLine, &param)
}
for i := range chunks {
out <- cliproxyexecutor.StreamChunk{Payload: bytes.Clone(chunks[i])}
@@ -400,9 +420,28 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
}, nil
}
// CountTokens is not supported for GitHub Copilot.
func (e *GitHubCopilotExecutor) CountTokens(_ context.Context, _ *cliproxyauth.Auth, _ cliproxyexecutor.Request, _ cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
return cliproxyexecutor.Response{}, statusErr{code: http.StatusNotImplemented, msg: "count tokens not supported for github-copilot"}
// CountTokens estimates token count locally using tiktoken, since the GitHub
// Copilot API does not expose a dedicated token counting endpoint.
func (e *GitHubCopilotExecutor) CountTokens(ctx context.Context, _ *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
baseModel := thinking.ParseSuffix(req.Model).ModelName
from := opts.SourceFormat
to := sdktranslator.FromString("openai")
translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
enc, err := helps.TokenizerForModel(baseModel)
if err != nil {
return cliproxyexecutor.Response{}, fmt.Errorf("github copilot executor: tokenizer init failed: %w", err)
}
count, err := helps.CountOpenAIChatTokens(enc, translated)
if err != nil {
return cliproxyexecutor.Response{}, fmt.Errorf("github copilot executor: token counting failed: %w", err)
}
usageJSON := helps.BuildOpenAIUsageJSON(count)
translatedUsage := sdktranslator.TranslateTokenCount(ctx, to, from, count, usageJSON)
return cliproxyexecutor.Response{Payload: translatedUsage}, nil
}
// Refresh validates the GitHub token is still working.
@@ -491,46 +530,127 @@ func (e *GitHubCopilotExecutor) applyHeaders(r *http.Request, apiToken string, b
r.Header.Set("X-Request-Id", uuid.NewString())
initiator := "user"
if role := detectLastConversationRole(body); role == "assistant" || role == "tool" {
if isAgentInitiated(body) {
initiator = "agent"
}
r.Header.Set("X-Initiator", initiator)
}
func detectLastConversationRole(body []byte) string {
// isAgentInitiated determines whether the current request is agent-initiated
// (tool callbacks, continuations) rather than user-initiated (new user prompt).
//
// GitHub Copilot uses the X-Initiator header for billing:
// - "user" → consumes premium request quota
// - "agent" → free (tool loops, continuations)
//
// The challenge: Claude Code sends tool results as role:"user" messages with
// content type "tool_result". After translation to OpenAI format, the tool_result
// part becomes a separate role:"tool" message, but if the original Claude message
// also contained text content (e.g. skill invocations, attachment descriptions),
// a role:"user" message is emitted AFTER the tool message, making the last message
// appear user-initiated when it's actually part of an agent tool loop.
//
// VSCode Copilot Chat solves this with explicit flags (iterationNumber,
// isContinuation, subAgentInvocationId). Since CPA doesn't have these flags,
// we infer agent status by checking whether the conversation contains prior
// assistant/tool messages — if it does, the current request is a continuation.
//
// References:
// - opencode#8030, opencode#15824: same root cause and fix approach
// - vscode-copilot-chat: toolCallingLoop.ts (iterationNumber === 0)
// - pi-ai: github-copilot-headers.ts (last message role check)
func isAgentInitiated(body []byte) bool {
if len(body) == 0 {
return ""
return false
}
// Chat Completions API: check messages array
if messages := gjson.GetBytes(body, "messages"); messages.Exists() && messages.IsArray() {
arr := messages.Array()
if len(arr) == 0 {
return false
}
lastRole := ""
for i := len(arr) - 1; i >= 0; i-- {
if role := arr[i].Get("role").String(); role != "" {
return role
if r := arr[i].Get("role").String(); r != "" {
lastRole = r
break
}
}
// If last message is assistant or tool, clearly agent-initiated.
if lastRole == "assistant" || lastRole == "tool" {
return true
}
// If last message is "user", check whether it contains tool results
// (indicating a tool-loop continuation) or if the preceding message
// is an assistant tool_use. This is more precise than checking for
// any prior assistant message, which would false-positive on genuine
// multi-turn follow-ups.
if lastRole == "user" {
// Check if the last user message contains tool_result content
lastContent := arr[len(arr)-1].Get("content")
if lastContent.Exists() && lastContent.IsArray() {
for _, part := range lastContent.Array() {
if part.Get("type").String() == "tool_result" {
return true
}
}
}
// Check if the second-to-last message is an assistant with tool_use
if len(arr) >= 2 {
prev := arr[len(arr)-2]
if prev.Get("role").String() == "assistant" {
prevContent := prev.Get("content")
if prevContent.Exists() && prevContent.IsArray() {
for _, part := range prevContent.Array() {
if part.Get("type").String() == "tool_use" {
return true
}
}
}
}
}
}
return false
}
// Responses API: check input array
if inputs := gjson.GetBytes(body, "input"); inputs.Exists() && inputs.IsArray() {
arr := inputs.Array()
for i := len(arr) - 1; i >= 0; i-- {
item := arr[i]
if len(arr) == 0 {
return false
}
// Most Responses input items carry a top-level role.
if role := item.Get("role").String(); role != "" {
return role
// Check last item
last := arr[len(arr)-1]
if role := last.Get("role").String(); role == "assistant" {
return true
}
switch last.Get("type").String() {
case "function_call", "function_call_arguments", "computer_call":
return true
case "function_call_output", "function_call_response", "tool_result", "computer_call_output":
return true
}
// If last item is user-role, check for prior non-user items
for _, item := range arr {
if role := item.Get("role").String(); role == "assistant" {
return true
}
switch item.Get("type").String() {
case "function_call", "function_call_arguments", "computer_call":
return "assistant"
case "function_call_output", "function_call_response", "tool_result", "computer_call_output":
return "tool"
case "function_call", "function_call_output", "function_call_response",
"function_call_arguments", "computer_call", "computer_call_output":
return true
}
}
}
return ""
return false
}
// detectVisionContent checks if the request body contains vision/image content.
@@ -572,6 +692,85 @@ func (e *GitHubCopilotExecutor) normalizeModel(model string, body []byte) []byte
return body
}
// copilotUnsupportedBetas lists beta headers that are Anthropic-specific and
// must not be forwarded to GitHub Copilot. The context-1m beta enables 1M
// context on Anthropic's API, but Copilot's Claude models are limited to
// ~128K-200K. Passing it through would not enable 1M on Copilot, but stripping
// it from the translated body avoids confusing downstream translators.
var copilotUnsupportedBetas = []string{
"context-1m-2025-08-07",
}
// stripUnsupportedBetas removes Anthropic-specific beta entries from the
// translated request body. In OpenAI format the betas may appear under
// "metadata.betas" or a top-level "betas" array; in Claude format they sit at
// "betas". This function checks all known locations.
func stripUnsupportedBetas(body []byte) []byte {
betaPaths := []string{"betas", "metadata.betas"}
for _, path := range betaPaths {
arr := gjson.GetBytes(body, path)
if !arr.Exists() || !arr.IsArray() {
continue
}
var filtered []string
changed := false
for _, item := range arr.Array() {
beta := item.String()
if isCopilotUnsupportedBeta(beta) {
changed = true
continue
}
filtered = append(filtered, beta)
}
if !changed {
continue
}
if len(filtered) == 0 {
body, _ = sjson.DeleteBytes(body, path)
} else {
body, _ = sjson.SetBytes(body, path, filtered)
}
}
return body
}
func isCopilotUnsupportedBeta(beta string) bool {
return slices.Contains(copilotUnsupportedBetas, beta)
}
// normalizeGitHubCopilotReasoningField maps Copilot's non-standard
// 'reasoning_text' field to the standard OpenAI 'reasoning_content' field
// that the SDK translator expects. This handles both streaming deltas
// (choices[].delta.reasoning_text) and non-streaming messages
// (choices[].message.reasoning_text). The field is only renamed when
// 'reasoning_content' is absent or null, preserving standard responses.
// All choices are processed to support n>1 requests.
func normalizeGitHubCopilotReasoningField(data []byte) []byte {
choices := gjson.GetBytes(data, "choices")
if !choices.Exists() || !choices.IsArray() {
return data
}
for i := range choices.Array() {
// Non-streaming: choices[i].message.reasoning_text
msgRT := fmt.Sprintf("choices.%d.message.reasoning_text", i)
msgRC := fmt.Sprintf("choices.%d.message.reasoning_content", i)
if rt := gjson.GetBytes(data, msgRT); rt.Exists() && rt.String() != "" {
if rc := gjson.GetBytes(data, msgRC); !rc.Exists() || rc.Type == gjson.Null || rc.String() == "" {
data, _ = sjson.SetBytes(data, msgRC, rt.String())
}
}
// Streaming: choices[i].delta.reasoning_text
deltaRT := fmt.Sprintf("choices.%d.delta.reasoning_text", i)
deltaRC := fmt.Sprintf("choices.%d.delta.reasoning_content", i)
if rt := gjson.GetBytes(data, deltaRT); rt.Exists() && rt.String() != "" {
if rc := gjson.GetBytes(data, deltaRC); !rc.Exists() || rc.Type == gjson.Null || rc.String() == "" {
data, _ = sjson.SetBytes(data, deltaRC, rt.String())
}
}
}
return data
}
func useGitHubCopilotResponsesEndpoint(sourceFormat sdktranslator.Format, model string) bool {
if sourceFormat.String() == "openai-response" {
return true
@@ -596,12 +795,7 @@ func lookupGitHubCopilotStaticModelInfo(model string) *registry.ModelInfo {
}
func containsEndpoint(endpoints []string, endpoint string) bool {
for _, item := range endpoints {
if item == endpoint {
return true
}
}
return false
return slices.Contains(endpoints, endpoint)
}
// flattenAssistantContent converts assistant message content from array format
@@ -856,6 +1050,32 @@ func stripGitHubCopilotResponsesUnsupportedFields(body []byte) []byte {
return body
}
// applyGitHubCopilotResponsesDefaults sets required fields for the Responses API
// that both vscode-copilot-chat and pi-ai always include.
//
// References:
// - vscode-copilot-chat: src/platform/endpoint/node/responsesApi.ts
// - pi-ai (badlogic/pi-mono): packages/ai/src/providers/openai-responses.ts
func applyGitHubCopilotResponsesDefaults(body []byte) []byte {
// store: false — prevents request/response storage
if !gjson.GetBytes(body, "store").Exists() {
body, _ = sjson.SetBytes(body, "store", false)
}
// include: ["reasoning.encrypted_content"] — enables reasoning content
// reuse across turns, avoiding redundant computation
if !gjson.GetBytes(body, "include").Exists() {
body, _ = sjson.SetRawBytes(body, "include", []byte(`["reasoning.encrypted_content"]`))
}
// If reasoning.effort is set but reasoning.summary is not, default to "auto"
if gjson.GetBytes(body, "reasoning.effort").Exists() && !gjson.GetBytes(body, "reasoning.summary").Exists() {
body, _ = sjson.SetBytes(body, "reasoning.summary", "auto")
}
return body
}
func normalizeGitHubCopilotResponsesTools(body []byte) []byte {
tools := gjson.GetBytes(body, "tools")
if tools.Exists() {
@@ -1406,6 +1626,21 @@ func FetchGitHubCopilotModels(ctx context.Context, auth *cliproxyauth.Auth, cfg
m.MaxCompletionTokens = defaultCopilotMaxCompletionTokens
}
// Override with real limits from the Copilot API when available.
// The API returns per-account limits (individual vs business) under
// capabilities.limits, which are more accurate than our static
// fallback values. We use max_prompt_tokens as ContextLength because
// that's the hard limit the Copilot API enforces on prompt size —
// exceeding it triggers "prompt token count exceeds the limit" errors.
if limits := entry.Limits(); limits != nil {
if limits.MaxPromptTokens > 0 {
m.ContextLength = limits.MaxPromptTokens
}
if limits.MaxOutputTokens > 0 {
m.MaxCompletionTokens = limits.MaxOutputTokens
}
}
models = append(models, m)
}

View File

@@ -1,11 +1,14 @@
package executor
import (
"context"
"net/http"
"strings"
"testing"
copilotauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/copilot"
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
"github.com/tidwall/gjson"
)
@@ -72,7 +75,7 @@ func TestUseGitHubCopilotResponsesEndpoint_CodexModel(t *testing.T) {
}
func TestUseGitHubCopilotResponsesEndpoint_RegistryResponsesOnlyModel(t *testing.T) {
t.Parallel()
// Not parallel: shares global model registry with DynamicRegistryWinsOverStatic.
if !useGitHubCopilotResponsesEndpoint(sdktranslator.FromString("openai"), "gpt-5.4") {
t.Fatal("expected responses-only registry model to use /responses")
}
@@ -82,7 +85,7 @@ func TestUseGitHubCopilotResponsesEndpoint_RegistryResponsesOnlyModel(t *testing
}
func TestUseGitHubCopilotResponsesEndpoint_DynamicRegistryWinsOverStatic(t *testing.T) {
t.Parallel()
// Not parallel: mutates global model registry, conflicts with RegistryResponsesOnlyModel.
reg := registry.GetGlobalRegistry()
clientID := "github-copilot-test-client"
@@ -251,14 +254,14 @@ func TestTranslateGitHubCopilotResponsesNonStreamToClaude_TextMapping(t *testing
t.Parallel()
resp := []byte(`{"id":"resp_1","model":"gpt-5-codex","output":[{"type":"message","content":[{"type":"output_text","text":"hello"}]}],"usage":{"input_tokens":3,"output_tokens":5}}`)
out := translateGitHubCopilotResponsesNonStreamToClaude(resp)
if gjson.Get(out, "type").String() != "message" {
t.Fatalf("type = %q, want message", gjson.Get(out, "type").String())
if gjson.GetBytes(out, "type").String() != "message" {
t.Fatalf("type = %q, want message", gjson.GetBytes(out, "type").String())
}
if gjson.Get(out, "content.0.type").String() != "text" {
t.Fatalf("content.0.type = %q, want text", gjson.Get(out, "content.0.type").String())
if gjson.GetBytes(out, "content.0.type").String() != "text" {
t.Fatalf("content.0.type = %q, want text", gjson.GetBytes(out, "content.0.type").String())
}
if gjson.Get(out, "content.0.text").String() != "hello" {
t.Fatalf("content.0.text = %q, want hello", gjson.Get(out, "content.0.text").String())
if gjson.GetBytes(out, "content.0.text").String() != "hello" {
t.Fatalf("content.0.text = %q, want hello", gjson.GetBytes(out, "content.0.text").String())
}
}
@@ -266,14 +269,14 @@ func TestTranslateGitHubCopilotResponsesNonStreamToClaude_ToolUseMapping(t *test
t.Parallel()
resp := []byte(`{"id":"resp_2","model":"gpt-5-codex","output":[{"type":"function_call","id":"fc_1","call_id":"call_1","name":"sum","arguments":"{\"a\":1}"}],"usage":{"input_tokens":1,"output_tokens":2}}`)
out := translateGitHubCopilotResponsesNonStreamToClaude(resp)
if gjson.Get(out, "content.0.type").String() != "tool_use" {
t.Fatalf("content.0.type = %q, want tool_use", gjson.Get(out, "content.0.type").String())
if gjson.GetBytes(out, "content.0.type").String() != "tool_use" {
t.Fatalf("content.0.type = %q, want tool_use", gjson.GetBytes(out, "content.0.type").String())
}
if gjson.Get(out, "content.0.name").String() != "sum" {
t.Fatalf("content.0.name = %q, want sum", gjson.Get(out, "content.0.name").String())
if gjson.GetBytes(out, "content.0.name").String() != "sum" {
t.Fatalf("content.0.name = %q, want sum", gjson.GetBytes(out, "content.0.name").String())
}
if gjson.Get(out, "stop_reason").String() != "tool_use" {
t.Fatalf("stop_reason = %q, want tool_use", gjson.Get(out, "stop_reason").String())
if gjson.GetBytes(out, "stop_reason").String() != "tool_use" {
t.Fatalf("stop_reason = %q, want tool_use", gjson.GetBytes(out, "stop_reason").String())
}
}
@@ -282,18 +285,24 @@ func TestTranslateGitHubCopilotResponsesStreamToClaude_TextLifecycle(t *testing.
var param any
created := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.created","response":{"id":"resp_1","model":"gpt-5-codex"}}`), &param)
if len(created) == 0 || !strings.Contains(created[0], "message_start") {
if len(created) == 0 || !strings.Contains(string(created[0]), "message_start") {
t.Fatalf("created events = %#v, want message_start", created)
}
delta := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.output_text.delta","delta":"he"}`), &param)
joinedDelta := strings.Join(delta, "")
var joinedDelta string
for _, d := range delta {
joinedDelta += string(d)
}
if !strings.Contains(joinedDelta, "content_block_start") || !strings.Contains(joinedDelta, "text_delta") {
t.Fatalf("delta events = %#v, want content_block_start + text_delta", delta)
}
completed := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.completed","response":{"usage":{"input_tokens":7,"output_tokens":9}}}`), &param)
joinedCompleted := strings.Join(completed, "")
var joinedCompleted string
for _, c := range completed {
joinedCompleted += string(c)
}
if !strings.Contains(joinedCompleted, "message_delta") || !strings.Contains(joinedCompleted, "message_stop") {
t.Fatalf("completed events = %#v, want message_delta + message_stop", completed)
}
@@ -312,15 +321,17 @@ func TestApplyHeaders_XInitiator_UserOnly(t *testing.T) {
}
}
func TestApplyHeaders_XInitiator_UserWhenLastRoleIsUser(t *testing.T) {
func TestApplyHeaders_XInitiator_AgentWhenLastUserButHistoryHasAssistant(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
// Last role governs the initiator decision.
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"I will read the file"},{"role":"user","content":"tool result here"}]}`)
// When the last role is "user" and the message contains tool_result content,
// the request is a continuation (e.g. Claude tool result translated to a
// synthetic user message). Should be "agent".
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"I will read the file"},{"role":"user","content":[{"type":"tool_result","tool_use_id":"tu1","content":"file contents..."}]}]}`)
e.applyHeaders(req, "token", body)
if got := req.Header.Get("X-Initiator"); got != "user" {
t.Fatalf("X-Initiator = %q, want user (last role is user)", got)
if got := req.Header.Get("X-Initiator"); got != "agent" {
t.Fatalf("X-Initiator = %q, want agent (last user contains tool_result)", got)
}
}
@@ -328,10 +339,11 @@ func TestApplyHeaders_XInitiator_AgentWithToolRole(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
// When the last message has role "tool", it's clearly agent-initiated.
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"tool","content":"result"}]}`)
e.applyHeaders(req, "token", body)
if got := req.Header.Get("X-Initiator"); got != "agent" {
t.Fatalf("X-Initiator = %q, want agent (tool role exists)", got)
t.Fatalf("X-Initiator = %q, want agent (last role is tool)", got)
}
}
@@ -346,14 +358,15 @@ func TestApplyHeaders_XInitiator_InputArrayLastAssistantMessage(t *testing.T) {
}
}
func TestApplyHeaders_XInitiator_InputArrayLastUserMessage(t *testing.T) {
func TestApplyHeaders_XInitiator_InputArrayAgentWhenLastUserButHistoryHasAssistant(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
// Responses API: last item is user-role but history contains assistant → agent.
body := []byte(`{"input":[{"type":"message","role":"assistant","content":[{"type":"output_text","text":"I can help"}]},{"type":"message","role":"user","content":[{"type":"input_text","text":"Do X"}]}]}`)
e.applyHeaders(req, "token", body)
if got := req.Header.Get("X-Initiator"); got != "user" {
t.Fatalf("X-Initiator = %q, want user (last role is user)", got)
if got := req.Header.Get("X-Initiator"); got != "agent" {
t.Fatalf("X-Initiator = %q, want agent (history has assistant)", got)
}
}
@@ -368,6 +381,33 @@ func TestApplyHeaders_XInitiator_InputArrayLastFunctionCallOutput(t *testing.T)
}
}
func TestApplyHeaders_XInitiator_UserInMultiTurnNoTools(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
// Genuine multi-turn: user → assistant (plain text) → user follow-up.
// No tool messages → should be "user" (not a false-positive).
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"Hi there!"},{"role":"user","content":"what is 2+2?"}]}`)
e.applyHeaders(req, "token", body)
if got := req.Header.Get("X-Initiator"); got != "user" {
t.Fatalf("X-Initiator = %q, want user (genuine multi-turn, no tools)", got)
}
}
func TestApplyHeaders_XInitiator_UserFollowUpAfterToolHistory(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
// User follow-up after a completed tool-use conversation.
// The last message is a genuine user question — should be "user", not "agent".
// This aligns with opencode's behavior: only active tool loops are agent-initiated.
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":[{"type":"tool_use","id":"tu1","name":"Read","input":{}}]},{"role":"tool","tool_call_id":"tu1","content":"file data"},{"role":"assistant","content":"I read the file."},{"role":"user","content":"What did we do so far?"}]}`)
e.applyHeaders(req, "token", body)
if got := req.Header.Get("X-Initiator"); got != "user" {
t.Fatalf("X-Initiator = %q, want user (genuine follow-up after tool history)", got)
}
}
// --- Tests for x-github-api-version header (Problem M) ---
func TestApplyHeaders_GitHubAPIVersion(t *testing.T) {
@@ -414,3 +454,364 @@ func TestDetectVisionContent_NoMessages(t *testing.T) {
t.Fatal("expected no vision content when messages field is absent")
}
}
// --- Tests for applyGitHubCopilotResponsesDefaults ---
func TestApplyGitHubCopilotResponsesDefaults_SetsAllDefaults(t *testing.T) {
t.Parallel()
body := []byte(`{"input":"hello","reasoning":{"effort":"medium"}}`)
got := applyGitHubCopilotResponsesDefaults(body)
if gjson.GetBytes(got, "store").Bool() != false {
t.Fatalf("store = %v, want false", gjson.GetBytes(got, "store").Raw)
}
inc := gjson.GetBytes(got, "include")
if !inc.IsArray() || inc.Array()[0].String() != "reasoning.encrypted_content" {
t.Fatalf("include = %s, want [\"reasoning.encrypted_content\"]", inc.Raw)
}
if gjson.GetBytes(got, "reasoning.summary").String() != "auto" {
t.Fatalf("reasoning.summary = %q, want auto", gjson.GetBytes(got, "reasoning.summary").String())
}
}
func TestApplyGitHubCopilotResponsesDefaults_DoesNotOverrideExisting(t *testing.T) {
t.Parallel()
body := []byte(`{"input":"hello","store":true,"include":["other"],"reasoning":{"effort":"high","summary":"concise"}}`)
got := applyGitHubCopilotResponsesDefaults(body)
if gjson.GetBytes(got, "store").Bool() != true {
t.Fatalf("store should not be overridden, got %s", gjson.GetBytes(got, "store").Raw)
}
if gjson.GetBytes(got, "include").Array()[0].String() != "other" {
t.Fatalf("include should not be overridden, got %s", gjson.GetBytes(got, "include").Raw)
}
if gjson.GetBytes(got, "reasoning.summary").String() != "concise" {
t.Fatalf("reasoning.summary should not be overridden, got %q", gjson.GetBytes(got, "reasoning.summary").String())
}
}
func TestApplyGitHubCopilotResponsesDefaults_NoReasoningEffort(t *testing.T) {
t.Parallel()
body := []byte(`{"input":"hello"}`)
got := applyGitHubCopilotResponsesDefaults(body)
if gjson.GetBytes(got, "store").Bool() != false {
t.Fatalf("store = %v, want false", gjson.GetBytes(got, "store").Raw)
}
// reasoning.summary should NOT be set when reasoning.effort is absent
if gjson.GetBytes(got, "reasoning.summary").Exists() {
t.Fatalf("reasoning.summary should not be set when reasoning.effort is absent, got %q", gjson.GetBytes(got, "reasoning.summary").String())
}
}
// --- Tests for normalizeGitHubCopilotReasoningField ---
func TestNormalizeReasoningField_NonStreaming(t *testing.T) {
t.Parallel()
data := []byte(`{"choices":[{"message":{"content":"hello","reasoning_text":"I think..."}}]}`)
got := normalizeGitHubCopilotReasoningField(data)
rc := gjson.GetBytes(got, "choices.0.message.reasoning_content").String()
if rc != "I think..." {
t.Fatalf("reasoning_content = %q, want %q", rc, "I think...")
}
}
func TestNormalizeReasoningField_Streaming(t *testing.T) {
t.Parallel()
data := []byte(`{"choices":[{"delta":{"reasoning_text":"thinking delta"}}]}`)
got := normalizeGitHubCopilotReasoningField(data)
rc := gjson.GetBytes(got, "choices.0.delta.reasoning_content").String()
if rc != "thinking delta" {
t.Fatalf("reasoning_content = %q, want %q", rc, "thinking delta")
}
}
func TestNormalizeReasoningField_PreservesExistingReasoningContent(t *testing.T) {
t.Parallel()
data := []byte(`{"choices":[{"message":{"reasoning_text":"old","reasoning_content":"existing"}}]}`)
got := normalizeGitHubCopilotReasoningField(data)
rc := gjson.GetBytes(got, "choices.0.message.reasoning_content").String()
if rc != "existing" {
t.Fatalf("reasoning_content = %q, want %q (should not overwrite)", rc, "existing")
}
}
func TestNormalizeReasoningField_MultiChoice(t *testing.T) {
t.Parallel()
data := []byte(`{"choices":[{"message":{"reasoning_text":"thought-0"}},{"message":{"reasoning_text":"thought-1"}}]}`)
got := normalizeGitHubCopilotReasoningField(data)
rc0 := gjson.GetBytes(got, "choices.0.message.reasoning_content").String()
rc1 := gjson.GetBytes(got, "choices.1.message.reasoning_content").String()
if rc0 != "thought-0" {
t.Fatalf("choices[0].reasoning_content = %q, want %q", rc0, "thought-0")
}
if rc1 != "thought-1" {
t.Fatalf("choices[1].reasoning_content = %q, want %q", rc1, "thought-1")
}
}
func TestNormalizeReasoningField_NoChoices(t *testing.T) {
t.Parallel()
data := []byte(`{"id":"chatcmpl-123"}`)
got := normalizeGitHubCopilotReasoningField(data)
if string(got) != string(data) {
t.Fatalf("expected no change, got %s", string(got))
}
}
func TestApplyHeaders_OpenAIIntentValue(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
e.applyHeaders(req, "token", nil)
if got := req.Header.Get("Openai-Intent"); got != "conversation-edits" {
t.Fatalf("Openai-Intent = %q, want conversation-edits", got)
}
}
// --- Tests for CountTokens (local tiktoken estimation) ---
func TestCountTokens_ReturnsPositiveCount(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
body := []byte(`{"model":"gpt-4o","messages":[{"role":"user","content":"Hello, world!"}]}`)
resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{
Model: "gpt-4o",
Payload: body,
}, cliproxyexecutor.Options{
SourceFormat: sdktranslator.FromString("openai"),
})
if err != nil {
t.Fatalf("CountTokens() error: %v", err)
}
if len(resp.Payload) == 0 {
t.Fatal("CountTokens() returned empty payload")
}
// The response should contain a positive token count.
tokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int()
if tokens <= 0 {
t.Fatalf("expected positive token count, got %d", tokens)
}
}
func TestCountTokens_ClaudeSourceFormatTranslates(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
body := []byte(`{"model":"claude-sonnet-4","messages":[{"role":"user","content":"Tell me a joke"}],"max_tokens":1024}`)
resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{
Model: "claude-sonnet-4",
Payload: body,
}, cliproxyexecutor.Options{
SourceFormat: sdktranslator.FromString("claude"),
})
if err != nil {
t.Fatalf("CountTokens() error: %v", err)
}
// Claude source format → should get input_tokens in response
inputTokens := gjson.GetBytes(resp.Payload, "input_tokens").Int()
if inputTokens <= 0 {
// Fallback: check usage.prompt_tokens (depends on translator registration)
promptTokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int()
if promptTokens <= 0 {
t.Fatalf("expected positive token count, got payload: %s", resp.Payload)
}
}
}
func TestCountTokens_EmptyPayload(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{
Model: "gpt-4o",
Payload: []byte(`{"model":"gpt-4o","messages":[]}`),
}, cliproxyexecutor.Options{
SourceFormat: sdktranslator.FromString("openai"),
})
if err != nil {
t.Fatalf("CountTokens() error: %v", err)
}
tokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int()
// Empty messages should return 0 tokens.
if tokens != 0 {
t.Fatalf("expected 0 tokens for empty messages, got %d", tokens)
}
}
func TestStripUnsupportedBetas_RemovesContext1M(t *testing.T) {
t.Parallel()
body := []byte(`{"model":"claude-opus-4.6","betas":["interleaved-thinking-2025-05-14","context-1m-2025-08-07","claude-code-20250219"],"messages":[]}`)
result := stripUnsupportedBetas(body)
betas := gjson.GetBytes(result, "betas")
if !betas.Exists() {
t.Fatal("betas field should still exist after stripping")
}
for _, item := range betas.Array() {
if item.String() == "context-1m-2025-08-07" {
t.Fatal("context-1m-2025-08-07 should have been stripped")
}
}
// Other betas should be preserved
found := false
for _, item := range betas.Array() {
if item.String() == "interleaved-thinking-2025-05-14" {
found = true
}
}
if !found {
t.Fatal("other betas should be preserved")
}
}
func TestStripUnsupportedBetas_NoBetasField(t *testing.T) {
t.Parallel()
body := []byte(`{"model":"gpt-4o","messages":[]}`)
result := stripUnsupportedBetas(body)
// Should be unchanged
if string(result) != string(body) {
t.Fatalf("body should be unchanged when no betas field exists, got %s", string(result))
}
}
func TestStripUnsupportedBetas_MetadataBetas(t *testing.T) {
t.Parallel()
body := []byte(`{"model":"claude-opus-4.6","metadata":{"betas":["context-1m-2025-08-07","other-beta"]},"messages":[]}`)
result := stripUnsupportedBetas(body)
betas := gjson.GetBytes(result, "metadata.betas")
if !betas.Exists() {
t.Fatal("metadata.betas field should still exist after stripping")
}
for _, item := range betas.Array() {
if item.String() == "context-1m-2025-08-07" {
t.Fatal("context-1m-2025-08-07 should have been stripped from metadata.betas")
}
}
if betas.Array()[0].String() != "other-beta" {
t.Fatal("other betas in metadata.betas should be preserved")
}
}
func TestStripUnsupportedBetas_AllBetasStripped(t *testing.T) {
t.Parallel()
body := []byte(`{"model":"claude-opus-4.6","betas":["context-1m-2025-08-07"],"messages":[]}`)
result := stripUnsupportedBetas(body)
betas := gjson.GetBytes(result, "betas")
if betas.Exists() {
t.Fatal("betas field should be deleted when all betas are stripped")
}
}
func TestCopilotModelEntry_Limits(t *testing.T) {
t.Parallel()
tests := []struct {
name string
capabilities map[string]any
wantNil bool
wantPrompt int
wantOutput int
wantContext int
}{
{
name: "nil capabilities",
capabilities: nil,
wantNil: true,
},
{
name: "no limits key",
capabilities: map[string]any{"family": "claude-opus-4.6"},
wantNil: true,
},
{
name: "limits is not a map",
capabilities: map[string]any{"limits": "invalid"},
wantNil: true,
},
{
name: "all zero values",
capabilities: map[string]any{
"limits": map[string]any{
"max_context_window_tokens": float64(0),
"max_prompt_tokens": float64(0),
"max_output_tokens": float64(0),
},
},
wantNil: true,
},
{
name: "individual account limits (128K prompt)",
capabilities: map[string]any{
"limits": map[string]any{
"max_context_window_tokens": float64(144000),
"max_prompt_tokens": float64(128000),
"max_output_tokens": float64(64000),
},
},
wantNil: false,
wantPrompt: 128000,
wantOutput: 64000,
wantContext: 144000,
},
{
name: "business account limits (168K prompt)",
capabilities: map[string]any{
"limits": map[string]any{
"max_context_window_tokens": float64(200000),
"max_prompt_tokens": float64(168000),
"max_output_tokens": float64(32000),
},
},
wantNil: false,
wantPrompt: 168000,
wantOutput: 32000,
wantContext: 200000,
},
{
name: "partial limits (only prompt)",
capabilities: map[string]any{
"limits": map[string]any{
"max_prompt_tokens": float64(128000),
},
},
wantNil: false,
wantPrompt: 128000,
wantOutput: 0,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
entry := copilotauth.CopilotModelEntry{
ID: "claude-opus-4.6",
Capabilities: tt.capabilities,
}
limits := entry.Limits()
if tt.wantNil {
if limits != nil {
t.Fatalf("expected nil limits, got %+v", limits)
}
return
}
if limits == nil {
t.Fatal("expected non-nil limits, got nil")
}
if limits.MaxPromptTokens != tt.wantPrompt {
t.Errorf("MaxPromptTokens = %d, want %d", limits.MaxPromptTokens, tt.wantPrompt)
}
if limits.MaxOutputTokens != tt.wantOutput {
t.Errorf("MaxOutputTokens = %d, want %d", limits.MaxOutputTokens, tt.wantOutput)
}
if tt.wantContext > 0 && limits.MaxContextWindowTokens != tt.wantContext {
t.Errorf("MaxContextWindowTokens = %d, want %d", limits.MaxContextWindowTokens, tt.wantContext)
}
})
}
}

View File

@@ -30,6 +30,8 @@ const (
qwenRateLimitWindow = time.Minute // sliding window duration
)
var qwenDefaultSystemMessage = []byte(`{"role":"system","content":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}]}`)
// qwenBeijingLoc caches the Beijing timezone to avoid repeated LoadLocation syscalls.
var qwenBeijingLoc = func() *time.Location {
loc, err := time.LoadLocation("Asia/Shanghai")
@@ -170,6 +172,36 @@ func timeUntilNextDay() time.Duration {
return tomorrow.Sub(now)
}
// ensureQwenSystemMessage prepends a default system message if none exists in "messages".
func ensureQwenSystemMessage(payload []byte) ([]byte, error) {
messages := gjson.GetBytes(payload, "messages")
if messages.Exists() && messages.IsArray() {
var buf bytes.Buffer
buf.WriteByte('[')
buf.Write(qwenDefaultSystemMessage)
for _, msg := range messages.Array() {
buf.WriteByte(',')
buf.WriteString(msg.Raw)
}
buf.WriteByte(']')
updated, errSet := sjson.SetRawBytes(payload, "messages", buf.Bytes())
if errSet != nil {
return nil, fmt.Errorf("qwen executor: set default system message failed: %w", errSet)
}
return updated, nil
}
var buf bytes.Buffer
buf.WriteByte('[')
buf.Write(qwenDefaultSystemMessage)
buf.WriteByte(']')
updated, errSet := sjson.SetRawBytes(payload, "messages", buf.Bytes())
if errSet != nil {
return nil, fmt.Errorf("qwen executor: set default system message failed: %w", errSet)
}
return updated, nil
}
// QwenExecutor is a stateless executor for Qwen Code using OpenAI-compatible chat completions.
// If access token is unavailable, it falls back to legacy via ClientAdapter.
type QwenExecutor struct {
@@ -251,6 +283,10 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
requestedModel := helps.PayloadRequestedModel(opts, req.Model)
body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
body, err = ensureQwenSystemMessage(body)
if err != nil {
return resp, err
}
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
@@ -357,15 +393,19 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
return nil, err
}
toolsResult := gjson.GetBytes(body, "tools")
// toolsResult := gjson.GetBytes(body, "tools")
// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
// This will have no real consequences. It's just to scare Qwen3.
if (toolsResult.IsArray() && len(toolsResult.Array()) == 0) || !toolsResult.Exists() {
body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`))
}
// if (toolsResult.IsArray() && len(toolsResult.Array()) == 0) || !toolsResult.Exists() {
// body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`))
// }
body, _ = sjson.SetBytes(body, "stream_options.include_usage", true)
requestedModel := helps.PayloadRequestedModel(opts, req.Model)
body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
body, err = ensureQwenSystemMessage(body)
if err != nil {
return nil, err
}
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))

View File

@@ -9,6 +9,7 @@ import (
"context"
"fmt"
"io"
"net"
"net/http"
"strings"
"time"
@@ -49,7 +50,23 @@ func (h *GeminiCLIAPIHandler) Models() []map[string]any {
// CLIHandler handles CLI-specific requests for Gemini API operations.
// It restricts access to localhost only and routes requests to appropriate internal handlers.
func (h *GeminiCLIAPIHandler) CLIHandler(c *gin.Context) {
if !strings.HasPrefix(c.Request.RemoteAddr, "127.0.0.1:") {
if h.Cfg == nil || !h.Cfg.EnableGeminiCLIEndpoint {
c.JSON(http.StatusForbidden, handlers.ErrorResponse{
Error: handlers.ErrorDetail{
Message: "Gemini CLI endpoint is disabled",
Type: "forbidden",
},
})
return
}
requestHost := c.Request.Host
requestHostname := requestHost
if hostname, _, errSplitHostPort := net.SplitHostPort(requestHost); errSplitHostPort == nil {
requestHostname = hostname
}
if !strings.HasPrefix(c.Request.RemoteAddr, "127.0.0.1:") || requestHostname != "127.0.0.1" {
c.JSON(http.StatusForbidden, handlers.ErrorResponse{
Error: handlers.ErrorDetail{
Message: "CLI reply only allow local access",

View File

@@ -379,7 +379,7 @@ func shouldReplaceWebsocketTranscript(rawJSON []byte, nextInput gjson.Result) bo
for _, item := range nextInput.Array() {
switch strings.TrimSpace(item.Get("type").String()) {
case "function_call":
case "function_call", "custom_tool_call":
return true
case "message":
role := strings.TrimSpace(item.Get("role").String())
@@ -431,7 +431,7 @@ func dedupeFunctionCallsByCallID(rawArray string) (string, error) {
continue
}
itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String())
if itemType == "function_call" {
if isResponsesToolCallType(itemType) {
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
if callID != "" {
if _, ok := seenCallIDs[callID]; ok {

View File

@@ -520,6 +520,92 @@ func TestRepairResponsesWebsocketToolCallsDropsOrphanOutputWhenCallMissing(t *te
}
}
func TestRepairResponsesWebsocketToolCallsInsertsCachedCustomToolOutput(t *testing.T) {
cache := newWebsocketToolOutputCache(time.Minute, 10)
sessionKey := "session-1"
cacheWarm := []byte(`{"previous_response_id":"resp-1","input":[{"type":"custom_tool_call_output","call_id":"call-1","output":"ok"}]}`)
warmed := repairResponsesWebsocketToolCallsWithCache(cache, sessionKey, cacheWarm)
if gjson.GetBytes(warmed, "input.0.call_id").String() != "call-1" {
t.Fatalf("expected warmup output to remain")
}
raw := []byte(`{"input":[{"type":"custom_tool_call","call_id":"call-1","name":"apply_patch"},{"type":"message","id":"msg-1"}]}`)
repaired := repairResponsesWebsocketToolCallsWithCache(cache, sessionKey, raw)
input := gjson.GetBytes(repaired, "input").Array()
if len(input) != 3 {
t.Fatalf("repaired input len = %d, want 3", len(input))
}
if input[0].Get("type").String() != "custom_tool_call" || input[0].Get("call_id").String() != "call-1" {
t.Fatalf("unexpected first item: %s", input[0].Raw)
}
if input[1].Get("type").String() != "custom_tool_call_output" || input[1].Get("call_id").String() != "call-1" {
t.Fatalf("missing inserted output: %s", input[1].Raw)
}
if input[2].Get("type").String() != "message" || input[2].Get("id").String() != "msg-1" {
t.Fatalf("unexpected trailing item: %s", input[2].Raw)
}
}
func TestRepairResponsesWebsocketToolCallsDropsOrphanCustomToolCall(t *testing.T) {
cache := newWebsocketToolOutputCache(time.Minute, 10)
sessionKey := "session-1"
raw := []byte(`{"input":[{"type":"custom_tool_call","call_id":"call-1","name":"apply_patch"},{"type":"message","id":"msg-1"}]}`)
repaired := repairResponsesWebsocketToolCallsWithCache(cache, sessionKey, raw)
input := gjson.GetBytes(repaired, "input").Array()
if len(input) != 1 {
t.Fatalf("repaired input len = %d, want 1", len(input))
}
if input[0].Get("type").String() != "message" || input[0].Get("id").String() != "msg-1" {
t.Fatalf("unexpected remaining item: %s", input[0].Raw)
}
}
func TestRepairResponsesWebsocketToolCallsInsertsCachedCustomToolCallForOrphanOutput(t *testing.T) {
outputCache := newWebsocketToolOutputCache(time.Minute, 10)
callCache := newWebsocketToolOutputCache(time.Minute, 10)
sessionKey := "session-1"
callCache.record(sessionKey, "call-1", []byte(`{"type":"custom_tool_call","call_id":"call-1","name":"apply_patch"}`))
raw := []byte(`{"input":[{"type":"custom_tool_call_output","call_id":"call-1","output":"ok"},{"type":"message","id":"msg-1"}]}`)
repaired := repairResponsesWebsocketToolCallsWithCaches(outputCache, callCache, sessionKey, raw)
input := gjson.GetBytes(repaired, "input").Array()
if len(input) != 3 {
t.Fatalf("repaired input len = %d, want 3", len(input))
}
if input[0].Get("type").String() != "custom_tool_call" || input[0].Get("call_id").String() != "call-1" {
t.Fatalf("missing inserted call: %s", input[0].Raw)
}
if input[1].Get("type").String() != "custom_tool_call_output" || input[1].Get("call_id").String() != "call-1" {
t.Fatalf("unexpected output item: %s", input[1].Raw)
}
if input[2].Get("type").String() != "message" || input[2].Get("id").String() != "msg-1" {
t.Fatalf("unexpected trailing item: %s", input[2].Raw)
}
}
func TestRepairResponsesWebsocketToolCallsDropsOrphanCustomToolOutputWhenCallMissing(t *testing.T) {
outputCache := newWebsocketToolOutputCache(time.Minute, 10)
callCache := newWebsocketToolOutputCache(time.Minute, 10)
sessionKey := "session-1"
raw := []byte(`{"input":[{"type":"custom_tool_call_output","call_id":"call-1","output":"ok"},{"type":"message","id":"msg-1"}]}`)
repaired := repairResponsesWebsocketToolCallsWithCaches(outputCache, callCache, sessionKey, raw)
input := gjson.GetBytes(repaired, "input").Array()
if len(input) != 1 {
t.Fatalf("repaired input len = %d, want 1", len(input))
}
if input[0].Get("type").String() != "message" || input[0].Get("id").String() != "msg-1" {
t.Fatalf("unexpected remaining item: %s", input[0].Raw)
}
}
func TestRecordResponsesWebsocketToolCallsFromPayloadWithCache(t *testing.T) {
cache := newWebsocketToolOutputCache(time.Minute, 10)
sessionKey := "session-1"
@@ -536,6 +622,38 @@ func TestRecordResponsesWebsocketToolCallsFromPayloadWithCache(t *testing.T) {
}
}
func TestRecordResponsesWebsocketCustomToolCallsFromCompletedPayloadWithCache(t *testing.T) {
cache := newWebsocketToolOutputCache(time.Minute, 10)
sessionKey := "session-1"
payload := []byte(`{"type":"response.completed","response":{"id":"resp-1","output":[{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch","input":"*** Begin Patch"}]}}`)
recordResponsesWebsocketToolCallsFromPayloadWithCache(cache, sessionKey, payload)
cached, ok := cache.get(sessionKey, "call-1")
if !ok {
t.Fatalf("expected cached custom tool call")
}
if gjson.GetBytes(cached, "type").String() != "custom_tool_call" || gjson.GetBytes(cached, "call_id").String() != "call-1" {
t.Fatalf("unexpected cached custom tool call: %s", cached)
}
}
func TestRecordResponsesWebsocketCustomToolCallsFromOutputItemDoneWithCache(t *testing.T) {
cache := newWebsocketToolOutputCache(time.Minute, 10)
sessionKey := "session-1"
payload := []byte(`{"type":"response.output_item.done","item":{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch","input":"*** Begin Patch"}}`)
recordResponsesWebsocketToolCallsFromPayloadWithCache(cache, sessionKey, payload)
cached, ok := cache.get(sessionKey, "call-1")
if !ok {
t.Fatalf("expected cached custom tool call")
}
if gjson.GetBytes(cached, "type").String() != "custom_tool_call" || gjson.GetBytes(cached, "call_id").String() != "call-1" {
t.Fatalf("unexpected cached custom tool call: %s", cached)
}
}
func TestForwardResponsesWebsocketPreservesCompletedEvent(t *testing.T) {
gin.SetMode(gin.TestMode)
@@ -1023,6 +1141,161 @@ func TestNormalizeResponsesWebsocketRequestDropsDuplicateFunctionCallsByCallID(t
}
}
func TestNormalizeResponsesWebsocketRequestTreatsCustomToolTranscriptReplacementAsReset(t *testing.T) {
lastRequest := []byte(`{"model":"test-model","stream":true,"input":[{"type":"message","id":"msg-1"},{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch"},{"type":"custom_tool_call_output","id":"tool-out-1","call_id":"call-1"},{"type":"message","id":"assistant-1","role":"assistant"}]}`)
lastResponseOutput := []byte(`[
{"type":"message","id":"assistant-1","role":"assistant"}
]`)
raw := []byte(`{"type":"response.create","input":[{"type":"custom_tool_call","id":"ctc-compact","call_id":"call-1","name":"apply_patch"},{"type":"custom_tool_call_output","id":"tool-out-compact","call_id":"call-1"},{"type":"message","id":"msg-2"}]}`)
normalized, next, errMsg := normalizeResponsesWebsocketRequest(raw, lastRequest, lastResponseOutput)
if errMsg != nil {
t.Fatalf("unexpected error: %v", errMsg.Error)
}
if gjson.GetBytes(normalized, "previous_response_id").Exists() {
t.Fatalf("previous_response_id must not exist in transcript replacement mode")
}
items := gjson.GetBytes(normalized, "input").Array()
if len(items) != 3 {
t.Fatalf("replacement input len = %d, want 3: %s", len(items), normalized)
}
if items[0].Get("id").String() != "ctc-compact" ||
items[1].Get("id").String() != "tool-out-compact" ||
items[2].Get("id").String() != "msg-2" {
t.Fatalf("replacement transcript was not preserved as-is: %s", normalized)
}
if !bytes.Equal(next, normalized) {
t.Fatalf("next request snapshot should match replacement request")
}
}
func TestNormalizeResponsesWebsocketRequestDropsDuplicateCustomToolCallsByCallID(t *testing.T) {
lastRequest := []byte(`{"model":"test-model","stream":true,"input":[{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch"},{"type":"custom_tool_call_output","id":"tool-out-1","call_id":"call-1"}]}`)
lastResponseOutput := []byte(`[
{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch"}
]`)
raw := []byte(`{"type":"response.create","input":[{"type":"message","id":"msg-2"}]}`)
normalized, _, errMsg := normalizeResponsesWebsocketRequest(raw, lastRequest, lastResponseOutput)
if errMsg != nil {
t.Fatalf("unexpected error: %v", errMsg.Error)
}
items := gjson.GetBytes(normalized, "input").Array()
if len(items) != 3 {
t.Fatalf("merged input len = %d, want 3: %s", len(items), normalized)
}
if items[0].Get("id").String() != "ctc-1" ||
items[1].Get("id").String() != "tool-out-1" ||
items[2].Get("id").String() != "msg-2" {
t.Fatalf("unexpected merged input order: %s", normalized)
}
}
func TestResponsesWebsocketCompactionResetsTurnStateOnCustomToolTranscriptReplacement(t *testing.T) {
gin.SetMode(gin.TestMode)
executor := &websocketCompactionCaptureExecutor{}
manager := coreauth.NewManager(nil, nil, nil)
manager.RegisterExecutor(executor)
auth := &coreauth.Auth{ID: "auth-sse", Provider: executor.Identifier(), Status: coreauth.StatusActive}
if _, err := manager.Register(context.Background(), auth); err != nil {
t.Fatalf("Register auth: %v", err)
}
registry.GetGlobalRegistry().RegisterClient(auth.ID, auth.Provider, []*registry.ModelInfo{{ID: "test-model"}})
t.Cleanup(func() {
registry.GetGlobalRegistry().UnregisterClient(auth.ID)
})
base := handlers.NewBaseAPIHandlers(&sdkconfig.SDKConfig{}, manager)
h := NewOpenAIResponsesAPIHandler(base)
router := gin.New()
router.GET("/v1/responses/ws", h.ResponsesWebsocket)
router.POST("/v1/responses/compact", h.Compact)
server := httptest.NewServer(router)
defer server.Close()
wsURL := "ws" + strings.TrimPrefix(server.URL, "http") + "/v1/responses/ws"
conn, _, err := websocket.DefaultDialer.Dial(wsURL, nil)
if err != nil {
t.Fatalf("dial websocket: %v", err)
}
defer func() {
if errClose := conn.Close(); errClose != nil {
t.Fatalf("close websocket: %v", errClose)
}
}()
requests := []string{
`{"type":"response.create","model":"test-model","input":[{"type":"message","id":"msg-1"}]}`,
`{"type":"response.create","input":[{"type":"custom_tool_call_output","call_id":"call-1","id":"tool-out-1"}]}`,
}
for i := range requests {
if errWrite := conn.WriteMessage(websocket.TextMessage, []byte(requests[i])); errWrite != nil {
t.Fatalf("write websocket message %d: %v", i+1, errWrite)
}
_, payload, errReadMessage := conn.ReadMessage()
if errReadMessage != nil {
t.Fatalf("read websocket message %d: %v", i+1, errReadMessage)
}
if got := gjson.GetBytes(payload, "type").String(); got != wsEventTypeCompleted {
t.Fatalf("message %d payload type = %s, want %s", i+1, got, wsEventTypeCompleted)
}
}
compactResp, errPost := server.Client().Post(
server.URL+"/v1/responses/compact",
"application/json",
strings.NewReader(`{"model":"test-model","input":[{"type":"message","id":"summary-1"}]}`),
)
if errPost != nil {
t.Fatalf("compact request failed: %v", errPost)
}
if errClose := compactResp.Body.Close(); errClose != nil {
t.Fatalf("close compact response body: %v", errClose)
}
if compactResp.StatusCode != http.StatusOK {
t.Fatalf("compact status = %d, want %d", compactResp.StatusCode, http.StatusOK)
}
postCompact := `{"type":"response.create","input":[{"type":"custom_tool_call","id":"ctc-compact","call_id":"call-1","name":"apply_patch"},{"type":"custom_tool_call_output","id":"tool-out-compact","call_id":"call-1"},{"type":"message","id":"msg-2"}]}`
if errWrite := conn.WriteMessage(websocket.TextMessage, []byte(postCompact)); errWrite != nil {
t.Fatalf("write post-compact websocket message: %v", errWrite)
}
_, payload, errReadMessage := conn.ReadMessage()
if errReadMessage != nil {
t.Fatalf("read post-compact websocket message: %v", errReadMessage)
}
if got := gjson.GetBytes(payload, "type").String(); got != wsEventTypeCompleted {
t.Fatalf("post-compact payload type = %s, want %s", got, wsEventTypeCompleted)
}
executor.mu.Lock()
defer executor.mu.Unlock()
if executor.compactPayload == nil {
t.Fatalf("compact payload was not captured")
}
if len(executor.streamPayloads) != 3 {
t.Fatalf("stream payload count = %d, want 3", len(executor.streamPayloads))
}
merged := executor.streamPayloads[2]
items := gjson.GetBytes(merged, "input").Array()
if len(items) != 3 {
t.Fatalf("merged input len = %d, want 3: %s", len(items), merged)
}
if items[0].Get("id").String() != "ctc-compact" ||
items[1].Get("id").String() != "tool-out-compact" ||
items[2].Get("id").String() != "msg-2" {
t.Fatalf("unexpected post-compact input order: %s", merged)
}
if items[0].Get("call_id").String() != "call-1" {
t.Fatalf("post-compact custom tool call id = %s, want call-1", items[0].Get("call_id").String())
}
}
func TestResponsesWebsocketCompactionResetsTurnStateOnTranscriptReplacement(t *testing.T) {
gin.SetMode(gin.TestMode)

View File

@@ -266,15 +266,15 @@ func repairResponsesToolCallsArray(outputCache, callCache *websocketToolOutputCa
continue
}
itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String())
switch itemType {
case "function_call_output":
switch {
case isResponsesToolCallOutputType(itemType):
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
if callID == "" {
continue
}
outputPresent[callID] = struct{}{}
outputCache.record(sessionKey, callID, item)
case "function_call":
case isResponsesToolCallType(itemType):
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
if callID == "" {
continue
@@ -293,7 +293,7 @@ func repairResponsesToolCallsArray(outputCache, callCache *websocketToolOutputCa
continue
}
itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String())
if itemType == "function_call_output" {
if isResponsesToolCallOutputType(itemType) {
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
if callID == "" {
// Upstream rejects tool outputs without a call_id; drop it.
@@ -325,7 +325,7 @@ func repairResponsesToolCallsArray(outputCache, callCache *websocketToolOutputCa
// Drop orphaned function_call_output items; upstream rejects transcripts with missing calls.
continue
}
if itemType != "function_call" {
if !isResponsesToolCallType(itemType) {
filtered = append(filtered, item)
continue
}
@@ -376,7 +376,7 @@ func recordResponsesWebsocketToolCallsFromPayloadWithCache(cache *websocketToolO
return
}
for _, item := range output.Array() {
if strings.TrimSpace(item.Get("type").String()) != "function_call" {
if !isResponsesToolCallType(item.Get("type").String()) {
continue
}
callID := strings.TrimSpace(item.Get("call_id").String())
@@ -390,7 +390,7 @@ func recordResponsesWebsocketToolCallsFromPayloadWithCache(cache *websocketToolO
if !item.Exists() || !item.IsObject() {
return
}
if strings.TrimSpace(item.Get("type").String()) != "function_call" {
if !isResponsesToolCallType(item.Get("type").String()) {
return
}
callID := strings.TrimSpace(item.Get("call_id").String())
@@ -400,3 +400,21 @@ func recordResponsesWebsocketToolCallsFromPayloadWithCache(cache *websocketToolO
cache.record(sessionKey, callID, json.RawMessage(item.Raw))
}
}
func isResponsesToolCallType(itemType string) bool {
switch strings.TrimSpace(itemType) {
case "function_call", "custom_tool_call":
return true
default:
return false
}
}
func isResponsesToolCallOutputType(itemType string) bool {
switch strings.TrimSpace(itemType) {
case "function_call_output", "custom_tool_call_output":
return true
default:
return false
}
}