diff --git a/internal/api/server.go b/internal/api/server.go index 10f88931..c773107f 100644 --- a/internal/api/server.go +++ b/internal/api/server.go @@ -573,6 +573,8 @@ func (s *Server) registerManagementRoutes() { mgmt.PUT("/quota-exceeded/switch-preview-model", s.mgmt.PutSwitchPreviewModel) mgmt.PATCH("/quota-exceeded/switch-preview-model", s.mgmt.PutSwitchPreviewModel) + mgmt.GET("/copilot-quota", s.mgmt.GetCopilotQuota) + mgmt.GET("/api-keys", s.mgmt.GetAPIKeys) mgmt.PUT("/api-keys", s.mgmt.PutAPIKeys) mgmt.PATCH("/api-keys", s.mgmt.PatchAPIKeys) diff --git a/internal/auth/copilot/copilot_auth.go b/internal/auth/copilot/copilot_auth.go index 72f5e4e1..663b6de1 100644 --- a/internal/auth/copilot/copilot_auth.go +++ b/internal/auth/copilot/copilot_auth.go @@ -235,6 +235,74 @@ type CopilotModelEntry struct { Capabilities map[string]any `json:"capabilities,omitempty"` } +// CopilotModelLimits holds the token limits returned by the Copilot /models API +// under capabilities.limits. These limits vary by account type (individual vs +// business) and are the authoritative source for enforcing prompt size. +type CopilotModelLimits struct { + // MaxContextWindowTokens is the total context window (prompt + output). + MaxContextWindowTokens int + // MaxPromptTokens is the hard limit on input/prompt tokens. + // Exceeding this triggers a 400 error from the Copilot API. + MaxPromptTokens int + // MaxOutputTokens is the maximum number of output/completion tokens. + MaxOutputTokens int +} + +// Limits extracts the token limits from the model's capabilities map. +// Returns nil if no limits are available or the structure is unexpected. +// +// Expected Copilot API shape: +// +// "capabilities": { +// "limits": { +// "max_context_window_tokens": 200000, +// "max_prompt_tokens": 168000, +// "max_output_tokens": 32000 +// } +// } +func (e *CopilotModelEntry) Limits() *CopilotModelLimits { + if e.Capabilities == nil { + return nil + } + limitsRaw, ok := e.Capabilities["limits"] + if !ok { + return nil + } + limitsMap, ok := limitsRaw.(map[string]any) + if !ok { + return nil + } + + result := &CopilotModelLimits{ + MaxContextWindowTokens: anyToInt(limitsMap["max_context_window_tokens"]), + MaxPromptTokens: anyToInt(limitsMap["max_prompt_tokens"]), + MaxOutputTokens: anyToInt(limitsMap["max_output_tokens"]), + } + + // Only return if at least one field is populated. + if result.MaxContextWindowTokens == 0 && result.MaxPromptTokens == 0 && result.MaxOutputTokens == 0 { + return nil + } + return result +} + +// anyToInt converts a JSON-decoded numeric value to int. +// Go's encoding/json decodes numbers into float64 when the target is any/interface{}. +func anyToInt(v any) int { + switch n := v.(type) { + case float64: + return int(n) + case float32: + return int(n) + case int: + return n + case int64: + return int(n) + default: + return 0 + } +} + // CopilotModelsResponse represents the response from the Copilot /models endpoint. type CopilotModelsResponse struct { Data []CopilotModelEntry `json:"data"` diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go index 457f160a..ba95b12a 100644 --- a/internal/registry/model_definitions.go +++ b/internal/registry/model_definitions.go @@ -549,6 +549,7 @@ func GetGitHubCopilotModels() []*ModelInfo { ContextLength: 200000, MaxCompletionTokens: 64000, SupportedEndpoints: []string{"/chat/completions"}, + Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}, }, { ID: "claude-opus-4.6", @@ -561,6 +562,7 @@ func GetGitHubCopilotModels() []*ModelInfo { ContextLength: 200000, MaxCompletionTokens: 64000, SupportedEndpoints: []string{"/chat/completions"}, + Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}, }, { ID: "claude-sonnet-4", @@ -573,6 +575,7 @@ func GetGitHubCopilotModels() []*ModelInfo { ContextLength: 200000, MaxCompletionTokens: 64000, SupportedEndpoints: []string{"/chat/completions"}, + Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}, }, { ID: "claude-sonnet-4.5", @@ -585,6 +588,7 @@ func GetGitHubCopilotModels() []*ModelInfo { ContextLength: 200000, MaxCompletionTokens: 64000, SupportedEndpoints: []string{"/chat/completions"}, + Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}, }, { ID: "claude-sonnet-4.6", @@ -597,6 +601,7 @@ func GetGitHubCopilotModels() []*ModelInfo { ContextLength: 200000, MaxCompletionTokens: 64000, SupportedEndpoints: []string{"/chat/completions"}, + Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}, }, { ID: "gemini-2.5-pro", diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index 7b2e5d8d..afc3f674 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -848,6 +848,14 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string, hasClaude1MHeader = true } } + // Also check auth attributes — GitLab Duo sets gitlab_duo_force_context_1m + // when routing through the Anthropic gateway, but the gin headers won't have + // X-CPA-CLAUDE-1M because the request is internally constructed. + if !hasClaude1MHeader && auth != nil && auth.Attributes != nil { + if auth.Attributes["gitlab_duo_force_context_1m"] == "true" { + hasClaude1MHeader = true + } + } // Merge extra betas from request body and request flags. if len(extraBetas) > 0 || hasClaude1MHeader { diff --git a/internal/runtime/executor/github_copilot_executor.go b/internal/runtime/executor/github_copilot_executor.go index ea39465d..22d343fe 100644 --- a/internal/runtime/executor/github_copilot_executor.go +++ b/internal/runtime/executor/github_copilot_executor.go @@ -7,6 +7,7 @@ import ( "fmt" "io" "net/http" + "slices" "strings" "sync" "time" @@ -17,6 +18,7 @@ import ( "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" + "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" log "github.com/sirupsen/logrus" @@ -40,7 +42,7 @@ const ( copilotEditorVersion = "vscode/1.107.0" copilotPluginVersion = "copilot-chat/0.35.0" copilotIntegrationID = "vscode-chat" - copilotOpenAIIntent = "conversation-panel" + copilotOpenAIIntent = "conversation-edits" copilotGitHubAPIVer = "2025-04-01" ) @@ -126,6 +128,7 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth. body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) body = e.normalizeModel(req.Model, body) body = flattenAssistantContent(body) + body = stripUnsupportedBetas(body) // Detect vision content before input normalization removes messages hasVision := detectVisionContent(body) @@ -142,6 +145,7 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth. if useResponses { body = normalizeGitHubCopilotResponsesInput(body) body = normalizeGitHubCopilotResponsesTools(body) + body = applyGitHubCopilotResponsesDefaults(body) } else { body = normalizeGitHubCopilotChatTools(body) } @@ -225,9 +229,10 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth. if useResponses && from.String() == "claude" { converted = translateGitHubCopilotResponsesNonStreamToClaude(data) } else { + data = normalizeGitHubCopilotReasoningField(data) converted = sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, ¶m) } - resp = cliproxyexecutor.Response{Payload: converted} + resp = cliproxyexecutor.Response{Payload: converted, Headers: httpResp.Header.Clone()} reporter.ensurePublished(ctx) return resp, nil } @@ -256,6 +261,7 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) body = e.normalizeModel(req.Model, body) body = flattenAssistantContent(body) + body = stripUnsupportedBetas(body) // Detect vision content before input normalization removes messages hasVision := detectVisionContent(body) @@ -272,6 +278,7 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox if useResponses { body = normalizeGitHubCopilotResponsesInput(body) body = normalizeGitHubCopilotResponsesTools(body) + body = applyGitHubCopilotResponsesDefaults(body) } else { body = normalizeGitHubCopilotChatTools(body) } @@ -378,7 +385,20 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox if useResponses && from.String() == "claude" { chunks = translateGitHubCopilotResponsesStreamToClaude(bytes.Clone(line), ¶m) } else { - chunks = sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(line), ¶m) + // Strip SSE "data: " prefix before reasoning field normalization, + // since normalizeGitHubCopilotReasoningField expects pure JSON. + // Re-wrap with the prefix afterward for the translator. + normalizedLine := bytes.Clone(line) + if bytes.HasPrefix(line, dataTag) { + sseData := bytes.TrimSpace(line[len(dataTag):]) + if !bytes.Equal(sseData, []byte("[DONE]")) && gjson.ValidBytes(sseData) { + normalized := normalizeGitHubCopilotReasoningField(bytes.Clone(sseData)) + if !bytes.Equal(normalized, sseData) { + normalizedLine = append(append([]byte(nil), dataTag...), normalized...) + } + } + } + chunks = sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, normalizedLine, ¶m) } for i := range chunks { out <- cliproxyexecutor.StreamChunk{Payload: bytes.Clone(chunks[i])} @@ -400,9 +420,28 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox }, nil } -// CountTokens is not supported for GitHub Copilot. -func (e *GitHubCopilotExecutor) CountTokens(_ context.Context, _ *cliproxyauth.Auth, _ cliproxyexecutor.Request, _ cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { - return cliproxyexecutor.Response{}, statusErr{code: http.StatusNotImplemented, msg: "count tokens not supported for github-copilot"} +// CountTokens estimates token count locally using tiktoken, since the GitHub +// Copilot API does not expose a dedicated token counting endpoint. +func (e *GitHubCopilotExecutor) CountTokens(ctx context.Context, _ *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + + from := opts.SourceFormat + to := sdktranslator.FromString("openai") + translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false) + + enc, err := helps.TokenizerForModel(baseModel) + if err != nil { + return cliproxyexecutor.Response{}, fmt.Errorf("github copilot executor: tokenizer init failed: %w", err) + } + + count, err := helps.CountOpenAIChatTokens(enc, translated) + if err != nil { + return cliproxyexecutor.Response{}, fmt.Errorf("github copilot executor: token counting failed: %w", err) + } + + usageJSON := helps.BuildOpenAIUsageJSON(count) + translatedUsage := sdktranslator.TranslateTokenCount(ctx, to, from, count, usageJSON) + return cliproxyexecutor.Response{Payload: translatedUsage}, nil } // Refresh validates the GitHub token is still working. @@ -491,46 +530,127 @@ func (e *GitHubCopilotExecutor) applyHeaders(r *http.Request, apiToken string, b r.Header.Set("X-Request-Id", uuid.NewString()) initiator := "user" - if role := detectLastConversationRole(body); role == "assistant" || role == "tool" { + if isAgentInitiated(body) { initiator = "agent" } r.Header.Set("X-Initiator", initiator) } -func detectLastConversationRole(body []byte) string { +// isAgentInitiated determines whether the current request is agent-initiated +// (tool callbacks, continuations) rather than user-initiated (new user prompt). +// +// GitHub Copilot uses the X-Initiator header for billing: +// - "user" → consumes premium request quota +// - "agent" → free (tool loops, continuations) +// +// The challenge: Claude Code sends tool results as role:"user" messages with +// content type "tool_result". After translation to OpenAI format, the tool_result +// part becomes a separate role:"tool" message, but if the original Claude message +// also contained text content (e.g. skill invocations, attachment descriptions), +// a role:"user" message is emitted AFTER the tool message, making the last message +// appear user-initiated when it's actually part of an agent tool loop. +// +// VSCode Copilot Chat solves this with explicit flags (iterationNumber, +// isContinuation, subAgentInvocationId). Since CPA doesn't have these flags, +// we infer agent status by checking whether the conversation contains prior +// assistant/tool messages — if it does, the current request is a continuation. +// +// References: +// - opencode#8030, opencode#15824: same root cause and fix approach +// - vscode-copilot-chat: toolCallingLoop.ts (iterationNumber === 0) +// - pi-ai: github-copilot-headers.ts (last message role check) +func isAgentInitiated(body []byte) bool { if len(body) == 0 { - return "" + return false } + // Chat Completions API: check messages array if messages := gjson.GetBytes(body, "messages"); messages.Exists() && messages.IsArray() { arr := messages.Array() + if len(arr) == 0 { + return false + } + + lastRole := "" for i := len(arr) - 1; i >= 0; i-- { - if role := arr[i].Get("role").String(); role != "" { - return role + if r := arr[i].Get("role").String(); r != "" { + lastRole = r + break } } + + // If last message is assistant or tool, clearly agent-initiated. + if lastRole == "assistant" || lastRole == "tool" { + return true + } + + // If last message is "user", check whether it contains tool results + // (indicating a tool-loop continuation) or if the preceding message + // is an assistant tool_use. This is more precise than checking for + // any prior assistant message, which would false-positive on genuine + // multi-turn follow-ups. + if lastRole == "user" { + // Check if the last user message contains tool_result content + lastContent := arr[len(arr)-1].Get("content") + if lastContent.Exists() && lastContent.IsArray() { + for _, part := range lastContent.Array() { + if part.Get("type").String() == "tool_result" { + return true + } + } + } + // Check if the second-to-last message is an assistant with tool_use + if len(arr) >= 2 { + prev := arr[len(arr)-2] + if prev.Get("role").String() == "assistant" { + prevContent := prev.Get("content") + if prevContent.Exists() && prevContent.IsArray() { + for _, part := range prevContent.Array() { + if part.Get("type").String() == "tool_use" { + return true + } + } + } + } + } + } + + return false } + // Responses API: check input array if inputs := gjson.GetBytes(body, "input"); inputs.Exists() && inputs.IsArray() { arr := inputs.Array() - for i := len(arr) - 1; i >= 0; i-- { - item := arr[i] + if len(arr) == 0 { + return false + } - // Most Responses input items carry a top-level role. - if role := item.Get("role").String(); role != "" { - return role + // Check last item + last := arr[len(arr)-1] + if role := last.Get("role").String(); role == "assistant" { + return true + } + switch last.Get("type").String() { + case "function_call", "function_call_arguments", "computer_call": + return true + case "function_call_output", "function_call_response", "tool_result", "computer_call_output": + return true + } + + // If last item is user-role, check for prior non-user items + for _, item := range arr { + if role := item.Get("role").String(); role == "assistant" { + return true } - switch item.Get("type").String() { - case "function_call", "function_call_arguments", "computer_call": - return "assistant" - case "function_call_output", "function_call_response", "tool_result", "computer_call_output": - return "tool" + case "function_call", "function_call_output", "function_call_response", + "function_call_arguments", "computer_call", "computer_call_output": + return true } } } - return "" + return false } // detectVisionContent checks if the request body contains vision/image content. @@ -572,6 +692,85 @@ func (e *GitHubCopilotExecutor) normalizeModel(model string, body []byte) []byte return body } +// copilotUnsupportedBetas lists beta headers that are Anthropic-specific and +// must not be forwarded to GitHub Copilot. The context-1m beta enables 1M +// context on Anthropic's API, but Copilot's Claude models are limited to +// ~128K-200K. Passing it through would not enable 1M on Copilot, but stripping +// it from the translated body avoids confusing downstream translators. +var copilotUnsupportedBetas = []string{ + "context-1m-2025-08-07", +} + +// stripUnsupportedBetas removes Anthropic-specific beta entries from the +// translated request body. In OpenAI format the betas may appear under +// "metadata.betas" or a top-level "betas" array; in Claude format they sit at +// "betas". This function checks all known locations. +func stripUnsupportedBetas(body []byte) []byte { + betaPaths := []string{"betas", "metadata.betas"} + for _, path := range betaPaths { + arr := gjson.GetBytes(body, path) + if !arr.Exists() || !arr.IsArray() { + continue + } + var filtered []string + changed := false + for _, item := range arr.Array() { + beta := item.String() + if isCopilotUnsupportedBeta(beta) { + changed = true + continue + } + filtered = append(filtered, beta) + } + if !changed { + continue + } + if len(filtered) == 0 { + body, _ = sjson.DeleteBytes(body, path) + } else { + body, _ = sjson.SetBytes(body, path, filtered) + } + } + return body +} + +func isCopilotUnsupportedBeta(beta string) bool { + return slices.Contains(copilotUnsupportedBetas, beta) +} + +// normalizeGitHubCopilotReasoningField maps Copilot's non-standard +// 'reasoning_text' field to the standard OpenAI 'reasoning_content' field +// that the SDK translator expects. This handles both streaming deltas +// (choices[].delta.reasoning_text) and non-streaming messages +// (choices[].message.reasoning_text). The field is only renamed when +// 'reasoning_content' is absent or null, preserving standard responses. +// All choices are processed to support n>1 requests. +func normalizeGitHubCopilotReasoningField(data []byte) []byte { + choices := gjson.GetBytes(data, "choices") + if !choices.Exists() || !choices.IsArray() { + return data + } + for i := range choices.Array() { + // Non-streaming: choices[i].message.reasoning_text + msgRT := fmt.Sprintf("choices.%d.message.reasoning_text", i) + msgRC := fmt.Sprintf("choices.%d.message.reasoning_content", i) + if rt := gjson.GetBytes(data, msgRT); rt.Exists() && rt.String() != "" { + if rc := gjson.GetBytes(data, msgRC); !rc.Exists() || rc.Type == gjson.Null || rc.String() == "" { + data, _ = sjson.SetBytes(data, msgRC, rt.String()) + } + } + // Streaming: choices[i].delta.reasoning_text + deltaRT := fmt.Sprintf("choices.%d.delta.reasoning_text", i) + deltaRC := fmt.Sprintf("choices.%d.delta.reasoning_content", i) + if rt := gjson.GetBytes(data, deltaRT); rt.Exists() && rt.String() != "" { + if rc := gjson.GetBytes(data, deltaRC); !rc.Exists() || rc.Type == gjson.Null || rc.String() == "" { + data, _ = sjson.SetBytes(data, deltaRC, rt.String()) + } + } + } + return data +} + func useGitHubCopilotResponsesEndpoint(sourceFormat sdktranslator.Format, model string) bool { if sourceFormat.String() == "openai-response" { return true @@ -596,12 +795,7 @@ func lookupGitHubCopilotStaticModelInfo(model string) *registry.ModelInfo { } func containsEndpoint(endpoints []string, endpoint string) bool { - for _, item := range endpoints { - if item == endpoint { - return true - } - } - return false + return slices.Contains(endpoints, endpoint) } // flattenAssistantContent converts assistant message content from array format @@ -856,6 +1050,32 @@ func stripGitHubCopilotResponsesUnsupportedFields(body []byte) []byte { return body } +// applyGitHubCopilotResponsesDefaults sets required fields for the Responses API +// that both vscode-copilot-chat and pi-ai always include. +// +// References: +// - vscode-copilot-chat: src/platform/endpoint/node/responsesApi.ts +// - pi-ai (badlogic/pi-mono): packages/ai/src/providers/openai-responses.ts +func applyGitHubCopilotResponsesDefaults(body []byte) []byte { + // store: false — prevents request/response storage + if !gjson.GetBytes(body, "store").Exists() { + body, _ = sjson.SetBytes(body, "store", false) + } + + // include: ["reasoning.encrypted_content"] — enables reasoning content + // reuse across turns, avoiding redundant computation + if !gjson.GetBytes(body, "include").Exists() { + body, _ = sjson.SetRawBytes(body, "include", []byte(`["reasoning.encrypted_content"]`)) + } + + // If reasoning.effort is set but reasoning.summary is not, default to "auto" + if gjson.GetBytes(body, "reasoning.effort").Exists() && !gjson.GetBytes(body, "reasoning.summary").Exists() { + body, _ = sjson.SetBytes(body, "reasoning.summary", "auto") + } + + return body +} + func normalizeGitHubCopilotResponsesTools(body []byte) []byte { tools := gjson.GetBytes(body, "tools") if tools.Exists() { @@ -1406,6 +1626,21 @@ func FetchGitHubCopilotModels(ctx context.Context, auth *cliproxyauth.Auth, cfg m.MaxCompletionTokens = defaultCopilotMaxCompletionTokens } + // Override with real limits from the Copilot API when available. + // The API returns per-account limits (individual vs business) under + // capabilities.limits, which are more accurate than our static + // fallback values. We use max_prompt_tokens as ContextLength because + // that's the hard limit the Copilot API enforces on prompt size — + // exceeding it triggers "prompt token count exceeds the limit" errors. + if limits := entry.Limits(); limits != nil { + if limits.MaxPromptTokens > 0 { + m.ContextLength = limits.MaxPromptTokens + } + if limits.MaxOutputTokens > 0 { + m.MaxCompletionTokens = limits.MaxOutputTokens + } + } + models = append(models, m) } diff --git a/internal/runtime/executor/github_copilot_executor_test.go b/internal/runtime/executor/github_copilot_executor_test.go index c320bccc..774a3cae 100644 --- a/internal/runtime/executor/github_copilot_executor_test.go +++ b/internal/runtime/executor/github_copilot_executor_test.go @@ -1,11 +1,14 @@ package executor import ( + "context" "net/http" "strings" "testing" + copilotauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/copilot" "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" "github.com/tidwall/gjson" ) @@ -72,7 +75,7 @@ func TestUseGitHubCopilotResponsesEndpoint_CodexModel(t *testing.T) { } func TestUseGitHubCopilotResponsesEndpoint_RegistryResponsesOnlyModel(t *testing.T) { - t.Parallel() + // Not parallel: shares global model registry with DynamicRegistryWinsOverStatic. if !useGitHubCopilotResponsesEndpoint(sdktranslator.FromString("openai"), "gpt-5.4") { t.Fatal("expected responses-only registry model to use /responses") } @@ -82,7 +85,7 @@ func TestUseGitHubCopilotResponsesEndpoint_RegistryResponsesOnlyModel(t *testing } func TestUseGitHubCopilotResponsesEndpoint_DynamicRegistryWinsOverStatic(t *testing.T) { - t.Parallel() + // Not parallel: mutates global model registry, conflicts with RegistryResponsesOnlyModel. reg := registry.GetGlobalRegistry() clientID := "github-copilot-test-client" @@ -251,14 +254,14 @@ func TestTranslateGitHubCopilotResponsesNonStreamToClaude_TextMapping(t *testing t.Parallel() resp := []byte(`{"id":"resp_1","model":"gpt-5-codex","output":[{"type":"message","content":[{"type":"output_text","text":"hello"}]}],"usage":{"input_tokens":3,"output_tokens":5}}`) out := translateGitHubCopilotResponsesNonStreamToClaude(resp) - if gjson.Get(out, "type").String() != "message" { - t.Fatalf("type = %q, want message", gjson.Get(out, "type").String()) + if gjson.GetBytes(out, "type").String() != "message" { + t.Fatalf("type = %q, want message", gjson.GetBytes(out, "type").String()) } - if gjson.Get(out, "content.0.type").String() != "text" { - t.Fatalf("content.0.type = %q, want text", gjson.Get(out, "content.0.type").String()) + if gjson.GetBytes(out, "content.0.type").String() != "text" { + t.Fatalf("content.0.type = %q, want text", gjson.GetBytes(out, "content.0.type").String()) } - if gjson.Get(out, "content.0.text").String() != "hello" { - t.Fatalf("content.0.text = %q, want hello", gjson.Get(out, "content.0.text").String()) + if gjson.GetBytes(out, "content.0.text").String() != "hello" { + t.Fatalf("content.0.text = %q, want hello", gjson.GetBytes(out, "content.0.text").String()) } } @@ -266,14 +269,14 @@ func TestTranslateGitHubCopilotResponsesNonStreamToClaude_ToolUseMapping(t *test t.Parallel() resp := []byte(`{"id":"resp_2","model":"gpt-5-codex","output":[{"type":"function_call","id":"fc_1","call_id":"call_1","name":"sum","arguments":"{\"a\":1}"}],"usage":{"input_tokens":1,"output_tokens":2}}`) out := translateGitHubCopilotResponsesNonStreamToClaude(resp) - if gjson.Get(out, "content.0.type").String() != "tool_use" { - t.Fatalf("content.0.type = %q, want tool_use", gjson.Get(out, "content.0.type").String()) + if gjson.GetBytes(out, "content.0.type").String() != "tool_use" { + t.Fatalf("content.0.type = %q, want tool_use", gjson.GetBytes(out, "content.0.type").String()) } - if gjson.Get(out, "content.0.name").String() != "sum" { - t.Fatalf("content.0.name = %q, want sum", gjson.Get(out, "content.0.name").String()) + if gjson.GetBytes(out, "content.0.name").String() != "sum" { + t.Fatalf("content.0.name = %q, want sum", gjson.GetBytes(out, "content.0.name").String()) } - if gjson.Get(out, "stop_reason").String() != "tool_use" { - t.Fatalf("stop_reason = %q, want tool_use", gjson.Get(out, "stop_reason").String()) + if gjson.GetBytes(out, "stop_reason").String() != "tool_use" { + t.Fatalf("stop_reason = %q, want tool_use", gjson.GetBytes(out, "stop_reason").String()) } } @@ -282,18 +285,24 @@ func TestTranslateGitHubCopilotResponsesStreamToClaude_TextLifecycle(t *testing. var param any created := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.created","response":{"id":"resp_1","model":"gpt-5-codex"}}`), ¶m) - if len(created) == 0 || !strings.Contains(created[0], "message_start") { + if len(created) == 0 || !strings.Contains(string(created[0]), "message_start") { t.Fatalf("created events = %#v, want message_start", created) } delta := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.output_text.delta","delta":"he"}`), ¶m) - joinedDelta := strings.Join(delta, "") + var joinedDelta string + for _, d := range delta { + joinedDelta += string(d) + } if !strings.Contains(joinedDelta, "content_block_start") || !strings.Contains(joinedDelta, "text_delta") { t.Fatalf("delta events = %#v, want content_block_start + text_delta", delta) } completed := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.completed","response":{"usage":{"input_tokens":7,"output_tokens":9}}}`), ¶m) - joinedCompleted := strings.Join(completed, "") + var joinedCompleted string + for _, c := range completed { + joinedCompleted += string(c) + } if !strings.Contains(joinedCompleted, "message_delta") || !strings.Contains(joinedCompleted, "message_stop") { t.Fatalf("completed events = %#v, want message_delta + message_stop", completed) } @@ -312,15 +321,17 @@ func TestApplyHeaders_XInitiator_UserOnly(t *testing.T) { } } -func TestApplyHeaders_XInitiator_UserWhenLastRoleIsUser(t *testing.T) { +func TestApplyHeaders_XInitiator_AgentWhenLastUserButHistoryHasAssistant(t *testing.T) { t.Parallel() e := &GitHubCopilotExecutor{} req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil) - // Last role governs the initiator decision. - body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"I will read the file"},{"role":"user","content":"tool result here"}]}`) + // When the last role is "user" and the message contains tool_result content, + // the request is a continuation (e.g. Claude tool result translated to a + // synthetic user message). Should be "agent". + body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"I will read the file"},{"role":"user","content":[{"type":"tool_result","tool_use_id":"tu1","content":"file contents..."}]}]}`) e.applyHeaders(req, "token", body) - if got := req.Header.Get("X-Initiator"); got != "user" { - t.Fatalf("X-Initiator = %q, want user (last role is user)", got) + if got := req.Header.Get("X-Initiator"); got != "agent" { + t.Fatalf("X-Initiator = %q, want agent (last user contains tool_result)", got) } } @@ -328,10 +339,11 @@ func TestApplyHeaders_XInitiator_AgentWithToolRole(t *testing.T) { t.Parallel() e := &GitHubCopilotExecutor{} req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil) + // When the last message has role "tool", it's clearly agent-initiated. body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"tool","content":"result"}]}`) e.applyHeaders(req, "token", body) if got := req.Header.Get("X-Initiator"); got != "agent" { - t.Fatalf("X-Initiator = %q, want agent (tool role exists)", got) + t.Fatalf("X-Initiator = %q, want agent (last role is tool)", got) } } @@ -346,14 +358,15 @@ func TestApplyHeaders_XInitiator_InputArrayLastAssistantMessage(t *testing.T) { } } -func TestApplyHeaders_XInitiator_InputArrayLastUserMessage(t *testing.T) { +func TestApplyHeaders_XInitiator_InputArrayAgentWhenLastUserButHistoryHasAssistant(t *testing.T) { t.Parallel() e := &GitHubCopilotExecutor{} req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil) + // Responses API: last item is user-role but history contains assistant → agent. body := []byte(`{"input":[{"type":"message","role":"assistant","content":[{"type":"output_text","text":"I can help"}]},{"type":"message","role":"user","content":[{"type":"input_text","text":"Do X"}]}]}`) e.applyHeaders(req, "token", body) - if got := req.Header.Get("X-Initiator"); got != "user" { - t.Fatalf("X-Initiator = %q, want user (last role is user)", got) + if got := req.Header.Get("X-Initiator"); got != "agent" { + t.Fatalf("X-Initiator = %q, want agent (history has assistant)", got) } } @@ -368,6 +381,33 @@ func TestApplyHeaders_XInitiator_InputArrayLastFunctionCallOutput(t *testing.T) } } +func TestApplyHeaders_XInitiator_UserInMultiTurnNoTools(t *testing.T) { + t.Parallel() + e := &GitHubCopilotExecutor{} + req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil) + // Genuine multi-turn: user → assistant (plain text) → user follow-up. + // No tool messages → should be "user" (not a false-positive). + body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"Hi there!"},{"role":"user","content":"what is 2+2?"}]}`) + e.applyHeaders(req, "token", body) + if got := req.Header.Get("X-Initiator"); got != "user" { + t.Fatalf("X-Initiator = %q, want user (genuine multi-turn, no tools)", got) + } +} + +func TestApplyHeaders_XInitiator_UserFollowUpAfterToolHistory(t *testing.T) { + t.Parallel() + e := &GitHubCopilotExecutor{} + req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil) + // User follow-up after a completed tool-use conversation. + // The last message is a genuine user question — should be "user", not "agent". + // This aligns with opencode's behavior: only active tool loops are agent-initiated. + body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":[{"type":"tool_use","id":"tu1","name":"Read","input":{}}]},{"role":"tool","tool_call_id":"tu1","content":"file data"},{"role":"assistant","content":"I read the file."},{"role":"user","content":"What did we do so far?"}]}`) + e.applyHeaders(req, "token", body) + if got := req.Header.Get("X-Initiator"); got != "user" { + t.Fatalf("X-Initiator = %q, want user (genuine follow-up after tool history)", got) + } +} + // --- Tests for x-github-api-version header (Problem M) --- func TestApplyHeaders_GitHubAPIVersion(t *testing.T) { @@ -414,3 +454,364 @@ func TestDetectVisionContent_NoMessages(t *testing.T) { t.Fatal("expected no vision content when messages field is absent") } } + +// --- Tests for applyGitHubCopilotResponsesDefaults --- + +func TestApplyGitHubCopilotResponsesDefaults_SetsAllDefaults(t *testing.T) { + t.Parallel() + body := []byte(`{"input":"hello","reasoning":{"effort":"medium"}}`) + got := applyGitHubCopilotResponsesDefaults(body) + + if gjson.GetBytes(got, "store").Bool() != false { + t.Fatalf("store = %v, want false", gjson.GetBytes(got, "store").Raw) + } + inc := gjson.GetBytes(got, "include") + if !inc.IsArray() || inc.Array()[0].String() != "reasoning.encrypted_content" { + t.Fatalf("include = %s, want [\"reasoning.encrypted_content\"]", inc.Raw) + } + if gjson.GetBytes(got, "reasoning.summary").String() != "auto" { + t.Fatalf("reasoning.summary = %q, want auto", gjson.GetBytes(got, "reasoning.summary").String()) + } +} + +func TestApplyGitHubCopilotResponsesDefaults_DoesNotOverrideExisting(t *testing.T) { + t.Parallel() + body := []byte(`{"input":"hello","store":true,"include":["other"],"reasoning":{"effort":"high","summary":"concise"}}`) + got := applyGitHubCopilotResponsesDefaults(body) + + if gjson.GetBytes(got, "store").Bool() != true { + t.Fatalf("store should not be overridden, got %s", gjson.GetBytes(got, "store").Raw) + } + if gjson.GetBytes(got, "include").Array()[0].String() != "other" { + t.Fatalf("include should not be overridden, got %s", gjson.GetBytes(got, "include").Raw) + } + if gjson.GetBytes(got, "reasoning.summary").String() != "concise" { + t.Fatalf("reasoning.summary should not be overridden, got %q", gjson.GetBytes(got, "reasoning.summary").String()) + } +} + +func TestApplyGitHubCopilotResponsesDefaults_NoReasoningEffort(t *testing.T) { + t.Parallel() + body := []byte(`{"input":"hello"}`) + got := applyGitHubCopilotResponsesDefaults(body) + + if gjson.GetBytes(got, "store").Bool() != false { + t.Fatalf("store = %v, want false", gjson.GetBytes(got, "store").Raw) + } + // reasoning.summary should NOT be set when reasoning.effort is absent + if gjson.GetBytes(got, "reasoning.summary").Exists() { + t.Fatalf("reasoning.summary should not be set when reasoning.effort is absent, got %q", gjson.GetBytes(got, "reasoning.summary").String()) + } +} + +// --- Tests for normalizeGitHubCopilotReasoningField --- + +func TestNormalizeReasoningField_NonStreaming(t *testing.T) { + t.Parallel() + data := []byte(`{"choices":[{"message":{"content":"hello","reasoning_text":"I think..."}}]}`) + got := normalizeGitHubCopilotReasoningField(data) + rc := gjson.GetBytes(got, "choices.0.message.reasoning_content").String() + if rc != "I think..." { + t.Fatalf("reasoning_content = %q, want %q", rc, "I think...") + } +} + +func TestNormalizeReasoningField_Streaming(t *testing.T) { + t.Parallel() + data := []byte(`{"choices":[{"delta":{"reasoning_text":"thinking delta"}}]}`) + got := normalizeGitHubCopilotReasoningField(data) + rc := gjson.GetBytes(got, "choices.0.delta.reasoning_content").String() + if rc != "thinking delta" { + t.Fatalf("reasoning_content = %q, want %q", rc, "thinking delta") + } +} + +func TestNormalizeReasoningField_PreservesExistingReasoningContent(t *testing.T) { + t.Parallel() + data := []byte(`{"choices":[{"message":{"reasoning_text":"old","reasoning_content":"existing"}}]}`) + got := normalizeGitHubCopilotReasoningField(data) + rc := gjson.GetBytes(got, "choices.0.message.reasoning_content").String() + if rc != "existing" { + t.Fatalf("reasoning_content = %q, want %q (should not overwrite)", rc, "existing") + } +} + +func TestNormalizeReasoningField_MultiChoice(t *testing.T) { + t.Parallel() + data := []byte(`{"choices":[{"message":{"reasoning_text":"thought-0"}},{"message":{"reasoning_text":"thought-1"}}]}`) + got := normalizeGitHubCopilotReasoningField(data) + rc0 := gjson.GetBytes(got, "choices.0.message.reasoning_content").String() + rc1 := gjson.GetBytes(got, "choices.1.message.reasoning_content").String() + if rc0 != "thought-0" { + t.Fatalf("choices[0].reasoning_content = %q, want %q", rc0, "thought-0") + } + if rc1 != "thought-1" { + t.Fatalf("choices[1].reasoning_content = %q, want %q", rc1, "thought-1") + } +} + +func TestNormalizeReasoningField_NoChoices(t *testing.T) { + t.Parallel() + data := []byte(`{"id":"chatcmpl-123"}`) + got := normalizeGitHubCopilotReasoningField(data) + if string(got) != string(data) { + t.Fatalf("expected no change, got %s", string(got)) + } +} + +func TestApplyHeaders_OpenAIIntentValue(t *testing.T) { + t.Parallel() + e := &GitHubCopilotExecutor{} + req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil) + e.applyHeaders(req, "token", nil) + if got := req.Header.Get("Openai-Intent"); got != "conversation-edits" { + t.Fatalf("Openai-Intent = %q, want conversation-edits", got) + } +} + +// --- Tests for CountTokens (local tiktoken estimation) --- + +func TestCountTokens_ReturnsPositiveCount(t *testing.T) { + t.Parallel() + e := &GitHubCopilotExecutor{} + body := []byte(`{"model":"gpt-4o","messages":[{"role":"user","content":"Hello, world!"}]}`) + resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{ + Model: "gpt-4o", + Payload: body, + }, cliproxyexecutor.Options{ + SourceFormat: sdktranslator.FromString("openai"), + }) + if err != nil { + t.Fatalf("CountTokens() error: %v", err) + } + if len(resp.Payload) == 0 { + t.Fatal("CountTokens() returned empty payload") + } + // The response should contain a positive token count. + tokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int() + if tokens <= 0 { + t.Fatalf("expected positive token count, got %d", tokens) + } +} + +func TestCountTokens_ClaudeSourceFormatTranslates(t *testing.T) { + t.Parallel() + e := &GitHubCopilotExecutor{} + body := []byte(`{"model":"claude-sonnet-4","messages":[{"role":"user","content":"Tell me a joke"}],"max_tokens":1024}`) + resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{ + Model: "claude-sonnet-4", + Payload: body, + }, cliproxyexecutor.Options{ + SourceFormat: sdktranslator.FromString("claude"), + }) + if err != nil { + t.Fatalf("CountTokens() error: %v", err) + } + // Claude source format → should get input_tokens in response + inputTokens := gjson.GetBytes(resp.Payload, "input_tokens").Int() + if inputTokens <= 0 { + // Fallback: check usage.prompt_tokens (depends on translator registration) + promptTokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int() + if promptTokens <= 0 { + t.Fatalf("expected positive token count, got payload: %s", resp.Payload) + } + } +} + +func TestCountTokens_EmptyPayload(t *testing.T) { + t.Parallel() + e := &GitHubCopilotExecutor{} + resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{ + Model: "gpt-4o", + Payload: []byte(`{"model":"gpt-4o","messages":[]}`), + }, cliproxyexecutor.Options{ + SourceFormat: sdktranslator.FromString("openai"), + }) + if err != nil { + t.Fatalf("CountTokens() error: %v", err) + } + tokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int() + // Empty messages should return 0 tokens. + if tokens != 0 { + t.Fatalf("expected 0 tokens for empty messages, got %d", tokens) + } +} + +func TestStripUnsupportedBetas_RemovesContext1M(t *testing.T) { + t.Parallel() + + body := []byte(`{"model":"claude-opus-4.6","betas":["interleaved-thinking-2025-05-14","context-1m-2025-08-07","claude-code-20250219"],"messages":[]}`) + result := stripUnsupportedBetas(body) + + betas := gjson.GetBytes(result, "betas") + if !betas.Exists() { + t.Fatal("betas field should still exist after stripping") + } + for _, item := range betas.Array() { + if item.String() == "context-1m-2025-08-07" { + t.Fatal("context-1m-2025-08-07 should have been stripped") + } + } + // Other betas should be preserved + found := false + for _, item := range betas.Array() { + if item.String() == "interleaved-thinking-2025-05-14" { + found = true + } + } + if !found { + t.Fatal("other betas should be preserved") + } +} + +func TestStripUnsupportedBetas_NoBetasField(t *testing.T) { + t.Parallel() + + body := []byte(`{"model":"gpt-4o","messages":[]}`) + result := stripUnsupportedBetas(body) + + // Should be unchanged + if string(result) != string(body) { + t.Fatalf("body should be unchanged when no betas field exists, got %s", string(result)) + } +} + +func TestStripUnsupportedBetas_MetadataBetas(t *testing.T) { + t.Parallel() + + body := []byte(`{"model":"claude-opus-4.6","metadata":{"betas":["context-1m-2025-08-07","other-beta"]},"messages":[]}`) + result := stripUnsupportedBetas(body) + + betas := gjson.GetBytes(result, "metadata.betas") + if !betas.Exists() { + t.Fatal("metadata.betas field should still exist after stripping") + } + for _, item := range betas.Array() { + if item.String() == "context-1m-2025-08-07" { + t.Fatal("context-1m-2025-08-07 should have been stripped from metadata.betas") + } + } + if betas.Array()[0].String() != "other-beta" { + t.Fatal("other betas in metadata.betas should be preserved") + } +} + +func TestStripUnsupportedBetas_AllBetasStripped(t *testing.T) { + t.Parallel() + + body := []byte(`{"model":"claude-opus-4.6","betas":["context-1m-2025-08-07"],"messages":[]}`) + result := stripUnsupportedBetas(body) + + betas := gjson.GetBytes(result, "betas") + if betas.Exists() { + t.Fatal("betas field should be deleted when all betas are stripped") + } +} + +func TestCopilotModelEntry_Limits(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + capabilities map[string]any + wantNil bool + wantPrompt int + wantOutput int + wantContext int + }{ + { + name: "nil capabilities", + capabilities: nil, + wantNil: true, + }, + { + name: "no limits key", + capabilities: map[string]any{"family": "claude-opus-4.6"}, + wantNil: true, + }, + { + name: "limits is not a map", + capabilities: map[string]any{"limits": "invalid"}, + wantNil: true, + }, + { + name: "all zero values", + capabilities: map[string]any{ + "limits": map[string]any{ + "max_context_window_tokens": float64(0), + "max_prompt_tokens": float64(0), + "max_output_tokens": float64(0), + }, + }, + wantNil: true, + }, + { + name: "individual account limits (128K prompt)", + capabilities: map[string]any{ + "limits": map[string]any{ + "max_context_window_tokens": float64(144000), + "max_prompt_tokens": float64(128000), + "max_output_tokens": float64(64000), + }, + }, + wantNil: false, + wantPrompt: 128000, + wantOutput: 64000, + wantContext: 144000, + }, + { + name: "business account limits (168K prompt)", + capabilities: map[string]any{ + "limits": map[string]any{ + "max_context_window_tokens": float64(200000), + "max_prompt_tokens": float64(168000), + "max_output_tokens": float64(32000), + }, + }, + wantNil: false, + wantPrompt: 168000, + wantOutput: 32000, + wantContext: 200000, + }, + { + name: "partial limits (only prompt)", + capabilities: map[string]any{ + "limits": map[string]any{ + "max_prompt_tokens": float64(128000), + }, + }, + wantNil: false, + wantPrompt: 128000, + wantOutput: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + entry := copilotauth.CopilotModelEntry{ + ID: "claude-opus-4.6", + Capabilities: tt.capabilities, + } + limits := entry.Limits() + if tt.wantNil { + if limits != nil { + t.Fatalf("expected nil limits, got %+v", limits) + } + return + } + if limits == nil { + t.Fatal("expected non-nil limits, got nil") + } + if limits.MaxPromptTokens != tt.wantPrompt { + t.Errorf("MaxPromptTokens = %d, want %d", limits.MaxPromptTokens, tt.wantPrompt) + } + if limits.MaxOutputTokens != tt.wantOutput { + t.Errorf("MaxOutputTokens = %d, want %d", limits.MaxOutputTokens, tt.wantOutput) + } + if tt.wantContext > 0 && limits.MaxContextWindowTokens != tt.wantContext { + t.Errorf("MaxContextWindowTokens = %d, want %d", limits.MaxContextWindowTokens, tt.wantContext) + } + }) + } +}