diff --git a/internal/auth/copilot/copilot_auth.go b/internal/auth/copilot/copilot_auth.go index 72f5e4e1..663b6de1 100644 --- a/internal/auth/copilot/copilot_auth.go +++ b/internal/auth/copilot/copilot_auth.go @@ -235,6 +235,74 @@ type CopilotModelEntry struct { Capabilities map[string]any `json:"capabilities,omitempty"` } +// CopilotModelLimits holds the token limits returned by the Copilot /models API +// under capabilities.limits. These limits vary by account type (individual vs +// business) and are the authoritative source for enforcing prompt size. +type CopilotModelLimits struct { + // MaxContextWindowTokens is the total context window (prompt + output). + MaxContextWindowTokens int + // MaxPromptTokens is the hard limit on input/prompt tokens. + // Exceeding this triggers a 400 error from the Copilot API. + MaxPromptTokens int + // MaxOutputTokens is the maximum number of output/completion tokens. + MaxOutputTokens int +} + +// Limits extracts the token limits from the model's capabilities map. +// Returns nil if no limits are available or the structure is unexpected. +// +// Expected Copilot API shape: +// +// "capabilities": { +// "limits": { +// "max_context_window_tokens": 200000, +// "max_prompt_tokens": 168000, +// "max_output_tokens": 32000 +// } +// } +func (e *CopilotModelEntry) Limits() *CopilotModelLimits { + if e.Capabilities == nil { + return nil + } + limitsRaw, ok := e.Capabilities["limits"] + if !ok { + return nil + } + limitsMap, ok := limitsRaw.(map[string]any) + if !ok { + return nil + } + + result := &CopilotModelLimits{ + MaxContextWindowTokens: anyToInt(limitsMap["max_context_window_tokens"]), + MaxPromptTokens: anyToInt(limitsMap["max_prompt_tokens"]), + MaxOutputTokens: anyToInt(limitsMap["max_output_tokens"]), + } + + // Only return if at least one field is populated. + if result.MaxContextWindowTokens == 0 && result.MaxPromptTokens == 0 && result.MaxOutputTokens == 0 { + return nil + } + return result +} + +// anyToInt converts a JSON-decoded numeric value to int. +// Go's encoding/json decodes numbers into float64 when the target is any/interface{}. +func anyToInt(v any) int { + switch n := v.(type) { + case float64: + return int(n) + case float32: + return int(n) + case int: + return n + case int64: + return int(n) + default: + return 0 + } +} + // CopilotModelsResponse represents the response from the Copilot /models endpoint. type CopilotModelsResponse struct { Data []CopilotModelEntry `json:"data"` diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index 56c2c540..bb6aa282 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -827,6 +827,14 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string, hasClaude1MHeader = true } } + // Also check auth attributes — GitLab Duo sets gitlab_duo_force_context_1m + // when routing through the Anthropic gateway, but the gin headers won't have + // X-CPA-CLAUDE-1M because the request is internally constructed. + if !hasClaude1MHeader && auth != nil && auth.Attributes != nil { + if auth.Attributes["gitlab_duo_force_context_1m"] == "true" { + hasClaude1MHeader = true + } + } // Merge extra betas from request body and request flags. if len(extraBetas) > 0 || hasClaude1MHeader { diff --git a/internal/runtime/executor/github_copilot_executor.go b/internal/runtime/executor/github_copilot_executor.go index 2c640f93..22d343fe 100644 --- a/internal/runtime/executor/github_copilot_executor.go +++ b/internal/runtime/executor/github_copilot_executor.go @@ -1626,6 +1626,21 @@ func FetchGitHubCopilotModels(ctx context.Context, auth *cliproxyauth.Auth, cfg m.MaxCompletionTokens = defaultCopilotMaxCompletionTokens } + // Override with real limits from the Copilot API when available. + // The API returns per-account limits (individual vs business) under + // capabilities.limits, which are more accurate than our static + // fallback values. We use max_prompt_tokens as ContextLength because + // that's the hard limit the Copilot API enforces on prompt size — + // exceeding it triggers "prompt token count exceeds the limit" errors. + if limits := entry.Limits(); limits != nil { + if limits.MaxPromptTokens > 0 { + m.ContextLength = limits.MaxPromptTokens + } + if limits.MaxOutputTokens > 0 { + m.MaxCompletionTokens = limits.MaxOutputTokens + } + } + models = append(models, m) } diff --git a/internal/runtime/executor/github_copilot_executor_test.go b/internal/runtime/executor/github_copilot_executor_test.go index 90a5b26f..774a3cae 100644 --- a/internal/runtime/executor/github_copilot_executor_test.go +++ b/internal/runtime/executor/github_copilot_executor_test.go @@ -6,6 +6,7 @@ import ( "strings" "testing" + copilotauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/copilot" "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" @@ -74,7 +75,7 @@ func TestUseGitHubCopilotResponsesEndpoint_CodexModel(t *testing.T) { } func TestUseGitHubCopilotResponsesEndpoint_RegistryResponsesOnlyModel(t *testing.T) { - t.Parallel() + // Not parallel: shares global model registry with DynamicRegistryWinsOverStatic. if !useGitHubCopilotResponsesEndpoint(sdktranslator.FromString("openai"), "gpt-5.4") { t.Fatal("expected responses-only registry model to use /responses") } @@ -84,7 +85,7 @@ func TestUseGitHubCopilotResponsesEndpoint_RegistryResponsesOnlyModel(t *testing } func TestUseGitHubCopilotResponsesEndpoint_DynamicRegistryWinsOverStatic(t *testing.T) { - t.Parallel() + // Not parallel: mutates global model registry, conflicts with RegistryResponsesOnlyModel. reg := registry.GetGlobalRegistry() clientID := "github-copilot-test-client" @@ -706,3 +707,111 @@ func TestStripUnsupportedBetas_AllBetasStripped(t *testing.T) { t.Fatal("betas field should be deleted when all betas are stripped") } } + +func TestCopilotModelEntry_Limits(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + capabilities map[string]any + wantNil bool + wantPrompt int + wantOutput int + wantContext int + }{ + { + name: "nil capabilities", + capabilities: nil, + wantNil: true, + }, + { + name: "no limits key", + capabilities: map[string]any{"family": "claude-opus-4.6"}, + wantNil: true, + }, + { + name: "limits is not a map", + capabilities: map[string]any{"limits": "invalid"}, + wantNil: true, + }, + { + name: "all zero values", + capabilities: map[string]any{ + "limits": map[string]any{ + "max_context_window_tokens": float64(0), + "max_prompt_tokens": float64(0), + "max_output_tokens": float64(0), + }, + }, + wantNil: true, + }, + { + name: "individual account limits (128K prompt)", + capabilities: map[string]any{ + "limits": map[string]any{ + "max_context_window_tokens": float64(144000), + "max_prompt_tokens": float64(128000), + "max_output_tokens": float64(64000), + }, + }, + wantNil: false, + wantPrompt: 128000, + wantOutput: 64000, + wantContext: 144000, + }, + { + name: "business account limits (168K prompt)", + capabilities: map[string]any{ + "limits": map[string]any{ + "max_context_window_tokens": float64(200000), + "max_prompt_tokens": float64(168000), + "max_output_tokens": float64(32000), + }, + }, + wantNil: false, + wantPrompt: 168000, + wantOutput: 32000, + wantContext: 200000, + }, + { + name: "partial limits (only prompt)", + capabilities: map[string]any{ + "limits": map[string]any{ + "max_prompt_tokens": float64(128000), + }, + }, + wantNil: false, + wantPrompt: 128000, + wantOutput: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + entry := copilotauth.CopilotModelEntry{ + ID: "claude-opus-4.6", + Capabilities: tt.capabilities, + } + limits := entry.Limits() + if tt.wantNil { + if limits != nil { + t.Fatalf("expected nil limits, got %+v", limits) + } + return + } + if limits == nil { + t.Fatal("expected non-nil limits, got nil") + } + if limits.MaxPromptTokens != tt.wantPrompt { + t.Errorf("MaxPromptTokens = %d, want %d", limits.MaxPromptTokens, tt.wantPrompt) + } + if limits.MaxOutputTokens != tt.wantOutput { + t.Errorf("MaxOutputTokens = %d, want %d", limits.MaxOutputTokens, tt.wantOutput) + } + if tt.wantContext > 0 && limits.MaxContextWindowTokens != tt.wantContext { + t.Errorf("MaxContextWindowTokens = %d, want %d", limits.MaxContextWindowTokens, tt.wantContext) + } + }) + } +}