Merge pull request #485 from kunish/fix/copilot-premium-request-inflation

fix(copilot): reduce premium request inflation, enable thinking, and use dynamic API limits
2026-04-08 05:47:16 +00:00 · 2026-04-04 02:19:56 +08:00
parent e7a66ae504 87bf0b73d5
commit 98509f615c
6 changed files with 773 additions and 54 deletions
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -573,6 +573,8 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.PUT("/quota-exceeded/switch-preview-model", s.mgmt.PutSwitchPreviewModel)
 		mgmt.PATCH("/quota-exceeded/switch-preview-model", s.mgmt.PutSwitchPreviewModel)

+		mgmt.GET("/copilot-quota", s.mgmt.GetCopilotQuota)
+
 		mgmt.GET("/api-keys", s.mgmt.GetAPIKeys)
 		mgmt.PUT("/api-keys", s.mgmt.PutAPIKeys)
 		mgmt.PATCH("/api-keys", s.mgmt.PatchAPIKeys)
--- a/internal/auth/copilot/copilot_auth.go
+++ b/internal/auth/copilot/copilot_auth.go
@@ -235,6 +235,74 @@ type CopilotModelEntry struct {
 	Capabilities map[string]any `json:"capabilities,omitempty"`
 }

+// CopilotModelLimits holds the token limits returned by the Copilot /models API
+// under capabilities.limits. These limits vary by account type (individual vs
+// business) and are the authoritative source for enforcing prompt size.
+type CopilotModelLimits struct {
+	// MaxContextWindowTokens is the total context window (prompt + output).
+	MaxContextWindowTokens int
+	// MaxPromptTokens is the hard limit on input/prompt tokens.
+	// Exceeding this triggers a 400 error from the Copilot API.
+	MaxPromptTokens int
+	// MaxOutputTokens is the maximum number of output/completion tokens.
+	MaxOutputTokens int
+}
+
+// Limits extracts the token limits from the model's capabilities map.
+// Returns nil if no limits are available or the structure is unexpected.
+//
+// Expected Copilot API shape:
+//
+//	"capabilities": {
+//	    "limits": {
+//	        "max_context_window_tokens": 200000,
+//	        "max_prompt_tokens": 168000,
+//	        "max_output_tokens": 32000
+//	    }
+//	}
+func (e *CopilotModelEntry) Limits() *CopilotModelLimits {
+	if e.Capabilities == nil {
+		return nil
+	}
+	limitsRaw, ok := e.Capabilities["limits"]
+	if !ok {
+		return nil
+	}
+	limitsMap, ok := limitsRaw.(map[string]any)
+	if !ok {
+		return nil
+	}
+
+	result := &CopilotModelLimits{
+		MaxContextWindowTokens: anyToInt(limitsMap["max_context_window_tokens"]),
+		MaxPromptTokens:        anyToInt(limitsMap["max_prompt_tokens"]),
+		MaxOutputTokens:        anyToInt(limitsMap["max_output_tokens"]),
+	}
+
+	// Only return if at least one field is populated.
+	if result.MaxContextWindowTokens == 0 && result.MaxPromptTokens == 0 && result.MaxOutputTokens == 0 {
+		return nil
+	}
+	return result
+}
+
+// anyToInt converts a JSON-decoded numeric value to int.
+// Go's encoding/json decodes numbers into float64 when the target is any/interface{}.
+func anyToInt(v any) int {
+	switch n := v.(type) {
+	case float64:
+		return int(n)
+	case float32:
+		return int(n)
+	case int:
+		return n
+	case int64:
+		return int(n)
+	default:
+		return 0
+	}
+}
+
 // CopilotModelsResponse represents the response from the Copilot /models endpoint.
 type CopilotModelsResponse struct {
 	Data   []CopilotModelEntry `json:"data"`
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -549,6 +549,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
 			SupportedEndpoints:  []string{"/chat/completions"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "claude-opus-4.6",
@@ -561,6 +562,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
 			SupportedEndpoints:  []string{"/chat/completions"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "claude-sonnet-4",
@@ -573,6 +575,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
 			SupportedEndpoints:  []string{"/chat/completions"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "claude-sonnet-4.5",
@@ -585,6 +588,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
 			SupportedEndpoints:  []string{"/chat/completions"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "claude-sonnet-4.6",
@@ -597,6 +601,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
 			SupportedEndpoints:  []string{"/chat/completions"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "gemini-2.5-pro",
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -848,6 +848,14 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string,
 			hasClaude1MHeader = true
 		}
 	}
+	// Also check auth attributes — GitLab Duo sets gitlab_duo_force_context_1m
+	// when routing through the Anthropic gateway, but the gin headers won't have
+	// X-CPA-CLAUDE-1M because the request is internally constructed.
+	if !hasClaude1MHeader && auth != nil && auth.Attributes != nil {
+		if auth.Attributes["gitlab_duo_force_context_1m"] == "true" {
+			hasClaude1MHeader = true
+		}
+	}

 	// Merge extra betas from request body and request flags.
 	if len(extraBetas) > 0 || hasClaude1MHeader {
--- a/internal/runtime/executor/github_copilot_executor.go
+++ b/internal/runtime/executor/github_copilot_executor.go
@@ -7,6 +7,7 @@ import (
 	"fmt"
 	"io"
 	"net/http"
+	"slices"
 	"strings"
 	"sync"
 	"time"
@@ -17,6 +18,7 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
 	log "github.com/sirupsen/logrus"
@@ -40,7 +42,7 @@ const (
 	copilotEditorVersion = "vscode/1.107.0"
 	copilotPluginVersion = "copilot-chat/0.35.0"
 	copilotIntegrationID = "vscode-chat"
-	copilotOpenAIIntent  = "conversation-panel"
+	copilotOpenAIIntent  = "conversation-edits"
 	copilotGitHubAPIVer  = "2025-04-01"
 )

@@ -126,6 +128,7 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	body = e.normalizeModel(req.Model, body)
 	body = flattenAssistantContent(body)
+	body = stripUnsupportedBetas(body)

 	// Detect vision content before input normalization removes messages
 	hasVision := detectVisionContent(body)
@@ -142,6 +145,7 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
 	if useResponses {
 		body = normalizeGitHubCopilotResponsesInput(body)
 		body = normalizeGitHubCopilotResponsesTools(body)
+		body = applyGitHubCopilotResponsesDefaults(body)
 	} else {
 		body = normalizeGitHubCopilotChatTools(body)
 	}
@@ -225,9 +229,10 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
 	if useResponses && from.String() == "claude" {
 		converted = translateGitHubCopilotResponsesNonStreamToClaude(data)
 	} else {
+		data = normalizeGitHubCopilotReasoningField(data)
 		converted = sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
 	}
-	resp = cliproxyexecutor.Response{Payload: converted}
+	resp = cliproxyexecutor.Response{Payload: converted, Headers: httpResp.Header.Clone()}
 	reporter.ensurePublished(ctx)
 	return resp, nil
 }
@@ -256,6 +261,7 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 	body = e.normalizeModel(req.Model, body)
 	body = flattenAssistantContent(body)
+	body = stripUnsupportedBetas(body)

 	// Detect vision content before input normalization removes messages
 	hasVision := detectVisionContent(body)
@@ -272,6 +278,7 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
 	if useResponses {
 		body = normalizeGitHubCopilotResponsesInput(body)
 		body = normalizeGitHubCopilotResponsesTools(body)
+		body = applyGitHubCopilotResponsesDefaults(body)
 	} else {
 		body = normalizeGitHubCopilotChatTools(body)
 	}
@@ -378,7 +385,20 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
 			if useResponses && from.String() == "claude" {
 				chunks = translateGitHubCopilotResponsesStreamToClaude(bytes.Clone(line), &param)
 			} else {
-				chunks = sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(line), &param)
+				// Strip SSE "data: " prefix before reasoning field normalization,
+				// since normalizeGitHubCopilotReasoningField expects pure JSON.
+				// Re-wrap with the prefix afterward for the translator.
+				normalizedLine := bytes.Clone(line)
+				if bytes.HasPrefix(line, dataTag) {
+					sseData := bytes.TrimSpace(line[len(dataTag):])
+					if !bytes.Equal(sseData, []byte("[DONE]")) && gjson.ValidBytes(sseData) {
+						normalized := normalizeGitHubCopilotReasoningField(bytes.Clone(sseData))
+						if !bytes.Equal(normalized, sseData) {
+							normalizedLine = append(append([]byte(nil), dataTag...), normalized...)
+						}
+					}
+				}
+				chunks = sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, normalizedLine, &param)
 			}
 			for i := range chunks {
 				out <- cliproxyexecutor.StreamChunk{Payload: bytes.Clone(chunks[i])}
@@ -400,9 +420,28 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
 	}, nil
 }

-// CountTokens is not supported for GitHub Copilot.
-func (e *GitHubCopilotExecutor) CountTokens(_ context.Context, _ *cliproxyauth.Auth, _ cliproxyexecutor.Request, _ cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
-	return cliproxyexecutor.Response{}, statusErr{code: http.StatusNotImplemented, msg: "count tokens not supported for github-copilot"}
+// CountTokens estimates token count locally using tiktoken, since the GitHub
+// Copilot API does not expose a dedicated token counting endpoint.
+func (e *GitHubCopilotExecutor) CountTokens(ctx context.Context, _ *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
+	from := opts.SourceFormat
+	to := sdktranslator.FromString("openai")
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
+
+	enc, err := helps.TokenizerForModel(baseModel)
+	if err != nil {
+		return cliproxyexecutor.Response{}, fmt.Errorf("github copilot executor: tokenizer init failed: %w", err)
+	}
+
+	count, err := helps.CountOpenAIChatTokens(enc, translated)
+	if err != nil {
+		return cliproxyexecutor.Response{}, fmt.Errorf("github copilot executor: token counting failed: %w", err)
+	}
+
+	usageJSON := helps.BuildOpenAIUsageJSON(count)
+	translatedUsage := sdktranslator.TranslateTokenCount(ctx, to, from, count, usageJSON)
+	return cliproxyexecutor.Response{Payload: translatedUsage}, nil
 }

 // Refresh validates the GitHub token is still working.
@@ -491,46 +530,127 @@ func (e *GitHubCopilotExecutor) applyHeaders(r *http.Request, apiToken string, b
 	r.Header.Set("X-Request-Id", uuid.NewString())

 	initiator := "user"
-	if role := detectLastConversationRole(body); role == "assistant" || role == "tool" {
+	if isAgentInitiated(body) {
 		initiator = "agent"
 	}
 	r.Header.Set("X-Initiator", initiator)
 }

-func detectLastConversationRole(body []byte) string {
+// isAgentInitiated determines whether the current request is agent-initiated
+// (tool callbacks, continuations) rather than user-initiated (new user prompt).
+//
+// GitHub Copilot uses the X-Initiator header for billing:
+//   - "user"  → consumes premium request quota
+//   - "agent" → free (tool loops, continuations)
+//
+// The challenge: Claude Code sends tool results as role:"user" messages with
+// content type "tool_result". After translation to OpenAI format, the tool_result
+// part becomes a separate role:"tool" message, but if the original Claude message
+// also contained text content (e.g. skill invocations, attachment descriptions),
+// a role:"user" message is emitted AFTER the tool message, making the last message
+// appear user-initiated when it's actually part of an agent tool loop.
+//
+// VSCode Copilot Chat solves this with explicit flags (iterationNumber,
+// isContinuation, subAgentInvocationId). Since CPA doesn't have these flags,
+// we infer agent status by checking whether the conversation contains prior
+// assistant/tool messages — if it does, the current request is a continuation.
+//
+// References:
+//   - opencode#8030, opencode#15824: same root cause and fix approach
+//   - vscode-copilot-chat: toolCallingLoop.ts (iterationNumber === 0)
+//   - pi-ai: github-copilot-headers.ts (last message role check)
+func isAgentInitiated(body []byte) bool {
 	if len(body) == 0 {
-		return ""
+		return false
 	}

+	// Chat Completions API: check messages array
 	if messages := gjson.GetBytes(body, "messages"); messages.Exists() && messages.IsArray() {
 		arr := messages.Array()
+		if len(arr) == 0 {
+			return false
+		}
+
+		lastRole := ""
 		for i := len(arr) - 1; i >= 0; i-- {
-			if role := arr[i].Get("role").String(); role != "" {
-				return role
+			if r := arr[i].Get("role").String(); r != "" {
+				lastRole = r
+				break
 			}
 		}
+
+		// If last message is assistant or tool, clearly agent-initiated.
+		if lastRole == "assistant" || lastRole == "tool" {
+			return true
+		}
+
+		// If last message is "user", check whether it contains tool results
+		// (indicating a tool-loop continuation) or if the preceding message
+		// is an assistant tool_use. This is more precise than checking for
+		// any prior assistant message, which would false-positive on genuine
+		// multi-turn follow-ups.
+		if lastRole == "user" {
+			// Check if the last user message contains tool_result content
+			lastContent := arr[len(arr)-1].Get("content")
+			if lastContent.Exists() && lastContent.IsArray() {
+				for _, part := range lastContent.Array() {
+					if part.Get("type").String() == "tool_result" {
+						return true
+					}
+				}
+			}
+			// Check if the second-to-last message is an assistant with tool_use
+			if len(arr) >= 2 {
+				prev := arr[len(arr)-2]
+				if prev.Get("role").String() == "assistant" {
+					prevContent := prev.Get("content")
+					if prevContent.Exists() && prevContent.IsArray() {
+						for _, part := range prevContent.Array() {
+							if part.Get("type").String() == "tool_use" {
+								return true
+							}
+						}
+					}
+				}
+			}
+		}
+
+		return false
 	}

+	// Responses API: check input array
 	if inputs := gjson.GetBytes(body, "input"); inputs.Exists() && inputs.IsArray() {
 		arr := inputs.Array()
-		for i := len(arr) - 1; i >= 0; i-- {
-			item := arr[i]
+		if len(arr) == 0 {
+			return false
+		}

-			// Most Responses input items carry a top-level role.
-			if role := item.Get("role").String(); role != "" {
-				return role
+		// Check last item
+		last := arr[len(arr)-1]
+		if role := last.Get("role").String(); role == "assistant" {
+			return true
+		}
+		switch last.Get("type").String() {
+		case "function_call", "function_call_arguments", "computer_call":
+			return true
+		case "function_call_output", "function_call_response", "tool_result", "computer_call_output":
+			return true
+		}
+
+		// If last item is user-role, check for prior non-user items
+		for _, item := range arr {
+			if role := item.Get("role").String(); role == "assistant" {
+				return true
 			}
-
 			switch item.Get("type").String() {
-			case "function_call", "function_call_arguments", "computer_call":
-				return "assistant"
-			case "function_call_output", "function_call_response", "tool_result", "computer_call_output":
-				return "tool"
+			case "function_call", "function_call_output", "function_call_response",
+				"function_call_arguments", "computer_call", "computer_call_output":
+				return true
 			}
 		}
 	}

-	return ""
+	return false
 }

 // detectVisionContent checks if the request body contains vision/image content.
@@ -572,6 +692,85 @@ func (e *GitHubCopilotExecutor) normalizeModel(model string, body []byte) []byte
 	return body
 }

+// copilotUnsupportedBetas lists beta headers that are Anthropic-specific and
+// must not be forwarded to GitHub Copilot. The context-1m beta enables 1M
+// context on Anthropic's API, but Copilot's Claude models are limited to
+// ~128K-200K. Passing it through would not enable 1M on Copilot, but stripping
+// it from the translated body avoids confusing downstream translators.
+var copilotUnsupportedBetas = []string{
+	"context-1m-2025-08-07",
+}
+
+// stripUnsupportedBetas removes Anthropic-specific beta entries from the
+// translated request body. In OpenAI format the betas may appear under
+// "metadata.betas" or a top-level "betas" array; in Claude format they sit at
+// "betas". This function checks all known locations.
+func stripUnsupportedBetas(body []byte) []byte {
+	betaPaths := []string{"betas", "metadata.betas"}
+	for _, path := range betaPaths {
+		arr := gjson.GetBytes(body, path)
+		if !arr.Exists() || !arr.IsArray() {
+			continue
+		}
+		var filtered []string
+		changed := false
+		for _, item := range arr.Array() {
+			beta := item.String()
+			if isCopilotUnsupportedBeta(beta) {
+				changed = true
+				continue
+			}
+			filtered = append(filtered, beta)
+		}
+		if !changed {
+			continue
+		}
+		if len(filtered) == 0 {
+			body, _ = sjson.DeleteBytes(body, path)
+		} else {
+			body, _ = sjson.SetBytes(body, path, filtered)
+		}
+	}
+	return body
+}
+
+func isCopilotUnsupportedBeta(beta string) bool {
+	return slices.Contains(copilotUnsupportedBetas, beta)
+}
+
+// normalizeGitHubCopilotReasoningField maps Copilot's non-standard
+// 'reasoning_text' field to the standard OpenAI 'reasoning_content' field
+// that the SDK translator expects. This handles both streaming deltas
+// (choices[].delta.reasoning_text) and non-streaming messages
+// (choices[].message.reasoning_text). The field is only renamed when
+// 'reasoning_content' is absent or null, preserving standard responses.
+// All choices are processed to support n>1 requests.
+func normalizeGitHubCopilotReasoningField(data []byte) []byte {
+	choices := gjson.GetBytes(data, "choices")
+	if !choices.Exists() || !choices.IsArray() {
+		return data
+	}
+	for i := range choices.Array() {
+		// Non-streaming: choices[i].message.reasoning_text
+		msgRT := fmt.Sprintf("choices.%d.message.reasoning_text", i)
+		msgRC := fmt.Sprintf("choices.%d.message.reasoning_content", i)
+		if rt := gjson.GetBytes(data, msgRT); rt.Exists() && rt.String() != "" {
+			if rc := gjson.GetBytes(data, msgRC); !rc.Exists() || rc.Type == gjson.Null || rc.String() == "" {
+				data, _ = sjson.SetBytes(data, msgRC, rt.String())
+			}
+		}
+		// Streaming: choices[i].delta.reasoning_text
+		deltaRT := fmt.Sprintf("choices.%d.delta.reasoning_text", i)
+		deltaRC := fmt.Sprintf("choices.%d.delta.reasoning_content", i)
+		if rt := gjson.GetBytes(data, deltaRT); rt.Exists() && rt.String() != "" {
+			if rc := gjson.GetBytes(data, deltaRC); !rc.Exists() || rc.Type == gjson.Null || rc.String() == "" {
+				data, _ = sjson.SetBytes(data, deltaRC, rt.String())
+			}
+		}
+	}
+	return data
+}
+
 func useGitHubCopilotResponsesEndpoint(sourceFormat sdktranslator.Format, model string) bool {
 	if sourceFormat.String() == "openai-response" {
 		return true
@@ -596,12 +795,7 @@ func lookupGitHubCopilotStaticModelInfo(model string) *registry.ModelInfo {
 }

 func containsEndpoint(endpoints []string, endpoint string) bool {
-	for _, item := range endpoints {
-		if item == endpoint {
-			return true
-		}
-	}
-	return false
+	return slices.Contains(endpoints, endpoint)
 }

 // flattenAssistantContent converts assistant message content from array format
@@ -856,6 +1050,32 @@ func stripGitHubCopilotResponsesUnsupportedFields(body []byte) []byte {
 	return body
 }

+// applyGitHubCopilotResponsesDefaults sets required fields for the Responses API
+// that both vscode-copilot-chat and pi-ai always include.
+//
+// References:
+//   - vscode-copilot-chat: src/platform/endpoint/node/responsesApi.ts
+//   - pi-ai (badlogic/pi-mono): packages/ai/src/providers/openai-responses.ts
+func applyGitHubCopilotResponsesDefaults(body []byte) []byte {
+	// store: false — prevents request/response storage
+	if !gjson.GetBytes(body, "store").Exists() {
+		body, _ = sjson.SetBytes(body, "store", false)
+	}
+
+	// include: ["reasoning.encrypted_content"] — enables reasoning content
+	// reuse across turns, avoiding redundant computation
+	if !gjson.GetBytes(body, "include").Exists() {
+		body, _ = sjson.SetRawBytes(body, "include", []byte(`["reasoning.encrypted_content"]`))
+	}
+
+	// If reasoning.effort is set but reasoning.summary is not, default to "auto"
+	if gjson.GetBytes(body, "reasoning.effort").Exists() && !gjson.GetBytes(body, "reasoning.summary").Exists() {
+		body, _ = sjson.SetBytes(body, "reasoning.summary", "auto")
+	}
+
+	return body
+}
+
 func normalizeGitHubCopilotResponsesTools(body []byte) []byte {
 	tools := gjson.GetBytes(body, "tools")
 	if tools.Exists() {
@@ -1406,6 +1626,21 @@ func FetchGitHubCopilotModels(ctx context.Context, auth *cliproxyauth.Auth, cfg
 			m.MaxCompletionTokens = defaultCopilotMaxCompletionTokens
 		}

+		// Override with real limits from the Copilot API when available.
+		// The API returns per-account limits (individual vs business) under
+		// capabilities.limits, which are more accurate than our static
+		// fallback values. We use max_prompt_tokens as ContextLength because
+		// that's the hard limit the Copilot API enforces on prompt size —
+		// exceeding it triggers "prompt token count exceeds the limit" errors.
+		if limits := entry.Limits(); limits != nil {
+			if limits.MaxPromptTokens > 0 {
+				m.ContextLength = limits.MaxPromptTokens
+			}
+			if limits.MaxOutputTokens > 0 {
+				m.MaxCompletionTokens = limits.MaxOutputTokens
+			}
+		}
+
 		models = append(models, m)
 	}

--- a/internal/runtime/executor/github_copilot_executor_test.go
+++ b/internal/runtime/executor/github_copilot_executor_test.go
@@ -1,11 +1,14 @@
 package executor

 import (
+	"context"
 	"net/http"
 	"strings"
 	"testing"

+	copilotauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/copilot"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
 	"github.com/tidwall/gjson"
 )
@@ -72,7 +75,7 @@ func TestUseGitHubCopilotResponsesEndpoint_CodexModel(t *testing.T) {
 }

 func TestUseGitHubCopilotResponsesEndpoint_RegistryResponsesOnlyModel(t *testing.T) {
-	t.Parallel()
+	// Not parallel: shares global model registry with DynamicRegistryWinsOverStatic.
 	if !useGitHubCopilotResponsesEndpoint(sdktranslator.FromString("openai"), "gpt-5.4") {
 		t.Fatal("expected responses-only registry model to use /responses")
 	}
@@ -82,7 +85,7 @@ func TestUseGitHubCopilotResponsesEndpoint_RegistryResponsesOnlyModel(t *testing
 }

 func TestUseGitHubCopilotResponsesEndpoint_DynamicRegistryWinsOverStatic(t *testing.T) {
-	t.Parallel()
+	// Not parallel: mutates global model registry, conflicts with RegistryResponsesOnlyModel.

 	reg := registry.GetGlobalRegistry()
 	clientID := "github-copilot-test-client"
@@ -251,14 +254,14 @@ func TestTranslateGitHubCopilotResponsesNonStreamToClaude_TextMapping(t *testing
 	t.Parallel()
 	resp := []byte(`{"id":"resp_1","model":"gpt-5-codex","output":[{"type":"message","content":[{"type":"output_text","text":"hello"}]}],"usage":{"input_tokens":3,"output_tokens":5}}`)
 	out := translateGitHubCopilotResponsesNonStreamToClaude(resp)
-	if gjson.Get(out, "type").String() != "message" {
-		t.Fatalf("type = %q, want message", gjson.Get(out, "type").String())
+	if gjson.GetBytes(out, "type").String() != "message" {
+		t.Fatalf("type = %q, want message", gjson.GetBytes(out, "type").String())
 	}
-	if gjson.Get(out, "content.0.type").String() != "text" {
-		t.Fatalf("content.0.type = %q, want text", gjson.Get(out, "content.0.type").String())
+	if gjson.GetBytes(out, "content.0.type").String() != "text" {
+		t.Fatalf("content.0.type = %q, want text", gjson.GetBytes(out, "content.0.type").String())
 	}
-	if gjson.Get(out, "content.0.text").String() != "hello" {
-		t.Fatalf("content.0.text = %q, want hello", gjson.Get(out, "content.0.text").String())
+	if gjson.GetBytes(out, "content.0.text").String() != "hello" {
+		t.Fatalf("content.0.text = %q, want hello", gjson.GetBytes(out, "content.0.text").String())
 	}
 }

@@ -266,14 +269,14 @@ func TestTranslateGitHubCopilotResponsesNonStreamToClaude_ToolUseMapping(t *test
 	t.Parallel()
 	resp := []byte(`{"id":"resp_2","model":"gpt-5-codex","output":[{"type":"function_call","id":"fc_1","call_id":"call_1","name":"sum","arguments":"{\"a\":1}"}],"usage":{"input_tokens":1,"output_tokens":2}}`)
 	out := translateGitHubCopilotResponsesNonStreamToClaude(resp)
-	if gjson.Get(out, "content.0.type").String() != "tool_use" {
-		t.Fatalf("content.0.type = %q, want tool_use", gjson.Get(out, "content.0.type").String())
+	if gjson.GetBytes(out, "content.0.type").String() != "tool_use" {
+		t.Fatalf("content.0.type = %q, want tool_use", gjson.GetBytes(out, "content.0.type").String())
 	}
-	if gjson.Get(out, "content.0.name").String() != "sum" {
-		t.Fatalf("content.0.name = %q, want sum", gjson.Get(out, "content.0.name").String())
+	if gjson.GetBytes(out, "content.0.name").String() != "sum" {
+		t.Fatalf("content.0.name = %q, want sum", gjson.GetBytes(out, "content.0.name").String())
 	}
-	if gjson.Get(out, "stop_reason").String() != "tool_use" {
-		t.Fatalf("stop_reason = %q, want tool_use", gjson.Get(out, "stop_reason").String())
+	if gjson.GetBytes(out, "stop_reason").String() != "tool_use" {
+		t.Fatalf("stop_reason = %q, want tool_use", gjson.GetBytes(out, "stop_reason").String())
 	}
 }

@@ -282,18 +285,24 @@ func TestTranslateGitHubCopilotResponsesStreamToClaude_TextLifecycle(t *testing.
 	var param any

 	created := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.created","response":{"id":"resp_1","model":"gpt-5-codex"}}`), &param)
-	if len(created) == 0 || !strings.Contains(created[0], "message_start") {
+	if len(created) == 0 || !strings.Contains(string(created[0]), "message_start") {
 		t.Fatalf("created events = %#v, want message_start", created)
 	}

 	delta := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.output_text.delta","delta":"he"}`), &param)
-	joinedDelta := strings.Join(delta, "")
+	var joinedDelta string
+	for _, d := range delta {
+		joinedDelta += string(d)
+	}
 	if !strings.Contains(joinedDelta, "content_block_start") || !strings.Contains(joinedDelta, "text_delta") {
 		t.Fatalf("delta events = %#v, want content_block_start + text_delta", delta)
 	}

 	completed := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.completed","response":{"usage":{"input_tokens":7,"output_tokens":9}}}`), &param)
-	joinedCompleted := strings.Join(completed, "")
+	var joinedCompleted string
+	for _, c := range completed {
+		joinedCompleted += string(c)
+	}
 	if !strings.Contains(joinedCompleted, "message_delta") || !strings.Contains(joinedCompleted, "message_stop") {
 		t.Fatalf("completed events = %#v, want message_delta + message_stop", completed)
 	}
@@ -312,15 +321,17 @@ func TestApplyHeaders_XInitiator_UserOnly(t *testing.T) {
 	}
 }

-func TestApplyHeaders_XInitiator_UserWhenLastRoleIsUser(t *testing.T) {
+func TestApplyHeaders_XInitiator_AgentWhenLastUserButHistoryHasAssistant(t *testing.T) {
 	t.Parallel()
 	e := &GitHubCopilotExecutor{}
 	req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
-	// Last role governs the initiator decision.
-	body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"I will read the file"},{"role":"user","content":"tool result here"}]}`)
+	// When the last role is "user" and the message contains tool_result content,
+	// the request is a continuation (e.g. Claude tool result translated to a
+	// synthetic user message). Should be "agent".
+	body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"I will read the file"},{"role":"user","content":[{"type":"tool_result","tool_use_id":"tu1","content":"file contents..."}]}]}`)
 	e.applyHeaders(req, "token", body)
-	if got := req.Header.Get("X-Initiator"); got != "user" {
-		t.Fatalf("X-Initiator = %q, want user (last role is user)", got)
+	if got := req.Header.Get("X-Initiator"); got != "agent" {
+		t.Fatalf("X-Initiator = %q, want agent (last user contains tool_result)", got)
 	}
 }

@@ -328,10 +339,11 @@ func TestApplyHeaders_XInitiator_AgentWithToolRole(t *testing.T) {
 	t.Parallel()
 	e := &GitHubCopilotExecutor{}
 	req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
+	// When the last message has role "tool", it's clearly agent-initiated.
 	body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"tool","content":"result"}]}`)
 	e.applyHeaders(req, "token", body)
 	if got := req.Header.Get("X-Initiator"); got != "agent" {
-		t.Fatalf("X-Initiator = %q, want agent (tool role exists)", got)
+		t.Fatalf("X-Initiator = %q, want agent (last role is tool)", got)
 	}
 }

@@ -346,14 +358,15 @@ func TestApplyHeaders_XInitiator_InputArrayLastAssistantMessage(t *testing.T) {
 	}
 }

-func TestApplyHeaders_XInitiator_InputArrayLastUserMessage(t *testing.T) {
+func TestApplyHeaders_XInitiator_InputArrayAgentWhenLastUserButHistoryHasAssistant(t *testing.T) {
 	t.Parallel()
 	e := &GitHubCopilotExecutor{}
 	req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
+	// Responses API: last item is user-role but history contains assistant → agent.
 	body := []byte(`{"input":[{"type":"message","role":"assistant","content":[{"type":"output_text","text":"I can help"}]},{"type":"message","role":"user","content":[{"type":"input_text","text":"Do X"}]}]}`)
 	e.applyHeaders(req, "token", body)
-	if got := req.Header.Get("X-Initiator"); got != "user" {
-		t.Fatalf("X-Initiator = %q, want user (last role is user)", got)
+	if got := req.Header.Get("X-Initiator"); got != "agent" {
+		t.Fatalf("X-Initiator = %q, want agent (history has assistant)", got)
 	}
 }

@@ -368,6 +381,33 @@ func TestApplyHeaders_XInitiator_InputArrayLastFunctionCallOutput(t *testing.T)
 	}
 }

+func TestApplyHeaders_XInitiator_UserInMultiTurnNoTools(t *testing.T) {
+	t.Parallel()
+	e := &GitHubCopilotExecutor{}
+	req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
+	// Genuine multi-turn: user → assistant (plain text) → user follow-up.
+	// No tool messages → should be "user" (not a false-positive).
+	body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"Hi there!"},{"role":"user","content":"what is 2+2?"}]}`)
+	e.applyHeaders(req, "token", body)
+	if got := req.Header.Get("X-Initiator"); got != "user" {
+		t.Fatalf("X-Initiator = %q, want user (genuine multi-turn, no tools)", got)
+	}
+}
+
+func TestApplyHeaders_XInitiator_UserFollowUpAfterToolHistory(t *testing.T) {
+	t.Parallel()
+	e := &GitHubCopilotExecutor{}
+	req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
+	// User follow-up after a completed tool-use conversation.
+	// The last message is a genuine user question — should be "user", not "agent".
+	// This aligns with opencode's behavior: only active tool loops are agent-initiated.
+	body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":[{"type":"tool_use","id":"tu1","name":"Read","input":{}}]},{"role":"tool","tool_call_id":"tu1","content":"file data"},{"role":"assistant","content":"I read the file."},{"role":"user","content":"What did we do so far?"}]}`)
+	e.applyHeaders(req, "token", body)
+	if got := req.Header.Get("X-Initiator"); got != "user" {
+		t.Fatalf("X-Initiator = %q, want user (genuine follow-up after tool history)", got)
+	}
+}
+
 // --- Tests for x-github-api-version header (Problem M) ---

 func TestApplyHeaders_GitHubAPIVersion(t *testing.T) {
@@ -414,3 +454,364 @@ func TestDetectVisionContent_NoMessages(t *testing.T) {
 		t.Fatal("expected no vision content when messages field is absent")
 	}
 }
+
+// --- Tests for applyGitHubCopilotResponsesDefaults ---
+
+func TestApplyGitHubCopilotResponsesDefaults_SetsAllDefaults(t *testing.T) {
+	t.Parallel()
+	body := []byte(`{"input":"hello","reasoning":{"effort":"medium"}}`)
+	got := applyGitHubCopilotResponsesDefaults(body)
+
+	if gjson.GetBytes(got, "store").Bool() != false {
+		t.Fatalf("store = %v, want false", gjson.GetBytes(got, "store").Raw)
+	}
+	inc := gjson.GetBytes(got, "include")
+	if !inc.IsArray() || inc.Array()[0].String() != "reasoning.encrypted_content" {
+		t.Fatalf("include = %s, want [\"reasoning.encrypted_content\"]", inc.Raw)
+	}
+	if gjson.GetBytes(got, "reasoning.summary").String() != "auto" {
+		t.Fatalf("reasoning.summary = %q, want auto", gjson.GetBytes(got, "reasoning.summary").String())
+	}
+}
+
+func TestApplyGitHubCopilotResponsesDefaults_DoesNotOverrideExisting(t *testing.T) {
+	t.Parallel()
+	body := []byte(`{"input":"hello","store":true,"include":["other"],"reasoning":{"effort":"high","summary":"concise"}}`)
+	got := applyGitHubCopilotResponsesDefaults(body)
+
+	if gjson.GetBytes(got, "store").Bool() != true {
+		t.Fatalf("store should not be overridden, got %s", gjson.GetBytes(got, "store").Raw)
+	}
+	if gjson.GetBytes(got, "include").Array()[0].String() != "other" {
+		t.Fatalf("include should not be overridden, got %s", gjson.GetBytes(got, "include").Raw)
+	}
+	if gjson.GetBytes(got, "reasoning.summary").String() != "concise" {
+		t.Fatalf("reasoning.summary should not be overridden, got %q", gjson.GetBytes(got, "reasoning.summary").String())
+	}
+}
+
+func TestApplyGitHubCopilotResponsesDefaults_NoReasoningEffort(t *testing.T) {
+	t.Parallel()
+	body := []byte(`{"input":"hello"}`)
+	got := applyGitHubCopilotResponsesDefaults(body)
+
+	if gjson.GetBytes(got, "store").Bool() != false {
+		t.Fatalf("store = %v, want false", gjson.GetBytes(got, "store").Raw)
+	}
+	// reasoning.summary should NOT be set when reasoning.effort is absent
+	if gjson.GetBytes(got, "reasoning.summary").Exists() {
+		t.Fatalf("reasoning.summary should not be set when reasoning.effort is absent, got %q", gjson.GetBytes(got, "reasoning.summary").String())
+	}
+}
+
+// --- Tests for normalizeGitHubCopilotReasoningField ---
+
+func TestNormalizeReasoningField_NonStreaming(t *testing.T) {
+	t.Parallel()
+	data := []byte(`{"choices":[{"message":{"content":"hello","reasoning_text":"I think..."}}]}`)
+	got := normalizeGitHubCopilotReasoningField(data)
+	rc := gjson.GetBytes(got, "choices.0.message.reasoning_content").String()
+	if rc != "I think..." {
+		t.Fatalf("reasoning_content = %q, want %q", rc, "I think...")
+	}
+}
+
+func TestNormalizeReasoningField_Streaming(t *testing.T) {
+	t.Parallel()
+	data := []byte(`{"choices":[{"delta":{"reasoning_text":"thinking delta"}}]}`)
+	got := normalizeGitHubCopilotReasoningField(data)
+	rc := gjson.GetBytes(got, "choices.0.delta.reasoning_content").String()
+	if rc != "thinking delta" {
+		t.Fatalf("reasoning_content = %q, want %q", rc, "thinking delta")
+	}
+}
+
+func TestNormalizeReasoningField_PreservesExistingReasoningContent(t *testing.T) {
+	t.Parallel()
+	data := []byte(`{"choices":[{"message":{"reasoning_text":"old","reasoning_content":"existing"}}]}`)
+	got := normalizeGitHubCopilotReasoningField(data)
+	rc := gjson.GetBytes(got, "choices.0.message.reasoning_content").String()
+	if rc != "existing" {
+		t.Fatalf("reasoning_content = %q, want %q (should not overwrite)", rc, "existing")
+	}
+}
+
+func TestNormalizeReasoningField_MultiChoice(t *testing.T) {
+	t.Parallel()
+	data := []byte(`{"choices":[{"message":{"reasoning_text":"thought-0"}},{"message":{"reasoning_text":"thought-1"}}]}`)
+	got := normalizeGitHubCopilotReasoningField(data)
+	rc0 := gjson.GetBytes(got, "choices.0.message.reasoning_content").String()
+	rc1 := gjson.GetBytes(got, "choices.1.message.reasoning_content").String()
+	if rc0 != "thought-0" {
+		t.Fatalf("choices[0].reasoning_content = %q, want %q", rc0, "thought-0")
+	}
+	if rc1 != "thought-1" {
+		t.Fatalf("choices[1].reasoning_content = %q, want %q", rc1, "thought-1")
+	}
+}
+
+func TestNormalizeReasoningField_NoChoices(t *testing.T) {
+	t.Parallel()
+	data := []byte(`{"id":"chatcmpl-123"}`)
+	got := normalizeGitHubCopilotReasoningField(data)
+	if string(got) != string(data) {
+		t.Fatalf("expected no change, got %s", string(got))
+	}
+}
+
+func TestApplyHeaders_OpenAIIntentValue(t *testing.T) {
+	t.Parallel()
+	e := &GitHubCopilotExecutor{}
+	req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
+	e.applyHeaders(req, "token", nil)
+	if got := req.Header.Get("Openai-Intent"); got != "conversation-edits" {
+		t.Fatalf("Openai-Intent = %q, want conversation-edits", got)
+	}
+}
+
+// --- Tests for CountTokens (local tiktoken estimation) ---
+
+func TestCountTokens_ReturnsPositiveCount(t *testing.T) {
+	t.Parallel()
+	e := &GitHubCopilotExecutor{}
+	body := []byte(`{"model":"gpt-4o","messages":[{"role":"user","content":"Hello, world!"}]}`)
+	resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{
+		Model:   "gpt-4o",
+		Payload: body,
+	}, cliproxyexecutor.Options{
+		SourceFormat: sdktranslator.FromString("openai"),
+	})
+	if err != nil {
+		t.Fatalf("CountTokens() error: %v", err)
+	}
+	if len(resp.Payload) == 0 {
+		t.Fatal("CountTokens() returned empty payload")
+	}
+	// The response should contain a positive token count.
+	tokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int()
+	if tokens <= 0 {
+		t.Fatalf("expected positive token count, got %d", tokens)
+	}
+}
+
+func TestCountTokens_ClaudeSourceFormatTranslates(t *testing.T) {
+	t.Parallel()
+	e := &GitHubCopilotExecutor{}
+	body := []byte(`{"model":"claude-sonnet-4","messages":[{"role":"user","content":"Tell me a joke"}],"max_tokens":1024}`)
+	resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{
+		Model:   "claude-sonnet-4",
+		Payload: body,
+	}, cliproxyexecutor.Options{
+		SourceFormat: sdktranslator.FromString("claude"),
+	})
+	if err != nil {
+		t.Fatalf("CountTokens() error: %v", err)
+	}
+	// Claude source format → should get input_tokens in response
+	inputTokens := gjson.GetBytes(resp.Payload, "input_tokens").Int()
+	if inputTokens <= 0 {
+		// Fallback: check usage.prompt_tokens (depends on translator registration)
+		promptTokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int()
+		if promptTokens <= 0 {
+			t.Fatalf("expected positive token count, got payload: %s", resp.Payload)
+		}
+	}
+}
+
+func TestCountTokens_EmptyPayload(t *testing.T) {
+	t.Parallel()
+	e := &GitHubCopilotExecutor{}
+	resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{
+		Model:   "gpt-4o",
+		Payload: []byte(`{"model":"gpt-4o","messages":[]}`),
+	}, cliproxyexecutor.Options{
+		SourceFormat: sdktranslator.FromString("openai"),
+	})
+	if err != nil {
+		t.Fatalf("CountTokens() error: %v", err)
+	}
+	tokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int()
+	// Empty messages should return 0 tokens.
+	if tokens != 0 {
+		t.Fatalf("expected 0 tokens for empty messages, got %d", tokens)
+	}
+}
+
+func TestStripUnsupportedBetas_RemovesContext1M(t *testing.T) {
+	t.Parallel()
+
+	body := []byte(`{"model":"claude-opus-4.6","betas":["interleaved-thinking-2025-05-14","context-1m-2025-08-07","claude-code-20250219"],"messages":[]}`)
+	result := stripUnsupportedBetas(body)
+
+	betas := gjson.GetBytes(result, "betas")
+	if !betas.Exists() {
+		t.Fatal("betas field should still exist after stripping")
+	}
+	for _, item := range betas.Array() {
+		if item.String() == "context-1m-2025-08-07" {
+			t.Fatal("context-1m-2025-08-07 should have been stripped")
+		}
+	}
+	// Other betas should be preserved
+	found := false
+	for _, item := range betas.Array() {
+		if item.String() == "interleaved-thinking-2025-05-14" {
+			found = true
+		}
+	}
+	if !found {
+		t.Fatal("other betas should be preserved")
+	}
+}
+
+func TestStripUnsupportedBetas_NoBetasField(t *testing.T) {
+	t.Parallel()
+
+	body := []byte(`{"model":"gpt-4o","messages":[]}`)
+	result := stripUnsupportedBetas(body)
+
+	// Should be unchanged
+	if string(result) != string(body) {
+		t.Fatalf("body should be unchanged when no betas field exists, got %s", string(result))
+	}
+}
+
+func TestStripUnsupportedBetas_MetadataBetas(t *testing.T) {
+	t.Parallel()
+
+	body := []byte(`{"model":"claude-opus-4.6","metadata":{"betas":["context-1m-2025-08-07","other-beta"]},"messages":[]}`)
+	result := stripUnsupportedBetas(body)
+
+	betas := gjson.GetBytes(result, "metadata.betas")
+	if !betas.Exists() {
+		t.Fatal("metadata.betas field should still exist after stripping")
+	}
+	for _, item := range betas.Array() {
+		if item.String() == "context-1m-2025-08-07" {
+			t.Fatal("context-1m-2025-08-07 should have been stripped from metadata.betas")
+		}
+	}
+	if betas.Array()[0].String() != "other-beta" {
+		t.Fatal("other betas in metadata.betas should be preserved")
+	}
+}
+
+func TestStripUnsupportedBetas_AllBetasStripped(t *testing.T) {
+	t.Parallel()
+
+	body := []byte(`{"model":"claude-opus-4.6","betas":["context-1m-2025-08-07"],"messages":[]}`)
+	result := stripUnsupportedBetas(body)
+
+	betas := gjson.GetBytes(result, "betas")
+	if betas.Exists() {
+		t.Fatal("betas field should be deleted when all betas are stripped")
+	}
+}
+
+func TestCopilotModelEntry_Limits(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name         string
+		capabilities map[string]any
+		wantNil      bool
+		wantPrompt   int
+		wantOutput   int
+		wantContext  int
+	}{
+		{
+			name:         "nil capabilities",
+			capabilities: nil,
+			wantNil:      true,
+		},
+		{
+			name:         "no limits key",
+			capabilities: map[string]any{"family": "claude-opus-4.6"},
+			wantNil:      true,
+		},
+		{
+			name:         "limits is not a map",
+			capabilities: map[string]any{"limits": "invalid"},
+			wantNil:      true,
+		},
+		{
+			name: "all zero values",
+			capabilities: map[string]any{
+				"limits": map[string]any{
+					"max_context_window_tokens": float64(0),
+					"max_prompt_tokens":         float64(0),
+					"max_output_tokens":         float64(0),
+				},
+			},
+			wantNil: true,
+		},
+		{
+			name: "individual account limits (128K prompt)",
+			capabilities: map[string]any{
+				"limits": map[string]any{
+					"max_context_window_tokens": float64(144000),
+					"max_prompt_tokens":         float64(128000),
+					"max_output_tokens":         float64(64000),
+				},
+			},
+			wantNil:     false,
+			wantPrompt:  128000,
+			wantOutput:  64000,
+			wantContext: 144000,
+		},
+		{
+			name: "business account limits (168K prompt)",
+			capabilities: map[string]any{
+				"limits": map[string]any{
+					"max_context_window_tokens": float64(200000),
+					"max_prompt_tokens":         float64(168000),
+					"max_output_tokens":         float64(32000),
+				},
+			},
+			wantNil:     false,
+			wantPrompt:  168000,
+			wantOutput:  32000,
+			wantContext: 200000,
+		},
+		{
+			name: "partial limits (only prompt)",
+			capabilities: map[string]any{
+				"limits": map[string]any{
+					"max_prompt_tokens": float64(128000),
+				},
+			},
+			wantNil:    false,
+			wantPrompt: 128000,
+			wantOutput: 0,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			entry := copilotauth.CopilotModelEntry{
+				ID:           "claude-opus-4.6",
+				Capabilities: tt.capabilities,
+			}
+			limits := entry.Limits()
+			if tt.wantNil {
+				if limits != nil {
+					t.Fatalf("expected nil limits, got %+v", limits)
+				}
+				return
+			}
+			if limits == nil {
+				t.Fatal("expected non-nil limits, got nil")
+			}
+			if limits.MaxPromptTokens != tt.wantPrompt {
+				t.Errorf("MaxPromptTokens = %d, want %d", limits.MaxPromptTokens, tt.wantPrompt)
+			}
+			if limits.MaxOutputTokens != tt.wantOutput {
+				t.Errorf("MaxOutputTokens = %d, want %d", limits.MaxOutputTokens, tt.wantOutput)
+			}
+			if tt.wantContext > 0 && limits.MaxContextWindowTokens != tt.wantContext {
+				t.Errorf("MaxContextWindowTokens = %d, want %d", limits.MaxContextWindowTokens, tt.wantContext)
+			}
+		})
+	}
+}