Merge branch 'pr-488'

# Conflicts: # README.md # README_CN.md # README_JA.md
feat(api): enhance proxy resolution for API key-based auth
2026-04-04 19:51:18 +00:00 · 2026-04-05 02:08:45 +08:00 · 2026-04-05 01:56:34 +08:00 · 2026-04-05 01:20:50 +08:00 · 2026-04-04 22:09:11 +08:00 · 2026-04-04 21:51:02 +08:00
17 changed files with 1394 additions and 80 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -54,4 +54,10 @@ _bmad-output/*
 # macOS
 .DS_Store
 ._*
+
+# Opencode
+.beads/
+.opencode/
+.cli-proxy-api/
+.venv/
 *.bak
--- a/cmd/fetch_antigravity_models/main.go
+++ b/cmd/fetch_antigravity_models/main.go
@@ -26,6 +26,7 @@ import (
 	"time"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
 	sdkauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/auth"
 	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/proxyutil"
@@ -188,7 +189,7 @@ func fetchModels(ctx context.Context, auth *coreauth.Auth) []modelEntry {
 		httpReq.Close = true
 		httpReq.Header.Set("Content-Type", "application/json")
 		httpReq.Header.Set("Authorization", "Bearer "+accessToken)
-		httpReq.Header.Set("User-Agent", "antigravity/1.19.6 darwin/arm64")
+		httpReq.Header.Set("User-Agent", misc.AntigravityUserAgent())

 		httpClient := &http.Client{Timeout: 30 * time.Second}
 		if transport, _, errProxy := proxyutil.BuildHTTPTransport(auth.ProxyURL); errProxy == nil && transport != nil {
--- a/cmd/server/main.go
+++ b/cmd/server/main.go
@@ -99,6 +99,7 @@ func main() {
 	var codeBuddyLogin bool
 	var projectID string
 	var vertexImport string
+	var vertexImportPrefix string
 	var configPath string
 	var password string
 	var tuiMode bool
@@ -139,6 +140,7 @@ func main() {
 	flag.StringVar(&projectID, "project_id", "", "Project ID (Gemini only, not required)")
 	flag.StringVar(&configPath, "config", DefaultConfigPath, "Configure File Path")
 	flag.StringVar(&vertexImport, "vertex-import", "", "Import Vertex service account key JSON file")
+	flag.StringVar(&vertexImportPrefix, "vertex-import-prefix", "", "Prefix for Vertex model namespacing (use with -vertex-import)")
 	flag.StringVar(&password, "password", "", "")
 	flag.BoolVar(&tuiMode, "tui", false, "Start with terminal management UI")
 	flag.BoolVar(&standalone, "standalone", false, "In TUI mode, start an embedded local server")
@@ -510,7 +512,7 @@ func main() {

 	if vertexImport != "" {
 		// Handle Vertex service account import
-		cmd.DoVertexImport(cfg, vertexImport)
+		cmd.DoVertexImport(cfg, vertexImport, vertexImportPrefix)
 	} else if login {
 		// Handle Google/Gemini login
 		cmd.DoLogin(cfg, projectID, options)
@@ -596,6 +598,7 @@ func main() {
 			if standalone {
 				// Standalone mode: start an embedded local server and connect TUI client to it.
 				managementasset.StartAutoUpdater(context.Background(), configFilePath)
+				misc.StartAntigravityVersionUpdater(context.Background())
 				if !localModel {
 					registry.StartModelsUpdater(context.Background())
 				}
@@ -671,6 +674,7 @@ func main() {
 		} else {
 			// Start the main proxy service
 			managementasset.StartAutoUpdater(context.Background(), configFilePath)
+			misc.StartAntigravityVersionUpdater(context.Background())
 			if !localModel {
 				registry.StartModelsUpdater(context.Background())
 			}
--- a/internal/api/handlers/management/api_tools.go
+++ b/internal/api/handlers/management/api_tools.go
@@ -13,6 +13,7 @@ import (

 	"github.com/fxamacker/cbor/v2"
 	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/geminicli"
 	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/proxyutil"
@@ -700,6 +701,11 @@ func (h *Handler) apiCallTransport(auth *coreauth.Auth) http.RoundTripper {
 		if proxyStr := strings.TrimSpace(auth.ProxyURL); proxyStr != "" {
 			proxyCandidates = append(proxyCandidates, proxyStr)
 		}
+		if h != nil && h.cfg != nil {
+			if proxyStr := strings.TrimSpace(proxyURLFromAPIKeyConfig(h.cfg, auth)); proxyStr != "" {
+				proxyCandidates = append(proxyCandidates, proxyStr)
+			}
+		}
 	}
 	if h != nil && h.cfg != nil {
 		if proxyStr := strings.TrimSpace(h.cfg.ProxyURL); proxyStr != "" {
@@ -722,6 +728,123 @@ func (h *Handler) apiCallTransport(auth *coreauth.Auth) http.RoundTripper {
 	return clone
 }

+type apiKeyConfigEntry interface {
+	GetAPIKey() string
+	GetBaseURL() string
+}
+
+func resolveAPIKeyConfig[T apiKeyConfigEntry](entries []T, auth *coreauth.Auth) *T {
+	if auth == nil || len(entries) == 0 {
+		return nil
+	}
+	attrKey, attrBase := "", ""
+	if auth.Attributes != nil {
+		attrKey = strings.TrimSpace(auth.Attributes["api_key"])
+		attrBase = strings.TrimSpace(auth.Attributes["base_url"])
+	}
+	for i := range entries {
+		entry := &entries[i]
+		cfgKey := strings.TrimSpace((*entry).GetAPIKey())
+		cfgBase := strings.TrimSpace((*entry).GetBaseURL())
+		if attrKey != "" && attrBase != "" {
+			if strings.EqualFold(cfgKey, attrKey) && strings.EqualFold(cfgBase, attrBase) {
+				return entry
+			}
+			continue
+		}
+		if attrKey != "" && strings.EqualFold(cfgKey, attrKey) {
+			if cfgBase == "" || strings.EqualFold(cfgBase, attrBase) {
+				return entry
+			}
+		}
+		if attrKey == "" && attrBase != "" && strings.EqualFold(cfgBase, attrBase) {
+			return entry
+		}
+	}
+	if attrKey != "" {
+		for i := range entries {
+			entry := &entries[i]
+			if strings.EqualFold(strings.TrimSpace((*entry).GetAPIKey()), attrKey) {
+				return entry
+			}
+		}
+	}
+	return nil
+}
+
+func proxyURLFromAPIKeyConfig(cfg *config.Config, auth *coreauth.Auth) string {
+	if cfg == nil || auth == nil {
+		return ""
+	}
+	authKind, authAccount := auth.AccountInfo()
+	if !strings.EqualFold(strings.TrimSpace(authKind), "api_key") {
+		return ""
+	}
+
+	attrs := auth.Attributes
+	compatName := ""
+	providerKey := ""
+	if len(attrs) > 0 {
+		compatName = strings.TrimSpace(attrs["compat_name"])
+		providerKey = strings.TrimSpace(attrs["provider_key"])
+	}
+	if compatName != "" || strings.EqualFold(strings.TrimSpace(auth.Provider), "openai-compatibility") {
+		return resolveOpenAICompatAPIKeyProxyURL(cfg, auth, strings.TrimSpace(authAccount), providerKey, compatName)
+	}
+
+	switch strings.ToLower(strings.TrimSpace(auth.Provider)) {
+	case "gemini":
+		if entry := resolveAPIKeyConfig(cfg.GeminiKey, auth); entry != nil {
+			return strings.TrimSpace(entry.ProxyURL)
+		}
+	case "claude":
+		if entry := resolveAPIKeyConfig(cfg.ClaudeKey, auth); entry != nil {
+			return strings.TrimSpace(entry.ProxyURL)
+		}
+	case "codex":
+		if entry := resolveAPIKeyConfig(cfg.CodexKey, auth); entry != nil {
+			return strings.TrimSpace(entry.ProxyURL)
+		}
+	}
+	return ""
+}
+
+func resolveOpenAICompatAPIKeyProxyURL(cfg *config.Config, auth *coreauth.Auth, apiKey, providerKey, compatName string) string {
+	if cfg == nil || auth == nil {
+		return ""
+	}
+	apiKey = strings.TrimSpace(apiKey)
+	if apiKey == "" {
+		return ""
+	}
+	candidates := make([]string, 0, 3)
+	if v := strings.TrimSpace(compatName); v != "" {
+		candidates = append(candidates, v)
+	}
+	if v := strings.TrimSpace(providerKey); v != "" {
+		candidates = append(candidates, v)
+	}
+	if v := strings.TrimSpace(auth.Provider); v != "" {
+		candidates = append(candidates, v)
+	}
+
+	for i := range cfg.OpenAICompatibility {
+		compat := &cfg.OpenAICompatibility[i]
+		for _, candidate := range candidates {
+			if candidate != "" && strings.EqualFold(strings.TrimSpace(candidate), compat.Name) {
+				for j := range compat.APIKeyEntries {
+					entry := &compat.APIKeyEntries[j]
+					if strings.EqualFold(strings.TrimSpace(entry.APIKey), apiKey) {
+						return strings.TrimSpace(entry.ProxyURL)
+					}
+				}
+				return ""
+			}
+		}
+	}
+	return ""
+}
+
 func buildProxyTransport(proxyStr string) *http.Transport {
 	transport, _, errBuild := proxyutil.BuildHTTPTransport(proxyStr)
 	if errBuild != nil {
--- a/internal/api/handlers/management/api_tools_test.go
+++ b/internal/api/handlers/management/api_tools_test.go
@@ -58,6 +58,105 @@ func TestAPICallTransportInvalidAuthFallsBackToGlobalProxy(t *testing.T) {
 	}
 }

+func TestAPICallTransportAPIKeyAuthFallsBackToConfigProxyURL(t *testing.T) {
+	t.Parallel()
+
+	h := &Handler{
+		cfg: &config.Config{
+			SDKConfig: sdkconfig.SDKConfig{ProxyURL: "http://global-proxy.example.com:8080"},
+			GeminiKey: []config.GeminiKey{{
+				APIKey:   "gemini-key",
+				ProxyURL: "http://gemini-proxy.example.com:8080",
+			}},
+			ClaudeKey: []config.ClaudeKey{{
+				APIKey:   "claude-key",
+				ProxyURL: "http://claude-proxy.example.com:8080",
+			}},
+			CodexKey: []config.CodexKey{{
+				APIKey:   "codex-key",
+				ProxyURL: "http://codex-proxy.example.com:8080",
+			}},
+			OpenAICompatibility: []config.OpenAICompatibility{{
+				Name:    "bohe",
+				BaseURL: "https://bohe.example.com",
+				APIKeyEntries: []config.OpenAICompatibilityAPIKey{{
+					APIKey:   "compat-key",
+					ProxyURL: "http://compat-proxy.example.com:8080",
+				}},
+			}},
+		},
+	}
+
+	cases := []struct {
+		name      string
+		auth      *coreauth.Auth
+		wantProxy string
+	}{
+		{
+			name: "gemini",
+			auth: &coreauth.Auth{
+				Provider:   "gemini",
+				Attributes: map[string]string{"api_key": "gemini-key"},
+			},
+			wantProxy: "http://gemini-proxy.example.com:8080",
+		},
+		{
+			name: "claude",
+			auth: &coreauth.Auth{
+				Provider:   "claude",
+				Attributes: map[string]string{"api_key": "claude-key"},
+			},
+			wantProxy: "http://claude-proxy.example.com:8080",
+		},
+		{
+			name: "codex",
+			auth: &coreauth.Auth{
+				Provider:   "codex",
+				Attributes: map[string]string{"api_key": "codex-key"},
+			},
+			wantProxy: "http://codex-proxy.example.com:8080",
+		},
+		{
+			name: "openai-compatibility",
+			auth: &coreauth.Auth{
+				Provider: "bohe",
+				Attributes: map[string]string{
+					"api_key":      "compat-key",
+					"compat_name":  "bohe",
+					"provider_key": "bohe",
+				},
+			},
+			wantProxy: "http://compat-proxy.example.com:8080",
+		},
+	}
+
+	for _, tc := range cases {
+		tc := tc
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+
+			transport := h.apiCallTransport(tc.auth)
+			httpTransport, ok := transport.(*http.Transport)
+			if !ok {
+				t.Fatalf("transport type = %T, want *http.Transport", transport)
+			}
+
+			req, errRequest := http.NewRequest(http.MethodGet, "https://example.com", nil)
+			if errRequest != nil {
+				t.Fatalf("http.NewRequest returned error: %v", errRequest)
+			}
+
+			proxyURL, errProxy := httpTransport.Proxy(req)
+			if errProxy != nil {
+				t.Fatalf("httpTransport.Proxy returned error: %v", errProxy)
+			}
+			if proxyURL == nil || proxyURL.String() != tc.wantProxy {
+				t.Fatalf("proxy URL = %v, want %s", proxyURL, tc.wantProxy)
+			}
+		})
+	}
+}
+
 func TestAuthByIndexDistinguishesSharedAPIKeysAcrossProviders(t *testing.T) {
 	t.Parallel()

--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -573,6 +573,8 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.PUT("/quota-exceeded/switch-preview-model", s.mgmt.PutSwitchPreviewModel)
 		mgmt.PATCH("/quota-exceeded/switch-preview-model", s.mgmt.PutSwitchPreviewModel)

+		mgmt.GET("/copilot-quota", s.mgmt.GetCopilotQuota)
+
 		mgmt.GET("/api-keys", s.mgmt.GetAPIKeys)
 		mgmt.PUT("/api-keys", s.mgmt.PutAPIKeys)
 		mgmt.PATCH("/api-keys", s.mgmt.PatchAPIKeys)
--- a/internal/auth/copilot/copilot_auth.go
+++ b/internal/auth/copilot/copilot_auth.go
@@ -235,6 +235,74 @@ type CopilotModelEntry struct {
 	Capabilities map[string]any `json:"capabilities,omitempty"`
 }

+// CopilotModelLimits holds the token limits returned by the Copilot /models API
+// under capabilities.limits. These limits vary by account type (individual vs
+// business) and are the authoritative source for enforcing prompt size.
+type CopilotModelLimits struct {
+	// MaxContextWindowTokens is the total context window (prompt + output).
+	MaxContextWindowTokens int
+	// MaxPromptTokens is the hard limit on input/prompt tokens.
+	// Exceeding this triggers a 400 error from the Copilot API.
+	MaxPromptTokens int
+	// MaxOutputTokens is the maximum number of output/completion tokens.
+	MaxOutputTokens int
+}
+
+// Limits extracts the token limits from the model's capabilities map.
+// Returns nil if no limits are available or the structure is unexpected.
+//
+// Expected Copilot API shape:
+//
+//	"capabilities": {
+//	    "limits": {
+//	        "max_context_window_tokens": 200000,
+//	        "max_prompt_tokens": 168000,
+//	        "max_output_tokens": 32000
+//	    }
+//	}
+func (e *CopilotModelEntry) Limits() *CopilotModelLimits {
+	if e.Capabilities == nil {
+		return nil
+	}
+	limitsRaw, ok := e.Capabilities["limits"]
+	if !ok {
+		return nil
+	}
+	limitsMap, ok := limitsRaw.(map[string]any)
+	if !ok {
+		return nil
+	}
+
+	result := &CopilotModelLimits{
+		MaxContextWindowTokens: anyToInt(limitsMap["max_context_window_tokens"]),
+		MaxPromptTokens:        anyToInt(limitsMap["max_prompt_tokens"]),
+		MaxOutputTokens:        anyToInt(limitsMap["max_output_tokens"]),
+	}
+
+	// Only return if at least one field is populated.
+	if result.MaxContextWindowTokens == 0 && result.MaxPromptTokens == 0 && result.MaxOutputTokens == 0 {
+		return nil
+	}
+	return result
+}
+
+// anyToInt converts a JSON-decoded numeric value to int.
+// Go's encoding/json decodes numbers into float64 when the target is any/interface{}.
+func anyToInt(v any) int {
+	switch n := v.(type) {
+	case float64:
+		return int(n)
+	case float32:
+		return int(n)
+	case int:
+		return n
+	case int64:
+		return int(n)
+	default:
+		return 0
+	}
+}
+
 // CopilotModelsResponse represents the response from the Copilot /models endpoint.
 type CopilotModelsResponse struct {
 	Data   []CopilotModelEntry `json:"data"`
--- a/internal/auth/vertex/vertex_credentials.go
+++ b/internal/auth/vertex/vertex_credentials.go
@@ -30,6 +30,10 @@ type VertexCredentialStorage struct {

 	// Type is the provider identifier stored alongside credentials. Always "vertex".
 	Type string `json:"type"`
+
+	// Prefix optionally namespaces models for this credential (e.g., "teamA").
+	// This results in model names like "teamA/gemini-2.0-flash".
+	Prefix string `json:"prefix,omitempty"`
 }

 // SaveTokenToFile writes the credential payload to the given file path in JSON format.
--- a/internal/cmd/vertex_import.go
+++ b/internal/cmd/vertex_import.go
@@ -20,7 +20,7 @@ import (
 // DoVertexImport imports a Google Cloud service account key JSON and persists
 // it as a "vertex" provider credential. The file content is embedded in the auth
 // file to allow portable deployment across stores.
-func DoVertexImport(cfg *config.Config, keyPath string) {
+func DoVertexImport(cfg *config.Config, keyPath string, prefix string) {
 	if cfg == nil {
 		cfg = &config.Config{}
 	}
@@ -62,13 +62,28 @@ func DoVertexImport(cfg *config.Config, keyPath string) {
 	// Default location if not provided by user. Can be edited in the saved file later.
 	location := "us-central1"

-	fileName := fmt.Sprintf("vertex-%s.json", sanitizeFilePart(projectID))
+	// Normalize and validate prefix: must be a single segment (no "/" allowed).
+	prefix = strings.TrimSpace(prefix)
+	prefix = strings.Trim(prefix, "/")
+	if prefix != "" && strings.Contains(prefix, "/") {
+		log.Errorf("vertex-import: prefix must be a single segment (no '/' allowed): %q", prefix)
+		return
+	}
+
+	// Include prefix in filename so importing the same project with different
+	// prefixes creates separate credential files instead of overwriting.
+	baseName := sanitizeFilePart(projectID)
+	if prefix != "" {
+		baseName = sanitizeFilePart(prefix) + "-" + baseName
+	}
+	fileName := fmt.Sprintf("vertex-%s.json", baseName)
 	// Build auth record
 	storage := &vertex.VertexCredentialStorage{
 		ServiceAccount: sa,
 		ProjectID:      projectID,
 		Email:          email,
 		Location:       location,
+		Prefix:         prefix,
 	}
 	metadata := map[string]any{
 		"service_account": sa,
@@ -76,6 +91,7 @@ func DoVertexImport(cfg *config.Config, keyPath string) {
 		"email":           email,
 		"location":        location,
 		"type":            "vertex",
+		"prefix":          prefix,
 		"label":           labelForVertex(projectID, email),
 	}
 	record := &coreauth.Auth{
--- a/internal/misc/antigravity_version.go
+++ b/internal/misc/antigravity_version.go
@@ -0,0 +1,151 @@
+// Package misc provides miscellaneous utility functions for the CLI Proxy API server.
+package misc
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net/http"
+	"sync"
+	"time"
+
+	log "github.com/sirupsen/logrus"
+)
+
+const (
+	antigravityReleasesURL     = "https://antigravity-auto-updater-974169037036.us-central1.run.app/releases"
+	antigravityFallbackVersion = "1.21.9"
+	antigravityVersionCacheTTL = 6 * time.Hour
+	antigravityFetchTimeout    = 10 * time.Second
+)
+
+type antigravityRelease struct {
+	Version     string `json:"version"`
+	ExecutionID string `json:"execution_id"`
+}
+
+var (
+	cachedAntigravityVersion = antigravityFallbackVersion
+	antigravityVersionMu     sync.RWMutex
+	antigravityVersionExpiry time.Time
+	antigravityUpdaterOnce   sync.Once
+)
+
+// StartAntigravityVersionUpdater starts a background goroutine that periodically refreshes the cached antigravity version.
+// This is intentionally decoupled from request execution to avoid blocking executors on version lookups.
+func StartAntigravityVersionUpdater(ctx context.Context) {
+	antigravityUpdaterOnce.Do(func() {
+		go runAntigravityVersionUpdater(ctx)
+	})
+}
+
+func runAntigravityVersionUpdater(ctx context.Context) {
+	if ctx == nil {
+		ctx = context.Background()
+	}
+
+	ticker := time.NewTicker(antigravityVersionCacheTTL / 2)
+	defer ticker.Stop()
+
+	log.Infof("periodic antigravity version refresh started (interval=%s)", antigravityVersionCacheTTL/2)
+
+	refreshAntigravityVersion(ctx)
+
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case <-ticker.C:
+			refreshAntigravityVersion(ctx)
+		}
+	}
+}
+
+func refreshAntigravityVersion(ctx context.Context) {
+	version, errFetch := fetchAntigravityLatestVersion(ctx)
+
+	antigravityVersionMu.Lock()
+	defer antigravityVersionMu.Unlock()
+
+	now := time.Now()
+
+	if errFetch == nil {
+		cachedAntigravityVersion = version
+		antigravityVersionExpiry = now.Add(antigravityVersionCacheTTL)
+		log.WithField("version", version).Info("fetched latest antigravity version")
+		return
+	}
+
+	if cachedAntigravityVersion == "" || now.After(antigravityVersionExpiry) {
+		cachedAntigravityVersion = antigravityFallbackVersion
+		antigravityVersionExpiry = now.Add(antigravityVersionCacheTTL)
+		log.WithError(errFetch).Warn("failed to refresh antigravity version, using fallback version")
+		return
+	}
+
+	log.WithError(errFetch).Debug("failed to refresh antigravity version, keeping cached value")
+}
+
+// AntigravityLatestVersion returns the cached antigravity version refreshed by StartAntigravityVersionUpdater.
+// It falls back to antigravityFallbackVersion if the cache is empty or stale.
+func AntigravityLatestVersion() string {
+	antigravityVersionMu.RLock()
+	if cachedAntigravityVersion != "" && time.Now().Before(antigravityVersionExpiry) {
+		v := cachedAntigravityVersion
+		antigravityVersionMu.RUnlock()
+		return v
+	}
+	antigravityVersionMu.RUnlock()
+
+	return antigravityFallbackVersion
+}
+
+// AntigravityUserAgent returns the User-Agent string for antigravity requests
+// using the latest version fetched from the releases API.
+func AntigravityUserAgent() string {
+	return fmt.Sprintf("antigravity/%s darwin/arm64", AntigravityLatestVersion())
+}
+
+func fetchAntigravityLatestVersion(ctx context.Context) (string, error) {
+	if ctx == nil {
+		ctx = context.Background()
+	}
+
+	client := &http.Client{Timeout: antigravityFetchTimeout}
+
+	httpReq, errReq := http.NewRequestWithContext(ctx, http.MethodGet, antigravityReleasesURL, nil)
+	if errReq != nil {
+		return "", fmt.Errorf("build antigravity releases request: %w", errReq)
+	}
+
+	resp, errDo := client.Do(httpReq)
+	if errDo != nil {
+		return "", fmt.Errorf("fetch antigravity releases: %w", errDo)
+	}
+	defer func() {
+		if errClose := resp.Body.Close(); errClose != nil {
+			log.WithError(errClose).Warn("antigravity releases response body close error")
+		}
+	}()
+
+	if resp.StatusCode != http.StatusOK {
+		return "", fmt.Errorf("antigravity releases API returned status %d", resp.StatusCode)
+	}
+
+	var releases []antigravityRelease
+	if errDecode := json.NewDecoder(resp.Body).Decode(&releases); errDecode != nil {
+		return "", fmt.Errorf("decode antigravity releases response: %w", errDecode)
+	}
+
+	if len(releases) == 0 {
+		return "", errors.New("antigravity releases API returned empty list")
+	}
+
+	version := releases[0].Version
+	if version == "" {
+		return "", errors.New("antigravity releases API returned empty version")
+	}
+
+	return version, nil
+}
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -549,6 +549,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
 			SupportedEndpoints:  []string{"/chat/completions"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "claude-opus-4.6",
@@ -561,6 +562,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
 			SupportedEndpoints:  []string{"/chat/completions"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "claude-sonnet-4",
@@ -573,6 +575,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
 			SupportedEndpoints:  []string{"/chat/completions"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "claude-sonnet-4.5",
@@ -585,6 +588,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
 			SupportedEndpoints:  []string{"/chat/completions"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "claude-sonnet-4.6",
@@ -597,6 +601,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
 			SupportedEndpoints:  []string{"/chat/completions"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "gemini-2.5-pro",
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -24,6 +24,7 @@ import (

 	"github.com/google/uuid"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
@@ -45,7 +46,7 @@ const (
 	antigravityGeneratePath        = "/v1internal:generateContent"
 	antigravityClientID            = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
 	antigravityClientSecret        = "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"
-	defaultAntigravityAgent        = "antigravity/1.19.6 darwin/arm64"
+	defaultAntigravityAgent        = "antigravity/1.21.9 darwin/arm64" // fallback only; overridden at runtime by misc.AntigravityUserAgent()
 	antigravityAuthType            = "antigravity"
 	refreshSkew                    = 3000 * time.Second
 	antigravityCreditsRetryTTL     = 5 * time.Hour
@@ -1739,7 +1740,7 @@ func resolveUserAgent(auth *cliproxyauth.Auth) string {
 			}
 		}
 	}
-	return defaultAntigravityAgent
+	return misc.AntigravityUserAgent()
 }

 func antigravityRetryAttempts(auth *cliproxyauth.Auth, cfg *config.Config) int {
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -848,6 +848,14 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string,
 			hasClaude1MHeader = true
 		}
 	}
+	// Also check auth attributes — GitLab Duo sets gitlab_duo_force_context_1m
+	// when routing through the Anthropic gateway, but the gin headers won't have
+	// X-CPA-CLAUDE-1M because the request is internally constructed.
+	if !hasClaude1MHeader && auth != nil && auth.Attributes != nil {
+		if auth.Attributes["gitlab_duo_force_context_1m"] == "true" {
+			hasClaude1MHeader = true
+		}
+	}

 	// Merge extra betas from request body and request flags.
 	if len(extraBetas) > 0 || hasClaude1MHeader {
--- a/internal/runtime/executor/github_copilot_executor.go
+++ b/internal/runtime/executor/github_copilot_executor.go
@@ -7,6 +7,7 @@ import (
 	"fmt"
 	"io"
 	"net/http"
+	"slices"
 	"strings"
 	"sync"
 	"time"
@@ -17,6 +18,7 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
 	log "github.com/sirupsen/logrus"
@@ -40,7 +42,7 @@ const (
 	copilotEditorVersion = "vscode/1.107.0"
 	copilotPluginVersion = "copilot-chat/0.35.0"
 	copilotIntegrationID = "vscode-chat"
-	copilotOpenAIIntent  = "conversation-panel"
+	copilotOpenAIIntent  = "conversation-edits"
 	copilotGitHubAPIVer  = "2025-04-01"
 )

@@ -126,6 +128,7 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	body = e.normalizeModel(req.Model, body)
 	body = flattenAssistantContent(body)
+	body = stripUnsupportedBetas(body)

 	// Detect vision content before input normalization removes messages
 	hasVision := detectVisionContent(body)
@@ -142,6 +145,7 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
 	if useResponses {
 		body = normalizeGitHubCopilotResponsesInput(body)
 		body = normalizeGitHubCopilotResponsesTools(body)
+		body = applyGitHubCopilotResponsesDefaults(body)
 	} else {
 		body = normalizeGitHubCopilotChatTools(body)
 	}
@@ -225,9 +229,10 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
 	if useResponses && from.String() == "claude" {
 		converted = translateGitHubCopilotResponsesNonStreamToClaude(data)
 	} else {
+		data = normalizeGitHubCopilotReasoningField(data)
 		converted = sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
 	}
-	resp = cliproxyexecutor.Response{Payload: converted}
+	resp = cliproxyexecutor.Response{Payload: converted, Headers: httpResp.Header.Clone()}
 	reporter.ensurePublished(ctx)
 	return resp, nil
 }
@@ -256,6 +261,7 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 	body = e.normalizeModel(req.Model, body)
 	body = flattenAssistantContent(body)
+	body = stripUnsupportedBetas(body)

 	// Detect vision content before input normalization removes messages
 	hasVision := detectVisionContent(body)
@@ -272,6 +278,7 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
 	if useResponses {
 		body = normalizeGitHubCopilotResponsesInput(body)
 		body = normalizeGitHubCopilotResponsesTools(body)
+		body = applyGitHubCopilotResponsesDefaults(body)
 	} else {
 		body = normalizeGitHubCopilotChatTools(body)
 	}
@@ -378,7 +385,20 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
 			if useResponses && from.String() == "claude" {
 				chunks = translateGitHubCopilotResponsesStreamToClaude(bytes.Clone(line), &param)
 			} else {
-				chunks = sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(line), &param)
+				// Strip SSE "data: " prefix before reasoning field normalization,
+				// since normalizeGitHubCopilotReasoningField expects pure JSON.
+				// Re-wrap with the prefix afterward for the translator.
+				normalizedLine := bytes.Clone(line)
+				if bytes.HasPrefix(line, dataTag) {
+					sseData := bytes.TrimSpace(line[len(dataTag):])
+					if !bytes.Equal(sseData, []byte("[DONE]")) && gjson.ValidBytes(sseData) {
+						normalized := normalizeGitHubCopilotReasoningField(bytes.Clone(sseData))
+						if !bytes.Equal(normalized, sseData) {
+							normalizedLine = append(append([]byte(nil), dataTag...), normalized...)
+						}
+					}
+				}
+				chunks = sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, normalizedLine, &param)
 			}
 			for i := range chunks {
 				out <- cliproxyexecutor.StreamChunk{Payload: bytes.Clone(chunks[i])}
@@ -400,9 +420,28 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
 	}, nil
 }

-// CountTokens is not supported for GitHub Copilot.
-func (e *GitHubCopilotExecutor) CountTokens(_ context.Context, _ *cliproxyauth.Auth, _ cliproxyexecutor.Request, _ cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
-	return cliproxyexecutor.Response{}, statusErr{code: http.StatusNotImplemented, msg: "count tokens not supported for github-copilot"}
+// CountTokens estimates token count locally using tiktoken, since the GitHub
+// Copilot API does not expose a dedicated token counting endpoint.
+func (e *GitHubCopilotExecutor) CountTokens(ctx context.Context, _ *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+	baseModel := thinking.ParseSuffix(req.Model).ModelName
+
+	from := opts.SourceFormat
+	to := sdktranslator.FromString("openai")
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
+
+	enc, err := helps.TokenizerForModel(baseModel)
+	if err != nil {
+		return cliproxyexecutor.Response{}, fmt.Errorf("github copilot executor: tokenizer init failed: %w", err)
+	}
+
+	count, err := helps.CountOpenAIChatTokens(enc, translated)
+	if err != nil {
+		return cliproxyexecutor.Response{}, fmt.Errorf("github copilot executor: token counting failed: %w", err)
+	}
+
+	usageJSON := helps.BuildOpenAIUsageJSON(count)
+	translatedUsage := sdktranslator.TranslateTokenCount(ctx, to, from, count, usageJSON)
+	return cliproxyexecutor.Response{Payload: translatedUsage}, nil
 }

 // Refresh validates the GitHub token is still working.
@@ -491,46 +530,127 @@ func (e *GitHubCopilotExecutor) applyHeaders(r *http.Request, apiToken string, b
 	r.Header.Set("X-Request-Id", uuid.NewString())

 	initiator := "user"
-	if role := detectLastConversationRole(body); role == "assistant" || role == "tool" {
+	if isAgentInitiated(body) {
 		initiator = "agent"
 	}
 	r.Header.Set("X-Initiator", initiator)
 }

-func detectLastConversationRole(body []byte) string {
+// isAgentInitiated determines whether the current request is agent-initiated
+// (tool callbacks, continuations) rather than user-initiated (new user prompt).
+//
+// GitHub Copilot uses the X-Initiator header for billing:
+//   - "user"  → consumes premium request quota
+//   - "agent" → free (tool loops, continuations)
+//
+// The challenge: Claude Code sends tool results as role:"user" messages with
+// content type "tool_result". After translation to OpenAI format, the tool_result
+// part becomes a separate role:"tool" message, but if the original Claude message
+// also contained text content (e.g. skill invocations, attachment descriptions),
+// a role:"user" message is emitted AFTER the tool message, making the last message
+// appear user-initiated when it's actually part of an agent tool loop.
+//
+// VSCode Copilot Chat solves this with explicit flags (iterationNumber,
+// isContinuation, subAgentInvocationId). Since CPA doesn't have these flags,
+// we infer agent status by checking whether the conversation contains prior
+// assistant/tool messages — if it does, the current request is a continuation.
+//
+// References:
+//   - opencode#8030, opencode#15824: same root cause and fix approach
+//   - vscode-copilot-chat: toolCallingLoop.ts (iterationNumber === 0)
+//   - pi-ai: github-copilot-headers.ts (last message role check)
+func isAgentInitiated(body []byte) bool {
 	if len(body) == 0 {
-		return ""
+		return false
 	}

+	// Chat Completions API: check messages array
 	if messages := gjson.GetBytes(body, "messages"); messages.Exists() && messages.IsArray() {
 		arr := messages.Array()
+		if len(arr) == 0 {
+			return false
+		}
+
+		lastRole := ""
 		for i := len(arr) - 1; i >= 0; i-- {
-			if role := arr[i].Get("role").String(); role != "" {
-				return role
+			if r := arr[i].Get("role").String(); r != "" {
+				lastRole = r
+				break
 			}
 		}
+
+		// If last message is assistant or tool, clearly agent-initiated.
+		if lastRole == "assistant" || lastRole == "tool" {
+			return true
+		}
+
+		// If last message is "user", check whether it contains tool results
+		// (indicating a tool-loop continuation) or if the preceding message
+		// is an assistant tool_use. This is more precise than checking for
+		// any prior assistant message, which would false-positive on genuine
+		// multi-turn follow-ups.
+		if lastRole == "user" {
+			// Check if the last user message contains tool_result content
+			lastContent := arr[len(arr)-1].Get("content")
+			if lastContent.Exists() && lastContent.IsArray() {
+				for _, part := range lastContent.Array() {
+					if part.Get("type").String() == "tool_result" {
+						return true
+					}
+				}
+			}
+			// Check if the second-to-last message is an assistant with tool_use
+			if len(arr) >= 2 {
+				prev := arr[len(arr)-2]
+				if prev.Get("role").String() == "assistant" {
+					prevContent := prev.Get("content")
+					if prevContent.Exists() && prevContent.IsArray() {
+						for _, part := range prevContent.Array() {
+							if part.Get("type").String() == "tool_use" {
+								return true
+							}
+						}
+					}
+				}
+			}
+		}
+
+		return false
 	}

+	// Responses API: check input array
 	if inputs := gjson.GetBytes(body, "input"); inputs.Exists() && inputs.IsArray() {
 		arr := inputs.Array()
-		for i := len(arr) - 1; i >= 0; i-- {
-			item := arr[i]
+		if len(arr) == 0 {
+			return false
+		}

-			// Most Responses input items carry a top-level role.
-			if role := item.Get("role").String(); role != "" {
-				return role
+		// Check last item
+		last := arr[len(arr)-1]
+		if role := last.Get("role").String(); role == "assistant" {
+			return true
+		}
+		switch last.Get("type").String() {
+		case "function_call", "function_call_arguments", "computer_call":
+			return true
+		case "function_call_output", "function_call_response", "tool_result", "computer_call_output":
+			return true
+		}
+
+		// If last item is user-role, check for prior non-user items
+		for _, item := range arr {
+			if role := item.Get("role").String(); role == "assistant" {
+				return true
 			}
-
 			switch item.Get("type").String() {
-			case "function_call", "function_call_arguments", "computer_call":
-				return "assistant"
-			case "function_call_output", "function_call_response", "tool_result", "computer_call_output":
-				return "tool"
+			case "function_call", "function_call_output", "function_call_response",
+				"function_call_arguments", "computer_call", "computer_call_output":
+				return true
 			}
 		}
 	}

-	return ""
+	return false
 }

 // detectVisionContent checks if the request body contains vision/image content.
@@ -572,6 +692,85 @@ func (e *GitHubCopilotExecutor) normalizeModel(model string, body []byte) []byte
 	return body
 }

+// copilotUnsupportedBetas lists beta headers that are Anthropic-specific and
+// must not be forwarded to GitHub Copilot. The context-1m beta enables 1M
+// context on Anthropic's API, but Copilot's Claude models are limited to
+// ~128K-200K. Passing it through would not enable 1M on Copilot, but stripping
+// it from the translated body avoids confusing downstream translators.
+var copilotUnsupportedBetas = []string{
+	"context-1m-2025-08-07",
+}
+
+// stripUnsupportedBetas removes Anthropic-specific beta entries from the
+// translated request body. In OpenAI format the betas may appear under
+// "metadata.betas" or a top-level "betas" array; in Claude format they sit at
+// "betas". This function checks all known locations.
+func stripUnsupportedBetas(body []byte) []byte {
+	betaPaths := []string{"betas", "metadata.betas"}
+	for _, path := range betaPaths {
+		arr := gjson.GetBytes(body, path)
+		if !arr.Exists() || !arr.IsArray() {
+			continue
+		}
+		var filtered []string
+		changed := false
+		for _, item := range arr.Array() {
+			beta := item.String()
+			if isCopilotUnsupportedBeta(beta) {
+				changed = true
+				continue
+			}
+			filtered = append(filtered, beta)
+		}
+		if !changed {
+			continue
+		}
+		if len(filtered) == 0 {
+			body, _ = sjson.DeleteBytes(body, path)
+		} else {
+			body, _ = sjson.SetBytes(body, path, filtered)
+		}
+	}
+	return body
+}
+
+func isCopilotUnsupportedBeta(beta string) bool {
+	return slices.Contains(copilotUnsupportedBetas, beta)
+}
+
+// normalizeGitHubCopilotReasoningField maps Copilot's non-standard
+// 'reasoning_text' field to the standard OpenAI 'reasoning_content' field
+// that the SDK translator expects. This handles both streaming deltas
+// (choices[].delta.reasoning_text) and non-streaming messages
+// (choices[].message.reasoning_text). The field is only renamed when
+// 'reasoning_content' is absent or null, preserving standard responses.
+// All choices are processed to support n>1 requests.
+func normalizeGitHubCopilotReasoningField(data []byte) []byte {
+	choices := gjson.GetBytes(data, "choices")
+	if !choices.Exists() || !choices.IsArray() {
+		return data
+	}
+	for i := range choices.Array() {
+		// Non-streaming: choices[i].message.reasoning_text
+		msgRT := fmt.Sprintf("choices.%d.message.reasoning_text", i)
+		msgRC := fmt.Sprintf("choices.%d.message.reasoning_content", i)
+		if rt := gjson.GetBytes(data, msgRT); rt.Exists() && rt.String() != "" {
+			if rc := gjson.GetBytes(data, msgRC); !rc.Exists() || rc.Type == gjson.Null || rc.String() == "" {
+				data, _ = sjson.SetBytes(data, msgRC, rt.String())
+			}
+		}
+		// Streaming: choices[i].delta.reasoning_text
+		deltaRT := fmt.Sprintf("choices.%d.delta.reasoning_text", i)
+		deltaRC := fmt.Sprintf("choices.%d.delta.reasoning_content", i)
+		if rt := gjson.GetBytes(data, deltaRT); rt.Exists() && rt.String() != "" {
+			if rc := gjson.GetBytes(data, deltaRC); !rc.Exists() || rc.Type == gjson.Null || rc.String() == "" {
+				data, _ = sjson.SetBytes(data, deltaRC, rt.String())
+			}
+		}
+	}
+	return data
+}
+
 func useGitHubCopilotResponsesEndpoint(sourceFormat sdktranslator.Format, model string) bool {
 	if sourceFormat.String() == "openai-response" {
 		return true
@@ -596,12 +795,7 @@ func lookupGitHubCopilotStaticModelInfo(model string) *registry.ModelInfo {
 }

 func containsEndpoint(endpoints []string, endpoint string) bool {
-	for _, item := range endpoints {
-		if item == endpoint {
-			return true
-		}
-	}
-	return false
+	return slices.Contains(endpoints, endpoint)
 }

 // flattenAssistantContent converts assistant message content from array format
@@ -856,6 +1050,32 @@ func stripGitHubCopilotResponsesUnsupportedFields(body []byte) []byte {
 	return body
 }

+// applyGitHubCopilotResponsesDefaults sets required fields for the Responses API
+// that both vscode-copilot-chat and pi-ai always include.
+//
+// References:
+//   - vscode-copilot-chat: src/platform/endpoint/node/responsesApi.ts
+//   - pi-ai (badlogic/pi-mono): packages/ai/src/providers/openai-responses.ts
+func applyGitHubCopilotResponsesDefaults(body []byte) []byte {
+	// store: false — prevents request/response storage
+	if !gjson.GetBytes(body, "store").Exists() {
+		body, _ = sjson.SetBytes(body, "store", false)
+	}
+
+	// include: ["reasoning.encrypted_content"] — enables reasoning content
+	// reuse across turns, avoiding redundant computation
+	if !gjson.GetBytes(body, "include").Exists() {
+		body, _ = sjson.SetRawBytes(body, "include", []byte(`["reasoning.encrypted_content"]`))
+	}
+
+	// If reasoning.effort is set but reasoning.summary is not, default to "auto"
+	if gjson.GetBytes(body, "reasoning.effort").Exists() && !gjson.GetBytes(body, "reasoning.summary").Exists() {
+		body, _ = sjson.SetBytes(body, "reasoning.summary", "auto")
+	}
+
+	return body
+}
+
 func normalizeGitHubCopilotResponsesTools(body []byte) []byte {
 	tools := gjson.GetBytes(body, "tools")
 	if tools.Exists() {
@@ -1406,6 +1626,21 @@ func FetchGitHubCopilotModels(ctx context.Context, auth *cliproxyauth.Auth, cfg
 			m.MaxCompletionTokens = defaultCopilotMaxCompletionTokens
 		}

+		// Override with real limits from the Copilot API when available.
+		// The API returns per-account limits (individual vs business) under
+		// capabilities.limits, which are more accurate than our static
+		// fallback values. We use max_prompt_tokens as ContextLength because
+		// that's the hard limit the Copilot API enforces on prompt size —
+		// exceeding it triggers "prompt token count exceeds the limit" errors.
+		if limits := entry.Limits(); limits != nil {
+			if limits.MaxPromptTokens > 0 {
+				m.ContextLength = limits.MaxPromptTokens
+			}
+			if limits.MaxOutputTokens > 0 {
+				m.MaxCompletionTokens = limits.MaxOutputTokens
+			}
+		}
+
 		models = append(models, m)
 	}

--- a/internal/runtime/executor/github_copilot_executor_test.go
+++ b/internal/runtime/executor/github_copilot_executor_test.go
@@ -1,11 +1,14 @@
 package executor

 import (
+	"context"
 	"net/http"
 	"strings"
 	"testing"

+	copilotauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/copilot"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
 	"github.com/tidwall/gjson"
 )
@@ -72,7 +75,7 @@ func TestUseGitHubCopilotResponsesEndpoint_CodexModel(t *testing.T) {
 }

 func TestUseGitHubCopilotResponsesEndpoint_RegistryResponsesOnlyModel(t *testing.T) {
-	t.Parallel()
+	// Not parallel: shares global model registry with DynamicRegistryWinsOverStatic.
 	if !useGitHubCopilotResponsesEndpoint(sdktranslator.FromString("openai"), "gpt-5.4") {
 		t.Fatal("expected responses-only registry model to use /responses")
 	}
@@ -82,7 +85,7 @@ func TestUseGitHubCopilotResponsesEndpoint_RegistryResponsesOnlyModel(t *testing
 }

 func TestUseGitHubCopilotResponsesEndpoint_DynamicRegistryWinsOverStatic(t *testing.T) {
-	t.Parallel()
+	// Not parallel: mutates global model registry, conflicts with RegistryResponsesOnlyModel.

 	reg := registry.GetGlobalRegistry()
 	clientID := "github-copilot-test-client"
@@ -251,14 +254,14 @@ func TestTranslateGitHubCopilotResponsesNonStreamToClaude_TextMapping(t *testing
 	t.Parallel()
 	resp := []byte(`{"id":"resp_1","model":"gpt-5-codex","output":[{"type":"message","content":[{"type":"output_text","text":"hello"}]}],"usage":{"input_tokens":3,"output_tokens":5}}`)
 	out := translateGitHubCopilotResponsesNonStreamToClaude(resp)
-	if gjson.Get(out, "type").String() != "message" {
-		t.Fatalf("type = %q, want message", gjson.Get(out, "type").String())
+	if gjson.GetBytes(out, "type").String() != "message" {
+		t.Fatalf("type = %q, want message", gjson.GetBytes(out, "type").String())
 	}
-	if gjson.Get(out, "content.0.type").String() != "text" {
-		t.Fatalf("content.0.type = %q, want text", gjson.Get(out, "content.0.type").String())
+	if gjson.GetBytes(out, "content.0.type").String() != "text" {
+		t.Fatalf("content.0.type = %q, want text", gjson.GetBytes(out, "content.0.type").String())
 	}
-	if gjson.Get(out, "content.0.text").String() != "hello" {
-		t.Fatalf("content.0.text = %q, want hello", gjson.Get(out, "content.0.text").String())
+	if gjson.GetBytes(out, "content.0.text").String() != "hello" {
+		t.Fatalf("content.0.text = %q, want hello", gjson.GetBytes(out, "content.0.text").String())
 	}
 }

@@ -266,14 +269,14 @@ func TestTranslateGitHubCopilotResponsesNonStreamToClaude_ToolUseMapping(t *test
 	t.Parallel()
 	resp := []byte(`{"id":"resp_2","model":"gpt-5-codex","output":[{"type":"function_call","id":"fc_1","call_id":"call_1","name":"sum","arguments":"{\"a\":1}"}],"usage":{"input_tokens":1,"output_tokens":2}}`)
 	out := translateGitHubCopilotResponsesNonStreamToClaude(resp)
-	if gjson.Get(out, "content.0.type").String() != "tool_use" {
-		t.Fatalf("content.0.type = %q, want tool_use", gjson.Get(out, "content.0.type").String())
+	if gjson.GetBytes(out, "content.0.type").String() != "tool_use" {
+		t.Fatalf("content.0.type = %q, want tool_use", gjson.GetBytes(out, "content.0.type").String())
 	}
-	if gjson.Get(out, "content.0.name").String() != "sum" {
-		t.Fatalf("content.0.name = %q, want sum", gjson.Get(out, "content.0.name").String())
+	if gjson.GetBytes(out, "content.0.name").String() != "sum" {
+		t.Fatalf("content.0.name = %q, want sum", gjson.GetBytes(out, "content.0.name").String())
 	}
-	if gjson.Get(out, "stop_reason").String() != "tool_use" {
-		t.Fatalf("stop_reason = %q, want tool_use", gjson.Get(out, "stop_reason").String())
+	if gjson.GetBytes(out, "stop_reason").String() != "tool_use" {
+		t.Fatalf("stop_reason = %q, want tool_use", gjson.GetBytes(out, "stop_reason").String())
 	}
 }

@@ -282,18 +285,24 @@ func TestTranslateGitHubCopilotResponsesStreamToClaude_TextLifecycle(t *testing.
 	var param any

 	created := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.created","response":{"id":"resp_1","model":"gpt-5-codex"}}`), &param)
-	if len(created) == 0 || !strings.Contains(created[0], "message_start") {
+	if len(created) == 0 || !strings.Contains(string(created[0]), "message_start") {
 		t.Fatalf("created events = %#v, want message_start", created)
 	}

 	delta := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.output_text.delta","delta":"he"}`), &param)
-	joinedDelta := strings.Join(delta, "")
+	var joinedDelta string
+	for _, d := range delta {
+		joinedDelta += string(d)
+	}
 	if !strings.Contains(joinedDelta, "content_block_start") || !strings.Contains(joinedDelta, "text_delta") {
 		t.Fatalf("delta events = %#v, want content_block_start + text_delta", delta)
 	}

 	completed := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.completed","response":{"usage":{"input_tokens":7,"output_tokens":9}}}`), &param)
-	joinedCompleted := strings.Join(completed, "")
+	var joinedCompleted string
+	for _, c := range completed {
+		joinedCompleted += string(c)
+	}
 	if !strings.Contains(joinedCompleted, "message_delta") || !strings.Contains(joinedCompleted, "message_stop") {
 		t.Fatalf("completed events = %#v, want message_delta + message_stop", completed)
 	}
@@ -312,15 +321,17 @@ func TestApplyHeaders_XInitiator_UserOnly(t *testing.T) {
 	}
 }

-func TestApplyHeaders_XInitiator_UserWhenLastRoleIsUser(t *testing.T) {
+func TestApplyHeaders_XInitiator_AgentWhenLastUserButHistoryHasAssistant(t *testing.T) {
 	t.Parallel()
 	e := &GitHubCopilotExecutor{}
 	req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
-	// Last role governs the initiator decision.
-	body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"I will read the file"},{"role":"user","content":"tool result here"}]}`)
+	// When the last role is "user" and the message contains tool_result content,
+	// the request is a continuation (e.g. Claude tool result translated to a
+	// synthetic user message). Should be "agent".
+	body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"I will read the file"},{"role":"user","content":[{"type":"tool_result","tool_use_id":"tu1","content":"file contents..."}]}]}`)
 	e.applyHeaders(req, "token", body)
-	if got := req.Header.Get("X-Initiator"); got != "user" {
-		t.Fatalf("X-Initiator = %q, want user (last role is user)", got)
+	if got := req.Header.Get("X-Initiator"); got != "agent" {
+		t.Fatalf("X-Initiator = %q, want agent (last user contains tool_result)", got)
 	}
 }

@@ -328,10 +339,11 @@ func TestApplyHeaders_XInitiator_AgentWithToolRole(t *testing.T) {
 	t.Parallel()
 	e := &GitHubCopilotExecutor{}
 	req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
+	// When the last message has role "tool", it's clearly agent-initiated.
 	body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"tool","content":"result"}]}`)
 	e.applyHeaders(req, "token", body)
 	if got := req.Header.Get("X-Initiator"); got != "agent" {
-		t.Fatalf("X-Initiator = %q, want agent (tool role exists)", got)
+		t.Fatalf("X-Initiator = %q, want agent (last role is tool)", got)
 	}
 }

@@ -346,14 +358,15 @@ func TestApplyHeaders_XInitiator_InputArrayLastAssistantMessage(t *testing.T) {
 	}
 }

-func TestApplyHeaders_XInitiator_InputArrayLastUserMessage(t *testing.T) {
+func TestApplyHeaders_XInitiator_InputArrayAgentWhenLastUserButHistoryHasAssistant(t *testing.T) {
 	t.Parallel()
 	e := &GitHubCopilotExecutor{}
 	req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
+	// Responses API: last item is user-role but history contains assistant → agent.
 	body := []byte(`{"input":[{"type":"message","role":"assistant","content":[{"type":"output_text","text":"I can help"}]},{"type":"message","role":"user","content":[{"type":"input_text","text":"Do X"}]}]}`)
 	e.applyHeaders(req, "token", body)
-	if got := req.Header.Get("X-Initiator"); got != "user" {
-		t.Fatalf("X-Initiator = %q, want user (last role is user)", got)
+	if got := req.Header.Get("X-Initiator"); got != "agent" {
+		t.Fatalf("X-Initiator = %q, want agent (history has assistant)", got)
 	}
 }

@@ -368,6 +381,33 @@ func TestApplyHeaders_XInitiator_InputArrayLastFunctionCallOutput(t *testing.T)
 	}
 }

+func TestApplyHeaders_XInitiator_UserInMultiTurnNoTools(t *testing.T) {
+	t.Parallel()
+	e := &GitHubCopilotExecutor{}
+	req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
+	// Genuine multi-turn: user → assistant (plain text) → user follow-up.
+	// No tool messages → should be "user" (not a false-positive).
+	body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"Hi there!"},{"role":"user","content":"what is 2+2?"}]}`)
+	e.applyHeaders(req, "token", body)
+	if got := req.Header.Get("X-Initiator"); got != "user" {
+		t.Fatalf("X-Initiator = %q, want user (genuine multi-turn, no tools)", got)
+	}
+}
+
+func TestApplyHeaders_XInitiator_UserFollowUpAfterToolHistory(t *testing.T) {
+	t.Parallel()
+	e := &GitHubCopilotExecutor{}
+	req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
+	// User follow-up after a completed tool-use conversation.
+	// The last message is a genuine user question — should be "user", not "agent".
+	// This aligns with opencode's behavior: only active tool loops are agent-initiated.
+	body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":[{"type":"tool_use","id":"tu1","name":"Read","input":{}}]},{"role":"tool","tool_call_id":"tu1","content":"file data"},{"role":"assistant","content":"I read the file."},{"role":"user","content":"What did we do so far?"}]}`)
+	e.applyHeaders(req, "token", body)
+	if got := req.Header.Get("X-Initiator"); got != "user" {
+		t.Fatalf("X-Initiator = %q, want user (genuine follow-up after tool history)", got)
+	}
+}
+
 // --- Tests for x-github-api-version header (Problem M) ---

 func TestApplyHeaders_GitHubAPIVersion(t *testing.T) {
@@ -414,3 +454,364 @@ func TestDetectVisionContent_NoMessages(t *testing.T) {
 		t.Fatal("expected no vision content when messages field is absent")
 	}
 }
+
+// --- Tests for applyGitHubCopilotResponsesDefaults ---
+
+func TestApplyGitHubCopilotResponsesDefaults_SetsAllDefaults(t *testing.T) {
+	t.Parallel()
+	body := []byte(`{"input":"hello","reasoning":{"effort":"medium"}}`)
+	got := applyGitHubCopilotResponsesDefaults(body)
+
+	if gjson.GetBytes(got, "store").Bool() != false {
+		t.Fatalf("store = %v, want false", gjson.GetBytes(got, "store").Raw)
+	}
+	inc := gjson.GetBytes(got, "include")
+	if !inc.IsArray() || inc.Array()[0].String() != "reasoning.encrypted_content" {
+		t.Fatalf("include = %s, want [\"reasoning.encrypted_content\"]", inc.Raw)
+	}
+	if gjson.GetBytes(got, "reasoning.summary").String() != "auto" {
+		t.Fatalf("reasoning.summary = %q, want auto", gjson.GetBytes(got, "reasoning.summary").String())
+	}
+}
+
+func TestApplyGitHubCopilotResponsesDefaults_DoesNotOverrideExisting(t *testing.T) {
+	t.Parallel()
+	body := []byte(`{"input":"hello","store":true,"include":["other"],"reasoning":{"effort":"high","summary":"concise"}}`)
+	got := applyGitHubCopilotResponsesDefaults(body)
+
+	if gjson.GetBytes(got, "store").Bool() != true {
+		t.Fatalf("store should not be overridden, got %s", gjson.GetBytes(got, "store").Raw)
+	}
+	if gjson.GetBytes(got, "include").Array()[0].String() != "other" {
+		t.Fatalf("include should not be overridden, got %s", gjson.GetBytes(got, "include").Raw)
+	}
+	if gjson.GetBytes(got, "reasoning.summary").String() != "concise" {
+		t.Fatalf("reasoning.summary should not be overridden, got %q", gjson.GetBytes(got, "reasoning.summary").String())
+	}
+}
+
+func TestApplyGitHubCopilotResponsesDefaults_NoReasoningEffort(t *testing.T) {
+	t.Parallel()
+	body := []byte(`{"input":"hello"}`)
+	got := applyGitHubCopilotResponsesDefaults(body)
+
+	if gjson.GetBytes(got, "store").Bool() != false {
+		t.Fatalf("store = %v, want false", gjson.GetBytes(got, "store").Raw)
+	}
+	// reasoning.summary should NOT be set when reasoning.effort is absent
+	if gjson.GetBytes(got, "reasoning.summary").Exists() {
+		t.Fatalf("reasoning.summary should not be set when reasoning.effort is absent, got %q", gjson.GetBytes(got, "reasoning.summary").String())
+	}
+}
+
+// --- Tests for normalizeGitHubCopilotReasoningField ---
+
+func TestNormalizeReasoningField_NonStreaming(t *testing.T) {
+	t.Parallel()
+	data := []byte(`{"choices":[{"message":{"content":"hello","reasoning_text":"I think..."}}]}`)
+	got := normalizeGitHubCopilotReasoningField(data)
+	rc := gjson.GetBytes(got, "choices.0.message.reasoning_content").String()
+	if rc != "I think..." {
+		t.Fatalf("reasoning_content = %q, want %q", rc, "I think...")
+	}
+}
+
+func TestNormalizeReasoningField_Streaming(t *testing.T) {
+	t.Parallel()
+	data := []byte(`{"choices":[{"delta":{"reasoning_text":"thinking delta"}}]}`)
+	got := normalizeGitHubCopilotReasoningField(data)
+	rc := gjson.GetBytes(got, "choices.0.delta.reasoning_content").String()
+	if rc != "thinking delta" {
+		t.Fatalf("reasoning_content = %q, want %q", rc, "thinking delta")
+	}
+}
+
+func TestNormalizeReasoningField_PreservesExistingReasoningContent(t *testing.T) {
+	t.Parallel()
+	data := []byte(`{"choices":[{"message":{"reasoning_text":"old","reasoning_content":"existing"}}]}`)
+	got := normalizeGitHubCopilotReasoningField(data)
+	rc := gjson.GetBytes(got, "choices.0.message.reasoning_content").String()
+	if rc != "existing" {
+		t.Fatalf("reasoning_content = %q, want %q (should not overwrite)", rc, "existing")
+	}
+}
+
+func TestNormalizeReasoningField_MultiChoice(t *testing.T) {
+	t.Parallel()
+	data := []byte(`{"choices":[{"message":{"reasoning_text":"thought-0"}},{"message":{"reasoning_text":"thought-1"}}]}`)
+	got := normalizeGitHubCopilotReasoningField(data)
+	rc0 := gjson.GetBytes(got, "choices.0.message.reasoning_content").String()
+	rc1 := gjson.GetBytes(got, "choices.1.message.reasoning_content").String()
+	if rc0 != "thought-0" {
+		t.Fatalf("choices[0].reasoning_content = %q, want %q", rc0, "thought-0")
+	}
+	if rc1 != "thought-1" {
+		t.Fatalf("choices[1].reasoning_content = %q, want %q", rc1, "thought-1")
+	}
+}
+
+func TestNormalizeReasoningField_NoChoices(t *testing.T) {
+	t.Parallel()
+	data := []byte(`{"id":"chatcmpl-123"}`)
+	got := normalizeGitHubCopilotReasoningField(data)
+	if string(got) != string(data) {
+		t.Fatalf("expected no change, got %s", string(got))
+	}
+}
+
+func TestApplyHeaders_OpenAIIntentValue(t *testing.T) {
+	t.Parallel()
+	e := &GitHubCopilotExecutor{}
+	req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
+	e.applyHeaders(req, "token", nil)
+	if got := req.Header.Get("Openai-Intent"); got != "conversation-edits" {
+		t.Fatalf("Openai-Intent = %q, want conversation-edits", got)
+	}
+}
+
+// --- Tests for CountTokens (local tiktoken estimation) ---
+
+func TestCountTokens_ReturnsPositiveCount(t *testing.T) {
+	t.Parallel()
+	e := &GitHubCopilotExecutor{}
+	body := []byte(`{"model":"gpt-4o","messages":[{"role":"user","content":"Hello, world!"}]}`)
+	resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{
+		Model:   "gpt-4o",
+		Payload: body,
+	}, cliproxyexecutor.Options{
+		SourceFormat: sdktranslator.FromString("openai"),
+	})
+	if err != nil {
+		t.Fatalf("CountTokens() error: %v", err)
+	}
+	if len(resp.Payload) == 0 {
+		t.Fatal("CountTokens() returned empty payload")
+	}
+	// The response should contain a positive token count.
+	tokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int()
+	if tokens <= 0 {
+		t.Fatalf("expected positive token count, got %d", tokens)
+	}
+}
+
+func TestCountTokens_ClaudeSourceFormatTranslates(t *testing.T) {
+	t.Parallel()
+	e := &GitHubCopilotExecutor{}
+	body := []byte(`{"model":"claude-sonnet-4","messages":[{"role":"user","content":"Tell me a joke"}],"max_tokens":1024}`)
+	resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{
+		Model:   "claude-sonnet-4",
+		Payload: body,
+	}, cliproxyexecutor.Options{
+		SourceFormat: sdktranslator.FromString("claude"),
+	})
+	if err != nil {
+		t.Fatalf("CountTokens() error: %v", err)
+	}
+	// Claude source format → should get input_tokens in response
+	inputTokens := gjson.GetBytes(resp.Payload, "input_tokens").Int()
+	if inputTokens <= 0 {
+		// Fallback: check usage.prompt_tokens (depends on translator registration)
+		promptTokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int()
+		if promptTokens <= 0 {
+			t.Fatalf("expected positive token count, got payload: %s", resp.Payload)
+		}
+	}
+}
+
+func TestCountTokens_EmptyPayload(t *testing.T) {
+	t.Parallel()
+	e := &GitHubCopilotExecutor{}
+	resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{
+		Model:   "gpt-4o",
+		Payload: []byte(`{"model":"gpt-4o","messages":[]}`),
+	}, cliproxyexecutor.Options{
+		SourceFormat: sdktranslator.FromString("openai"),
+	})
+	if err != nil {
+		t.Fatalf("CountTokens() error: %v", err)
+	}
+	tokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int()
+	// Empty messages should return 0 tokens.
+	if tokens != 0 {
+		t.Fatalf("expected 0 tokens for empty messages, got %d", tokens)
+	}
+}
+
+func TestStripUnsupportedBetas_RemovesContext1M(t *testing.T) {
+	t.Parallel()
+
+	body := []byte(`{"model":"claude-opus-4.6","betas":["interleaved-thinking-2025-05-14","context-1m-2025-08-07","claude-code-20250219"],"messages":[]}`)
+	result := stripUnsupportedBetas(body)
+
+	betas := gjson.GetBytes(result, "betas")
+	if !betas.Exists() {
+		t.Fatal("betas field should still exist after stripping")
+	}
+	for _, item := range betas.Array() {
+		if item.String() == "context-1m-2025-08-07" {
+			t.Fatal("context-1m-2025-08-07 should have been stripped")
+		}
+	}
+	// Other betas should be preserved
+	found := false
+	for _, item := range betas.Array() {
+		if item.String() == "interleaved-thinking-2025-05-14" {
+			found = true
+		}
+	}
+	if !found {
+		t.Fatal("other betas should be preserved")
+	}
+}
+
+func TestStripUnsupportedBetas_NoBetasField(t *testing.T) {
+	t.Parallel()
+
+	body := []byte(`{"model":"gpt-4o","messages":[]}`)
+	result := stripUnsupportedBetas(body)
+
+	// Should be unchanged
+	if string(result) != string(body) {
+		t.Fatalf("body should be unchanged when no betas field exists, got %s", string(result))
+	}
+}
+
+func TestStripUnsupportedBetas_MetadataBetas(t *testing.T) {
+	t.Parallel()
+
+	body := []byte(`{"model":"claude-opus-4.6","metadata":{"betas":["context-1m-2025-08-07","other-beta"]},"messages":[]}`)
+	result := stripUnsupportedBetas(body)
+
+	betas := gjson.GetBytes(result, "metadata.betas")
+	if !betas.Exists() {
+		t.Fatal("metadata.betas field should still exist after stripping")
+	}
+	for _, item := range betas.Array() {
+		if item.String() == "context-1m-2025-08-07" {
+			t.Fatal("context-1m-2025-08-07 should have been stripped from metadata.betas")
+		}
+	}
+	if betas.Array()[0].String() != "other-beta" {
+		t.Fatal("other betas in metadata.betas should be preserved")
+	}
+}
+
+func TestStripUnsupportedBetas_AllBetasStripped(t *testing.T) {
+	t.Parallel()
+
+	body := []byte(`{"model":"claude-opus-4.6","betas":["context-1m-2025-08-07"],"messages":[]}`)
+	result := stripUnsupportedBetas(body)
+
+	betas := gjson.GetBytes(result, "betas")
+	if betas.Exists() {
+		t.Fatal("betas field should be deleted when all betas are stripped")
+	}
+}
+
+func TestCopilotModelEntry_Limits(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name         string
+		capabilities map[string]any
+		wantNil      bool
+		wantPrompt   int
+		wantOutput   int
+		wantContext  int
+	}{
+		{
+			name:         "nil capabilities",
+			capabilities: nil,
+			wantNil:      true,
+		},
+		{
+			name:         "no limits key",
+			capabilities: map[string]any{"family": "claude-opus-4.6"},
+			wantNil:      true,
+		},
+		{
+			name:         "limits is not a map",
+			capabilities: map[string]any{"limits": "invalid"},
+			wantNil:      true,
+		},
+		{
+			name: "all zero values",
+			capabilities: map[string]any{
+				"limits": map[string]any{
+					"max_context_window_tokens": float64(0),
+					"max_prompt_tokens":         float64(0),
+					"max_output_tokens":         float64(0),
+				},
+			},
+			wantNil: true,
+		},
+		{
+			name: "individual account limits (128K prompt)",
+			capabilities: map[string]any{
+				"limits": map[string]any{
+					"max_context_window_tokens": float64(144000),
+					"max_prompt_tokens":         float64(128000),
+					"max_output_tokens":         float64(64000),
+				},
+			},
+			wantNil:     false,
+			wantPrompt:  128000,
+			wantOutput:  64000,
+			wantContext: 144000,
+		},
+		{
+			name: "business account limits (168K prompt)",
+			capabilities: map[string]any{
+				"limits": map[string]any{
+					"max_context_window_tokens": float64(200000),
+					"max_prompt_tokens":         float64(168000),
+					"max_output_tokens":         float64(32000),
+				},
+			},
+			wantNil:     false,
+			wantPrompt:  168000,
+			wantOutput:  32000,
+			wantContext: 200000,
+		},
+		{
+			name: "partial limits (only prompt)",
+			capabilities: map[string]any{
+				"limits": map[string]any{
+					"max_prompt_tokens": float64(128000),
+				},
+			},
+			wantNil:    false,
+			wantPrompt: 128000,
+			wantOutput: 0,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			entry := copilotauth.CopilotModelEntry{
+				ID:           "claude-opus-4.6",
+				Capabilities: tt.capabilities,
+			}
+			limits := entry.Limits()
+			if tt.wantNil {
+				if limits != nil {
+					t.Fatalf("expected nil limits, got %+v", limits)
+				}
+				return
+			}
+			if limits == nil {
+				t.Fatal("expected non-nil limits, got nil")
+			}
+			if limits.MaxPromptTokens != tt.wantPrompt {
+				t.Errorf("MaxPromptTokens = %d, want %d", limits.MaxPromptTokens, tt.wantPrompt)
+			}
+			if limits.MaxOutputTokens != tt.wantOutput {
+				t.Errorf("MaxOutputTokens = %d, want %d", limits.MaxOutputTokens, tt.wantOutput)
+			}
+			if tt.wantContext > 0 && limits.MaxContextWindowTokens != tt.wantContext {
+				t.Errorf("MaxContextWindowTokens = %d, want %d", limits.MaxContextWindowTokens, tt.wantContext)
+			}
+		})
+	}
+}
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -172,32 +172,101 @@ func timeUntilNextDay() time.Duration {
 	return tomorrow.Sub(now)
 }

-// ensureQwenSystemMessage prepends a default system message if none exists in "messages".
+// ensureQwenSystemMessage ensures the request has a single system message at the beginning.
+// It always injects the default system prompt and merges any user-provided system messages
+// into the injected system message content to satisfy Qwen's strict message ordering rules.
 func ensureQwenSystemMessage(payload []byte) ([]byte, error) {
-	messages := gjson.GetBytes(payload, "messages")
-	if messages.Exists() && messages.IsArray() {
-		var buf bytes.Buffer
-		buf.WriteByte('[')
-		buf.Write(qwenDefaultSystemMessage)
-		for _, msg := range messages.Array() {
-			buf.WriteByte(',')
-			buf.WriteString(msg.Raw)
+	isInjectedSystemPart := func(part gjson.Result) bool {
+		if !part.Exists() || !part.IsObject() {
+			return false
 		}
-		buf.WriteByte(']')
-		updated, errSet := sjson.SetRawBytes(payload, "messages", buf.Bytes())
-		if errSet != nil {
-			return nil, fmt.Errorf("qwen executor: set default system message failed: %w", errSet)
+		if !strings.EqualFold(part.Get("type").String(), "text") {
+			return false
 		}
-		return updated, nil
+		if !strings.EqualFold(part.Get("cache_control.type").String(), "ephemeral") {
+			return false
+		}
+		text := part.Get("text").String()
+		return text == "" || text == "You are Qwen Code."
 	}

-	var buf bytes.Buffer
-	buf.WriteByte('[')
-	buf.Write(qwenDefaultSystemMessage)
-	buf.WriteByte(']')
-	updated, errSet := sjson.SetRawBytes(payload, "messages", buf.Bytes())
+	defaultParts := gjson.ParseBytes(qwenDefaultSystemMessage).Get("content")
+	var systemParts []any
+	if defaultParts.Exists() && defaultParts.IsArray() {
+		for _, part := range defaultParts.Array() {
+			systemParts = append(systemParts, part.Value())
+		}
+	}
+	if len(systemParts) == 0 {
+		systemParts = append(systemParts, map[string]any{
+			"type": "text",
+			"text": "You are Qwen Code.",
+			"cache_control": map[string]any{
+				"type": "ephemeral",
+			},
+		})
+	}
+
+	appendSystemContent := func(content gjson.Result) {
+		makeTextPart := func(text string) map[string]any {
+			return map[string]any{
+				"type": "text",
+				"text": text,
+			}
+		}
+
+		if !content.Exists() || content.Type == gjson.Null {
+			return
+		}
+		if content.IsArray() {
+			for _, part := range content.Array() {
+				if part.Type == gjson.String {
+					systemParts = append(systemParts, makeTextPart(part.String()))
+					continue
+				}
+				if isInjectedSystemPart(part) {
+					continue
+				}
+				systemParts = append(systemParts, part.Value())
+			}
+			return
+		}
+		if content.Type == gjson.String {
+			systemParts = append(systemParts, makeTextPart(content.String()))
+			return
+		}
+		if content.IsObject() {
+			if isInjectedSystemPart(content) {
+				return
+			}
+			systemParts = append(systemParts, content.Value())
+			return
+		}
+		systemParts = append(systemParts, makeTextPart(content.String()))
+	}
+
+	messages := gjson.GetBytes(payload, "messages")
+	var nonSystemMessages []any
+	if messages.Exists() && messages.IsArray() {
+		for _, msg := range messages.Array() {
+			if strings.EqualFold(msg.Get("role").String(), "system") {
+				appendSystemContent(msg.Get("content"))
+				continue
+			}
+			nonSystemMessages = append(nonSystemMessages, msg.Value())
+		}
+	}
+
+	newMessages := make([]any, 0, 1+len(nonSystemMessages))
+	newMessages = append(newMessages, map[string]any{
+		"role":    "system",
+		"content": systemParts,
+	})
+	newMessages = append(newMessages, nonSystemMessages...)
+
+	updated, errSet := sjson.SetBytes(payload, "messages", newMessages)
 	if errSet != nil {
-		return nil, fmt.Errorf("qwen executor: set default system message failed: %w", errSet)
+		return nil, fmt.Errorf("qwen executor: set system message failed: %w", errSet)
 	}
 	return updated, nil
 }
--- a/internal/runtime/executor/qwen_executor_test.go
+++ b/internal/runtime/executor/qwen_executor_test.go
@@ -4,6 +4,7 @@ import (
 	"testing"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
 )

 func TestQwenExecutorParseSuffix(t *testing.T) {
@@ -28,3 +29,123 @@ func TestQwenExecutorParseSuffix(t *testing.T) {
 		})
 	}
 }
+
+func TestEnsureQwenSystemMessage_MergeStringSystem(t *testing.T) {
+	payload := []byte(`{
+		"model": "qwen3.6-plus",
+		"stream": true,
+		"messages": [
+			{ "role": "system", "content": "ABCDEFG" },
+			{ "role": "user", "content": [ { "type": "text", "text": "你好" } ] }
+		]
+	}`)
+
+	out, err := ensureQwenSystemMessage(payload)
+	if err != nil {
+		t.Fatalf("ensureQwenSystemMessage() error = %v", err)
+	}
+
+	msgs := gjson.GetBytes(out, "messages").Array()
+	if len(msgs) != 2 {
+		t.Fatalf("messages length = %d, want 2", len(msgs))
+	}
+	if msgs[0].Get("role").String() != "system" {
+		t.Fatalf("messages[0].role = %q, want %q", msgs[0].Get("role").String(), "system")
+	}
+	parts := msgs[0].Get("content").Array()
+	if len(parts) != 2 {
+		t.Fatalf("messages[0].content length = %d, want 2", len(parts))
+	}
+	if parts[0].Get("text").String() != "You are Qwen Code." || parts[0].Get("cache_control.type").String() != "ephemeral" {
+		t.Fatalf("messages[0].content[0] = %s, want injected system part", parts[0].Raw)
+	}
+	if parts[1].Get("type").String() != "text" || parts[1].Get("text").String() != "ABCDEFG" {
+		t.Fatalf("messages[0].content[1] = %s, want text part with ABCDEFG", parts[1].Raw)
+	}
+	if msgs[1].Get("role").String() != "user" {
+		t.Fatalf("messages[1].role = %q, want %q", msgs[1].Get("role").String(), "user")
+	}
+}
+
+func TestEnsureQwenSystemMessage_MergeObjectSystem(t *testing.T) {
+	payload := []byte(`{
+		"messages": [
+			{ "role": "system", "content": { "type": "text", "text": "ABCDEFG" } },
+			{ "role": "user", "content": [ { "type": "text", "text": "你好" } ] }
+		]
+	}`)
+
+	out, err := ensureQwenSystemMessage(payload)
+	if err != nil {
+		t.Fatalf("ensureQwenSystemMessage() error = %v", err)
+	}
+
+	msgs := gjson.GetBytes(out, "messages").Array()
+	if len(msgs) != 2 {
+		t.Fatalf("messages length = %d, want 2", len(msgs))
+	}
+	parts := msgs[0].Get("content").Array()
+	if len(parts) != 2 {
+		t.Fatalf("messages[0].content length = %d, want 2", len(parts))
+	}
+	if parts[1].Get("text").String() != "ABCDEFG" {
+		t.Fatalf("messages[0].content[1].text = %q, want %q", parts[1].Get("text").String(), "ABCDEFG")
+	}
+}
+
+func TestEnsureQwenSystemMessage_PrependsWhenMissing(t *testing.T) {
+	payload := []byte(`{
+		"messages": [
+			{ "role": "user", "content": [ { "type": "text", "text": "你好" } ] }
+		]
+	}`)
+
+	out, err := ensureQwenSystemMessage(payload)
+	if err != nil {
+		t.Fatalf("ensureQwenSystemMessage() error = %v", err)
+	}
+
+	msgs := gjson.GetBytes(out, "messages").Array()
+	if len(msgs) != 2 {
+		t.Fatalf("messages length = %d, want 2", len(msgs))
+	}
+	if msgs[0].Get("role").String() != "system" {
+		t.Fatalf("messages[0].role = %q, want %q", msgs[0].Get("role").String(), "system")
+	}
+	if !msgs[0].Get("content").IsArray() || len(msgs[0].Get("content").Array()) == 0 {
+		t.Fatalf("messages[0].content = %s, want non-empty array", msgs[0].Get("content").Raw)
+	}
+	if msgs[1].Get("role").String() != "user" {
+		t.Fatalf("messages[1].role = %q, want %q", msgs[1].Get("role").String(), "user")
+	}
+}
+
+func TestEnsureQwenSystemMessage_MergesMultipleSystemMessages(t *testing.T) {
+	payload := []byte(`{
+		"messages": [
+			{ "role": "system", "content": "A" },
+			{ "role": "user", "content": [ { "type": "text", "text": "hi" } ] },
+			{ "role": "system", "content": "B" }
+		]
+	}`)
+
+	out, err := ensureQwenSystemMessage(payload)
+	if err != nil {
+		t.Fatalf("ensureQwenSystemMessage() error = %v", err)
+	}
+
+	msgs := gjson.GetBytes(out, "messages").Array()
+	if len(msgs) != 2 {
+		t.Fatalf("messages length = %d, want 2", len(msgs))
+	}
+	parts := msgs[0].Get("content").Array()
+	if len(parts) != 3 {
+		t.Fatalf("messages[0].content length = %d, want 3", len(parts))
+	}
+	if parts[1].Get("text").String() != "A" {
+		t.Fatalf("messages[0].content[1].text = %q, want %q", parts[1].Get("text").String(), "A")
+	}
+	if parts[2].Get("text").String() != "B" {
+		t.Fatalf("messages[0].content[2].text = %q, want %q", parts[2].Get("text").String(), "B")
+	}
+}
Author	SHA1	Message	Date
Luis Pater	7223fee2de	Merge branch 'pr-488' # Conflicts: # README.md # README_CN.md # README_JA.md	2026-04-05 02:08:45 +08:00
Luis Pater	ada8e2905e	feat(api): enhance proxy resolution for API key-based auth Added comprehensive support for resolving proxy URLs from configuration based on API key and provider attributes. Introduced new helper functions and extended the test suite to validate fallback mechanisms and compatibility cases.	2026-04-05 01:56:34 +08:00
Luis Pater	4ba10531da	feat(docs): add Poixe AI sponsorship details to README files Added Poixe AI sponsorship information, including referral bonuses and platform capabilities, to README files in English, Japanese, and Chinese. Updated assets to include Poixe AI logo.	2026-04-05 01:20:50 +08:00
Luis Pater	3774b56e9f	feat(misc): add background updater for Antigravity version caching Introduce `StartAntigravityVersionUpdater` to periodically refresh the cached Antigravity version using a non-blocking background process. Updated main server flow to initialize the updater.	2026-04-04 22:09:11 +08:00
Luis Pater	c2d4137fb9	feat(executor): enhance Qwen system message handling with strict injection and merging rules Closes: #2537	2026-04-04 21:51:02 +08:00
Luis Pater	2ee938acaf	Merge pull request #2535 from rensumo/main feat: 动态获取 Antigravity User-Agent 版本号	2026-04-04 21:00:47 +08:00
rensumo	8d5e470e1f	feat: dynamically fetch antigravity UA version from releases API Fetch the latest version from the antigravity auto-updater releases endpoint and cache it for 6 hours. Falls back to 1.21.9 if the API is unreachable or returns unexpected data.	2026-04-04 14:52:59 +08:00
Luis Pater	3882494878	Merge pull request #486 from router-for-me/plus v6.9.14	2026-04-04 11:40:13 +08:00
Luis Pater	088c1d07f4	Merge branch 'main' into plus	2026-04-04 11:40:03 +08:00
Luis Pater	8430b28cfa	Merge pull request #2526 from rensumo/main feat: 升级反重力 (antigravity) UA 版本为 1.21.9	2026-04-04 11:32:16 +08:00
rensumo	f3ab8f4bc5	chore: update antigravity UA version to 1.21.9	2026-04-04 07:35:08 +08:00
Luis Pater	0e4f189c2e	Merge pull request #1302 from dinhkarate/feat(vertex)/add-prefix-field Feat(vertex): add prefix field	2026-04-04 04:17:12 +08:00
Luis Pater	98509f615c	Merge pull request #485 from kunish/fix/copilot-premium-request-inflation fix(copilot): reduce premium request inflation, enable thinking, and use dynamic API limits	2026-04-04 02:19:56 +08:00
kunish	87bf0b73d5	fix(copilot): use dynamic API limits to prevent prompt token overflow The Copilot API enforces per-account prompt token limits (128K individual, 168K business) that differ from the static 200K context length advertised by the proxy. This mismatch caused Claude Code to accumulate context beyond the actual limit, triggering "prompt token count exceeds the limit of 128000" errors. Changes: - Extract max_prompt_tokens and max_output_tokens from the Copilot /models API response (capabilities.limits) and use them as the authoritative ContextLength and MaxCompletionTokens values - Add CopilotModelLimits struct and Limits() helper to parse limits from the existing Capabilities map - Fix GitLab Duo context-1m beta header not being set when routing through the Anthropic gateway (gitlab_duo_force_context_1m attr was set but only gin headers were checked) - Fix flaky parallel tests that shared global model registry state	2026-04-03 23:54:17 +08:00
kunish	b849bf79d6	fix(copilot): address code review — SSE reasoning, multi-choice, agent detection - Strip SSE `data:` prefix before normalizing reasoning_text→reasoning_content in streaming mode; re-wrap afterward for the translator - Iterate all choices in normalizeGitHubCopilotReasoningField (not just choices[0]) to support n>1 requests - Remove over-broad tool-role fallback in isAgentInitiated that scanned all messages for role:"tool", aligning with opencode's approach of only detecting active tool loops — genuine user follow-ups after tool use are no longer mis-classified as agent-initiated - Add 5 reasoning normalization tests; update 2 X-Initiator tests to match refined semantics	2026-04-03 20:51:19 +08:00
kunish	59af2c57b1	fix(copilot): reduce premium request inflation and enable thinking This commit addresses three issues with Claude Code through GitHub Copilot: 1. Premium request inflation: Responses API requests were missing Openai-Intent headers and proper defaults, causing Copilot to bill each tool-loop continuation as a new premium request. Fixed by adding isAgentInitiated() heuristic (checks for tool_result content or preceding assistant tool_use), applying Responses API defaults (store, include, reasoning.summary), and local tiktoken-based token counting to avoid extra API calls. 2. Context overflow: Claude Code's modelSupports1M() hardcodes opus-4-6 as 1M-capable, but Copilot only supports ~128K-200K. Fixed by stripping the context-1m-2025-08-07 beta from translated request bodies. Also forwards response headers in non-streaming Execute() and registers the GET /copilot-quota management API route. 3. Thinking not working: Add ThinkingSupport with level-based reasoning to Claude models in the static definitions. Normalize Copilot's non-standard 'reasoning_text' response field to 'reasoning_content' before passing to the SDK translator. Use caller-provided context in CountTokens instead of Background().	2026-04-03 20:24:30 +08:00
dinhkarate	36efcc6e28	fix(vertex): include prefix in auth filename and validate at import Address two blocking issues from PR review: - Auth file now named vertex-{prefix}-{project}.json so importing the same project with different prefixes no longer overwrites credentials - Prefix containing "/" is rejected at import time instead of being silently ignored at runtime - Add prefix to in-memory metadata map for consistency Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-03-17 15:06:04 +07:00
Pham Quang Dinh	a337ecf35c	Merge branch 'router-for-me:main' into feat(vertex)/add-prefix-field	2026-03-17 11:48:40 +07:00
dinhkarate	14cb2b95c6	feat(vertex): add --vertex-import-prefix flag for model namespacing	2026-01-29 13:32:38 +07:00
dinhkarate	fdeef48498	feat(vertex): Add Prefix field to VertexCredentialStorage for per-file model namespacing	2026-01-29 13:32:38 +07:00