From 0ffcce3ec8e6e1eed869309afaef7b4e6d765949 Mon Sep 17 00:00:00 2001
From: ChrAlpha <53332481+ChrAlpha@users.noreply.github.com>
Date: Thu, 15 Jan 2026 16:32:28 +0800
Subject: [PATCH] feat(registry): add supported endpoints for GitHub Copilot
 models

Enhance model definitions by including supported API endpoints for each model. This allows for better integration and usage tracking with the GitHub Copilot API.
---
 internal/registry/model_definitions.go        | 49 ++++++++++++++++---
 internal/registry/model_registry.go           |  8 +++
 .../executor/github_copilot_executor.go       | 36 ++++++++++++--
 internal/runtime/executor/usage_helpers.go    | 48 ++++++++++++++++++
 4 files changed, 132 insertions(+), 9 deletions(-)

diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go
index 8359279c..8ec49304 100644
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -834,6 +834,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "OpenAI GPT-5 via GitHub Copilot",
 			ContextLength:       200000,
 			MaxCompletionTokens: 32768,
+			SupportedEndpoints:  []string{"/chat/completions", "/responses"},
 		},
 		{
 			ID:                  "gpt-5-mini",
@@ -845,6 +846,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "OpenAI GPT-5 Mini via GitHub Copilot",
 			ContextLength:       128000,
 			MaxCompletionTokens: 16384,
+			SupportedEndpoints:  []string{"/chat/completions", "/responses"},
 		},
 		{
 			ID:                  "gpt-5-codex",
@@ -856,6 +858,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "OpenAI GPT-5 Codex via GitHub Copilot",
 			ContextLength:       200000,
 			MaxCompletionTokens: 32768,
+			SupportedEndpoints:  []string{"/responses"},
 		},
 		{
 			ID:                  "gpt-5.1",
@@ -867,6 +870,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "OpenAI GPT-5.1 via GitHub Copilot",
 			ContextLength:       200000,
 			MaxCompletionTokens: 32768,
+			SupportedEndpoints:  []string{"/chat/completions", "/responses"},
 		},
 		{
 			ID:                  "gpt-5.1-codex",
@@ -878,6 +882,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "OpenAI GPT-5.1 Codex via GitHub Copilot",
 			ContextLength:       200000,
 			MaxCompletionTokens: 32768,
+			SupportedEndpoints:  []string{"/responses"},
 		},
 		{
 			ID:                  "gpt-5.1-codex-mini",
@@ -889,6 +894,19 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "OpenAI GPT-5.1 Codex Mini via GitHub Copilot",
 			ContextLength:       128000,
 			MaxCompletionTokens: 16384,
+			SupportedEndpoints:  []string{"/responses"},
+		},
+		{
+			ID:                  "gpt-5.1-codex-max",
+			Object:              "model",
+			Created:             now,
+			OwnedBy:             "github-copilot",
+			Type:                "github-copilot",
+			DisplayName:         "GPT-5.1 Codex Max",
+			Description:         "OpenAI GPT-5.1 Codex Max via GitHub Copilot",
+			ContextLength:       200000,
+			MaxCompletionTokens: 32768,
+			SupportedEndpoints:  []string{"/responses"},
 		},
 		{
 			ID:                  "gpt-5.2",
@@ -900,6 +918,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "OpenAI GPT-5.2 via GitHub Copilot",
 			ContextLength:       200000,
 			MaxCompletionTokens: 32768,
+			SupportedEndpoints:  []string{"/chat/completions", "/responses"},
 		},
 		{
 			ID:                  "gpt-5.2-codex",
@@ -911,6 +930,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "OpenAI GPT-5.2 Codex via GitHub Copilot",
 			ContextLength:       200000,
 			MaxCompletionTokens: 32768,
+			SupportedEndpoints:  []string{"/responses"},
 		},
 		{
 			ID:                  "claude-haiku-4.5",
@@ -922,6 +942,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "Anthropic Claude Haiku 4.5 via GitHub Copilot",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
+			SupportedEndpoints:  []string{"/chat/completions"},
 		},
 		{
 			ID:                  "claude-opus-4.1",
@@ -933,6 +954,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "Anthropic Claude Opus 4.1 via GitHub Copilot",
 			ContextLength:       200000,
 			MaxCompletionTokens: 32000,
+			SupportedEndpoints:  []string{"/chat/completions"},
 		},
 		{
 			ID:                  "claude-opus-4.5",
@@ -944,6 +966,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "Anthropic Claude Opus 4.5 via GitHub Copilot",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
+			SupportedEndpoints:  []string{"/chat/completions"},
 		},
 		{
 			ID:                  "claude-sonnet-4",
@@ -955,6 +978,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "Anthropic Claude Sonnet 4 via GitHub Copilot",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
+			SupportedEndpoints:  []string{"/chat/completions"},
 		},
 		{
 			ID:                  "claude-sonnet-4.5",
@@ -966,6 +990,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "Anthropic Claude Sonnet 4.5 via GitHub Copilot",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
+			SupportedEndpoints:  []string{"/chat/completions"},
 		},
 		{
 			ID:                  "gemini-2.5-pro",
@@ -979,13 +1004,24 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			MaxCompletionTokens: 65536,
 		},
 		{
-			ID:                  "gemini-3-pro",
+			ID:                  "gemini-3-pro-preview",
 			Object:              "model",
 			Created:             now,
 			OwnedBy:             "github-copilot",
 			Type:                "github-copilot",
-			DisplayName:         "Gemini 3 Pro",
-			Description:         "Google Gemini 3 Pro via GitHub Copilot",
+			DisplayName:         "Gemini 3 Pro (Preview)",
+			Description:         "Google Gemini 3 Pro Preview via GitHub Copilot",
+			ContextLength:       1048576,
+			MaxCompletionTokens: 65536,
+		},
+		{
+			ID:                  "gemini-3-flash-preview",
+			Object:              "model",
+			Created:             now,
+			OwnedBy:             "github-copilot",
+			Type:                "github-copilot",
+			DisplayName:         "Gemini 3 Flash (Preview)",
+			Description:         "Google Gemini 3 Flash Preview via GitHub Copilot",
 			ContextLength:       1048576,
 			MaxCompletionTokens: 65536,
 		},
@@ -1001,15 +1037,16 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			MaxCompletionTokens: 16384,
 		},
 		{
-			ID:                  "raptor-mini",
+			ID:                  "oswe-vscode-prime",
 			Object:              "model",
 			Created:             now,
 			OwnedBy:             "github-copilot",
 			Type:                "github-copilot",
-			DisplayName:         "Raptor Mini",
-			Description:         "Raptor Mini via GitHub Copilot",
+			DisplayName:         "Raptor mini (Preview)",
+			Description:         "Raptor mini via GitHub Copilot",
 			ContextLength:       128000,
 			MaxCompletionTokens: 16384,
+			SupportedEndpoints:  []string{"/chat/completions", "/responses"},
 		},
 	}
 }
diff --git a/internal/registry/model_registry.go b/internal/registry/model_registry.go
index 537b03c2..13e2e699 100644
--- a/internal/registry/model_registry.go
+++ b/internal/registry/model_registry.go
@@ -47,6 +47,8 @@ type ModelInfo struct {
 	MaxCompletionTokens int `json:"max_completion_tokens,omitempty"`
 	// SupportedParameters lists supported parameters
 	SupportedParameters []string `json:"supported_parameters,omitempty"`
+	// SupportedEndpoints lists supported API endpoints (e.g., "/chat/completions", "/responses").
+	SupportedEndpoints []string `json:"supported_endpoints,omitempty"`
 
 	// Thinking holds provider-specific reasoning/thinking budget capabilities.
 	// This is optional and currently used for Gemini thinking budget normalization.
@@ -456,6 +458,9 @@ func cloneModelInfo(model *ModelInfo) *ModelInfo {
 	if len(model.SupportedParameters) > 0 {
 		copyModel.SupportedParameters = append([]string(nil), model.SupportedParameters...)
 	}
+	if len(model.SupportedEndpoints) > 0 {
+		copyModel.SupportedEndpoints = append([]string(nil), model.SupportedEndpoints...)
+	}
 	return &copyModel
 }
 
@@ -968,6 +973,9 @@ func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string)
 		if len(model.SupportedParameters) > 0 {
 			result["supported_parameters"] = model.SupportedParameters
 		}
+		if len(model.SupportedEndpoints) > 0 {
+			result["supported_endpoints"] = model.SupportedEndpoints
+		}
 		return result
 
 	case "claude", "kiro", "antigravity":
diff --git a/internal/runtime/executor/github_copilot_executor.go b/internal/runtime/executor/github_copilot_executor.go
index f29af146..74e3fa6c 100644
--- a/internal/runtime/executor/github_copilot_executor.go
+++ b/internal/runtime/executor/github_copilot_executor.go
@@ -23,6 +23,7 @@ import (
 const (
 	githubCopilotBaseURL       = "https://api.githubcopilot.com"
 	githubCopilotChatPath      = "/chat/completions"
+	githubCopilotResponsesPath = "/responses"
 	githubCopilotAuthType      = "github-copilot"
 	githubCopilotTokenCacheTTL = 25 * time.Minute
 	// tokenExpiryBuffer is the time before expiry when we should refresh the token.
@@ -106,7 +107,11 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
 	defer reporter.trackFailure(ctx, &err)
 
 	from := opts.SourceFormat
+	useResponses := useGitHubCopilotResponsesEndpoint(from)
 	to := sdktranslator.FromString("openai")
+	if useResponses {
+		to = sdktranslator.FromString("openai-response")
+	}
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
@@ -117,7 +122,11 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
 	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "stream", false)
 
-	url := githubCopilotBaseURL + githubCopilotChatPath
+	path := githubCopilotChatPath
+	if useResponses {
+		path = githubCopilotResponsesPath
+	}
+	url := githubCopilotBaseURL + path
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
 	if err != nil {
 		return resp, err
@@ -172,6 +181,9 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
 	appendAPIResponseChunk(ctx, e.cfg, data)
 
 	detail := parseOpenAIUsage(data)
+	if useResponses && detail.TotalTokens == 0 {
+		detail = parseOpenAIResponsesUsage(data)
+	}
 	if detail.TotalTokens > 0 {
 		reporter.publish(ctx, detail)
 	}
@@ -194,7 +206,11 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
 	defer reporter.trackFailure(ctx, &err)
 
 	from := opts.SourceFormat
+	useResponses := useGitHubCopilotResponsesEndpoint(from)
 	to := sdktranslator.FromString("openai")
+	if useResponses {
+		to = sdktranslator.FromString("openai-response")
+	}
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
@@ -205,9 +221,15 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
 	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "stream", true)
 	// Enable stream options for usage stats in stream
-	body, _ = sjson.SetBytes(body, "stream_options.include_usage", true)
+	if !useResponses {
+		body, _ = sjson.SetBytes(body, "stream_options.include_usage", true)
+	}
 
-	url := githubCopilotBaseURL + githubCopilotChatPath
+	path := githubCopilotChatPath
+	if useResponses {
+		path = githubCopilotResponsesPath
+	}
+	url := githubCopilotBaseURL + path
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
 	if err != nil {
 		return nil, err
@@ -283,6 +305,10 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
 				}
 				if detail, ok := parseOpenAIStreamUsage(line); ok {
 					reporter.publish(ctx, detail)
+				} else if useResponses {
+					if detail, ok := parseOpenAIResponsesStreamUsage(line); ok {
+						reporter.publish(ctx, detail)
+					}
 				}
 			}
 
@@ -393,6 +419,10 @@ func (e *GitHubCopilotExecutor) normalizeModel(_ string, body []byte) []byte {
 	return body
 }
 
+func useGitHubCopilotResponsesEndpoint(sourceFormat sdktranslator.Format) bool {
+	return sourceFormat.String() == "openai-response"
+}
+
 // isHTTPSuccess checks if the status code indicates success (2xx).
 func isHTTPSuccess(statusCode int) bool {
 	return statusCode >= 200 && statusCode < 300
diff --git a/internal/runtime/executor/usage_helpers.go b/internal/runtime/executor/usage_helpers.go
index a3ce270c..7d8d345e 100644
--- a/internal/runtime/executor/usage_helpers.go
+++ b/internal/runtime/executor/usage_helpers.go
@@ -236,6 +236,54 @@ func parseOpenAIStreamUsage(line []byte) (usage.Detail, bool) {
 	return detail, true
 }
 
+func parseOpenAIResponsesUsage(data []byte) usage.Detail {
+	usageNode := gjson.ParseBytes(data).Get("usage")
+	if !usageNode.Exists() {
+		return usage.Detail{}
+	}
+	detail := usage.Detail{
+		InputTokens:  usageNode.Get("input_tokens").Int(),
+		OutputTokens: usageNode.Get("output_tokens").Int(),
+		TotalTokens:  usageNode.Get("total_tokens").Int(),
+	}
+	if detail.TotalTokens == 0 {
+		detail.TotalTokens = detail.InputTokens + detail.OutputTokens
+	}
+	if cached := usageNode.Get("input_tokens_details.cached_tokens"); cached.Exists() {
+		detail.CachedTokens = cached.Int()
+	}
+	if reasoning := usageNode.Get("output_tokens_details.reasoning_tokens"); reasoning.Exists() {
+		detail.ReasoningTokens = reasoning.Int()
+	}
+	return detail
+}
+
+func parseOpenAIResponsesStreamUsage(line []byte) (usage.Detail, bool) {
+	payload := jsonPayload(line)
+	if len(payload) == 0 || !gjson.ValidBytes(payload) {
+		return usage.Detail{}, false
+	}
+	usageNode := gjson.GetBytes(payload, "usage")
+	if !usageNode.Exists() {
+		return usage.Detail{}, false
+	}
+	detail := usage.Detail{
+		InputTokens:  usageNode.Get("input_tokens").Int(),
+		OutputTokens: usageNode.Get("output_tokens").Int(),
+		TotalTokens:  usageNode.Get("total_tokens").Int(),
+	}
+	if detail.TotalTokens == 0 {
+		detail.TotalTokens = detail.InputTokens + detail.OutputTokens
+	}
+	if cached := usageNode.Get("input_tokens_details.cached_tokens"); cached.Exists() {
+		detail.CachedTokens = cached.Int()
+	}
+	if reasoning := usageNode.Get("output_tokens_details.reasoning_tokens"); reasoning.Exists() {
+		detail.ReasoningTokens = reasoning.Int()
+	}
+	return detail, true
+}
+
 func parseClaudeUsage(data []byte) usage.Detail {
 	usageNode := gjson.ParseBytes(data).Get("usage")
 	if !usageNode.Exists() {