mirror of
https://github.com/router-for-me/CLIProxyAPIPlus.git
synced 2026-04-18 04:14:28 +00:00
fix(copilot): reduce premium request inflation and enable thinking
This commit addresses three issues with Claude Code through GitHub Copilot: 1. **Premium request inflation**: Responses API requests were missing Openai-Intent headers and proper defaults, causing Copilot to bill each tool-loop continuation as a new premium request. Fixed by adding isAgentInitiated() heuristic (checks for tool_result content or preceding assistant tool_use), applying Responses API defaults (store, include, reasoning.summary), and local tiktoken-based token counting to avoid extra API calls. 2. **Context overflow**: Claude Code's modelSupports1M() hardcodes opus-4-6 as 1M-capable, but Copilot only supports ~128K-200K. Fixed by stripping the context-1m-2025-08-07 beta from translated request bodies. Also forwards response headers in non-streaming Execute() and registers the GET /copilot-quota management API route. 3. **Thinking not working**: Add ThinkingSupport with level-based reasoning to Claude models in the static definitions. Normalize Copilot's non-standard 'reasoning_text' response field to 'reasoning_content' before passing to the SDK translator. Use caller-provided context in CountTokens instead of Background().
This commit is contained in:
@@ -7,6 +7,7 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"slices"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -17,6 +18,7 @@ import (
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
||||
log "github.com/sirupsen/logrus"
|
||||
@@ -40,7 +42,7 @@ const (
|
||||
copilotEditorVersion = "vscode/1.107.0"
|
||||
copilotPluginVersion = "copilot-chat/0.35.0"
|
||||
copilotIntegrationID = "vscode-chat"
|
||||
copilotOpenAIIntent = "conversation-panel"
|
||||
copilotOpenAIIntent = "conversation-edits"
|
||||
copilotGitHubAPIVer = "2025-04-01"
|
||||
)
|
||||
|
||||
@@ -126,6 +128,7 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
body = e.normalizeModel(req.Model, body)
|
||||
body = flattenAssistantContent(body)
|
||||
body = stripUnsupportedBetas(body)
|
||||
|
||||
// Detect vision content before input normalization removes messages
|
||||
hasVision := detectVisionContent(body)
|
||||
@@ -142,6 +145,7 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
|
||||
if useResponses {
|
||||
body = normalizeGitHubCopilotResponsesInput(body)
|
||||
body = normalizeGitHubCopilotResponsesTools(body)
|
||||
body = applyGitHubCopilotResponsesDefaults(body)
|
||||
} else {
|
||||
body = normalizeGitHubCopilotChatTools(body)
|
||||
}
|
||||
@@ -225,9 +229,10 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
|
||||
if useResponses && from.String() == "claude" {
|
||||
converted = translateGitHubCopilotResponsesNonStreamToClaude(data)
|
||||
} else {
|
||||
data = normalizeGitHubCopilotReasoningField(data)
|
||||
converted = sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, ¶m)
|
||||
}
|
||||
resp = cliproxyexecutor.Response{Payload: converted}
|
||||
resp = cliproxyexecutor.Response{Payload: converted, Headers: httpResp.Header.Clone()}
|
||||
reporter.ensurePublished(ctx)
|
||||
return resp, nil
|
||||
}
|
||||
@@ -256,6 +261,7 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
body = e.normalizeModel(req.Model, body)
|
||||
body = flattenAssistantContent(body)
|
||||
body = stripUnsupportedBetas(body)
|
||||
|
||||
// Detect vision content before input normalization removes messages
|
||||
hasVision := detectVisionContent(body)
|
||||
@@ -272,6 +278,7 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
|
||||
if useResponses {
|
||||
body = normalizeGitHubCopilotResponsesInput(body)
|
||||
body = normalizeGitHubCopilotResponsesTools(body)
|
||||
body = applyGitHubCopilotResponsesDefaults(body)
|
||||
} else {
|
||||
body = normalizeGitHubCopilotChatTools(body)
|
||||
}
|
||||
@@ -378,7 +385,8 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
|
||||
if useResponses && from.String() == "claude" {
|
||||
chunks = translateGitHubCopilotResponsesStreamToClaude(bytes.Clone(line), ¶m)
|
||||
} else {
|
||||
chunks = sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(line), ¶m)
|
||||
normalizedLine := normalizeGitHubCopilotReasoningField(bytes.Clone(line))
|
||||
chunks = sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, normalizedLine, ¶m)
|
||||
}
|
||||
for i := range chunks {
|
||||
out <- cliproxyexecutor.StreamChunk{Payload: bytes.Clone(chunks[i])}
|
||||
@@ -400,9 +408,28 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
|
||||
}, nil
|
||||
}
|
||||
|
||||
// CountTokens is not supported for GitHub Copilot.
|
||||
func (e *GitHubCopilotExecutor) CountTokens(_ context.Context, _ *cliproxyauth.Auth, _ cliproxyexecutor.Request, _ cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
return cliproxyexecutor.Response{}, statusErr{code: http.StatusNotImplemented, msg: "count tokens not supported for github-copilot"}
|
||||
// CountTokens estimates token count locally using tiktoken, since the GitHub
|
||||
// Copilot API does not expose a dedicated token counting endpoint.
|
||||
func (e *GitHubCopilotExecutor) CountTokens(ctx context.Context, _ *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("openai")
|
||||
translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
|
||||
|
||||
enc, err := helps.TokenizerForModel(baseModel)
|
||||
if err != nil {
|
||||
return cliproxyexecutor.Response{}, fmt.Errorf("github copilot executor: tokenizer init failed: %w", err)
|
||||
}
|
||||
|
||||
count, err := helps.CountOpenAIChatTokens(enc, translated)
|
||||
if err != nil {
|
||||
return cliproxyexecutor.Response{}, fmt.Errorf("github copilot executor: token counting failed: %w", err)
|
||||
}
|
||||
|
||||
usageJSON := helps.BuildOpenAIUsageJSON(count)
|
||||
translatedUsage := sdktranslator.TranslateTokenCount(ctx, to, from, count, usageJSON)
|
||||
return cliproxyexecutor.Response{Payload: translatedUsage}, nil
|
||||
}
|
||||
|
||||
// Refresh validates the GitHub token is still working.
|
||||
@@ -491,46 +518,135 @@ func (e *GitHubCopilotExecutor) applyHeaders(r *http.Request, apiToken string, b
|
||||
r.Header.Set("X-Request-Id", uuid.NewString())
|
||||
|
||||
initiator := "user"
|
||||
if role := detectLastConversationRole(body); role == "assistant" || role == "tool" {
|
||||
if isAgentInitiated(body) {
|
||||
initiator = "agent"
|
||||
}
|
||||
r.Header.Set("X-Initiator", initiator)
|
||||
}
|
||||
|
||||
func detectLastConversationRole(body []byte) string {
|
||||
// isAgentInitiated determines whether the current request is agent-initiated
|
||||
// (tool callbacks, continuations) rather than user-initiated (new user prompt).
|
||||
//
|
||||
// GitHub Copilot uses the X-Initiator header for billing:
|
||||
// - "user" → consumes premium request quota
|
||||
// - "agent" → free (tool loops, continuations)
|
||||
//
|
||||
// The challenge: Claude Code sends tool results as role:"user" messages with
|
||||
// content type "tool_result". After translation to OpenAI format, the tool_result
|
||||
// part becomes a separate role:"tool" message, but if the original Claude message
|
||||
// also contained text content (e.g. skill invocations, attachment descriptions),
|
||||
// a role:"user" message is emitted AFTER the tool message, making the last message
|
||||
// appear user-initiated when it's actually part of an agent tool loop.
|
||||
//
|
||||
// VSCode Copilot Chat solves this with explicit flags (iterationNumber,
|
||||
// isContinuation, subAgentInvocationId). Since CPA doesn't have these flags,
|
||||
// we infer agent status by checking whether the conversation contains prior
|
||||
// assistant/tool messages — if it does, the current request is a continuation.
|
||||
//
|
||||
// References:
|
||||
// - opencode#8030, opencode#15824: same root cause and fix approach
|
||||
// - vscode-copilot-chat: toolCallingLoop.ts (iterationNumber === 0)
|
||||
// - pi-ai: github-copilot-headers.ts (last message role check)
|
||||
func isAgentInitiated(body []byte) bool {
|
||||
if len(body) == 0 {
|
||||
return ""
|
||||
return false
|
||||
}
|
||||
|
||||
// Chat Completions API: check messages array
|
||||
if messages := gjson.GetBytes(body, "messages"); messages.Exists() && messages.IsArray() {
|
||||
arr := messages.Array()
|
||||
if len(arr) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
lastRole := ""
|
||||
for i := len(arr) - 1; i >= 0; i-- {
|
||||
if role := arr[i].Get("role").String(); role != "" {
|
||||
return role
|
||||
if r := arr[i].Get("role").String(); r != "" {
|
||||
lastRole = r
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// If last message is assistant or tool, clearly agent-initiated.
|
||||
if lastRole == "assistant" || lastRole == "tool" {
|
||||
return true
|
||||
}
|
||||
|
||||
// If last message is "user", check whether it contains tool results
|
||||
// (indicating a tool-loop continuation) or if the preceding message
|
||||
// is an assistant tool_use. This is more precise than checking for
|
||||
// any prior assistant message, which would false-positive on genuine
|
||||
// multi-turn follow-ups.
|
||||
if lastRole == "user" {
|
||||
// Check if the last user message contains tool_result content
|
||||
lastContent := arr[len(arr)-1].Get("content")
|
||||
if lastContent.Exists() && lastContent.IsArray() {
|
||||
for _, part := range lastContent.Array() {
|
||||
if part.Get("type").String() == "tool_result" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
// Check if the second-to-last message is an assistant with tool_use
|
||||
if len(arr) >= 2 {
|
||||
prev := arr[len(arr)-2]
|
||||
if prev.Get("role").String() == "assistant" {
|
||||
prevContent := prev.Get("content")
|
||||
if prevContent.Exists() && prevContent.IsArray() {
|
||||
for _, part := range prevContent.Array() {
|
||||
if part.Get("type").String() == "tool_use" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Fallback: if ANY message has role "tool", this conversation
|
||||
// involves tool use — mark as agent to cover compaction and
|
||||
// multi-turn agent continuations.
|
||||
for _, msg := range arr {
|
||||
if msg.Get("role").String() == "tool" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// Responses API: check input array
|
||||
if inputs := gjson.GetBytes(body, "input"); inputs.Exists() && inputs.IsArray() {
|
||||
arr := inputs.Array()
|
||||
for i := len(arr) - 1; i >= 0; i-- {
|
||||
item := arr[i]
|
||||
if len(arr) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
// Most Responses input items carry a top-level role.
|
||||
if role := item.Get("role").String(); role != "" {
|
||||
return role
|
||||
// Check last item
|
||||
last := arr[len(arr)-1]
|
||||
if role := last.Get("role").String(); role == "assistant" {
|
||||
return true
|
||||
}
|
||||
switch last.Get("type").String() {
|
||||
case "function_call", "function_call_arguments", "computer_call":
|
||||
return true
|
||||
case "function_call_output", "function_call_response", "tool_result", "computer_call_output":
|
||||
return true
|
||||
}
|
||||
|
||||
// If last item is user-role, check for prior non-user items
|
||||
for _, item := range arr {
|
||||
if role := item.Get("role").String(); role == "assistant" {
|
||||
return true
|
||||
}
|
||||
|
||||
switch item.Get("type").String() {
|
||||
case "function_call", "function_call_arguments", "computer_call":
|
||||
return "assistant"
|
||||
case "function_call_output", "function_call_response", "tool_result", "computer_call_output":
|
||||
return "tool"
|
||||
case "function_call", "function_call_output", "function_call_response",
|
||||
"function_call_arguments", "computer_call", "computer_call_output":
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
return false
|
||||
}
|
||||
|
||||
// detectVisionContent checks if the request body contains vision/image content.
|
||||
@@ -572,6 +688,76 @@ func (e *GitHubCopilotExecutor) normalizeModel(model string, body []byte) []byte
|
||||
return body
|
||||
}
|
||||
|
||||
// copilotUnsupportedBetas lists beta headers that are Anthropic-specific and
|
||||
// must not be forwarded to GitHub Copilot. The context-1m beta enables 1M
|
||||
// context on Anthropic's API, but Copilot's Claude models are limited to
|
||||
// ~128K-200K. Passing it through would not enable 1M on Copilot, but stripping
|
||||
// it from the translated body avoids confusing downstream translators.
|
||||
var copilotUnsupportedBetas = []string{
|
||||
"context-1m-2025-08-07",
|
||||
}
|
||||
|
||||
// stripUnsupportedBetas removes Anthropic-specific beta entries from the
|
||||
// translated request body. In OpenAI format the betas may appear under
|
||||
// "metadata.betas" or a top-level "betas" array; in Claude format they sit at
|
||||
// "betas". This function checks all known locations.
|
||||
func stripUnsupportedBetas(body []byte) []byte {
|
||||
betaPaths := []string{"betas", "metadata.betas"}
|
||||
for _, path := range betaPaths {
|
||||
arr := gjson.GetBytes(body, path)
|
||||
if !arr.Exists() || !arr.IsArray() {
|
||||
continue
|
||||
}
|
||||
var filtered []string
|
||||
changed := false
|
||||
for _, item := range arr.Array() {
|
||||
beta := item.String()
|
||||
if isCopilotUnsupportedBeta(beta) {
|
||||
changed = true
|
||||
continue
|
||||
}
|
||||
filtered = append(filtered, beta)
|
||||
}
|
||||
if !changed {
|
||||
continue
|
||||
}
|
||||
if len(filtered) == 0 {
|
||||
body, _ = sjson.DeleteBytes(body, path)
|
||||
} else {
|
||||
body, _ = sjson.SetBytes(body, path, filtered)
|
||||
}
|
||||
}
|
||||
return body
|
||||
}
|
||||
|
||||
func isCopilotUnsupportedBeta(beta string) bool {
|
||||
return slices.Contains(copilotUnsupportedBetas, beta)
|
||||
}
|
||||
|
||||
// normalizeGitHubCopilotReasoningField maps Copilot's non-standard
|
||||
// 'reasoning_text' field to the standard OpenAI 'reasoning_content' field
|
||||
// that the SDK translator expects. This handles both streaming deltas
|
||||
// (choices[].delta.reasoning_text) and non-streaming messages
|
||||
// (choices[].message.reasoning_text). The field is only renamed when
|
||||
// 'reasoning_content' is absent or null, preserving standard responses.
|
||||
func normalizeGitHubCopilotReasoningField(data []byte) []byte {
|
||||
// Non-streaming: choices[].message.reasoning_text
|
||||
if rt := gjson.GetBytes(data, "choices.0.message.reasoning_text"); rt.Exists() && rt.String() != "" {
|
||||
rc := gjson.GetBytes(data, "choices.0.message.reasoning_content")
|
||||
if !rc.Exists() || rc.Type == gjson.Null || rc.String() == "" {
|
||||
data, _ = sjson.SetBytes(data, "choices.0.message.reasoning_content", rt.String())
|
||||
}
|
||||
}
|
||||
// Streaming: choices[].delta.reasoning_text
|
||||
if rt := gjson.GetBytes(data, "choices.0.delta.reasoning_text"); rt.Exists() && rt.String() != "" {
|
||||
rc := gjson.GetBytes(data, "choices.0.delta.reasoning_content")
|
||||
if !rc.Exists() || rc.Type == gjson.Null || rc.String() == "" {
|
||||
data, _ = sjson.SetBytes(data, "choices.0.delta.reasoning_content", rt.String())
|
||||
}
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
func useGitHubCopilotResponsesEndpoint(sourceFormat sdktranslator.Format, model string) bool {
|
||||
if sourceFormat.String() == "openai-response" {
|
||||
return true
|
||||
@@ -596,12 +782,7 @@ func lookupGitHubCopilotStaticModelInfo(model string) *registry.ModelInfo {
|
||||
}
|
||||
|
||||
func containsEndpoint(endpoints []string, endpoint string) bool {
|
||||
for _, item := range endpoints {
|
||||
if item == endpoint {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
return slices.Contains(endpoints, endpoint)
|
||||
}
|
||||
|
||||
// flattenAssistantContent converts assistant message content from array format
|
||||
@@ -856,6 +1037,32 @@ func stripGitHubCopilotResponsesUnsupportedFields(body []byte) []byte {
|
||||
return body
|
||||
}
|
||||
|
||||
// applyGitHubCopilotResponsesDefaults sets required fields for the Responses API
|
||||
// that both vscode-copilot-chat and pi-ai always include.
|
||||
//
|
||||
// References:
|
||||
// - vscode-copilot-chat: src/platform/endpoint/node/responsesApi.ts
|
||||
// - pi-ai (badlogic/pi-mono): packages/ai/src/providers/openai-responses.ts
|
||||
func applyGitHubCopilotResponsesDefaults(body []byte) []byte {
|
||||
// store: false — prevents request/response storage
|
||||
if !gjson.GetBytes(body, "store").Exists() {
|
||||
body, _ = sjson.SetBytes(body, "store", false)
|
||||
}
|
||||
|
||||
// include: ["reasoning.encrypted_content"] — enables reasoning content
|
||||
// reuse across turns, avoiding redundant computation
|
||||
if !gjson.GetBytes(body, "include").Exists() {
|
||||
body, _ = sjson.SetRawBytes(body, "include", []byte(`["reasoning.encrypted_content"]`))
|
||||
}
|
||||
|
||||
// If reasoning.effort is set but reasoning.summary is not, default to "auto"
|
||||
if gjson.GetBytes(body, "reasoning.effort").Exists() && !gjson.GetBytes(body, "reasoning.summary").Exists() {
|
||||
body, _ = sjson.SetBytes(body, "reasoning.summary", "auto")
|
||||
}
|
||||
|
||||
return body
|
||||
}
|
||||
|
||||
func normalizeGitHubCopilotResponsesTools(body []byte) []byte {
|
||||
tools := gjson.GetBytes(body, "tools")
|
||||
if tools.Exists() {
|
||||
|
||||
Reference in New Issue
Block a user