fix(copilot): reduce premium request inflation and enable thinking

This commit addresses three issues with Claude Code through GitHub
Copilot:

1. **Premium request inflation**: Responses API requests were missing
   Openai-Intent headers and proper defaults, causing Copilot to bill
   each tool-loop continuation as a new premium request. Fixed by adding
   isAgentInitiated() heuristic (checks for tool_result content or
   preceding assistant tool_use), applying Responses API defaults
   (store, include, reasoning.summary), and local tiktoken-based token
   counting to avoid extra API calls.

2. **Context overflow**: Claude Code's modelSupports1M() hardcodes
   opus-4-6 as 1M-capable, but Copilot only supports ~128K-200K.
   Fixed by stripping the context-1m-2025-08-07 beta from translated
   request bodies. Also forwards response headers in non-streaming
   Execute() and registers the GET /copilot-quota management API route.

3. **Thinking not working**: Add ThinkingSupport with level-based
   reasoning to Claude models in the static definitions. Normalize
   Copilot's non-standard 'reasoning_text' response field to
   'reasoning_content' before passing to the SDK translator. Use
   caller-provided context in CountTokens instead of Background().
This commit is contained in:
kunish
2026-04-03 19:23:36 +08:00
parent 516d22c695
commit 59af2c57b1
4 changed files with 501 additions and 51 deletions

View File

@@ -1,11 +1,13 @@
package executor
import (
"context"
"net/http"
"strings"
"testing"
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
"github.com/tidwall/gjson"
)
@@ -251,14 +253,14 @@ func TestTranslateGitHubCopilotResponsesNonStreamToClaude_TextMapping(t *testing
t.Parallel()
resp := []byte(`{"id":"resp_1","model":"gpt-5-codex","output":[{"type":"message","content":[{"type":"output_text","text":"hello"}]}],"usage":{"input_tokens":3,"output_tokens":5}}`)
out := translateGitHubCopilotResponsesNonStreamToClaude(resp)
if gjson.Get(out, "type").String() != "message" {
t.Fatalf("type = %q, want message", gjson.Get(out, "type").String())
if gjson.GetBytes(out, "type").String() != "message" {
t.Fatalf("type = %q, want message", gjson.GetBytes(out, "type").String())
}
if gjson.Get(out, "content.0.type").String() != "text" {
t.Fatalf("content.0.type = %q, want text", gjson.Get(out, "content.0.type").String())
if gjson.GetBytes(out, "content.0.type").String() != "text" {
t.Fatalf("content.0.type = %q, want text", gjson.GetBytes(out, "content.0.type").String())
}
if gjson.Get(out, "content.0.text").String() != "hello" {
t.Fatalf("content.0.text = %q, want hello", gjson.Get(out, "content.0.text").String())
if gjson.GetBytes(out, "content.0.text").String() != "hello" {
t.Fatalf("content.0.text = %q, want hello", gjson.GetBytes(out, "content.0.text").String())
}
}
@@ -266,14 +268,14 @@ func TestTranslateGitHubCopilotResponsesNonStreamToClaude_ToolUseMapping(t *test
t.Parallel()
resp := []byte(`{"id":"resp_2","model":"gpt-5-codex","output":[{"type":"function_call","id":"fc_1","call_id":"call_1","name":"sum","arguments":"{\"a\":1}"}],"usage":{"input_tokens":1,"output_tokens":2}}`)
out := translateGitHubCopilotResponsesNonStreamToClaude(resp)
if gjson.Get(out, "content.0.type").String() != "tool_use" {
t.Fatalf("content.0.type = %q, want tool_use", gjson.Get(out, "content.0.type").String())
if gjson.GetBytes(out, "content.0.type").String() != "tool_use" {
t.Fatalf("content.0.type = %q, want tool_use", gjson.GetBytes(out, "content.0.type").String())
}
if gjson.Get(out, "content.0.name").String() != "sum" {
t.Fatalf("content.0.name = %q, want sum", gjson.Get(out, "content.0.name").String())
if gjson.GetBytes(out, "content.0.name").String() != "sum" {
t.Fatalf("content.0.name = %q, want sum", gjson.GetBytes(out, "content.0.name").String())
}
if gjson.Get(out, "stop_reason").String() != "tool_use" {
t.Fatalf("stop_reason = %q, want tool_use", gjson.Get(out, "stop_reason").String())
if gjson.GetBytes(out, "stop_reason").String() != "tool_use" {
t.Fatalf("stop_reason = %q, want tool_use", gjson.GetBytes(out, "stop_reason").String())
}
}
@@ -282,18 +284,24 @@ func TestTranslateGitHubCopilotResponsesStreamToClaude_TextLifecycle(t *testing.
var param any
created := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.created","response":{"id":"resp_1","model":"gpt-5-codex"}}`), &param)
if len(created) == 0 || !strings.Contains(created[0], "message_start") {
if len(created) == 0 || !strings.Contains(string(created[0]), "message_start") {
t.Fatalf("created events = %#v, want message_start", created)
}
delta := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.output_text.delta","delta":"he"}`), &param)
joinedDelta := strings.Join(delta, "")
var joinedDelta string
for _, d := range delta {
joinedDelta += string(d)
}
if !strings.Contains(joinedDelta, "content_block_start") || !strings.Contains(joinedDelta, "text_delta") {
t.Fatalf("delta events = %#v, want content_block_start + text_delta", delta)
}
completed := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.completed","response":{"usage":{"input_tokens":7,"output_tokens":9}}}`), &param)
joinedCompleted := strings.Join(completed, "")
var joinedCompleted string
for _, c := range completed {
joinedCompleted += string(c)
}
if !strings.Contains(joinedCompleted, "message_delta") || !strings.Contains(joinedCompleted, "message_stop") {
t.Fatalf("completed events = %#v, want message_delta + message_stop", completed)
}
@@ -312,15 +320,17 @@ func TestApplyHeaders_XInitiator_UserOnly(t *testing.T) {
}
}
func TestApplyHeaders_XInitiator_UserWhenLastRoleIsUser(t *testing.T) {
func TestApplyHeaders_XInitiator_AgentWhenLastUserButHistoryHasAssistant(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
// Last role governs the initiator decision.
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"I will read the file"},{"role":"user","content":"tool result here"}]}`)
// When the last role is "user" but the conversation contains tool messages,
// the request is a continuation (e.g. tool result with attached text
// translated to a synthetic user message). Should be "agent".
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"I will read the file"},{"role":"tool","content":"file contents..."},{"role":"user","content":"tool result here"}]}`)
e.applyHeaders(req, "token", body)
if got := req.Header.Get("X-Initiator"); got != "user" {
t.Fatalf("X-Initiator = %q, want user (last role is user)", got)
if got := req.Header.Get("X-Initiator"); got != "agent" {
t.Fatalf("X-Initiator = %q, want agent (history has tool role)", got)
}
}
@@ -346,14 +356,15 @@ func TestApplyHeaders_XInitiator_InputArrayLastAssistantMessage(t *testing.T) {
}
}
func TestApplyHeaders_XInitiator_InputArrayLastUserMessage(t *testing.T) {
func TestApplyHeaders_XInitiator_InputArrayAgentWhenLastUserButHistoryHasAssistant(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
// Responses API: last item is user-role but history contains assistant → agent.
body := []byte(`{"input":[{"type":"message","role":"assistant","content":[{"type":"output_text","text":"I can help"}]},{"type":"message","role":"user","content":[{"type":"input_text","text":"Do X"}]}]}`)
e.applyHeaders(req, "token", body)
if got := req.Header.Get("X-Initiator"); got != "user" {
t.Fatalf("X-Initiator = %q, want user (last role is user)", got)
if got := req.Header.Get("X-Initiator"); got != "agent" {
t.Fatalf("X-Initiator = %q, want agent (history has assistant)", got)
}
}
@@ -368,6 +379,33 @@ func TestApplyHeaders_XInitiator_InputArrayLastFunctionCallOutput(t *testing.T)
}
}
func TestApplyHeaders_XInitiator_UserInMultiTurnNoTools(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
// Genuine multi-turn: user → assistant (plain text) → user follow-up.
// No tool messages → should be "user" (not a false-positive).
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"Hi there!"},{"role":"user","content":"what is 2+2?"}]}`)
e.applyHeaders(req, "token", body)
if got := req.Header.Get("X-Initiator"); got != "user" {
t.Fatalf("X-Initiator = %q, want user (genuine multi-turn, no tools)", got)
}
}
func TestApplyHeaders_XInitiator_AgentCompactionWithToolHistory(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
// Compaction scenario: user prompt after a conversation with tool use history.
// The last message is a plain "user" message (compaction summary request),
// but the conversation contains tool messages → should be "agent".
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":[{"type":"tool_use","id":"tu1","name":"Read","input":{}}]},{"role":"tool","tool_call_id":"tu1","content":"file data"},{"role":"assistant","content":"I read the file."},{"role":"user","content":"What did we do so far?"}]}`)
e.applyHeaders(req, "token", body)
if got := req.Header.Get("X-Initiator"); got != "agent" {
t.Fatalf("X-Initiator = %q, want agent (compaction with tool history)", got)
}
}
// --- Tests for x-github-api-version header (Problem M) ---
func TestApplyHeaders_GitHubAPIVersion(t *testing.T) {
@@ -414,3 +452,201 @@ func TestDetectVisionContent_NoMessages(t *testing.T) {
t.Fatal("expected no vision content when messages field is absent")
}
}
// --- Tests for applyGitHubCopilotResponsesDefaults ---
func TestApplyGitHubCopilotResponsesDefaults_SetsAllDefaults(t *testing.T) {
t.Parallel()
body := []byte(`{"input":"hello","reasoning":{"effort":"medium"}}`)
got := applyGitHubCopilotResponsesDefaults(body)
if gjson.GetBytes(got, "store").Bool() != false {
t.Fatalf("store = %v, want false", gjson.GetBytes(got, "store").Raw)
}
inc := gjson.GetBytes(got, "include")
if !inc.IsArray() || inc.Array()[0].String() != "reasoning.encrypted_content" {
t.Fatalf("include = %s, want [\"reasoning.encrypted_content\"]", inc.Raw)
}
if gjson.GetBytes(got, "reasoning.summary").String() != "auto" {
t.Fatalf("reasoning.summary = %q, want auto", gjson.GetBytes(got, "reasoning.summary").String())
}
}
func TestApplyGitHubCopilotResponsesDefaults_DoesNotOverrideExisting(t *testing.T) {
t.Parallel()
body := []byte(`{"input":"hello","store":true,"include":["other"],"reasoning":{"effort":"high","summary":"concise"}}`)
got := applyGitHubCopilotResponsesDefaults(body)
if gjson.GetBytes(got, "store").Bool() != true {
t.Fatalf("store should not be overridden, got %s", gjson.GetBytes(got, "store").Raw)
}
if gjson.GetBytes(got, "include").Array()[0].String() != "other" {
t.Fatalf("include should not be overridden, got %s", gjson.GetBytes(got, "include").Raw)
}
if gjson.GetBytes(got, "reasoning.summary").String() != "concise" {
t.Fatalf("reasoning.summary should not be overridden, got %q", gjson.GetBytes(got, "reasoning.summary").String())
}
}
func TestApplyGitHubCopilotResponsesDefaults_NoReasoningEffort(t *testing.T) {
t.Parallel()
body := []byte(`{"input":"hello"}`)
got := applyGitHubCopilotResponsesDefaults(body)
if gjson.GetBytes(got, "store").Bool() != false {
t.Fatalf("store = %v, want false", gjson.GetBytes(got, "store").Raw)
}
// reasoning.summary should NOT be set when reasoning.effort is absent
if gjson.GetBytes(got, "reasoning.summary").Exists() {
t.Fatalf("reasoning.summary should not be set when reasoning.effort is absent, got %q", gjson.GetBytes(got, "reasoning.summary").String())
}
}
func TestApplyHeaders_OpenAIIntentValue(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
e.applyHeaders(req, "token", nil)
if got := req.Header.Get("Openai-Intent"); got != "conversation-edits" {
t.Fatalf("Openai-Intent = %q, want conversation-edits", got)
}
}
// --- Tests for CountTokens (local tiktoken estimation) ---
func TestCountTokens_ReturnsPositiveCount(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
body := []byte(`{"model":"gpt-4o","messages":[{"role":"user","content":"Hello, world!"}]}`)
resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{
Model: "gpt-4o",
Payload: body,
}, cliproxyexecutor.Options{
SourceFormat: sdktranslator.FromString("openai"),
})
if err != nil {
t.Fatalf("CountTokens() error: %v", err)
}
if len(resp.Payload) == 0 {
t.Fatal("CountTokens() returned empty payload")
}
// The response should contain a positive token count.
tokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int()
if tokens <= 0 {
t.Fatalf("expected positive token count, got %d", tokens)
}
}
func TestCountTokens_ClaudeSourceFormatTranslates(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
body := []byte(`{"model":"claude-sonnet-4","messages":[{"role":"user","content":"Tell me a joke"}],"max_tokens":1024}`)
resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{
Model: "claude-sonnet-4",
Payload: body,
}, cliproxyexecutor.Options{
SourceFormat: sdktranslator.FromString("claude"),
})
if err != nil {
t.Fatalf("CountTokens() error: %v", err)
}
// Claude source format → should get input_tokens in response
inputTokens := gjson.GetBytes(resp.Payload, "input_tokens").Int()
if inputTokens <= 0 {
// Fallback: check usage.prompt_tokens (depends on translator registration)
promptTokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int()
if promptTokens <= 0 {
t.Fatalf("expected positive token count, got payload: %s", resp.Payload)
}
}
}
func TestCountTokens_EmptyPayload(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{
Model: "gpt-4o",
Payload: []byte(`{"model":"gpt-4o","messages":[]}`),
}, cliproxyexecutor.Options{
SourceFormat: sdktranslator.FromString("openai"),
})
if err != nil {
t.Fatalf("CountTokens() error: %v", err)
}
tokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int()
// Empty messages should return 0 tokens.
if tokens != 0 {
t.Fatalf("expected 0 tokens for empty messages, got %d", tokens)
}
}
func TestStripUnsupportedBetas_RemovesContext1M(t *testing.T) {
t.Parallel()
body := []byte(`{"model":"claude-opus-4.6","betas":["interleaved-thinking-2025-05-14","context-1m-2025-08-07","claude-code-20250219"],"messages":[]}`)
result := stripUnsupportedBetas(body)
betas := gjson.GetBytes(result, "betas")
if !betas.Exists() {
t.Fatal("betas field should still exist after stripping")
}
for _, item := range betas.Array() {
if item.String() == "context-1m-2025-08-07" {
t.Fatal("context-1m-2025-08-07 should have been stripped")
}
}
// Other betas should be preserved
found := false
for _, item := range betas.Array() {
if item.String() == "interleaved-thinking-2025-05-14" {
found = true
}
}
if !found {
t.Fatal("other betas should be preserved")
}
}
func TestStripUnsupportedBetas_NoBetasField(t *testing.T) {
t.Parallel()
body := []byte(`{"model":"gpt-4o","messages":[]}`)
result := stripUnsupportedBetas(body)
// Should be unchanged
if string(result) != string(body) {
t.Fatalf("body should be unchanged when no betas field exists, got %s", string(result))
}
}
func TestStripUnsupportedBetas_MetadataBetas(t *testing.T) {
t.Parallel()
body := []byte(`{"model":"claude-opus-4.6","metadata":{"betas":["context-1m-2025-08-07","other-beta"]},"messages":[]}`)
result := stripUnsupportedBetas(body)
betas := gjson.GetBytes(result, "metadata.betas")
if !betas.Exists() {
t.Fatal("metadata.betas field should still exist after stripping")
}
for _, item := range betas.Array() {
if item.String() == "context-1m-2025-08-07" {
t.Fatal("context-1m-2025-08-07 should have been stripped from metadata.betas")
}
}
if betas.Array()[0].String() != "other-beta" {
t.Fatal("other betas in metadata.betas should be preserved")
}
}
func TestStripUnsupportedBetas_AllBetasStripped(t *testing.T) {
t.Parallel()
body := []byte(`{"model":"claude-opus-4.6","betas":["context-1m-2025-08-07"],"messages":[]}`)
result := stripUnsupportedBetas(body)
betas := gjson.GetBytes(result, "betas")
if betas.Exists() {
t.Fatal("betas field should be deleted when all betas are stripped")
}
}