diff --git a/internal/api/server.go b/internal/api/server.go index 723aca0a..15ddefdf 100644 --- a/internal/api/server.go +++ b/internal/api/server.go @@ -332,6 +332,7 @@ func (s *Server) setupRoutes() { v1.POST("/messages", claudeCodeHandlers.ClaudeMessages) v1.POST("/messages/count_tokens", claudeCodeHandlers.ClaudeCountTokens) v1.POST("/responses", openaiResponsesHandlers.Responses) + v1.POST("/responses/compact", openaiResponsesHandlers.Compact) } // Gemini compatible API routes diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go index e08492fd..317090d0 100644 --- a/internal/runtime/executor/aistudio_executor.go +++ b/internal/runtime/executor/aistudio_executor.go @@ -111,6 +111,9 @@ func (e *AIStudioExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.A // Execute performs a non-streaming request to the AI Studio API. func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { + if opts.Alt == "responses/compact" { + return resp, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"} + } baseModel := thinking.ParseSuffix(req.Model).ModelName reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) @@ -167,6 +170,9 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, // ExecuteStream performs a streaming request to the AI Studio API. func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { + if opts.Alt == "responses/compact" { + return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"} + } baseModel := thinking.ParseSuffix(req.Model).ModelName reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 64d19951..b4ca3275 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -109,6 +109,9 @@ func (e *AntigravityExecutor) HttpRequest(ctx context.Context, auth *cliproxyaut // Execute performs a non-streaming request to the Antigravity API. func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { + if opts.Alt == "responses/compact" { + return resp, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"} + } baseModel := thinking.ParseSuffix(req.Model).ModelName isClaude := strings.Contains(strings.ToLower(baseModel), "claude") @@ -641,6 +644,9 @@ func (e *AntigravityExecutor) convertStreamToNonStream(stream []byte) []byte { // ExecuteStream performs a streaming request to the Antigravity API. func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { + if opts.Alt == "responses/compact" { + return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"} + } baseModel := thinking.ParseSuffix(req.Model).ModelName ctx = context.WithValue(ctx, "alt", "") diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index 83c231bd..9ef7a2df 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -84,6 +84,9 @@ func (e *ClaudeExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Aut } func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { + if opts.Alt == "responses/compact" { + return resp, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"} + } baseModel := thinking.ParseSuffix(req.Model).ModelName apiKey, baseURL := claudeCreds(auth) @@ -221,6 +224,9 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r } func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { + if opts.Alt == "responses/compact" { + return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"} + } baseModel := thinking.ParseSuffix(req.Model).ModelName apiKey, baseURL := claudeCreds(auth) diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index 1f368b84..01ba2175 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -73,6 +73,9 @@ func (e *CodexExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth } func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { + if opts.Alt == "responses/compact" { + return e.executeCompact(ctx, auth, req, opts) + } baseModel := thinking.ParseSuffix(req.Model).ModelName apiKey, baseURL := codexCreds(auth) @@ -117,7 +120,7 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re if err != nil { return resp, err } - applyCodexHeaders(httpReq, auth, apiKey) + applyCodexHeaders(httpReq, auth, apiKey, true) var authID, authLabel, authType, authValue string if auth != nil { authID = auth.ID @@ -185,7 +188,96 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re return resp, err } +func (e *CodexExecutor) executeCompact(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + + apiKey, baseURL := codexCreds(auth) + if baseURL == "" { + baseURL = "https://chatgpt.com/backend-api/codex" + } + + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.trackFailure(ctx, &err) + + from := opts.SourceFormat + to := sdktranslator.FromString("openai-response") + originalPayload := bytes.Clone(req.Payload) + if len(opts.OriginalRequest) > 0 { + originalPayload = bytes.Clone(opts.OriginalRequest) + } + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) + + body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier()) + if err != nil { + return resp, err + } + + requestedModel := payloadRequestedModel(opts, req.Model) + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) + body, _ = sjson.SetBytes(body, "model", baseModel) + body, _ = sjson.DeleteBytes(body, "stream") + + url := strings.TrimSuffix(baseURL, "/") + "/responses/compact" + httpReq, err := e.cacheHelper(ctx, from, url, req, body) + if err != nil { + return resp, err + } + applyCodexHeaders(httpReq, auth, apiKey, false) + var authID, authLabel, authType, authValue string + if auth != nil { + authID = auth.ID + authLabel = auth.Label + authType, authValue = auth.AccountInfo() + } + recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + URL: url, + Method: http.MethodPost, + Headers: httpReq.Header.Clone(), + Body: body, + Provider: e.Identifier(), + AuthID: authID, + AuthLabel: authLabel, + AuthType: authType, + AuthValue: authValue, + }) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpResp, err := httpClient.Do(httpReq) + if err != nil { + recordAPIResponseError(ctx, e.cfg, err) + return resp, err + } + defer func() { + if errClose := httpResp.Body.Close(); errClose != nil { + log.Errorf("codex executor: close response body error: %v", errClose) + } + }() + recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { + b, _ := io.ReadAll(httpResp.Body) + appendAPIResponseChunk(ctx, e.cfg, b) + logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + err = statusErr{code: httpResp.StatusCode, msg: string(b)} + return resp, err + } + data, err := io.ReadAll(httpResp.Body) + if err != nil { + recordAPIResponseError(ctx, e.cfg, err) + return resp, err + } + appendAPIResponseChunk(ctx, e.cfg, data) + reporter.publish(ctx, parseOpenAIUsage(data)) + reporter.ensurePublished(ctx) + var param any + out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(originalPayload), body, data, ¶m) + resp = cliproxyexecutor.Response{Payload: []byte(out)} + return resp, nil +} + func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { + if opts.Alt == "responses/compact" { + return nil, statusErr{code: http.StatusBadRequest, msg: "streaming not supported for /responses/compact"} + } baseModel := thinking.ParseSuffix(req.Model).ModelName apiKey, baseURL := codexCreds(auth) @@ -229,7 +321,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au if err != nil { return nil, err } - applyCodexHeaders(httpReq, auth, apiKey) + applyCodexHeaders(httpReq, auth, apiKey, true) var authID, authLabel, authType, authValue string if auth != nil { authID = auth.ID @@ -530,17 +622,21 @@ func (e *CodexExecutor) cacheHelper(ctx context.Context, from sdktranslator.Form } } - rawJSON, _ = sjson.SetBytes(rawJSON, "prompt_cache_key", cache.ID) + if cache.ID != "" { + rawJSON, _ = sjson.SetBytes(rawJSON, "prompt_cache_key", cache.ID) + } httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(rawJSON)) if err != nil { return nil, err } - httpReq.Header.Set("Conversation_id", cache.ID) - httpReq.Header.Set("Session_id", cache.ID) + if cache.ID != "" { + httpReq.Header.Set("Conversation_id", cache.ID) + httpReq.Header.Set("Session_id", cache.ID) + } return httpReq, nil } -func applyCodexHeaders(r *http.Request, auth *cliproxyauth.Auth, token string) { +func applyCodexHeaders(r *http.Request, auth *cliproxyauth.Auth, token string, stream bool) { r.Header.Set("Content-Type", "application/json") r.Header.Set("Authorization", "Bearer "+token) @@ -554,7 +650,11 @@ func applyCodexHeaders(r *http.Request, auth *cliproxyauth.Auth, token string) { misc.EnsureHeader(r.Header, ginHeaders, "Session_id", uuid.NewString()) misc.EnsureHeader(r.Header, ginHeaders, "User-Agent", "codex_cli_rs/0.50.0 (Mac OS 26.0.1; arm64) Apple_Terminal/464") - r.Header.Set("Accept", "text/event-stream") + if stream { + r.Header.Set("Accept", "text/event-stream") + } else { + r.Header.Set("Accept", "application/json") + } r.Header.Set("Connection", "Keep-Alive") isAPIKey := false diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index e8a244ab..16ff0158 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -103,6 +103,9 @@ func (e *GeminiCLIExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth. // Execute performs a non-streaming request to the Gemini CLI API. func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { + if opts.Alt == "responses/compact" { + return resp, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"} + } baseModel := thinking.ParseSuffix(req.Model).ModelName tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth) @@ -253,6 +256,9 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth // ExecuteStream performs a streaming request to the Gemini CLI API. func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { + if opts.Alt == "responses/compact" { + return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"} + } baseModel := thinking.ParseSuffix(req.Model).ModelName tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth) diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go index 58bd71a2..8f729f5b 100644 --- a/internal/runtime/executor/gemini_executor.go +++ b/internal/runtime/executor/gemini_executor.go @@ -103,6 +103,9 @@ func (e *GeminiExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Aut // - cliproxyexecutor.Response: The response from the API // - error: An error if the request fails func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { + if opts.Alt == "responses/compact" { + return resp, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"} + } baseModel := thinking.ParseSuffix(req.Model).ModelName apiKey, bearer := geminiCreds(auth) @@ -207,6 +210,9 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r // ExecuteStream performs a streaming request to the Gemini API. func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { + if opts.Alt == "responses/compact" { + return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"} + } baseModel := thinking.ParseSuffix(req.Model).ModelName apiKey, bearer := geminiCreds(auth) diff --git a/internal/runtime/executor/gemini_vertex_executor.go b/internal/runtime/executor/gemini_vertex_executor.go index ceea42ff..83456a86 100644 --- a/internal/runtime/executor/gemini_vertex_executor.go +++ b/internal/runtime/executor/gemini_vertex_executor.go @@ -233,6 +233,9 @@ func (e *GeminiVertexExecutor) HttpRequest(ctx context.Context, auth *cliproxyau // Execute performs a non-streaming request to the Vertex AI API. func (e *GeminiVertexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { + if opts.Alt == "responses/compact" { + return resp, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"} + } // Try API key authentication first apiKey, baseURL := vertexAPICreds(auth) @@ -251,6 +254,9 @@ func (e *GeminiVertexExecutor) Execute(ctx context.Context, auth *cliproxyauth.A // ExecuteStream performs a streaming request to the Vertex AI API. func (e *GeminiVertexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { + if opts.Alt == "responses/compact" { + return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"} + } // Try API key authentication first apiKey, baseURL := vertexAPICreds(auth) diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go index 270f5aa4..08a0a5af 100644 --- a/internal/runtime/executor/iflow_executor.go +++ b/internal/runtime/executor/iflow_executor.go @@ -68,6 +68,9 @@ func (e *IFlowExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth // Execute performs a non-streaming chat completion request. func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { + if opts.Alt == "responses/compact" { + return resp, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"} + } baseModel := thinking.ParseSuffix(req.Model).ModelName apiKey, baseURL := iflowCreds(auth) @@ -167,6 +170,9 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re // ExecuteStream performs a streaming chat completion request. func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { + if opts.Alt == "responses/compact" { + return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"} + } baseModel := thinking.ParseSuffix(req.Model).ModelName apiKey, baseURL := iflowCreds(auth) diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go index 85df21b1..ee61556e 100644 --- a/internal/runtime/executor/openai_compat_executor.go +++ b/internal/runtime/executor/openai_compat_executor.go @@ -81,9 +81,13 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A return } - // Translate inbound request to OpenAI format from := opts.SourceFormat to := sdktranslator.FromString("openai") + endpoint := "/chat/completions" + if opts.Alt == "responses/compact" { + to = sdktranslator.FromString("openai-response") + endpoint = "/responses/compact" + } originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) @@ -92,13 +96,18 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), opts.Stream) requestedModel := payloadRequestedModel(opts, req.Model) translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated, requestedModel) + if opts.Alt == "responses/compact" { + if updated, errDelete := sjson.DeleteBytes(translated, "stream"); errDelete == nil { + translated = updated + } + } translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier()) if err != nil { return resp, err } - url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" + url := strings.TrimSuffix(baseURL, "/") + endpoint httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated)) if err != nil { return resp, err diff --git a/internal/runtime/executor/openai_compat_executor_compact_test.go b/internal/runtime/executor/openai_compat_executor_compact_test.go new file mode 100644 index 00000000..fe281262 --- /dev/null +++ b/internal/runtime/executor/openai_compat_executor_compact_test.go @@ -0,0 +1,58 @@ +package executor + +import ( + "context" + "io" + "net/http" + "net/http/httptest" + "testing" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" + cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" + sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" + "github.com/tidwall/gjson" +) + +func TestOpenAICompatExecutorCompactPassthrough(t *testing.T) { + var gotPath string + var gotBody []byte + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotPath = r.URL.Path + body, _ := io.ReadAll(r.Body) + gotBody = body + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"id":"resp_1","object":"response.compaction","usage":{"input_tokens":1,"output_tokens":2,"total_tokens":3}}`)) + })) + defer server.Close() + + executor := NewOpenAICompatExecutor("openai-compatibility", &config.Config{}) + auth := &cliproxyauth.Auth{Attributes: map[string]string{ + "base_url": server.URL + "/v1", + "api_key": "test", + }} + payload := []byte(`{"model":"gpt-5.1-codex-max","input":[{"role":"user","content":"hi"}]}`) + resp, err := executor.Execute(context.Background(), auth, cliproxyexecutor.Request{ + Model: "gpt-5.1-codex-max", + Payload: payload, + }, cliproxyexecutor.Options{ + SourceFormat: sdktranslator.FromString("openai-response"), + Alt: "responses/compact", + Stream: false, + }) + if err != nil { + t.Fatalf("Execute error: %v", err) + } + if gotPath != "/v1/responses/compact" { + t.Fatalf("path = %q, want %q", gotPath, "/v1/responses/compact") + } + if !gjson.GetBytes(gotBody, "input").Exists() { + t.Fatalf("expected input in body") + } + if gjson.GetBytes(gotBody, "messages").Exists() { + t.Fatalf("unexpected messages in body") + } + if string(resp.Payload) != `{"id":"resp_1","object":"response.compaction","usage":{"input_tokens":1,"output_tokens":2,"total_tokens":3}}` { + t.Fatalf("payload = %s", string(resp.Payload)) + } +} diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go index d05579d4..8df359e9 100644 --- a/internal/runtime/executor/qwen_executor.go +++ b/internal/runtime/executor/qwen_executor.go @@ -66,6 +66,9 @@ func (e *QwenExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth, } func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { + if opts.Alt == "responses/compact" { + return resp, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"} + } baseModel := thinking.ParseSuffix(req.Model).ModelName token, baseURL := qwenCreds(auth) @@ -153,6 +156,9 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req } func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { + if opts.Alt == "responses/compact" { + return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"} + } baseModel := thinking.ParseSuffix(req.Model).ModelName token, baseURL := qwenCreds(auth) diff --git a/internal/runtime/executor/usage_helpers.go b/internal/runtime/executor/usage_helpers.go index 3aa1e7ff..a642fac2 100644 --- a/internal/runtime/executor/usage_helpers.go +++ b/internal/runtime/executor/usage_helpers.go @@ -199,15 +199,31 @@ func parseOpenAIUsage(data []byte) usage.Detail { if !usageNode.Exists() { return usage.Detail{} } + inputNode := usageNode.Get("prompt_tokens") + if !inputNode.Exists() { + inputNode = usageNode.Get("input_tokens") + } + outputNode := usageNode.Get("completion_tokens") + if !outputNode.Exists() { + outputNode = usageNode.Get("output_tokens") + } detail := usage.Detail{ - InputTokens: usageNode.Get("prompt_tokens").Int(), - OutputTokens: usageNode.Get("completion_tokens").Int(), + InputTokens: inputNode.Int(), + OutputTokens: outputNode.Int(), TotalTokens: usageNode.Get("total_tokens").Int(), } - if cached := usageNode.Get("prompt_tokens_details.cached_tokens"); cached.Exists() { + cached := usageNode.Get("prompt_tokens_details.cached_tokens") + if !cached.Exists() { + cached = usageNode.Get("input_tokens_details.cached_tokens") + } + if cached.Exists() { detail.CachedTokens = cached.Int() } - if reasoning := usageNode.Get("completion_tokens_details.reasoning_tokens"); reasoning.Exists() { + reasoning := usageNode.Get("completion_tokens_details.reasoning_tokens") + if !reasoning.Exists() { + reasoning = usageNode.Get("output_tokens_details.reasoning_tokens") + } + if reasoning.Exists() { detail.ReasoningTokens = reasoning.Int() } return detail diff --git a/internal/runtime/executor/usage_helpers_test.go b/internal/runtime/executor/usage_helpers_test.go new file mode 100644 index 00000000..337f108a --- /dev/null +++ b/internal/runtime/executor/usage_helpers_test.go @@ -0,0 +1,43 @@ +package executor + +import "testing" + +func TestParseOpenAIUsageChatCompletions(t *testing.T) { + data := []byte(`{"usage":{"prompt_tokens":1,"completion_tokens":2,"total_tokens":3,"prompt_tokens_details":{"cached_tokens":4},"completion_tokens_details":{"reasoning_tokens":5}}}`) + detail := parseOpenAIUsage(data) + if detail.InputTokens != 1 { + t.Fatalf("input tokens = %d, want %d", detail.InputTokens, 1) + } + if detail.OutputTokens != 2 { + t.Fatalf("output tokens = %d, want %d", detail.OutputTokens, 2) + } + if detail.TotalTokens != 3 { + t.Fatalf("total tokens = %d, want %d", detail.TotalTokens, 3) + } + if detail.CachedTokens != 4 { + t.Fatalf("cached tokens = %d, want %d", detail.CachedTokens, 4) + } + if detail.ReasoningTokens != 5 { + t.Fatalf("reasoning tokens = %d, want %d", detail.ReasoningTokens, 5) + } +} + +func TestParseOpenAIUsageResponses(t *testing.T) { + data := []byte(`{"usage":{"input_tokens":10,"output_tokens":20,"total_tokens":30,"input_tokens_details":{"cached_tokens":7},"output_tokens_details":{"reasoning_tokens":9}}}`) + detail := parseOpenAIUsage(data) + if detail.InputTokens != 10 { + t.Fatalf("input tokens = %d, want %d", detail.InputTokens, 10) + } + if detail.OutputTokens != 20 { + t.Fatalf("output tokens = %d, want %d", detail.OutputTokens, 20) + } + if detail.TotalTokens != 30 { + t.Fatalf("total tokens = %d, want %d", detail.TotalTokens, 30) + } + if detail.CachedTokens != 7 { + t.Fatalf("cached tokens = %d, want %d", detail.CachedTokens, 7) + } + if detail.ReasoningTokens != 9 { + t.Fatalf("reasoning tokens = %d, want %d", detail.ReasoningTokens, 9) + } +} diff --git a/internal/translator/openai/claude/openai_claude_response.go b/internal/translator/openai/claude/openai_claude_response.go index b6e0d005..ca20c848 100644 --- a/internal/translator/openai/claude/openai_claude_response.go +++ b/internal/translator/openai/claude/openai_claude_response.go @@ -347,7 +347,7 @@ func convertOpenAIDoneToAnthropic(param *ConvertOpenAIResponseToAnthropicParams) // If we haven't sent message_delta yet (no usage info was received), send it now if param.FinishReason != "" && !param.MessageDeltaSent { - messageDeltaJSON := `{"type":"message_delta","delta":{"stop_reason":"","stop_sequence":null}}` + messageDeltaJSON := `{"type":"message_delta","delta":{"stop_reason":"","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}` messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "delta.stop_reason", mapOpenAIFinishReasonToAnthropic(param.FinishReason)) results = append(results, "event: message_delta\ndata: "+messageDeltaJSON+"\n\n") param.MessageDeltaSent = true diff --git a/sdk/api/handlers/openai/openai_responses_compact_test.go b/sdk/api/handlers/openai/openai_responses_compact_test.go new file mode 100644 index 00000000..a62a9682 --- /dev/null +++ b/sdk/api/handlers/openai/openai_responses_compact_test.go @@ -0,0 +1,120 @@ +package openai + +import ( + "context" + "errors" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/gin-gonic/gin" + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers" + coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" + coreexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" + sdkconfig "github.com/router-for-me/CLIProxyAPI/v6/sdk/config" +) + +type compactCaptureExecutor struct { + alt string + sourceFormat string + calls int +} + +func (e *compactCaptureExecutor) Identifier() string { return "test-provider" } + +func (e *compactCaptureExecutor) Execute(ctx context.Context, auth *coreauth.Auth, req coreexecutor.Request, opts coreexecutor.Options) (coreexecutor.Response, error) { + e.calls++ + e.alt = opts.Alt + e.sourceFormat = opts.SourceFormat.String() + return coreexecutor.Response{Payload: []byte(`{"ok":true}`)}, nil +} + +func (e *compactCaptureExecutor) ExecuteStream(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (<-chan coreexecutor.StreamChunk, error) { + return nil, errors.New("not implemented") +} + +func (e *compactCaptureExecutor) Refresh(ctx context.Context, auth *coreauth.Auth) (*coreauth.Auth, error) { + return auth, nil +} + +func (e *compactCaptureExecutor) CountTokens(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (coreexecutor.Response, error) { + return coreexecutor.Response{}, errors.New("not implemented") +} + +func (e *compactCaptureExecutor) HttpRequest(context.Context, *coreauth.Auth, *http.Request) (*http.Response, error) { + return nil, errors.New("not implemented") +} + +func TestOpenAIResponsesCompactRejectsStream(t *testing.T) { + gin.SetMode(gin.TestMode) + executor := &compactCaptureExecutor{} + manager := coreauth.NewManager(nil, nil, nil) + manager.RegisterExecutor(executor) + + auth := &coreauth.Auth{ID: "auth1", Provider: executor.Identifier(), Status: coreauth.StatusActive} + if _, err := manager.Register(context.Background(), auth); err != nil { + t.Fatalf("Register auth: %v", err) + } + registry.GetGlobalRegistry().RegisterClient(auth.ID, auth.Provider, []*registry.ModelInfo{{ID: "test-model"}}) + t.Cleanup(func() { + registry.GetGlobalRegistry().UnregisterClient(auth.ID) + }) + + base := handlers.NewBaseAPIHandlers(&sdkconfig.SDKConfig{}, manager) + h := NewOpenAIResponsesAPIHandler(base) + router := gin.New() + router.POST("/v1/responses/compact", h.Compact) + + req := httptest.NewRequest(http.MethodPost, "/v1/responses/compact", strings.NewReader(`{"model":"test-model","stream":true}`)) + req.Header.Set("Content-Type", "application/json") + resp := httptest.NewRecorder() + router.ServeHTTP(resp, req) + + if resp.Code != http.StatusBadRequest { + t.Fatalf("status = %d, want %d", resp.Code, http.StatusBadRequest) + } + if executor.calls != 0 { + t.Fatalf("executor calls = %d, want 0", executor.calls) + } +} + +func TestOpenAIResponsesCompactExecute(t *testing.T) { + gin.SetMode(gin.TestMode) + executor := &compactCaptureExecutor{} + manager := coreauth.NewManager(nil, nil, nil) + manager.RegisterExecutor(executor) + + auth := &coreauth.Auth{ID: "auth2", Provider: executor.Identifier(), Status: coreauth.StatusActive} + if _, err := manager.Register(context.Background(), auth); err != nil { + t.Fatalf("Register auth: %v", err) + } + registry.GetGlobalRegistry().RegisterClient(auth.ID, auth.Provider, []*registry.ModelInfo{{ID: "test-model"}}) + t.Cleanup(func() { + registry.GetGlobalRegistry().UnregisterClient(auth.ID) + }) + + base := handlers.NewBaseAPIHandlers(&sdkconfig.SDKConfig{}, manager) + h := NewOpenAIResponsesAPIHandler(base) + router := gin.New() + router.POST("/v1/responses/compact", h.Compact) + + req := httptest.NewRequest(http.MethodPost, "/v1/responses/compact", strings.NewReader(`{"model":"test-model","input":"hello"}`)) + req.Header.Set("Content-Type", "application/json") + resp := httptest.NewRecorder() + router.ServeHTTP(resp, req) + + if resp.Code != http.StatusOK { + t.Fatalf("status = %d, want %d", resp.Code, http.StatusOK) + } + if executor.alt != "responses/compact" { + t.Fatalf("alt = %q, want %q", executor.alt, "responses/compact") + } + if executor.sourceFormat != "openai-response" { + t.Fatalf("source format = %q, want %q", executor.sourceFormat, "openai-response") + } + if strings.TrimSpace(resp.Body.String()) != `{"ok":true}` { + t.Fatalf("body = %s", resp.Body.String()) + } +} diff --git a/sdk/api/handlers/openai/openai_responses_handlers.go b/sdk/api/handlers/openai/openai_responses_handlers.go index 952e44e0..e18789e1 100644 --- a/sdk/api/handlers/openai/openai_responses_handlers.go +++ b/sdk/api/handlers/openai/openai_responses_handlers.go @@ -19,6 +19,7 @@ import ( responsesconverter "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/openai/openai/responses" "github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers" "github.com/tidwall/gjson" + "github.com/tidwall/sjson" ) // OpenAIResponsesAPIHandler contains the handlers for OpenAIResponses API endpoints. @@ -106,6 +107,49 @@ func (h *OpenAIResponsesAPIHandler) Responses(c *gin.Context) { } +func (h *OpenAIResponsesAPIHandler) Compact(c *gin.Context) { + rawJSON, err := c.GetRawData() + if err != nil { + c.JSON(http.StatusBadRequest, handlers.ErrorResponse{ + Error: handlers.ErrorDetail{ + Message: fmt.Sprintf("Invalid request: %v", err), + Type: "invalid_request_error", + }, + }) + return + } + + streamResult := gjson.GetBytes(rawJSON, "stream") + if streamResult.Type == gjson.True { + c.JSON(http.StatusBadRequest, handlers.ErrorResponse{ + Error: handlers.ErrorDetail{ + Message: "Streaming not supported for compact responses", + Type: "invalid_request_error", + }, + }) + return + } + if streamResult.Exists() { + if updated, err := sjson.DeleteBytes(rawJSON, "stream"); err == nil { + rawJSON = updated + } + } + + c.Header("Content-Type", "application/json") + modelName := gjson.GetBytes(rawJSON, "model").String() + cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background()) + stopKeepAlive := h.StartNonStreamingKeepAlive(c, cliCtx) + resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "responses/compact") + stopKeepAlive() + if errMsg != nil { + h.WriteErrorResponse(c, errMsg) + cliCancel(errMsg.Error) + return + } + _, _ = c.Writer.Write(resp) + cliCancel() +} + // handleNonStreamingResponse handles non-streaming chat completion responses // for Gemini models. It selects a client from the pool, sends the request, and // aggregates the response before sending it back to the client in OpenAIResponses format.