diff --git a/README.md b/README.md index fc86a893..2cb78360 100644 --- a/README.md +++ b/README.md @@ -556,12 +556,17 @@ The server will relay the `loadCodeAssist`, `onboardUser`, and `countTokens` req ## Claude Code with multiple account load balancing -Start CLI Proxy API server, and then set the `ANTHROPIC_BASE_URL`, `ANTHROPIC_AUTH_TOKEN`, `ANTHROPIC_MODEL`, `ANTHROPIC_SMALL_FAST_MODEL` environment variables. +Start CLI Proxy API server, and then set the `ANTHROPIC_BASE_URL`, `ANTHROPIC_AUTH_TOKEN`, `ANTHROPIC_DEFAULT_OPUS_MODEL`, `ANTHROPIC_DEFAULT_SONNET_MODEL`, `ANTHROPIC_DEFAULT_HAIKU_MODEL` (or `ANTHROPIC_MODEL`, `ANTHROPIC_SMALL_FAST_MODEL` for version 1.x.x) environment variables. Using Gemini models: ```bash export ANTHROPIC_BASE_URL=http://127.0.0.1:8317 export ANTHROPIC_AUTH_TOKEN=sk-dummy +# version 2.x.x +export ANTHROPIC_DEFAULT_OPUS_MODEL=gemini-2.5-pro +export ANTHROPIC_DEFAULT_SONNET_MODEL=gemini-2.5-flash +export ANTHROPIC_DEFAULT_HAIKU_MODEL=gemini-2.5-flash-lite +# version 1.x.x export ANTHROPIC_MODEL=gemini-2.5-pro export ANTHROPIC_SMALL_FAST_MODEL=gemini-2.5-flash ``` @@ -570,6 +575,11 @@ Using OpenAI GPT 5 models: ```bash export ANTHROPIC_BASE_URL=http://127.0.0.1:8317 export ANTHROPIC_AUTH_TOKEN=sk-dummy +# version 2.x.x +export ANTHROPIC_DEFAULT_OPUS_MODEL=gpt-5-high +export ANTHROPIC_DEFAULT_SONNET_MODEL=gpt-5-medium +export ANTHROPIC_DEFAULT_HAIKU_MODEL=gpt-5-minimal +# version 1.x.x export ANTHROPIC_MODEL=gpt-5 export ANTHROPIC_SMALL_FAST_MODEL=gpt-5-minimal ``` @@ -578,6 +588,11 @@ Using OpenAI GPT 5 Codex models: ```bash export ANTHROPIC_BASE_URL=http://127.0.0.1:8317 export ANTHROPIC_AUTH_TOKEN=sk-dummy +# version 2.x.x +export ANTHROPIC_DEFAULT_OPUS_MODEL=gpt-5-codex-high +export ANTHROPIC_DEFAULT_SONNET_MODEL=gpt-5-codex-medium +export ANTHROPIC_DEFAULT_HAIKU_MODEL=gpt-5-codex-low +# version 1.x.x export ANTHROPIC_MODEL=gpt-5-codex export ANTHROPIC_SMALL_FAST_MODEL=gpt-5-codex-low ``` @@ -586,6 +601,11 @@ Using Claude models: ```bash export ANTHROPIC_BASE_URL=http://127.0.0.1:8317 export ANTHROPIC_AUTH_TOKEN=sk-dummy +# version 2.x.x +export ANTHROPIC_DEFAULT_OPUS_MODEL=claude-opus-4-1-20250805 +export ANTHROPIC_DEFAULT_SONNET_MODEL=claude-sonnet-4-5-20250929 +export ANTHROPIC_DEFAULT_HAIKU_MODEL=claude-3-5-haiku-20241022 +# version 1.x.x export ANTHROPIC_MODEL=claude-sonnet-4-20250514 export ANTHROPIC_SMALL_FAST_MODEL=claude-3-5-haiku-20241022 ``` @@ -594,6 +614,11 @@ Using Qwen models: ```bash export ANTHROPIC_BASE_URL=http://127.0.0.1:8317 export ANTHROPIC_AUTH_TOKEN=sk-dummy +# version 2.x.x +export ANTHROPIC_DEFAULT_OPUS_MODEL=qwen3-coder-plus +export ANTHROPIC_DEFAULT_SONNET_MODEL=qwen3-coder-plus +export ANTHROPIC_DEFAULT_HAIKU_MODEL=qwen3-coder-flash +# version 1.x.x export ANTHROPIC_MODEL=qwen3-coder-plus export ANTHROPIC_SMALL_FAST_MODEL=qwen3-coder-flash ``` @@ -602,6 +627,11 @@ Using iFlow models: ```bash export ANTHROPIC_BASE_URL=http://127.0.0.1:8317 export ANTHROPIC_AUTH_TOKEN=sk-dummy +# version 2.x.x +export ANTHROPIC_DEFAULT_OPUS_MODEL=qwen3-max +export ANTHROPIC_DEFAULT_SONNET_MODEL=qwen3-coder-plus +export ANTHROPIC_DEFAULT_HAIKU_MODEL=qwen3-235b-a22b-instruct +# version 1.x.x export ANTHROPIC_MODEL=qwen3-max export ANTHROPIC_SMALL_FAST_MODEL=qwen3-235b-a22b-instruct ``` diff --git a/README_CN.md b/README_CN.md index f0b0ac40..8f9b465e 100644 --- a/README_CN.md +++ b/README_CN.md @@ -564,12 +564,17 @@ export CODE_ASSIST_ENDPOINT="http://127.0.0.1:8317" ## Claude Code 的使用方法 -启动 CLI Proxy API 服务器, 设置如下系统环境变量 `ANTHROPIC_BASE_URL`, `ANTHROPIC_AUTH_TOKEN`, `ANTHROPIC_MODEL`, `ANTHROPIC_SMALL_FAST_MODEL` +启动 CLI Proxy API 服务器, 设置如下系统环境变量 `ANTHROPIC_BASE_URL`, `ANTHROPIC_AUTH_TOKEN`, `ANTHROPIC_DEFAULT_OPUS_MODEL`, `ANTHROPIC_DEFAULT_SONNET_MODEL`, `ANTHROPIC_DEFAULT_HAIKU_MODEL` (或 `ANTHROPIC_MODEL`, `ANTHROPIC_SMALL_FAST_MODEL` 对应 1.x.x 版本) 使用 Gemini 模型: ```bash export ANTHROPIC_BASE_URL=http://127.0.0.1:8317 export ANTHROPIC_AUTH_TOKEN=sk-dummy +# 2.x.x 版本 +export ANTHROPIC_DEFAULT_OPUS_MODEL=gemini-2.5-pro +export ANTHROPIC_DEFAULT_SONNET_MODEL=gemini-2.5-flash +export ANTHROPIC_DEFAULT_HAIKU_MODEL=gemini-2.5-flash-lite +# 1.x.x 版本 export ANTHROPIC_MODEL=gemini-2.5-pro export ANTHROPIC_SMALL_FAST_MODEL=gemini-2.5-flash ``` @@ -578,6 +583,11 @@ export ANTHROPIC_SMALL_FAST_MODEL=gemini-2.5-flash ```bash export ANTHROPIC_BASE_URL=http://127.0.0.1:8317 export ANTHROPIC_AUTH_TOKEN=sk-dummy +# 2.x.x 版本 +export ANTHROPIC_DEFAULT_OPUS_MODEL=gpt-5-high +export ANTHROPIC_DEFAULT_SONNET_MODEL=gpt-5-medium +export ANTHROPIC_DEFAULT_HAIKU_MODEL=gpt-5-minimal +# 1.x.x 版本 export ANTHROPIC_MODEL=gpt-5 export ANTHROPIC_SMALL_FAST_MODEL=gpt-5-minimal ``` @@ -586,15 +596,24 @@ export ANTHROPIC_SMALL_FAST_MODEL=gpt-5-minimal ```bash export ANTHROPIC_BASE_URL=http://127.0.0.1:8317 export ANTHROPIC_AUTH_TOKEN=sk-dummy +# 2.x.x 版本 +export ANTHROPIC_DEFAULT_OPUS_MODEL=gpt-5-codex-high +export ANTHROPIC_DEFAULT_SONNET_MODEL=gpt-5-codex-medium +export ANTHROPIC_DEFAULT_HAIKU_MODEL=gpt-5-codex-low +# 1.x.x 版本 export ANTHROPIC_MODEL=gpt-5-codex export ANTHROPIC_SMALL_FAST_MODEL=gpt-5-codex-low ``` - 使用 Claude 模型: ```bash export ANTHROPIC_BASE_URL=http://127.0.0.1:8317 export ANTHROPIC_AUTH_TOKEN=sk-dummy +# 2.x.x 版本 +export ANTHROPIC_DEFAULT_OPUS_MODEL=claude-opus-4-1-20250805 +export ANTHROPIC_DEFAULT_SONNET_MODEL=claude-sonnet-4-5-20250929 +export ANTHROPIC_DEFAULT_HAIKU_MODEL=claude-3-5-haiku-20241022 +# 1.x.x 版本 export ANTHROPIC_MODEL=claude-sonnet-4-20250514 export ANTHROPIC_SMALL_FAST_MODEL=claude-3-5-haiku-20241022 ``` @@ -603,6 +622,11 @@ export ANTHROPIC_SMALL_FAST_MODEL=claude-3-5-haiku-20241022 ```bash export ANTHROPIC_BASE_URL=http://127.0.0.1:8317 export ANTHROPIC_AUTH_TOKEN=sk-dummy +# 2.x.x 版本 +export ANTHROPIC_DEFAULT_OPUS_MODEL=qwen3-coder-plus +export ANTHROPIC_DEFAULT_SONNET_MODEL=qwen3-coder-plus +export ANTHROPIC_DEFAULT_HAIKU_MODEL=qwen3-coder-flash +# 1.x.x 版本 export ANTHROPIC_MODEL=qwen3-coder-plus export ANTHROPIC_SMALL_FAST_MODEL=qwen3-coder-flash ``` @@ -611,6 +635,11 @@ export ANTHROPIC_SMALL_FAST_MODEL=qwen3-coder-flash ```bash export ANTHROPIC_BASE_URL=http://127.0.0.1:8317 export ANTHROPIC_AUTH_TOKEN=sk-dummy +# 2.x.x 版本 +export ANTHROPIC_DEFAULT_OPUS_MODEL=qwen3-max +export ANTHROPIC_DEFAULT_SONNET_MODEL=qwen3-coder-plus +export ANTHROPIC_DEFAULT_HAIKU_MODEL=qwen3-235b-a22b-instruct +# 1.x.x 版本 export ANTHROPIC_MODEL=qwen3-max export ANTHROPIC_SMALL_FAST_MODEL=qwen3-235b-a22b-instruct ``` diff --git a/go.mod b/go.mod index ebc3c220..df03ac4e 100644 --- a/go.mod +++ b/go.mod @@ -7,14 +7,15 @@ require ( github.com/gin-gonic/gin v1.10.1 github.com/go-git/go-git/v6 v6.0.0-20251009132922-75a182125145 github.com/google/uuid v1.6.0 - github.com/joho/godotenv v1.5.1 github.com/jackc/pgx/v5 v5.7.6 + github.com/joho/godotenv v1.5.1 github.com/klauspost/compress v1.17.4 github.com/minio/minio-go/v7 v7.0.66 github.com/sirupsen/logrus v1.9.3 github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966 github.com/tidwall/gjson v1.18.0 github.com/tidwall/sjson v1.2.5 + github.com/tiktoken-go/tokenizer v0.7.0 golang.org/x/crypto v0.43.0 golang.org/x/net v0.46.0 golang.org/x/oauth2 v0.30.0 @@ -32,6 +33,7 @@ require ( github.com/cloudwego/base64x v0.1.4 // indirect github.com/cloudwego/iasm v0.2.0 // indirect github.com/cyphar/filepath-securejoin v0.4.1 // indirect + github.com/dlclark/regexp2 v1.11.5 // indirect github.com/dustin/go-humanize v1.0.1 // indirect github.com/emirpasic/gods v1.18.1 // indirect github.com/gabriel-vasile/mimetype v1.4.3 // indirect diff --git a/go.sum b/go.sum index b4d83b1a..cba1c68c 100644 --- a/go.sum +++ b/go.sum @@ -23,6 +23,8 @@ github.com/cyphar/filepath-securejoin v0.4.1/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGL github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZQ= +github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/elazarl/goproxy v1.7.2 h1:Y2o6urb7Eule09PjlhQRGNsqRfPmYI3KKQLFpCAV3+o= @@ -147,6 +149,8 @@ github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs= github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY= github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= +github.com/tiktoken-go/tokenizer v0.7.0 h1:VMu6MPT0bXFDHr7UPh9uii7CNItVt3X9K90omxL54vw= +github.com/tiktoken-go/tokenizer v0.7.0/go.mod h1:6UCYI/DtOallbmL7sSy30p6YQv60qNyU/4aVigPOx6w= github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index 9e4b1471..30562c51 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -20,6 +20,7 @@ import ( log "github.com/sirupsen/logrus" "github.com/tidwall/gjson" "github.com/tidwall/sjson" + "github.com/tiktoken-go/tokenizer" "github.com/gin-gonic/gin" "github.com/google/uuid" @@ -277,7 +278,180 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au } func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { - return cliproxyexecutor.Response{Payload: []byte{}}, fmt.Errorf("not implemented") + from := opts.SourceFormat + to := sdktranslator.FromString("codex") + body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + + modelForCounting := req.Model + + if util.InArray([]string{"gpt-5", "gpt-5-minimal", "gpt-5-low", "gpt-5-medium", "gpt-5-high"}, req.Model) { + modelForCounting = "gpt-5" + body, _ = sjson.SetBytes(body, "model", "gpt-5") + switch req.Model { + case "gpt-5-minimal": + body, _ = sjson.SetBytes(body, "reasoning.effort", "minimal") + case "gpt-5-low": + body, _ = sjson.SetBytes(body, "reasoning.effort", "low") + case "gpt-5-medium": + body, _ = sjson.SetBytes(body, "reasoning.effort", "medium") + case "gpt-5-high": + body, _ = sjson.SetBytes(body, "reasoning.effort", "high") + default: + body, _ = sjson.SetBytes(body, "reasoning.effort", "low") + } + } else if util.InArray([]string{"gpt-5-codex", "gpt-5-codex-low", "gpt-5-codex-medium", "gpt-5-codex-high"}, req.Model) { + modelForCounting = "gpt-5" + body, _ = sjson.SetBytes(body, "model", "gpt-5-codex") + switch req.Model { + case "gpt-5-codex-low": + body, _ = sjson.SetBytes(body, "reasoning.effort", "low") + case "gpt-5-codex-medium": + body, _ = sjson.SetBytes(body, "reasoning.effort", "medium") + case "gpt-5-codex-high": + body, _ = sjson.SetBytes(body, "reasoning.effort", "high") + default: + body, _ = sjson.SetBytes(body, "reasoning.effort", "low") + } + } + + body, _ = sjson.DeleteBytes(body, "previous_response_id") + body, _ = sjson.SetBytes(body, "stream", false) + + enc, err := tokenizerForCodexModel(modelForCounting) + if err != nil { + return cliproxyexecutor.Response{}, fmt.Errorf("codex executor: tokenizer init failed: %w", err) + } + + count, err := countCodexInputTokens(enc, body) + if err != nil { + return cliproxyexecutor.Response{}, fmt.Errorf("codex executor: token counting failed: %w", err) + } + + usageJSON := fmt.Sprintf(`{"response":{"usage":{"input_tokens":%d,"output_tokens":0,"total_tokens":%d}}}`, count, count) + translated := sdktranslator.TranslateTokenCount(ctx, to, from, count, []byte(usageJSON)) + return cliproxyexecutor.Response{Payload: []byte(translated)}, nil +} + +func tokenizerForCodexModel(model string) (tokenizer.Codec, error) { + sanitized := strings.ToLower(strings.TrimSpace(model)) + switch { + case sanitized == "": + return tokenizer.Get(tokenizer.Cl100kBase) + case strings.HasPrefix(sanitized, "gpt-5"): + return tokenizer.ForModel(tokenizer.GPT5) + case strings.HasPrefix(sanitized, "gpt-4.1"): + return tokenizer.ForModel(tokenizer.GPT41) + case strings.HasPrefix(sanitized, "gpt-4o"): + return tokenizer.ForModel(tokenizer.GPT4o) + case strings.HasPrefix(sanitized, "gpt-4"): + return tokenizer.ForModel(tokenizer.GPT4) + case strings.HasPrefix(sanitized, "gpt-3.5"), strings.HasPrefix(sanitized, "gpt-3"): + return tokenizer.ForModel(tokenizer.GPT35Turbo) + default: + return tokenizer.Get(tokenizer.Cl100kBase) + } +} + +func countCodexInputTokens(enc tokenizer.Codec, body []byte) (int64, error) { + if enc == nil { + return 0, fmt.Errorf("encoder is nil") + } + if len(body) == 0 { + return 0, nil + } + + root := gjson.ParseBytes(body) + var segments []string + + if inst := strings.TrimSpace(root.Get("instructions").String()); inst != "" { + segments = append(segments, inst) + } + + inputItems := root.Get("input") + if inputItems.IsArray() { + arr := inputItems.Array() + for i := range arr { + item := arr[i] + switch item.Get("type").String() { + case "message": + content := item.Get("content") + if content.IsArray() { + parts := content.Array() + for j := range parts { + part := parts[j] + if text := strings.TrimSpace(part.Get("text").String()); text != "" { + segments = append(segments, text) + } + } + } + case "function_call": + if name := strings.TrimSpace(item.Get("name").String()); name != "" { + segments = append(segments, name) + } + if args := strings.TrimSpace(item.Get("arguments").String()); args != "" { + segments = append(segments, args) + } + case "function_call_output": + if out := strings.TrimSpace(item.Get("output").String()); out != "" { + segments = append(segments, out) + } + default: + if text := strings.TrimSpace(item.Get("text").String()); text != "" { + segments = append(segments, text) + } + } + } + } + + tools := root.Get("tools") + if tools.IsArray() { + tarr := tools.Array() + for i := range tarr { + tool := tarr[i] + if name := strings.TrimSpace(tool.Get("name").String()); name != "" { + segments = append(segments, name) + } + if desc := strings.TrimSpace(tool.Get("description").String()); desc != "" { + segments = append(segments, desc) + } + if params := tool.Get("parameters"); params.Exists() { + val := params.Raw + if params.Type == gjson.String { + val = params.String() + } + if trimmed := strings.TrimSpace(val); trimmed != "" { + segments = append(segments, trimmed) + } + } + } + } + + textFormat := root.Get("text.format") + if textFormat.Exists() { + if name := strings.TrimSpace(textFormat.Get("name").String()); name != "" { + segments = append(segments, name) + } + if schema := textFormat.Get("schema"); schema.Exists() { + val := schema.Raw + if schema.Type == gjson.String { + val = schema.String() + } + if trimmed := strings.TrimSpace(val); trimmed != "" { + segments = append(segments, trimmed) + } + } + } + + text := strings.Join(segments, "\n") + if text == "" { + return 0, nil + } + + count, err := enc.Count(text) + if err != nil { + return 0, err + } + return int64(count), nil } func (e *CodexExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) { diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go index 905594ef..30344992 100644 --- a/internal/runtime/executor/iflow_executor.go +++ b/internal/runtime/executor/iflow_executor.go @@ -221,9 +221,24 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au return stream, nil } -// CountTokens is not implemented for iFlow. -func (e *IFlowExecutor) CountTokens(context.Context, *cliproxyauth.Auth, cliproxyexecutor.Request, cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { - return cliproxyexecutor.Response{Payload: nil}, fmt.Errorf("not implemented") +func (e *IFlowExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + from := opts.SourceFormat + to := sdktranslator.FromString("openai") + body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + + enc, err := tokenizerForModel(req.Model) + if err != nil { + return cliproxyexecutor.Response{}, fmt.Errorf("iflow executor: tokenizer init failed: %w", err) + } + + count, err := countOpenAIChatTokens(enc, body) + if err != nil { + return cliproxyexecutor.Response{}, fmt.Errorf("iflow executor: token counting failed: %w", err) + } + + usageJSON := buildOpenAIUsageJSON(count) + translated := sdktranslator.TranslateTokenCount(ctx, to, from, count, usageJSON) + return cliproxyexecutor.Response{Payload: []byte(translated)}, nil } // Refresh refreshes OAuth tokens and updates the stored API key. diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go index 127a2791..d3fbf528 100644 --- a/internal/runtime/executor/openai_compat_executor.go +++ b/internal/runtime/executor/openai_compat_executor.go @@ -219,7 +219,29 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy } func (e *OpenAICompatExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { - return cliproxyexecutor.Response{Payload: []byte{}}, fmt.Errorf("not implemented") + from := opts.SourceFormat + to := sdktranslator.FromString("openai") + translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + + modelForCounting := req.Model + if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" { + translated = e.overrideModel(translated, modelOverride) + modelForCounting = modelOverride + } + + enc, err := tokenizerForModel(modelForCounting) + if err != nil { + return cliproxyexecutor.Response{}, fmt.Errorf("openai compat executor: tokenizer init failed: %w", err) + } + + count, err := countOpenAIChatTokens(enc, translated) + if err != nil { + return cliproxyexecutor.Response{}, fmt.Errorf("openai compat executor: token counting failed: %w", err) + } + + usageJSON := buildOpenAIUsageJSON(count) + translatedUsage := sdktranslator.TranslateTokenCount(ctx, to, from, count, usageJSON) + return cliproxyexecutor.Response{Payload: []byte(translatedUsage)}, nil } // Refresh is a no-op for API-key based compatibility providers. diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go index 22e5c5da..a59f16e5 100644 --- a/internal/runtime/executor/qwen_executor.go +++ b/internal/runtime/executor/qwen_executor.go @@ -207,7 +207,28 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut } func (e *QwenExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { - return cliproxyexecutor.Response{Payload: []byte{}}, fmt.Errorf("not implemented") + from := opts.SourceFormat + to := sdktranslator.FromString("openai") + body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + + modelName := gjson.GetBytes(body, "model").String() + if strings.TrimSpace(modelName) == "" { + modelName = req.Model + } + + enc, err := tokenizerForModel(modelName) + if err != nil { + return cliproxyexecutor.Response{}, fmt.Errorf("qwen executor: tokenizer init failed: %w", err) + } + + count, err := countOpenAIChatTokens(enc, body) + if err != nil { + return cliproxyexecutor.Response{}, fmt.Errorf("qwen executor: token counting failed: %w", err) + } + + usageJSON := buildOpenAIUsageJSON(count) + translated := sdktranslator.TranslateTokenCount(ctx, to, from, count, usageJSON) + return cliproxyexecutor.Response{Payload: []byte(translated)}, nil } func (e *QwenExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) { diff --git a/internal/runtime/executor/token_helpers.go b/internal/runtime/executor/token_helpers.go new file mode 100644 index 00000000..c196bd00 --- /dev/null +++ b/internal/runtime/executor/token_helpers.go @@ -0,0 +1,234 @@ +package executor + +import ( + "fmt" + "strings" + + "github.com/tidwall/gjson" + "github.com/tiktoken-go/tokenizer" +) + +// tokenizerForModel returns a tokenizer codec suitable for an OpenAI-style model id. +func tokenizerForModel(model string) (tokenizer.Codec, error) { + sanitized := strings.ToLower(strings.TrimSpace(model)) + switch { + case sanitized == "": + return tokenizer.Get(tokenizer.Cl100kBase) + case strings.HasPrefix(sanitized, "gpt-5"): + return tokenizer.ForModel(tokenizer.GPT5) + case strings.HasPrefix(sanitized, "gpt-4.1"): + return tokenizer.ForModel(tokenizer.GPT41) + case strings.HasPrefix(sanitized, "gpt-4o"): + return tokenizer.ForModel(tokenizer.GPT4o) + case strings.HasPrefix(sanitized, "gpt-4"): + return tokenizer.ForModel(tokenizer.GPT4) + case strings.HasPrefix(sanitized, "gpt-3.5"), strings.HasPrefix(sanitized, "gpt-3"): + return tokenizer.ForModel(tokenizer.GPT35Turbo) + case strings.HasPrefix(sanitized, "o1"): + return tokenizer.ForModel(tokenizer.O1) + case strings.HasPrefix(sanitized, "o3"): + return tokenizer.ForModel(tokenizer.O3) + case strings.HasPrefix(sanitized, "o4"): + return tokenizer.ForModel(tokenizer.O4Mini) + default: + return tokenizer.Get(tokenizer.O200kBase) + } +} + +// countOpenAIChatTokens approximates prompt tokens for OpenAI chat completions payloads. +func countOpenAIChatTokens(enc tokenizer.Codec, payload []byte) (int64, error) { + if enc == nil { + return 0, fmt.Errorf("encoder is nil") + } + if len(payload) == 0 { + return 0, nil + } + + root := gjson.ParseBytes(payload) + segments := make([]string, 0, 32) + + collectOpenAIMessages(root.Get("messages"), &segments) + collectOpenAITools(root.Get("tools"), &segments) + collectOpenAIFunctions(root.Get("functions"), &segments) + collectOpenAIToolChoice(root.Get("tool_choice"), &segments) + collectOpenAIResponseFormat(root.Get("response_format"), &segments) + addIfNotEmpty(&segments, root.Get("input").String()) + addIfNotEmpty(&segments, root.Get("prompt").String()) + + joined := strings.TrimSpace(strings.Join(segments, "\n")) + if joined == "" { + return 0, nil + } + + count, err := enc.Count(joined) + if err != nil { + return 0, err + } + return int64(count), nil +} + +// buildOpenAIUsageJSON returns a minimal usage structure understood by downstream translators. +func buildOpenAIUsageJSON(count int64) []byte { + return []byte(fmt.Sprintf(`{"usage":{"prompt_tokens":%d,"completion_tokens":0,"total_tokens":%d}}`, count, count)) +} + +func collectOpenAIMessages(messages gjson.Result, segments *[]string) { + if !messages.Exists() || !messages.IsArray() { + return + } + messages.ForEach(func(_, message gjson.Result) bool { + addIfNotEmpty(segments, message.Get("role").String()) + addIfNotEmpty(segments, message.Get("name").String()) + collectOpenAIContent(message.Get("content"), segments) + collectOpenAIToolCalls(message.Get("tool_calls"), segments) + collectOpenAIFunctionCall(message.Get("function_call"), segments) + return true + }) +} + +func collectOpenAIContent(content gjson.Result, segments *[]string) { + if !content.Exists() { + return + } + if content.Type == gjson.String { + addIfNotEmpty(segments, content.String()) + return + } + if content.IsArray() { + content.ForEach(func(_, part gjson.Result) bool { + partType := part.Get("type").String() + switch partType { + case "text", "input_text", "output_text": + addIfNotEmpty(segments, part.Get("text").String()) + case "image_url": + addIfNotEmpty(segments, part.Get("image_url.url").String()) + case "input_audio", "output_audio", "audio": + addIfNotEmpty(segments, part.Get("id").String()) + case "tool_result": + addIfNotEmpty(segments, part.Get("name").String()) + collectOpenAIContent(part.Get("content"), segments) + default: + if part.IsArray() { + collectOpenAIContent(part, segments) + return true + } + if part.Type == gjson.JSON { + addIfNotEmpty(segments, part.Raw) + return true + } + addIfNotEmpty(segments, part.String()) + } + return true + }) + return + } + if content.Type == gjson.JSON { + addIfNotEmpty(segments, content.Raw) + } +} + +func collectOpenAIToolCalls(calls gjson.Result, segments *[]string) { + if !calls.Exists() || !calls.IsArray() { + return + } + calls.ForEach(func(_, call gjson.Result) bool { + addIfNotEmpty(segments, call.Get("id").String()) + addIfNotEmpty(segments, call.Get("type").String()) + function := call.Get("function") + if function.Exists() { + addIfNotEmpty(segments, function.Get("name").String()) + addIfNotEmpty(segments, function.Get("description").String()) + addIfNotEmpty(segments, function.Get("arguments").String()) + if params := function.Get("parameters"); params.Exists() { + addIfNotEmpty(segments, params.Raw) + } + } + return true + }) +} + +func collectOpenAIFunctionCall(call gjson.Result, segments *[]string) { + if !call.Exists() { + return + } + addIfNotEmpty(segments, call.Get("name").String()) + addIfNotEmpty(segments, call.Get("arguments").String()) +} + +func collectOpenAITools(tools gjson.Result, segments *[]string) { + if !tools.Exists() { + return + } + if tools.IsArray() { + tools.ForEach(func(_, tool gjson.Result) bool { + appendToolPayload(tool, segments) + return true + }) + return + } + appendToolPayload(tools, segments) +} + +func collectOpenAIFunctions(functions gjson.Result, segments *[]string) { + if !functions.Exists() || !functions.IsArray() { + return + } + functions.ForEach(func(_, function gjson.Result) bool { + addIfNotEmpty(segments, function.Get("name").String()) + addIfNotEmpty(segments, function.Get("description").String()) + if params := function.Get("parameters"); params.Exists() { + addIfNotEmpty(segments, params.Raw) + } + return true + }) +} + +func collectOpenAIToolChoice(choice gjson.Result, segments *[]string) { + if !choice.Exists() { + return + } + if choice.Type == gjson.String { + addIfNotEmpty(segments, choice.String()) + return + } + addIfNotEmpty(segments, choice.Raw) +} + +func collectOpenAIResponseFormat(format gjson.Result, segments *[]string) { + if !format.Exists() { + return + } + addIfNotEmpty(segments, format.Get("type").String()) + addIfNotEmpty(segments, format.Get("name").String()) + if schema := format.Get("json_schema"); schema.Exists() { + addIfNotEmpty(segments, schema.Raw) + } + if schema := format.Get("schema"); schema.Exists() { + addIfNotEmpty(segments, schema.Raw) + } +} + +func appendToolPayload(tool gjson.Result, segments *[]string) { + if !tool.Exists() { + return + } + addIfNotEmpty(segments, tool.Get("type").String()) + addIfNotEmpty(segments, tool.Get("name").String()) + addIfNotEmpty(segments, tool.Get("description").String()) + if function := tool.Get("function"); function.Exists() { + addIfNotEmpty(segments, function.Get("name").String()) + addIfNotEmpty(segments, function.Get("description").String()) + if params := function.Get("parameters"); params.Exists() { + addIfNotEmpty(segments, params.Raw) + } + } +} + +func addIfNotEmpty(segments *[]string, value string) { + if segments == nil { + return + } + if trimmed := strings.TrimSpace(value); trimmed != "" { + *segments = append(*segments, trimmed) + } +} diff --git a/internal/translator/codex/claude/codex_claude_response.go b/internal/translator/codex/claude/codex_claude_response.go index 6f5b5d49..e9fe758d 100644 --- a/internal/translator/codex/claude/codex_claude_response.go +++ b/internal/translator/codex/claude/codex_claude_response.go @@ -354,3 +354,7 @@ func buildReverseMapFromClaudeOriginalShortToOriginal(original []byte) map[strin } return rev } + +func ClaudeTokenCount(ctx context.Context, count int64) string { + return fmt.Sprintf(`{"input_tokens":%d}`, count) +} diff --git a/internal/translator/codex/claude/init.go b/internal/translator/codex/claude/init.go index 82ff78ad..7126edc3 100644 --- a/internal/translator/codex/claude/init.go +++ b/internal/translator/codex/claude/init.go @@ -12,8 +12,9 @@ func init() { Codex, ConvertClaudeRequestToCodex, interfaces.TranslateResponse{ - Stream: ConvertCodexResponseToClaude, - NonStream: ConvertCodexResponseToClaudeNonStream, + Stream: ConvertCodexResponseToClaude, + NonStream: ConvertCodexResponseToClaudeNonStream, + TokenCount: ClaudeTokenCount, }, ) } diff --git a/internal/translator/codex/gemini-cli/codex_gemini-cli_response.go b/internal/translator/codex/gemini-cli/codex_gemini-cli_response.go index 3de4bb8f..c60e66b9 100644 --- a/internal/translator/codex/gemini-cli/codex_gemini-cli_response.go +++ b/internal/translator/codex/gemini-cli/codex_gemini-cli_response.go @@ -6,6 +6,7 @@ package geminiCLI import ( "context" + "fmt" . "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/codex/gemini" "github.com/tidwall/sjson" @@ -54,3 +55,7 @@ func ConvertCodexResponseToGeminiCLINonStream(ctx context.Context, modelName str strJSON, _ = sjson.SetRaw(json, "response", strJSON) return strJSON } + +func GeminiCLITokenCount(ctx context.Context, count int64) string { + return fmt.Sprintf(`{"totalTokens":%d,"promptTokensDetails":[{"modality":"TEXT","tokenCount":%d}]}`, count, count) +} diff --git a/internal/translator/codex/gemini-cli/init.go b/internal/translator/codex/gemini-cli/init.go index ac470655..8bcd3de5 100644 --- a/internal/translator/codex/gemini-cli/init.go +++ b/internal/translator/codex/gemini-cli/init.go @@ -12,8 +12,9 @@ func init() { Codex, ConvertGeminiCLIRequestToCodex, interfaces.TranslateResponse{ - Stream: ConvertCodexResponseToGeminiCLI, - NonStream: ConvertCodexResponseToGeminiCLINonStream, + Stream: ConvertCodexResponseToGeminiCLI, + NonStream: ConvertCodexResponseToGeminiCLINonStream, + TokenCount: GeminiCLITokenCount, }, ) } diff --git a/internal/translator/codex/gemini/codex_gemini_response.go b/internal/translator/codex/gemini/codex_gemini_response.go index 4148b407..23a816ce 100644 --- a/internal/translator/codex/gemini/codex_gemini_response.go +++ b/internal/translator/codex/gemini/codex_gemini_response.go @@ -8,6 +8,7 @@ import ( "bytes" "context" "encoding/json" + "fmt" "time" "github.com/tidwall/gjson" @@ -330,3 +331,7 @@ func mustMarshalJSON(v interface{}) string { } return string(data) } + +func GeminiTokenCount(ctx context.Context, count int64) string { + return fmt.Sprintf(`{"totalTokens":%d,"promptTokensDetails":[{"modality":"TEXT","tokenCount":%d}]}`, count, count) +} diff --git a/internal/translator/codex/gemini/init.go b/internal/translator/codex/gemini/init.go index 96f68a98..41d30559 100644 --- a/internal/translator/codex/gemini/init.go +++ b/internal/translator/codex/gemini/init.go @@ -12,8 +12,9 @@ func init() { Codex, ConvertGeminiRequestToCodex, interfaces.TranslateResponse{ - Stream: ConvertCodexResponseToGemini, - NonStream: ConvertCodexResponseToGeminiNonStream, + Stream: ConvertCodexResponseToGemini, + NonStream: ConvertCodexResponseToGeminiNonStream, + TokenCount: GeminiTokenCount, }, ) } diff --git a/internal/translator/openai/claude/init.go b/internal/translator/openai/claude/init.go index e72227f1..0e0f82ea 100644 --- a/internal/translator/openai/claude/init.go +++ b/internal/translator/openai/claude/init.go @@ -12,8 +12,9 @@ func init() { OpenAI, ConvertClaudeRequestToOpenAI, interfaces.TranslateResponse{ - Stream: ConvertOpenAIResponseToClaude, - NonStream: ConvertOpenAIResponseToClaudeNonStream, + Stream: ConvertOpenAIResponseToClaude, + NonStream: ConvertOpenAIResponseToClaudeNonStream, + TokenCount: ClaudeTokenCount, }, ) } diff --git a/internal/translator/openai/claude/openai_claude_response.go b/internal/translator/openai/claude/openai_claude_response.go index 7b61e9df..eecbd7e1 100644 --- a/internal/translator/openai/claude/openai_claude_response.go +++ b/internal/translator/openai/claude/openai_claude_response.go @@ -9,6 +9,7 @@ import ( "bytes" "context" "encoding/json" + "fmt" "strings" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" @@ -630,3 +631,7 @@ func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, origina } return string(responseJSON) } + +func ClaudeTokenCount(ctx context.Context, count int64) string { + return fmt.Sprintf(`{"input_tokens":%d}`, count) +} diff --git a/internal/translator/openai/gemini-cli/init.go b/internal/translator/openai/gemini-cli/init.go index 24262c36..12aec5ec 100644 --- a/internal/translator/openai/gemini-cli/init.go +++ b/internal/translator/openai/gemini-cli/init.go @@ -12,8 +12,9 @@ func init() { OpenAI, ConvertGeminiCLIRequestToOpenAI, interfaces.TranslateResponse{ - Stream: ConvertOpenAIResponseToGeminiCLI, - NonStream: ConvertOpenAIResponseToGeminiCLINonStream, + Stream: ConvertOpenAIResponseToGeminiCLI, + NonStream: ConvertOpenAIResponseToGeminiCLINonStream, + TokenCount: GeminiCLITokenCount, }, ) } diff --git a/internal/translator/openai/gemini-cli/openai_gemini_response.go b/internal/translator/openai/gemini-cli/openai_gemini_response.go index 1531c0e6..b5977964 100644 --- a/internal/translator/openai/gemini-cli/openai_gemini_response.go +++ b/internal/translator/openai/gemini-cli/openai_gemini_response.go @@ -7,6 +7,7 @@ package geminiCLI import ( "context" + "fmt" . "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/openai/gemini" "github.com/tidwall/sjson" @@ -51,3 +52,7 @@ func ConvertOpenAIResponseToGeminiCLINonStream(ctx context.Context, modelName st strJSON, _ = sjson.SetRaw(json, "response", strJSON) return strJSON } + +func GeminiCLITokenCount(ctx context.Context, count int64) string { + return fmt.Sprintf(`{"totalTokens":%d,"promptTokensDetails":[{"modality":"TEXT","tokenCount":%d}]}`, count, count) +} diff --git a/internal/translator/openai/gemini/init.go b/internal/translator/openai/gemini/init.go index 04c0704a..4f056ace 100644 --- a/internal/translator/openai/gemini/init.go +++ b/internal/translator/openai/gemini/init.go @@ -12,8 +12,9 @@ func init() { OpenAI, ConvertGeminiRequestToOpenAI, interfaces.TranslateResponse{ - Stream: ConvertOpenAIResponseToGemini, - NonStream: ConvertOpenAIResponseToGeminiNonStream, + Stream: ConvertOpenAIResponseToGemini, + NonStream: ConvertOpenAIResponseToGeminiNonStream, + TokenCount: GeminiTokenCount, }, ) } diff --git a/internal/translator/openai/gemini/openai_gemini_response.go b/internal/translator/openai/gemini/openai_gemini_response.go index 59301e7b..d2f45db1 100644 --- a/internal/translator/openai/gemini/openai_gemini_response.go +++ b/internal/translator/openai/gemini/openai_gemini_response.go @@ -9,6 +9,7 @@ import ( "bytes" "context" "encoding/json" + "fmt" "strconv" "strings" @@ -609,3 +610,7 @@ func ConvertOpenAIResponseToGeminiNonStream(_ context.Context, _ string, origina return out } + +func GeminiTokenCount(ctx context.Context, count int64) string { + return fmt.Sprintf(`{"totalTokens":%d,"promptTokensDetails":[{"modality":"TEXT","tokenCount":%d}]}`, count, count) +}