Merge pull request #498 from router-for-me/plus

v6.9.17
This commit is contained in:
Luis Pater
2026-04-08 23:42:40 +08:00
committed by GitHub
17 changed files with 663 additions and 160 deletions

81
.github/workflows/agents-md-guard.yml vendored Normal file
View File

@@ -0,0 +1,81 @@
name: agents-md-guard
on:
pull_request_target:
types:
- opened
- synchronize
- reopened
permissions:
contents: read
issues: write
pull-requests: write
jobs:
close-when-agents-md-changed:
runs-on: ubuntu-latest
steps:
- name: Detect AGENTS.md changes and close PR
uses: actions/github-script@v7
with:
script: |
const prNumber = context.payload.pull_request.number;
const { owner, repo } = context.repo;
const files = await github.paginate(github.rest.pulls.listFiles, {
owner,
repo,
pull_number: prNumber,
per_page: 100,
});
const touchesAgentsMd = (path) =>
typeof path === "string" &&
(path === "AGENTS.md" || path.endsWith("/AGENTS.md"));
const touched = files.filter(
(f) => touchesAgentsMd(f.filename) || touchesAgentsMd(f.previous_filename),
);
if (touched.length === 0) {
core.info("No AGENTS.md changes detected.");
return;
}
const changedList = touched
.map((f) =>
f.previous_filename && f.previous_filename !== f.filename
? `- ${f.previous_filename} -> ${f.filename}`
: `- ${f.filename}`,
)
.join("\n");
const body = [
"This repository does not allow modifying `AGENTS.md` in pull requests.",
"",
"Detected changes:",
changedList,
"",
"Please revert these changes and open a new PR without touching `AGENTS.md`.",
].join("\n");
try {
await github.rest.issues.createComment({
owner,
repo,
issue_number: prNumber,
body,
});
} catch (error) {
core.warning(`Failed to comment on PR #${prNumber}: ${error.message}`);
}
await github.rest.pulls.update({
owner,
repo,
pull_number: prNumber,
state: "closed",
});
core.setFailed("PR modifies AGENTS.md");

View File

@@ -0,0 +1,73 @@
name: auto-retarget-main-pr-to-dev
on:
pull_request_target:
types:
- opened
- reopened
- edited
branches:
- main
permissions:
contents: read
issues: write
pull-requests: write
jobs:
retarget:
if: github.actor != 'github-actions[bot]'
runs-on: ubuntu-latest
steps:
- name: Retarget PR base to dev
uses: actions/github-script@v7
with:
script: |
const pr = context.payload.pull_request;
const prNumber = pr.number;
const { owner, repo } = context.repo;
const baseRef = pr.base?.ref;
const headRef = pr.head?.ref;
const desiredBase = "dev";
if (baseRef !== "main") {
core.info(`PR #${prNumber} base is ${baseRef}; nothing to do.`);
return;
}
if (headRef === desiredBase) {
core.info(`PR #${prNumber} is ${desiredBase} -> main; skipping retarget.`);
return;
}
core.info(`Retargeting PR #${prNumber} base from ${baseRef} to ${desiredBase}.`);
try {
await github.rest.pulls.update({
owner,
repo,
pull_number: prNumber,
base: desiredBase,
});
} catch (error) {
core.setFailed(`Failed to retarget PR #${prNumber} to ${desiredBase}: ${error.message}`);
return;
}
const body = [
`This pull request targeted \`${baseRef}\`.`,
"",
`The base branch has been automatically changed to \`${desiredBase}\`.`,
].join("\n");
try {
await github.rest.issues.createComment({
owner,
repo,
issue_number: prNumber,
body,
});
} catch (error) {
core.warning(`Failed to comment on PR #${prNumber}: ${error.message}`);
}

1
.gitignore vendored
View File

@@ -46,6 +46,7 @@ GEMINI.md
.agents/*
.opencode/*
.idea/*
.beads/*
.bmad/*
_bmad/*
_bmad-output/*

58
AGENTS.md Normal file
View File

@@ -0,0 +1,58 @@
# AGENTS.md
Go 1.26+ proxy server providing OpenAI/Gemini/Claude/Codex compatible APIs with OAuth and round-robin load balancing.
## Repository
- GitHub: https://github.com/router-for-me/CLIProxyAPI
## Commands
```bash
gofmt -w . # Format (required after Go changes)
go build -o cli-proxy-api ./cmd/server # Build
go run ./cmd/server # Run dev server
go test ./... # Run all tests
go test -v -run TestName ./path/to/pkg # Run single test
go build -o test-output ./cmd/server && rm test-output # Verify compile (REQUIRED after changes)
```
- Common flags: `--config <path>`, `--tui`, `--standalone`, `--local-model`, `--no-browser`, `--oauth-callback-port <port>`
## Config
- Default config: `config.yaml` (template: `config.example.yaml`)
- `.env` is auto-loaded from the working directory
- Auth material defaults under `auths/`
- Storage backends: file-based default; optional Postgres/git/object store (`PGSTORE_*`, `GITSTORE_*`, `OBJECTSTORE_*`)
## Architecture
- `cmd/server/` — Server entrypoint
- `internal/api/` — Gin HTTP API (routes, middleware, modules)
- `internal/api/modules/amp/` — Amp integration (Amp-style routes + reverse proxy)
- `internal/thinking/` — Main thinking/reasoning pipeline. `ApplyThinking()` (apply.go) parses suffixes (`suffix.go`, suffix overrides body), normalizes config to canonical `ThinkingConfig` (`types.go`), normalizes and validates centrally (`validate.go`/`convert.go`), then applies provider-specific output via `ProviderApplier`. Do not break this "canonical representation → per-provider translation" architecture.
- `internal/runtime/executor/` — Per-provider runtime executors (incl. Codex WebSocket)
- `internal/translator/` — Provider protocol translators (and shared `common`)
- `internal/registry/` — Model registry + remote updater (`StartModelsUpdater`); `--local-model` disables remote updates
- `internal/store/` — Storage implementations and secret resolution
- `internal/managementasset/` — Config snapshots and management assets
- `internal/cache/` — Request signature caching
- `internal/watcher/` — Config hot-reload and watchers
- `internal/wsrelay/` — WebSocket relay sessions
- `internal/usage/` — Usage and token accounting
- `internal/tui/` — Bubbletea terminal UI (`--tui`, `--standalone`)
- `sdk/cliproxy/` — Embeddable SDK entry (service/builder/watchers/pipeline)
- `test/` — Cross-module integration tests
## Code Conventions
- Keep changes small and simple (KISS)
- Comments in English only
- If editing code that already contains non-English comments, translate them to English (dont add new non-English comments)
- For user-visible strings, keep the existing language used in that file/area
- New Markdown docs should be in English unless the file is explicitly language-specific (e.g. `README_CN.md`)
- As a rule, do not make standalone changes to `internal/translator/`. You may modify it only as part of broader changes elsewhere.
- If a task requires changing only `internal/translator/`, run `gh repo view --json viewerPermission -q .viewerPermission` to confirm you have `WRITE`, `MAINTAIN`, or `ADMIN`. If you do, you may proceed; otherwise, file a GitHub issue including the goal, rationale, and the intended implementation code, then stop further work.
- `internal/runtime/executor/` should contain executors and their unit tests only. Place any helper/supporting files under `internal/runtime/executor/helps/`.
- Follow `gofmt`; keep imports goimports-style; wrap errors with context where helpful
- Do not use `log.Fatal`/`log.Fatalf` (terminates the process); prefer returning errors and logging via logrus
- Shadowed variables: use method suffix (`errStart := server.Start()`)
- Wrap defer errors: `defer func() { if err := f.Close(); err != nil { log.Errorf(...) } }()`
- Use logrus structured logging; avoid leaking secrets/tokens in logs
- Avoid panics in HTTP handlers; prefer logged errors and meaningful HTTP status codes
- Timeouts are allowed only during credential acquisition; after an upstream connection is established, do not set timeouts for any subsequent network behavior. Intentional exceptions that must remain allowed are the Codex websocket liveness deadlines in `internal/runtime/executor/codex_websockets_executor.go`, the wsrelay session deadlines in `internal/wsrelay/session.go`, the management APICall timeout in `internal/api/handlers/management/api_tools.go`, and the `cmd/fetch_antigravity_models` utility timeouts

View File

@@ -129,11 +129,11 @@ func TestModifyResponse_GzipScenarios(t *testing.T) {
wantCE: "",
},
{
name: "skips_non_2xx_status",
name: "decompresses_non_2xx_status_when_gzip_detected",
header: http.Header{},
body: good,
status: 404,
wantBody: good,
wantBody: goodJSON,
wantCE: "",
},
}

View File

@@ -261,6 +261,28 @@ func classifyAntigravity429(body []byte) antigravity429Category {
return antigravity429Unknown
}
func antigravityHasQuotaResetDelayOrModelInfo(body []byte) bool {
if len(body) == 0 {
return false
}
details := gjson.GetBytes(body, "error.details")
if !details.Exists() || !details.IsArray() {
return false
}
for _, detail := range details.Array() {
if detail.Get("@type").String() != "type.googleapis.com/google.rpc.ErrorInfo" {
continue
}
if strings.TrimSpace(detail.Get("metadata.quotaResetDelay").String()) != "" {
return true
}
if strings.TrimSpace(detail.Get("metadata.model").String()) != "" {
return true
}
}
return false
}
func antigravityCreditsRetryEnabled(cfg *config.Config) bool {
return cfg != nil && cfg.QuotaExceeded.AntigravityCredits
}
@@ -362,6 +384,12 @@ func shouldMarkAntigravityCreditsExhausted(statusCode int, body []byte, reqErr e
lowerBody := strings.ToLower(string(body))
for _, keyword := range antigravityCreditsExhaustedKeywords {
if strings.Contains(lowerBody, keyword) {
if keyword == "resource has been exhausted" &&
statusCode == http.StatusTooManyRequests &&
classifyAntigravity429(body) == antigravity429Unknown &&
!antigravityHasQuotaResetDelayOrModelInfo(body) {
return false
}
return true
}
}
@@ -575,6 +603,14 @@ attemptLoop:
log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
continue
}
if antigravityShouldRetryTransientResourceExhausted429(httpResp.StatusCode, bodyBytes) && attempt+1 < attempts {
delay := antigravityTransient429RetryDelay(attempt)
log.Debugf("antigravity executor: transient 429 resource exhausted for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts)
if errWait := antigravityWait(ctx, delay); errWait != nil {
return resp, errWait
}
continue attemptLoop
}
if antigravityShouldRetryNoCapacity(httpResp.StatusCode, bodyBytes) {
if idx+1 < len(baseURLs) {
log.Debugf("antigravity executor: no capacity on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
@@ -742,6 +778,14 @@ attemptLoop:
log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
continue
}
if antigravityShouldRetryTransientResourceExhausted429(httpResp.StatusCode, bodyBytes) && attempt+1 < attempts {
delay := antigravityTransient429RetryDelay(attempt)
log.Debugf("antigravity executor: transient 429 resource exhausted for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts)
if errWait := antigravityWait(ctx, delay); errWait != nil {
return resp, errWait
}
continue attemptLoop
}
if antigravityShouldRetryNoCapacity(httpResp.StatusCode, bodyBytes) {
if idx+1 < len(baseURLs) {
log.Debugf("antigravity executor: no capacity on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
@@ -1158,6 +1202,14 @@ attemptLoop:
log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
continue
}
if antigravityShouldRetryTransientResourceExhausted429(httpResp.StatusCode, bodyBytes) && attempt+1 < attempts {
delay := antigravityTransient429RetryDelay(attempt)
log.Debugf("antigravity executor: transient 429 resource exhausted for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts)
if errWait := antigravityWait(ctx, delay); errWait != nil {
return nil, errWait
}
continue attemptLoop
}
if antigravityShouldRetryNoCapacity(httpResp.StatusCode, bodyBytes) {
if idx+1 < len(baseURLs) {
log.Debugf("antigravity executor: no capacity on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
@@ -1774,6 +1826,24 @@ func antigravityShouldRetryNoCapacity(statusCode int, body []byte) bool {
return strings.Contains(msg, "no capacity available")
}
func antigravityShouldRetryTransientResourceExhausted429(statusCode int, body []byte) bool {
if statusCode != http.StatusTooManyRequests {
return false
}
if len(body) == 0 {
return false
}
if classifyAntigravity429(body) != antigravity429Unknown {
return false
}
status := strings.TrimSpace(gjson.GetBytes(body, "error.status").String())
if !strings.EqualFold(status, "RESOURCE_EXHAUSTED") {
return false
}
msg := strings.ToLower(string(body))
return strings.Contains(msg, "resource has been exhausted")
}
func antigravityNoCapacityRetryDelay(attempt int) time.Duration {
if attempt < 0 {
attempt = 0
@@ -1785,6 +1855,17 @@ func antigravityNoCapacityRetryDelay(attempt int) time.Duration {
return delay
}
func antigravityTransient429RetryDelay(attempt int) time.Duration {
if attempt < 0 {
attempt = 0
}
delay := time.Duration(attempt+1) * 100 * time.Millisecond
if delay > 500*time.Millisecond {
delay = 500 * time.Millisecond
}
return delay
}
func antigravityWait(ctx context.Context, wait time.Duration) error {
if wait <= 0 {
return nil

View File

@@ -82,20 +82,86 @@ func TestInjectEnabledCreditTypes(t *testing.T) {
}
func TestShouldMarkAntigravityCreditsExhausted(t *testing.T) {
for _, body := range [][]byte{
[]byte(`{"error":{"message":"Insufficient GOOGLE_ONE_AI credits"}}`),
[]byte(`{"error":{"message":"minimumCreditAmountForUsage requirement not met"}}`),
[]byte(`{"error":{"message":"Resource has been exhausted"}}`),
} {
if !shouldMarkAntigravityCreditsExhausted(http.StatusForbidden, body, nil) {
t.Run("credit errors are marked", func(t *testing.T) {
for _, body := range [][]byte{
[]byte(`{"error":{"message":"Insufficient GOOGLE_ONE_AI credits"}}`),
[]byte(`{"error":{"message":"minimumCreditAmountForUsage requirement not met"}}`),
} {
if !shouldMarkAntigravityCreditsExhausted(http.StatusForbidden, body, nil) {
t.Fatalf("shouldMarkAntigravityCreditsExhausted(%s) = false, want true", string(body))
}
}
})
t.Run("transient 429 resource exhausted is not marked", func(t *testing.T) {
body := []byte(`{"error":{"code":429,"message":"Resource has been exhausted (e.g. check quota).","status":"RESOURCE_EXHAUSTED"}}`)
if shouldMarkAntigravityCreditsExhausted(http.StatusTooManyRequests, body, nil) {
t.Fatalf("shouldMarkAntigravityCreditsExhausted(%s) = true, want false", string(body))
}
})
t.Run("resource exhausted with quota metadata is still marked", func(t *testing.T) {
body := []byte(`{"error":{"code":429,"message":"Resource has been exhausted","status":"RESOURCE_EXHAUSTED","details":[{"@type":"type.googleapis.com/google.rpc.ErrorInfo","metadata":{"quotaResetDelay":"1h","model":"claude-sonnet-4-6"}}]}}`)
if !shouldMarkAntigravityCreditsExhausted(http.StatusTooManyRequests, body, nil) {
t.Fatalf("shouldMarkAntigravityCreditsExhausted(%s) = false, want true", string(body))
}
}
})
if shouldMarkAntigravityCreditsExhausted(http.StatusServiceUnavailable, []byte(`{"error":{"message":"credits exhausted"}}`), nil) {
t.Fatal("shouldMarkAntigravityCreditsExhausted() = true for 5xx, want false")
}
}
func TestAntigravityExecute_RetriesTransient429ResourceExhausted(t *testing.T) {
resetAntigravityCreditsRetryState()
t.Cleanup(resetAntigravityCreditsRetryState)
var requestCount int
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
requestCount++
switch requestCount {
case 1:
w.WriteHeader(http.StatusTooManyRequests)
_, _ = w.Write([]byte(`{"error":{"code":429,"message":"Resource has been exhausted (e.g. check quota).","status":"RESOURCE_EXHAUSTED"}}`))
case 2:
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"response":{"candidates":[{"content":{"role":"model","parts":[{"text":"ok"}]}}],"usageMetadata":{"promptTokenCount":1,"candidatesTokenCount":1,"totalTokenCount":2}}}`))
default:
t.Fatalf("unexpected request count %d", requestCount)
}
}))
defer server.Close()
exec := NewAntigravityExecutor(&config.Config{RequestRetry: 1})
auth := &cliproxyauth.Auth{
ID: "auth-transient-429",
Attributes: map[string]string{
"base_url": server.URL,
},
Metadata: map[string]any{
"access_token": "token",
"project_id": "project-1",
"expired": time.Now().Add(1 * time.Hour).Format(time.RFC3339),
},
}
resp, err := exec.Execute(context.Background(), auth, cliproxyexecutor.Request{
Model: "gemini-2.5-flash",
Payload: []byte(`{"request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`),
}, cliproxyexecutor.Options{
SourceFormat: sdktranslator.FormatAntigravity,
})
if err != nil {
t.Fatalf("Execute() error = %v", err)
}
if len(resp.Payload) == 0 {
t.Fatal("Execute() returned empty payload")
}
if requestCount != 2 {
t.Fatalf("request count = %d, want 2", requestCount)
}
}
func TestAntigravityExecute_RetriesQuotaExhaustedWithCredits(t *testing.T) {
resetAntigravityCreditsRetryState()
t.Cleanup(resetAntigravityCreditsRetryState)

View File

@@ -69,9 +69,6 @@ func (r *UsageReporter) publishWithOutcome(ctx context.Context, detail usage.Det
detail.TotalTokens = total
}
}
if detail.InputTokens == 0 && detail.OutputTokens == 0 && detail.ReasoningTokens == 0 && detail.CachedTokens == 0 && detail.TotalTokens == 0 && !failed {
return
}
r.once.Do(func() {
usage.PublishRecord(ctx, r.buildRecord(detail, failed))
})

View File

@@ -56,9 +56,12 @@ func TestEnsureQwenSystemMessage_MergeStringSystem(t *testing.T) {
if len(parts) != 2 {
t.Fatalf("messages[0].content length = %d, want 2", len(parts))
}
if parts[0].Get("text").String() != "You are Qwen Code." || parts[0].Get("cache_control.type").String() != "ephemeral" {
if parts[0].Get("type").String() != "text" || parts[0].Get("cache_control.type").String() != "ephemeral" {
t.Fatalf("messages[0].content[0] = %s, want injected system part", parts[0].Raw)
}
if text := parts[0].Get("text").String(); text != "" && text != "You are Qwen Code." {
t.Fatalf("messages[0].content[0].text = %q, want empty string or default prompt", text)
}
if parts[1].Get("type").String() != "text" || parts[1].Get("text").String() != "ABCDEFG" {
t.Fatalf("messages[0].content[1] = %s, want text part with ABCDEFG", parts[1].Raw)
}

View File

@@ -174,8 +174,7 @@ func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo
// Ensure the request satisfies Claude constraints:
// 1) Determine effective max_tokens (request overrides model default)
// 2) If budget_tokens >= max_tokens, reduce budget_tokens to max_tokens-1
// 3) If the adjusted budget falls below the model minimum, try raising max_tokens
// (clamped to MaxCompletionTokens); disable thinking if constraints are unsatisfiable
// 3) If the adjusted budget falls below the model minimum, leave the request unchanged
// 4) If max_tokens came from model default, write it back into the request
effectiveMax, setDefaultMax := a.effectiveMaxTokens(body, modelInfo)
@@ -194,28 +193,8 @@ func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo
minBudget = modelInfo.Thinking.Min
}
if minBudget > 0 && adjustedBudget > 0 && adjustedBudget < minBudget {
// Enforcing budget_tokens < max_tokens pushed the budget below the model minimum.
// Try raising max_tokens to fit the original budget.
needed := budgetTokens + 1
maxAllowed := 0
if modelInfo != nil {
maxAllowed = modelInfo.MaxCompletionTokens
}
if maxAllowed > 0 && needed > maxAllowed {
// Cannot use original budget; cap max_tokens at model limit.
needed = maxAllowed
}
cappedBudget := needed - 1
if cappedBudget < minBudget {
// Impossible to satisfy both budget >= minBudget and budget < max_tokens
// within the model's completion limit. Disable thinking entirely.
body, _ = sjson.DeleteBytes(body, "thinking")
return body
}
body, _ = sjson.SetBytes(body, "max_tokens", needed)
if cappedBudget != budgetTokens {
body, _ = sjson.SetBytes(body, "thinking.budget_tokens", cappedBudget)
}
// If enforcing the max_tokens constraint would push the budget below the model minimum,
// leave the request unchanged.
return body
}

View File

@@ -1,99 +0,0 @@
package claude
import (
"testing"
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
"github.com/tidwall/gjson"
)
func TestNormalizeClaudeBudget_RaisesMaxTokens(t *testing.T) {
a := &Applier{}
modelInfo := &registry.ModelInfo{
MaxCompletionTokens: 64000,
Thinking: &registry.ThinkingSupport{Min: 1024, Max: 128000},
}
body := []byte(`{"max_tokens":1000,"thinking":{"type":"enabled","budget_tokens":5000}}`)
out := a.normalizeClaudeBudget(body, 5000, modelInfo)
maxTok := gjson.GetBytes(out, "max_tokens").Int()
if maxTok != 5001 {
t.Fatalf("max_tokens = %d, want 5001, body=%s", maxTok, string(out))
}
}
func TestNormalizeClaudeBudget_ClampsToModelMax(t *testing.T) {
a := &Applier{}
modelInfo := &registry.ModelInfo{
MaxCompletionTokens: 64000,
Thinking: &registry.ThinkingSupport{Min: 1024, Max: 128000},
}
body := []byte(`{"max_tokens":500,"thinking":{"type":"enabled","budget_tokens":200000}}`)
out := a.normalizeClaudeBudget(body, 200000, modelInfo)
maxTok := gjson.GetBytes(out, "max_tokens").Int()
if maxTok != 64000 {
t.Fatalf("max_tokens = %d, want 64000 (capped to model limit), body=%s", maxTok, string(out))
}
budget := gjson.GetBytes(out, "thinking.budget_tokens").Int()
if budget != 63999 {
t.Fatalf("budget_tokens = %d, want 63999 (max_tokens-1), body=%s", budget, string(out))
}
}
func TestNormalizeClaudeBudget_DisablesThinkingWhenUnsatisfiable(t *testing.T) {
a := &Applier{}
modelInfo := &registry.ModelInfo{
MaxCompletionTokens: 1000,
Thinking: &registry.ThinkingSupport{Min: 1024, Max: 128000},
}
body := []byte(`{"max_tokens":500,"thinking":{"type":"enabled","budget_tokens":2000}}`)
out := a.normalizeClaudeBudget(body, 2000, modelInfo)
if gjson.GetBytes(out, "thinking").Exists() {
t.Fatalf("thinking should be removed when constraints are unsatisfiable, body=%s", string(out))
}
}
func TestNormalizeClaudeBudget_NoClamping(t *testing.T) {
a := &Applier{}
modelInfo := &registry.ModelInfo{
MaxCompletionTokens: 64000,
Thinking: &registry.ThinkingSupport{Min: 1024, Max: 128000},
}
body := []byte(`{"max_tokens":32000,"thinking":{"type":"enabled","budget_tokens":16000}}`)
out := a.normalizeClaudeBudget(body, 16000, modelInfo)
maxTok := gjson.GetBytes(out, "max_tokens").Int()
if maxTok != 32000 {
t.Fatalf("max_tokens should remain 32000, got %d, body=%s", maxTok, string(out))
}
budget := gjson.GetBytes(out, "thinking.budget_tokens").Int()
if budget != 16000 {
t.Fatalf("budget_tokens should remain 16000, got %d, body=%s", budget, string(out))
}
}
func TestNormalizeClaudeBudget_AdjustsBudgetToMaxMinus1(t *testing.T) {
a := &Applier{}
modelInfo := &registry.ModelInfo{
MaxCompletionTokens: 8192,
Thinking: &registry.ThinkingSupport{Min: 1024, Max: 128000},
}
body := []byte(`{"max_tokens":8192,"thinking":{"type":"enabled","budget_tokens":10000}}`)
out := a.normalizeClaudeBudget(body, 10000, modelInfo)
maxTok := gjson.GetBytes(out, "max_tokens").Int()
if maxTok != 8192 {
t.Fatalf("max_tokens = %d, want 8192 (unchanged), body=%s", maxTok, string(out))
}
budget := gjson.GetBytes(out, "thinking.budget_tokens").Int()
if budget != 8191 {
t.Fatalf("budget_tokens = %d, want 8191 (max_tokens-1), body=%s", budget, string(out))
}
}

View File

@@ -26,6 +26,8 @@ type ConvertCodexResponseToClaudeParams struct {
HasToolCall bool
BlockIndex int
HasReceivedArgumentsDelta bool
HasTextDelta bool
TextBlockOpen bool
ThinkingBlockOpen bool
ThinkingStopPending bool
ThinkingSignature string
@@ -104,9 +106,11 @@ func ConvertCodexResponseToClaude(_ context.Context, _ string, originalRequestRa
} else if typeStr == "response.content_part.added" {
template = []byte(`{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}`)
template, _ = sjson.SetBytes(template, "index", params.BlockIndex)
params.TextBlockOpen = true
output = translatorcommon.AppendSSEEventBytes(output, "content_block_start", template, 2)
} else if typeStr == "response.output_text.delta" {
params.HasTextDelta = true
template = []byte(`{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":""}}`)
template, _ = sjson.SetBytes(template, "index", params.BlockIndex)
template, _ = sjson.SetBytes(template, "delta.text", rootResult.Get("delta").String())
@@ -115,6 +119,7 @@ func ConvertCodexResponseToClaude(_ context.Context, _ string, originalRequestRa
} else if typeStr == "response.content_part.done" {
template = []byte(`{"type":"content_block_stop","index":0}`)
template, _ = sjson.SetBytes(template, "index", params.BlockIndex)
params.TextBlockOpen = false
params.BlockIndex++
output = translatorcommon.AppendSSEEventBytes(output, "content_block_stop", template, 2)
@@ -172,7 +177,49 @@ func ConvertCodexResponseToClaude(_ context.Context, _ string, originalRequestRa
} else if typeStr == "response.output_item.done" {
itemResult := rootResult.Get("item")
itemType := itemResult.Get("type").String()
if itemType == "function_call" {
if itemType == "message" {
if params.HasTextDelta {
return [][]byte{output}
}
contentResult := itemResult.Get("content")
if !contentResult.Exists() || !contentResult.IsArray() {
return [][]byte{output}
}
var textBuilder strings.Builder
contentResult.ForEach(func(_, part gjson.Result) bool {
if part.Get("type").String() != "output_text" {
return true
}
if txt := part.Get("text").String(); txt != "" {
textBuilder.WriteString(txt)
}
return true
})
text := textBuilder.String()
if text == "" {
return [][]byte{output}
}
output = append(output, finalizeCodexThinkingBlock(params)...)
if !params.TextBlockOpen {
template = []byte(`{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}`)
template, _ = sjson.SetBytes(template, "index", params.BlockIndex)
params.TextBlockOpen = true
output = translatorcommon.AppendSSEEventBytes(output, "content_block_start", template, 2)
}
template = []byte(`{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":""}}`)
template, _ = sjson.SetBytes(template, "index", params.BlockIndex)
template, _ = sjson.SetBytes(template, "delta.text", text)
output = translatorcommon.AppendSSEEventBytes(output, "content_block_delta", template, 2)
template = []byte(`{"type":"content_block_stop","index":0}`)
template, _ = sjson.SetBytes(template, "index", params.BlockIndex)
params.TextBlockOpen = false
params.BlockIndex++
params.HasTextDelta = true
output = translatorcommon.AppendSSEEventBytes(output, "content_block_stop", template, 2)
} else if itemType == "function_call" {
template = []byte(`{"type":"content_block_stop","index":0}`)
template, _ = sjson.SetBytes(template, "index", params.BlockIndex)
params.BlockIndex++

View File

@@ -280,3 +280,40 @@ func TestConvertCodexResponseToClaudeNonStream_ThinkingIncludesSignature(t *test
t.Fatalf("unexpected thinking text: %q", got)
}
}
func TestConvertCodexResponseToClaude_StreamEmptyOutputUsesOutputItemDoneMessageFallback(t *testing.T) {
ctx := context.Background()
originalRequest := []byte(`{"tools":[]}`)
var param any
chunks := [][]byte{
[]byte("data: {\"type\":\"response.created\",\"response\":{\"id\":\"resp_1\",\"model\":\"gpt-5\"}}"),
[]byte("data: {\"type\":\"response.output_item.done\",\"item\":{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"ok\"}]},\"output_index\":0}"),
[]byte("data: {\"type\":\"response.completed\",\"response\":{\"usage\":{\"input_tokens\":1,\"output_tokens\":1}}}"),
}
var outputs [][]byte
for _, chunk := range chunks {
outputs = append(outputs, ConvertCodexResponseToClaude(ctx, "", originalRequest, nil, chunk, &param)...)
}
foundText := false
for _, out := range outputs {
for _, line := range strings.Split(string(out), "\n") {
if !strings.HasPrefix(line, "data: ") {
continue
}
data := gjson.Parse(strings.TrimPrefix(line, "data: "))
if data.Get("type").String() == "content_block_delta" && data.Get("delta.type").String() == "text_delta" && data.Get("delta.text").String() == "ok" {
foundText = true
break
}
}
if foundText {
break
}
}
if !foundText {
t.Fatalf("expected fallback content from response.output_item.done message; outputs=%q", outputs)
}
}

View File

@@ -20,10 +20,11 @@ var (
// ConvertCodexResponseToGeminiParams holds parameters for response conversion.
type ConvertCodexResponseToGeminiParams struct {
Model string
CreatedAt int64
ResponseID string
LastStorageOutput []byte
Model string
CreatedAt int64
ResponseID string
LastStorageOutput []byte
HasOutputTextDelta bool
}
// ConvertCodexResponseToGemini converts Codex streaming response format to Gemini format.
@@ -42,10 +43,11 @@ type ConvertCodexResponseToGeminiParams struct {
func ConvertCodexResponseToGemini(_ context.Context, modelName string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, param *any) [][]byte {
if *param == nil {
*param = &ConvertCodexResponseToGeminiParams{
Model: modelName,
CreatedAt: 0,
ResponseID: "",
LastStorageOutput: nil,
Model: modelName,
CreatedAt: 0,
ResponseID: "",
LastStorageOutput: nil,
HasOutputTextDelta: false,
}
}
@@ -58,18 +60,18 @@ func ConvertCodexResponseToGemini(_ context.Context, modelName string, originalR
typeResult := rootResult.Get("type")
typeStr := typeResult.String()
params := (*param).(*ConvertCodexResponseToGeminiParams)
// Base Gemini response template
template := []byte(`{"candidates":[{"content":{"role":"model","parts":[]}}],"usageMetadata":{"trafficType":"PROVISIONED_THROUGHPUT"},"modelVersion":"gemini-2.5-pro","createTime":"2025-08-15T02:52:03.884209Z","responseId":"06CeaPH7NaCU48APvNXDyA4"}`)
if len((*param).(*ConvertCodexResponseToGeminiParams).LastStorageOutput) > 0 && typeStr == "response.output_item.done" {
template = append([]byte(nil), (*param).(*ConvertCodexResponseToGeminiParams).LastStorageOutput...)
} else {
template, _ = sjson.SetBytes(template, "modelVersion", (*param).(*ConvertCodexResponseToGeminiParams).Model)
{
template, _ = sjson.SetBytes(template, "modelVersion", params.Model)
createdAtResult := rootResult.Get("response.created_at")
if createdAtResult.Exists() {
(*param).(*ConvertCodexResponseToGeminiParams).CreatedAt = createdAtResult.Int()
template, _ = sjson.SetBytes(template, "createTime", time.Unix((*param).(*ConvertCodexResponseToGeminiParams).CreatedAt, 0).Format(time.RFC3339Nano))
params.CreatedAt = createdAtResult.Int()
template, _ = sjson.SetBytes(template, "createTime", time.Unix(params.CreatedAt, 0).Format(time.RFC3339Nano))
}
template, _ = sjson.SetBytes(template, "responseId", (*param).(*ConvertCodexResponseToGeminiParams).ResponseID)
template, _ = sjson.SetBytes(template, "responseId", params.ResponseID)
}
// Handle function call completion
@@ -101,7 +103,7 @@ func ConvertCodexResponseToGemini(_ context.Context, modelName string, originalR
template, _ = sjson.SetRawBytes(template, "candidates.0.content.parts.-1", functionCall)
template, _ = sjson.SetBytes(template, "candidates.0.finishReason", "STOP")
(*param).(*ConvertCodexResponseToGeminiParams).LastStorageOutput = append([]byte(nil), template...)
params.LastStorageOutput = append([]byte(nil), template...)
// Use this return to storage message
return [][]byte{}
@@ -111,15 +113,45 @@ func ConvertCodexResponseToGemini(_ context.Context, modelName string, originalR
if typeStr == "response.created" { // Handle response creation - set model and response ID
template, _ = sjson.SetBytes(template, "modelVersion", rootResult.Get("response.model").String())
template, _ = sjson.SetBytes(template, "responseId", rootResult.Get("response.id").String())
(*param).(*ConvertCodexResponseToGeminiParams).ResponseID = rootResult.Get("response.id").String()
params.ResponseID = rootResult.Get("response.id").String()
} else if typeStr == "response.reasoning_summary_text.delta" { // Handle reasoning/thinking content delta
part := []byte(`{"thought":true,"text":""}`)
part, _ = sjson.SetBytes(part, "text", rootResult.Get("delta").String())
template, _ = sjson.SetRawBytes(template, "candidates.0.content.parts.-1", part)
} else if typeStr == "response.output_text.delta" { // Handle regular text content delta
params.HasOutputTextDelta = true
part := []byte(`{"text":""}`)
part, _ = sjson.SetBytes(part, "text", rootResult.Get("delta").String())
template, _ = sjson.SetRawBytes(template, "candidates.0.content.parts.-1", part)
} else if typeStr == "response.output_item.done" { // Fallback: emit final message text when no delta chunks were received
itemResult := rootResult.Get("item")
if itemResult.Get("type").String() != "message" || params.HasOutputTextDelta {
return [][]byte{}
}
contentResult := itemResult.Get("content")
if !contentResult.Exists() || !contentResult.IsArray() {
return [][]byte{}
}
wroteText := false
contentResult.ForEach(func(_, partResult gjson.Result) bool {
if partResult.Get("type").String() != "output_text" {
return true
}
text := partResult.Get("text").String()
if text == "" {
return true
}
part := []byte(`{"text":""}`)
part, _ = sjson.SetBytes(part, "text", text)
template, _ = sjson.SetRawBytes(template, "candidates.0.content.parts.-1", part)
wroteText = true
return true
})
if wroteText {
params.HasOutputTextDelta = true
return [][]byte{template}
}
return [][]byte{}
} else if typeStr == "response.completed" { // Handle response completion with usage metadata
template, _ = sjson.SetBytes(template, "usageMetadata.promptTokenCount", rootResult.Get("response.usage.input_tokens").Int())
template, _ = sjson.SetBytes(template, "usageMetadata.candidatesTokenCount", rootResult.Get("response.usage.output_tokens").Int())
@@ -129,11 +161,10 @@ func ConvertCodexResponseToGemini(_ context.Context, modelName string, originalR
return [][]byte{}
}
if len((*param).(*ConvertCodexResponseToGeminiParams).LastStorageOutput) > 0 {
return [][]byte{
append([]byte(nil), (*param).(*ConvertCodexResponseToGeminiParams).LastStorageOutput...),
template,
}
if len(params.LastStorageOutput) > 0 {
stored := append([]byte(nil), params.LastStorageOutput...)
params.LastStorageOutput = nil
return [][]byte{stored, template}
}
return [][]byte{template}
}

View File

@@ -0,0 +1,35 @@
package gemini
import (
"context"
"testing"
"github.com/tidwall/gjson"
)
func TestConvertCodexResponseToGemini_StreamEmptyOutputUsesOutputItemDoneMessageFallback(t *testing.T) {
ctx := context.Background()
originalRequest := []byte(`{"tools":[]}`)
var param any
chunks := [][]byte{
[]byte("data: {\"type\":\"response.output_item.done\",\"item\":{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"ok\"}]},\"output_index\":0}"),
[]byte("data: {\"type\":\"response.completed\",\"response\":{\"usage\":{\"input_tokens\":1,\"output_tokens\":1}}}"),
}
var outputs [][]byte
for _, chunk := range chunks {
outputs = append(outputs, ConvertCodexResponseToGemini(ctx, "gemini-2.5-pro", originalRequest, nil, chunk, &param)...)
}
found := false
for _, out := range outputs {
if gjson.GetBytes(out, "candidates.0.content.parts.0.text").String() == "ok" {
found = true
break
}
}
if !found {
t.Fatalf("expected fallback content from response.output_item.done message; outputs=%q", outputs)
}
}

View File

@@ -53,8 +53,24 @@ func TestServiceApplyCoreAuthAddOrUpdate_DeleteReAddDoesNotInheritStaleRuntimeSt
if disabled.NextRefreshAfter.IsZero() {
t.Fatalf("expected disabled auth to still carry prior NextRefreshAfter for regression setup")
}
// Reconcile prunes unsupported model state during registration, so seed the
// disabled snapshot explicitly before exercising delete -> re-add behavior.
disabled.ModelStates = map[string]*coreauth.ModelState{
modelID: {
Quota: coreauth.QuotaState{BackoffLevel: 7},
},
}
if _, err := service.coreManager.Update(context.Background(), disabled); err != nil {
t.Fatalf("seed disabled auth stale ModelStates: %v", err)
}
disabled, ok = service.coreManager.GetByID(authID)
if !ok || disabled == nil {
t.Fatalf("expected disabled auth after stale state seeding")
}
if len(disabled.ModelStates) == 0 {
t.Fatalf("expected disabled auth to still carry prior ModelStates for regression setup")
t.Fatalf("expected disabled auth to carry seeded ModelStates for regression setup")
}
service.applyCoreAuthAddOrUpdate(context.Background(), &coreauth.Auth{

View File

@@ -0,0 +1,97 @@
package test
import (
"context"
"fmt"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
runtimeexecutor "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor"
internalusage "github.com/router-for-me/CLIProxyAPI/v6/internal/usage"
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
)
func TestGeminiExecutorRecordsSuccessfulZeroUsageInStatistics(t *testing.T) {
model := fmt.Sprintf("gemini-2.5-flash-zero-usage-%d", time.Now().UnixNano())
source := fmt.Sprintf("zero-usage-%d@example.com", time.Now().UnixNano())
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
wantPath := "/v1beta/models/" + model + ":generateContent"
if r.URL.Path != wantPath {
t.Fatalf("path = %q, want %q", r.URL.Path, wantPath)
}
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"candidates":[{"content":{"role":"model","parts":[{"text":"ok"}]},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":0,"candidatesTokenCount":0,"totalTokenCount":0}}`))
}))
defer server.Close()
executor := runtimeexecutor.NewGeminiExecutor(&config.Config{})
auth := &cliproxyauth.Auth{
Provider: "gemini",
Attributes: map[string]string{
"api_key": "test-upstream-key",
"base_url": server.URL,
},
Metadata: map[string]any{
"email": source,
},
}
prevStatsEnabled := internalusage.StatisticsEnabled()
internalusage.SetStatisticsEnabled(true)
t.Cleanup(func() {
internalusage.SetStatisticsEnabled(prevStatsEnabled)
})
_, err := executor.Execute(context.Background(), auth, cliproxyexecutor.Request{
Model: model,
Payload: []byte(`{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}`),
}, cliproxyexecutor.Options{
SourceFormat: sdktranslator.FormatGemini,
OriginalRequest: []byte(`{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}`),
})
if err != nil {
t.Fatalf("Execute error: %v", err)
}
detail := waitForStatisticsDetail(t, "gemini", model, source)
if detail.Failed {
t.Fatalf("detail failed = true, want false")
}
if detail.Tokens.TotalTokens != 0 {
t.Fatalf("total tokens = %d, want 0", detail.Tokens.TotalTokens)
}
}
func waitForStatisticsDetail(t *testing.T, apiName, model, source string) internalusage.RequestDetail {
t.Helper()
deadline := time.Now().Add(2 * time.Second)
for time.Now().Before(deadline) {
snapshot := internalusage.GetRequestStatistics().Snapshot()
apiSnapshot, ok := snapshot.APIs[apiName]
if !ok {
time.Sleep(10 * time.Millisecond)
continue
}
modelSnapshot, ok := apiSnapshot.Models[model]
if !ok {
time.Sleep(10 * time.Millisecond)
continue
}
for _, detail := range modelSnapshot.Details {
if detail.Source == source {
return detail
}
}
time.Sleep(10 * time.Millisecond)
}
t.Fatalf("timed out waiting for statistics detail for api=%q model=%q source=%q", apiName, model, source)
return internalusage.RequestDetail{}
}