mirror of
https://github.com/router-for-me/CLIProxyAPIPlus.git
synced 2026-04-13 01:34:18 +00:00
81
.github/workflows/agents-md-guard.yml
vendored
Normal file
81
.github/workflows/agents-md-guard.yml
vendored
Normal file
@@ -0,0 +1,81 @@
|
||||
name: agents-md-guard
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
types:
|
||||
- opened
|
||||
- synchronize
|
||||
- reopened
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
pull-requests: write
|
||||
|
||||
jobs:
|
||||
close-when-agents-md-changed:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Detect AGENTS.md changes and close PR
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const prNumber = context.payload.pull_request.number;
|
||||
const { owner, repo } = context.repo;
|
||||
|
||||
const files = await github.paginate(github.rest.pulls.listFiles, {
|
||||
owner,
|
||||
repo,
|
||||
pull_number: prNumber,
|
||||
per_page: 100,
|
||||
});
|
||||
|
||||
const touchesAgentsMd = (path) =>
|
||||
typeof path === "string" &&
|
||||
(path === "AGENTS.md" || path.endsWith("/AGENTS.md"));
|
||||
|
||||
const touched = files.filter(
|
||||
(f) => touchesAgentsMd(f.filename) || touchesAgentsMd(f.previous_filename),
|
||||
);
|
||||
|
||||
if (touched.length === 0) {
|
||||
core.info("No AGENTS.md changes detected.");
|
||||
return;
|
||||
}
|
||||
|
||||
const changedList = touched
|
||||
.map((f) =>
|
||||
f.previous_filename && f.previous_filename !== f.filename
|
||||
? `- ${f.previous_filename} -> ${f.filename}`
|
||||
: `- ${f.filename}`,
|
||||
)
|
||||
.join("\n");
|
||||
|
||||
const body = [
|
||||
"This repository does not allow modifying `AGENTS.md` in pull requests.",
|
||||
"",
|
||||
"Detected changes:",
|
||||
changedList,
|
||||
"",
|
||||
"Please revert these changes and open a new PR without touching `AGENTS.md`.",
|
||||
].join("\n");
|
||||
|
||||
try {
|
||||
await github.rest.issues.createComment({
|
||||
owner,
|
||||
repo,
|
||||
issue_number: prNumber,
|
||||
body,
|
||||
});
|
||||
} catch (error) {
|
||||
core.warning(`Failed to comment on PR #${prNumber}: ${error.message}`);
|
||||
}
|
||||
|
||||
await github.rest.pulls.update({
|
||||
owner,
|
||||
repo,
|
||||
pull_number: prNumber,
|
||||
state: "closed",
|
||||
});
|
||||
|
||||
core.setFailed("PR modifies AGENTS.md");
|
||||
73
.github/workflows/auto-retarget-main-pr-to-dev.yml
vendored
Normal file
73
.github/workflows/auto-retarget-main-pr-to-dev.yml
vendored
Normal file
@@ -0,0 +1,73 @@
|
||||
name: auto-retarget-main-pr-to-dev
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
types:
|
||||
- opened
|
||||
- reopened
|
||||
- edited
|
||||
branches:
|
||||
- main
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
pull-requests: write
|
||||
|
||||
jobs:
|
||||
retarget:
|
||||
if: github.actor != 'github-actions[bot]'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Retarget PR base to dev
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const pr = context.payload.pull_request;
|
||||
const prNumber = pr.number;
|
||||
const { owner, repo } = context.repo;
|
||||
|
||||
const baseRef = pr.base?.ref;
|
||||
const headRef = pr.head?.ref;
|
||||
const desiredBase = "dev";
|
||||
|
||||
if (baseRef !== "main") {
|
||||
core.info(`PR #${prNumber} base is ${baseRef}; nothing to do.`);
|
||||
return;
|
||||
}
|
||||
|
||||
if (headRef === desiredBase) {
|
||||
core.info(`PR #${prNumber} is ${desiredBase} -> main; skipping retarget.`);
|
||||
return;
|
||||
}
|
||||
|
||||
core.info(`Retargeting PR #${prNumber} base from ${baseRef} to ${desiredBase}.`);
|
||||
|
||||
try {
|
||||
await github.rest.pulls.update({
|
||||
owner,
|
||||
repo,
|
||||
pull_number: prNumber,
|
||||
base: desiredBase,
|
||||
});
|
||||
} catch (error) {
|
||||
core.setFailed(`Failed to retarget PR #${prNumber} to ${desiredBase}: ${error.message}`);
|
||||
return;
|
||||
}
|
||||
|
||||
const body = [
|
||||
`This pull request targeted \`${baseRef}\`.`,
|
||||
"",
|
||||
`The base branch has been automatically changed to \`${desiredBase}\`.`,
|
||||
].join("\n");
|
||||
|
||||
try {
|
||||
await github.rest.issues.createComment({
|
||||
owner,
|
||||
repo,
|
||||
issue_number: prNumber,
|
||||
body,
|
||||
});
|
||||
} catch (error) {
|
||||
core.warning(`Failed to comment on PR #${prNumber}: ${error.message}`);
|
||||
}
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -46,6 +46,7 @@ GEMINI.md
|
||||
.agents/*
|
||||
.opencode/*
|
||||
.idea/*
|
||||
.beads/*
|
||||
.bmad/*
|
||||
_bmad/*
|
||||
_bmad-output/*
|
||||
|
||||
58
AGENTS.md
Normal file
58
AGENTS.md
Normal file
@@ -0,0 +1,58 @@
|
||||
# AGENTS.md
|
||||
|
||||
Go 1.26+ proxy server providing OpenAI/Gemini/Claude/Codex compatible APIs with OAuth and round-robin load balancing.
|
||||
|
||||
## Repository
|
||||
- GitHub: https://github.com/router-for-me/CLIProxyAPI
|
||||
|
||||
## Commands
|
||||
```bash
|
||||
gofmt -w . # Format (required after Go changes)
|
||||
go build -o cli-proxy-api ./cmd/server # Build
|
||||
go run ./cmd/server # Run dev server
|
||||
go test ./... # Run all tests
|
||||
go test -v -run TestName ./path/to/pkg # Run single test
|
||||
go build -o test-output ./cmd/server && rm test-output # Verify compile (REQUIRED after changes)
|
||||
```
|
||||
- Common flags: `--config <path>`, `--tui`, `--standalone`, `--local-model`, `--no-browser`, `--oauth-callback-port <port>`
|
||||
|
||||
## Config
|
||||
- Default config: `config.yaml` (template: `config.example.yaml`)
|
||||
- `.env` is auto-loaded from the working directory
|
||||
- Auth material defaults under `auths/`
|
||||
- Storage backends: file-based default; optional Postgres/git/object store (`PGSTORE_*`, `GITSTORE_*`, `OBJECTSTORE_*`)
|
||||
|
||||
## Architecture
|
||||
- `cmd/server/` — Server entrypoint
|
||||
- `internal/api/` — Gin HTTP API (routes, middleware, modules)
|
||||
- `internal/api/modules/amp/` — Amp integration (Amp-style routes + reverse proxy)
|
||||
- `internal/thinking/` — Main thinking/reasoning pipeline. `ApplyThinking()` (apply.go) parses suffixes (`suffix.go`, suffix overrides body), normalizes config to canonical `ThinkingConfig` (`types.go`), normalizes and validates centrally (`validate.go`/`convert.go`), then applies provider-specific output via `ProviderApplier`. Do not break this "canonical representation → per-provider translation" architecture.
|
||||
- `internal/runtime/executor/` — Per-provider runtime executors (incl. Codex WebSocket)
|
||||
- `internal/translator/` — Provider protocol translators (and shared `common`)
|
||||
- `internal/registry/` — Model registry + remote updater (`StartModelsUpdater`); `--local-model` disables remote updates
|
||||
- `internal/store/` — Storage implementations and secret resolution
|
||||
- `internal/managementasset/` — Config snapshots and management assets
|
||||
- `internal/cache/` — Request signature caching
|
||||
- `internal/watcher/` — Config hot-reload and watchers
|
||||
- `internal/wsrelay/` — WebSocket relay sessions
|
||||
- `internal/usage/` — Usage and token accounting
|
||||
- `internal/tui/` — Bubbletea terminal UI (`--tui`, `--standalone`)
|
||||
- `sdk/cliproxy/` — Embeddable SDK entry (service/builder/watchers/pipeline)
|
||||
- `test/` — Cross-module integration tests
|
||||
|
||||
## Code Conventions
|
||||
- Keep changes small and simple (KISS)
|
||||
- Comments in English only
|
||||
- If editing code that already contains non-English comments, translate them to English (don’t add new non-English comments)
|
||||
- For user-visible strings, keep the existing language used in that file/area
|
||||
- New Markdown docs should be in English unless the file is explicitly language-specific (e.g. `README_CN.md`)
|
||||
- As a rule, do not make standalone changes to `internal/translator/`. You may modify it only as part of broader changes elsewhere.
|
||||
- If a task requires changing only `internal/translator/`, run `gh repo view --json viewerPermission -q .viewerPermission` to confirm you have `WRITE`, `MAINTAIN`, or `ADMIN`. If you do, you may proceed; otherwise, file a GitHub issue including the goal, rationale, and the intended implementation code, then stop further work.
|
||||
- `internal/runtime/executor/` should contain executors and their unit tests only. Place any helper/supporting files under `internal/runtime/executor/helps/`.
|
||||
- Follow `gofmt`; keep imports goimports-style; wrap errors with context where helpful
|
||||
- Do not use `log.Fatal`/`log.Fatalf` (terminates the process); prefer returning errors and logging via logrus
|
||||
- Shadowed variables: use method suffix (`errStart := server.Start()`)
|
||||
- Wrap defer errors: `defer func() { if err := f.Close(); err != nil { log.Errorf(...) } }()`
|
||||
- Use logrus structured logging; avoid leaking secrets/tokens in logs
|
||||
- Avoid panics in HTTP handlers; prefer logged errors and meaningful HTTP status codes
|
||||
- Timeouts are allowed only during credential acquisition; after an upstream connection is established, do not set timeouts for any subsequent network behavior. Intentional exceptions that must remain allowed are the Codex websocket liveness deadlines in `internal/runtime/executor/codex_websockets_executor.go`, the wsrelay session deadlines in `internal/wsrelay/session.go`, the management APICall timeout in `internal/api/handlers/management/api_tools.go`, and the `cmd/fetch_antigravity_models` utility timeouts
|
||||
@@ -129,11 +129,11 @@ func TestModifyResponse_GzipScenarios(t *testing.T) {
|
||||
wantCE: "",
|
||||
},
|
||||
{
|
||||
name: "skips_non_2xx_status",
|
||||
name: "decompresses_non_2xx_status_when_gzip_detected",
|
||||
header: http.Header{},
|
||||
body: good,
|
||||
status: 404,
|
||||
wantBody: good,
|
||||
wantBody: goodJSON,
|
||||
wantCE: "",
|
||||
},
|
||||
}
|
||||
|
||||
@@ -261,6 +261,28 @@ func classifyAntigravity429(body []byte) antigravity429Category {
|
||||
return antigravity429Unknown
|
||||
}
|
||||
|
||||
func antigravityHasQuotaResetDelayOrModelInfo(body []byte) bool {
|
||||
if len(body) == 0 {
|
||||
return false
|
||||
}
|
||||
details := gjson.GetBytes(body, "error.details")
|
||||
if !details.Exists() || !details.IsArray() {
|
||||
return false
|
||||
}
|
||||
for _, detail := range details.Array() {
|
||||
if detail.Get("@type").String() != "type.googleapis.com/google.rpc.ErrorInfo" {
|
||||
continue
|
||||
}
|
||||
if strings.TrimSpace(detail.Get("metadata.quotaResetDelay").String()) != "" {
|
||||
return true
|
||||
}
|
||||
if strings.TrimSpace(detail.Get("metadata.model").String()) != "" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func antigravityCreditsRetryEnabled(cfg *config.Config) bool {
|
||||
return cfg != nil && cfg.QuotaExceeded.AntigravityCredits
|
||||
}
|
||||
@@ -362,6 +384,12 @@ func shouldMarkAntigravityCreditsExhausted(statusCode int, body []byte, reqErr e
|
||||
lowerBody := strings.ToLower(string(body))
|
||||
for _, keyword := range antigravityCreditsExhaustedKeywords {
|
||||
if strings.Contains(lowerBody, keyword) {
|
||||
if keyword == "resource has been exhausted" &&
|
||||
statusCode == http.StatusTooManyRequests &&
|
||||
classifyAntigravity429(body) == antigravity429Unknown &&
|
||||
!antigravityHasQuotaResetDelayOrModelInfo(body) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
}
|
||||
@@ -575,6 +603,14 @@ attemptLoop:
|
||||
log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
|
||||
continue
|
||||
}
|
||||
if antigravityShouldRetryTransientResourceExhausted429(httpResp.StatusCode, bodyBytes) && attempt+1 < attempts {
|
||||
delay := antigravityTransient429RetryDelay(attempt)
|
||||
log.Debugf("antigravity executor: transient 429 resource exhausted for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts)
|
||||
if errWait := antigravityWait(ctx, delay); errWait != nil {
|
||||
return resp, errWait
|
||||
}
|
||||
continue attemptLoop
|
||||
}
|
||||
if antigravityShouldRetryNoCapacity(httpResp.StatusCode, bodyBytes) {
|
||||
if idx+1 < len(baseURLs) {
|
||||
log.Debugf("antigravity executor: no capacity on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
|
||||
@@ -742,6 +778,14 @@ attemptLoop:
|
||||
log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
|
||||
continue
|
||||
}
|
||||
if antigravityShouldRetryTransientResourceExhausted429(httpResp.StatusCode, bodyBytes) && attempt+1 < attempts {
|
||||
delay := antigravityTransient429RetryDelay(attempt)
|
||||
log.Debugf("antigravity executor: transient 429 resource exhausted for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts)
|
||||
if errWait := antigravityWait(ctx, delay); errWait != nil {
|
||||
return resp, errWait
|
||||
}
|
||||
continue attemptLoop
|
||||
}
|
||||
if antigravityShouldRetryNoCapacity(httpResp.StatusCode, bodyBytes) {
|
||||
if idx+1 < len(baseURLs) {
|
||||
log.Debugf("antigravity executor: no capacity on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
|
||||
@@ -1158,6 +1202,14 @@ attemptLoop:
|
||||
log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
|
||||
continue
|
||||
}
|
||||
if antigravityShouldRetryTransientResourceExhausted429(httpResp.StatusCode, bodyBytes) && attempt+1 < attempts {
|
||||
delay := antigravityTransient429RetryDelay(attempt)
|
||||
log.Debugf("antigravity executor: transient 429 resource exhausted for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts)
|
||||
if errWait := antigravityWait(ctx, delay); errWait != nil {
|
||||
return nil, errWait
|
||||
}
|
||||
continue attemptLoop
|
||||
}
|
||||
if antigravityShouldRetryNoCapacity(httpResp.StatusCode, bodyBytes) {
|
||||
if idx+1 < len(baseURLs) {
|
||||
log.Debugf("antigravity executor: no capacity on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
|
||||
@@ -1774,6 +1826,24 @@ func antigravityShouldRetryNoCapacity(statusCode int, body []byte) bool {
|
||||
return strings.Contains(msg, "no capacity available")
|
||||
}
|
||||
|
||||
func antigravityShouldRetryTransientResourceExhausted429(statusCode int, body []byte) bool {
|
||||
if statusCode != http.StatusTooManyRequests {
|
||||
return false
|
||||
}
|
||||
if len(body) == 0 {
|
||||
return false
|
||||
}
|
||||
if classifyAntigravity429(body) != antigravity429Unknown {
|
||||
return false
|
||||
}
|
||||
status := strings.TrimSpace(gjson.GetBytes(body, "error.status").String())
|
||||
if !strings.EqualFold(status, "RESOURCE_EXHAUSTED") {
|
||||
return false
|
||||
}
|
||||
msg := strings.ToLower(string(body))
|
||||
return strings.Contains(msg, "resource has been exhausted")
|
||||
}
|
||||
|
||||
func antigravityNoCapacityRetryDelay(attempt int) time.Duration {
|
||||
if attempt < 0 {
|
||||
attempt = 0
|
||||
@@ -1785,6 +1855,17 @@ func antigravityNoCapacityRetryDelay(attempt int) time.Duration {
|
||||
return delay
|
||||
}
|
||||
|
||||
func antigravityTransient429RetryDelay(attempt int) time.Duration {
|
||||
if attempt < 0 {
|
||||
attempt = 0
|
||||
}
|
||||
delay := time.Duration(attempt+1) * 100 * time.Millisecond
|
||||
if delay > 500*time.Millisecond {
|
||||
delay = 500 * time.Millisecond
|
||||
}
|
||||
return delay
|
||||
}
|
||||
|
||||
func antigravityWait(ctx context.Context, wait time.Duration) error {
|
||||
if wait <= 0 {
|
||||
return nil
|
||||
|
||||
@@ -82,20 +82,86 @@ func TestInjectEnabledCreditTypes(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestShouldMarkAntigravityCreditsExhausted(t *testing.T) {
|
||||
for _, body := range [][]byte{
|
||||
[]byte(`{"error":{"message":"Insufficient GOOGLE_ONE_AI credits"}}`),
|
||||
[]byte(`{"error":{"message":"minimumCreditAmountForUsage requirement not met"}}`),
|
||||
[]byte(`{"error":{"message":"Resource has been exhausted"}}`),
|
||||
} {
|
||||
if !shouldMarkAntigravityCreditsExhausted(http.StatusForbidden, body, nil) {
|
||||
t.Run("credit errors are marked", func(t *testing.T) {
|
||||
for _, body := range [][]byte{
|
||||
[]byte(`{"error":{"message":"Insufficient GOOGLE_ONE_AI credits"}}`),
|
||||
[]byte(`{"error":{"message":"minimumCreditAmountForUsage requirement not met"}}`),
|
||||
} {
|
||||
if !shouldMarkAntigravityCreditsExhausted(http.StatusForbidden, body, nil) {
|
||||
t.Fatalf("shouldMarkAntigravityCreditsExhausted(%s) = false, want true", string(body))
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("transient 429 resource exhausted is not marked", func(t *testing.T) {
|
||||
body := []byte(`{"error":{"code":429,"message":"Resource has been exhausted (e.g. check quota).","status":"RESOURCE_EXHAUSTED"}}`)
|
||||
if shouldMarkAntigravityCreditsExhausted(http.StatusTooManyRequests, body, nil) {
|
||||
t.Fatalf("shouldMarkAntigravityCreditsExhausted(%s) = true, want false", string(body))
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("resource exhausted with quota metadata is still marked", func(t *testing.T) {
|
||||
body := []byte(`{"error":{"code":429,"message":"Resource has been exhausted","status":"RESOURCE_EXHAUSTED","details":[{"@type":"type.googleapis.com/google.rpc.ErrorInfo","metadata":{"quotaResetDelay":"1h","model":"claude-sonnet-4-6"}}]}}`)
|
||||
if !shouldMarkAntigravityCreditsExhausted(http.StatusTooManyRequests, body, nil) {
|
||||
t.Fatalf("shouldMarkAntigravityCreditsExhausted(%s) = false, want true", string(body))
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
if shouldMarkAntigravityCreditsExhausted(http.StatusServiceUnavailable, []byte(`{"error":{"message":"credits exhausted"}}`), nil) {
|
||||
t.Fatal("shouldMarkAntigravityCreditsExhausted() = true for 5xx, want false")
|
||||
}
|
||||
}
|
||||
|
||||
func TestAntigravityExecute_RetriesTransient429ResourceExhausted(t *testing.T) {
|
||||
resetAntigravityCreditsRetryState()
|
||||
t.Cleanup(resetAntigravityCreditsRetryState)
|
||||
|
||||
var requestCount int
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
requestCount++
|
||||
switch requestCount {
|
||||
case 1:
|
||||
w.WriteHeader(http.StatusTooManyRequests)
|
||||
_, _ = w.Write([]byte(`{"error":{"code":429,"message":"Resource has been exhausted (e.g. check quota).","status":"RESOURCE_EXHAUSTED"}}`))
|
||||
case 2:
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`{"response":{"candidates":[{"content":{"role":"model","parts":[{"text":"ok"}]}}],"usageMetadata":{"promptTokenCount":1,"candidatesTokenCount":1,"totalTokenCount":2}}}`))
|
||||
default:
|
||||
t.Fatalf("unexpected request count %d", requestCount)
|
||||
}
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
exec := NewAntigravityExecutor(&config.Config{RequestRetry: 1})
|
||||
auth := &cliproxyauth.Auth{
|
||||
ID: "auth-transient-429",
|
||||
Attributes: map[string]string{
|
||||
"base_url": server.URL,
|
||||
},
|
||||
Metadata: map[string]any{
|
||||
"access_token": "token",
|
||||
"project_id": "project-1",
|
||||
"expired": time.Now().Add(1 * time.Hour).Format(time.RFC3339),
|
||||
},
|
||||
}
|
||||
|
||||
resp, err := exec.Execute(context.Background(), auth, cliproxyexecutor.Request{
|
||||
Model: "gemini-2.5-flash",
|
||||
Payload: []byte(`{"request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`),
|
||||
}, cliproxyexecutor.Options{
|
||||
SourceFormat: sdktranslator.FormatAntigravity,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Execute() error = %v", err)
|
||||
}
|
||||
if len(resp.Payload) == 0 {
|
||||
t.Fatal("Execute() returned empty payload")
|
||||
}
|
||||
if requestCount != 2 {
|
||||
t.Fatalf("request count = %d, want 2", requestCount)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAntigravityExecute_RetriesQuotaExhaustedWithCredits(t *testing.T) {
|
||||
resetAntigravityCreditsRetryState()
|
||||
t.Cleanup(resetAntigravityCreditsRetryState)
|
||||
|
||||
@@ -69,9 +69,6 @@ func (r *UsageReporter) publishWithOutcome(ctx context.Context, detail usage.Det
|
||||
detail.TotalTokens = total
|
||||
}
|
||||
}
|
||||
if detail.InputTokens == 0 && detail.OutputTokens == 0 && detail.ReasoningTokens == 0 && detail.CachedTokens == 0 && detail.TotalTokens == 0 && !failed {
|
||||
return
|
||||
}
|
||||
r.once.Do(func() {
|
||||
usage.PublishRecord(ctx, r.buildRecord(detail, failed))
|
||||
})
|
||||
|
||||
@@ -56,9 +56,12 @@ func TestEnsureQwenSystemMessage_MergeStringSystem(t *testing.T) {
|
||||
if len(parts) != 2 {
|
||||
t.Fatalf("messages[0].content length = %d, want 2", len(parts))
|
||||
}
|
||||
if parts[0].Get("text").String() != "You are Qwen Code." || parts[0].Get("cache_control.type").String() != "ephemeral" {
|
||||
if parts[0].Get("type").String() != "text" || parts[0].Get("cache_control.type").String() != "ephemeral" {
|
||||
t.Fatalf("messages[0].content[0] = %s, want injected system part", parts[0].Raw)
|
||||
}
|
||||
if text := parts[0].Get("text").String(); text != "" && text != "You are Qwen Code." {
|
||||
t.Fatalf("messages[0].content[0].text = %q, want empty string or default prompt", text)
|
||||
}
|
||||
if parts[1].Get("type").String() != "text" || parts[1].Get("text").String() != "ABCDEFG" {
|
||||
t.Fatalf("messages[0].content[1] = %s, want text part with ABCDEFG", parts[1].Raw)
|
||||
}
|
||||
|
||||
@@ -174,8 +174,7 @@ func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo
|
||||
// Ensure the request satisfies Claude constraints:
|
||||
// 1) Determine effective max_tokens (request overrides model default)
|
||||
// 2) If budget_tokens >= max_tokens, reduce budget_tokens to max_tokens-1
|
||||
// 3) If the adjusted budget falls below the model minimum, try raising max_tokens
|
||||
// (clamped to MaxCompletionTokens); disable thinking if constraints are unsatisfiable
|
||||
// 3) If the adjusted budget falls below the model minimum, leave the request unchanged
|
||||
// 4) If max_tokens came from model default, write it back into the request
|
||||
|
||||
effectiveMax, setDefaultMax := a.effectiveMaxTokens(body, modelInfo)
|
||||
@@ -194,28 +193,8 @@ func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo
|
||||
minBudget = modelInfo.Thinking.Min
|
||||
}
|
||||
if minBudget > 0 && adjustedBudget > 0 && adjustedBudget < minBudget {
|
||||
// Enforcing budget_tokens < max_tokens pushed the budget below the model minimum.
|
||||
// Try raising max_tokens to fit the original budget.
|
||||
needed := budgetTokens + 1
|
||||
maxAllowed := 0
|
||||
if modelInfo != nil {
|
||||
maxAllowed = modelInfo.MaxCompletionTokens
|
||||
}
|
||||
if maxAllowed > 0 && needed > maxAllowed {
|
||||
// Cannot use original budget; cap max_tokens at model limit.
|
||||
needed = maxAllowed
|
||||
}
|
||||
cappedBudget := needed - 1
|
||||
if cappedBudget < minBudget {
|
||||
// Impossible to satisfy both budget >= minBudget and budget < max_tokens
|
||||
// within the model's completion limit. Disable thinking entirely.
|
||||
body, _ = sjson.DeleteBytes(body, "thinking")
|
||||
return body
|
||||
}
|
||||
body, _ = sjson.SetBytes(body, "max_tokens", needed)
|
||||
if cappedBudget != budgetTokens {
|
||||
body, _ = sjson.SetBytes(body, "thinking.budget_tokens", cappedBudget)
|
||||
}
|
||||
// If enforcing the max_tokens constraint would push the budget below the model minimum,
|
||||
// leave the request unchanged.
|
||||
return body
|
||||
}
|
||||
|
||||
|
||||
@@ -1,99 +0,0 @@
|
||||
package claude
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
func TestNormalizeClaudeBudget_RaisesMaxTokens(t *testing.T) {
|
||||
a := &Applier{}
|
||||
modelInfo := ®istry.ModelInfo{
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 128000},
|
||||
}
|
||||
body := []byte(`{"max_tokens":1000,"thinking":{"type":"enabled","budget_tokens":5000}}`)
|
||||
|
||||
out := a.normalizeClaudeBudget(body, 5000, modelInfo)
|
||||
|
||||
maxTok := gjson.GetBytes(out, "max_tokens").Int()
|
||||
if maxTok != 5001 {
|
||||
t.Fatalf("max_tokens = %d, want 5001, body=%s", maxTok, string(out))
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeClaudeBudget_ClampsToModelMax(t *testing.T) {
|
||||
a := &Applier{}
|
||||
modelInfo := ®istry.ModelInfo{
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 128000},
|
||||
}
|
||||
body := []byte(`{"max_tokens":500,"thinking":{"type":"enabled","budget_tokens":200000}}`)
|
||||
|
||||
out := a.normalizeClaudeBudget(body, 200000, modelInfo)
|
||||
|
||||
maxTok := gjson.GetBytes(out, "max_tokens").Int()
|
||||
if maxTok != 64000 {
|
||||
t.Fatalf("max_tokens = %d, want 64000 (capped to model limit), body=%s", maxTok, string(out))
|
||||
}
|
||||
budget := gjson.GetBytes(out, "thinking.budget_tokens").Int()
|
||||
if budget != 63999 {
|
||||
t.Fatalf("budget_tokens = %d, want 63999 (max_tokens-1), body=%s", budget, string(out))
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeClaudeBudget_DisablesThinkingWhenUnsatisfiable(t *testing.T) {
|
||||
a := &Applier{}
|
||||
modelInfo := ®istry.ModelInfo{
|
||||
MaxCompletionTokens: 1000,
|
||||
Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 128000},
|
||||
}
|
||||
body := []byte(`{"max_tokens":500,"thinking":{"type":"enabled","budget_tokens":2000}}`)
|
||||
|
||||
out := a.normalizeClaudeBudget(body, 2000, modelInfo)
|
||||
|
||||
if gjson.GetBytes(out, "thinking").Exists() {
|
||||
t.Fatalf("thinking should be removed when constraints are unsatisfiable, body=%s", string(out))
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeClaudeBudget_NoClamping(t *testing.T) {
|
||||
a := &Applier{}
|
||||
modelInfo := ®istry.ModelInfo{
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 128000},
|
||||
}
|
||||
body := []byte(`{"max_tokens":32000,"thinking":{"type":"enabled","budget_tokens":16000}}`)
|
||||
|
||||
out := a.normalizeClaudeBudget(body, 16000, modelInfo)
|
||||
|
||||
maxTok := gjson.GetBytes(out, "max_tokens").Int()
|
||||
if maxTok != 32000 {
|
||||
t.Fatalf("max_tokens should remain 32000, got %d, body=%s", maxTok, string(out))
|
||||
}
|
||||
budget := gjson.GetBytes(out, "thinking.budget_tokens").Int()
|
||||
if budget != 16000 {
|
||||
t.Fatalf("budget_tokens should remain 16000, got %d, body=%s", budget, string(out))
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeClaudeBudget_AdjustsBudgetToMaxMinus1(t *testing.T) {
|
||||
a := &Applier{}
|
||||
modelInfo := ®istry.ModelInfo{
|
||||
MaxCompletionTokens: 8192,
|
||||
Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 128000},
|
||||
}
|
||||
body := []byte(`{"max_tokens":8192,"thinking":{"type":"enabled","budget_tokens":10000}}`)
|
||||
|
||||
out := a.normalizeClaudeBudget(body, 10000, modelInfo)
|
||||
|
||||
maxTok := gjson.GetBytes(out, "max_tokens").Int()
|
||||
if maxTok != 8192 {
|
||||
t.Fatalf("max_tokens = %d, want 8192 (unchanged), body=%s", maxTok, string(out))
|
||||
}
|
||||
budget := gjson.GetBytes(out, "thinking.budget_tokens").Int()
|
||||
if budget != 8191 {
|
||||
t.Fatalf("budget_tokens = %d, want 8191 (max_tokens-1), body=%s", budget, string(out))
|
||||
}
|
||||
}
|
||||
@@ -26,6 +26,8 @@ type ConvertCodexResponseToClaudeParams struct {
|
||||
HasToolCall bool
|
||||
BlockIndex int
|
||||
HasReceivedArgumentsDelta bool
|
||||
HasTextDelta bool
|
||||
TextBlockOpen bool
|
||||
ThinkingBlockOpen bool
|
||||
ThinkingStopPending bool
|
||||
ThinkingSignature string
|
||||
@@ -104,9 +106,11 @@ func ConvertCodexResponseToClaude(_ context.Context, _ string, originalRequestRa
|
||||
} else if typeStr == "response.content_part.added" {
|
||||
template = []byte(`{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}`)
|
||||
template, _ = sjson.SetBytes(template, "index", params.BlockIndex)
|
||||
params.TextBlockOpen = true
|
||||
|
||||
output = translatorcommon.AppendSSEEventBytes(output, "content_block_start", template, 2)
|
||||
} else if typeStr == "response.output_text.delta" {
|
||||
params.HasTextDelta = true
|
||||
template = []byte(`{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":""}}`)
|
||||
template, _ = sjson.SetBytes(template, "index", params.BlockIndex)
|
||||
template, _ = sjson.SetBytes(template, "delta.text", rootResult.Get("delta").String())
|
||||
@@ -115,6 +119,7 @@ func ConvertCodexResponseToClaude(_ context.Context, _ string, originalRequestRa
|
||||
} else if typeStr == "response.content_part.done" {
|
||||
template = []byte(`{"type":"content_block_stop","index":0}`)
|
||||
template, _ = sjson.SetBytes(template, "index", params.BlockIndex)
|
||||
params.TextBlockOpen = false
|
||||
params.BlockIndex++
|
||||
|
||||
output = translatorcommon.AppendSSEEventBytes(output, "content_block_stop", template, 2)
|
||||
@@ -172,7 +177,49 @@ func ConvertCodexResponseToClaude(_ context.Context, _ string, originalRequestRa
|
||||
} else if typeStr == "response.output_item.done" {
|
||||
itemResult := rootResult.Get("item")
|
||||
itemType := itemResult.Get("type").String()
|
||||
if itemType == "function_call" {
|
||||
if itemType == "message" {
|
||||
if params.HasTextDelta {
|
||||
return [][]byte{output}
|
||||
}
|
||||
contentResult := itemResult.Get("content")
|
||||
if !contentResult.Exists() || !contentResult.IsArray() {
|
||||
return [][]byte{output}
|
||||
}
|
||||
var textBuilder strings.Builder
|
||||
contentResult.ForEach(func(_, part gjson.Result) bool {
|
||||
if part.Get("type").String() != "output_text" {
|
||||
return true
|
||||
}
|
||||
if txt := part.Get("text").String(); txt != "" {
|
||||
textBuilder.WriteString(txt)
|
||||
}
|
||||
return true
|
||||
})
|
||||
text := textBuilder.String()
|
||||
if text == "" {
|
||||
return [][]byte{output}
|
||||
}
|
||||
|
||||
output = append(output, finalizeCodexThinkingBlock(params)...)
|
||||
if !params.TextBlockOpen {
|
||||
template = []byte(`{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}`)
|
||||
template, _ = sjson.SetBytes(template, "index", params.BlockIndex)
|
||||
params.TextBlockOpen = true
|
||||
output = translatorcommon.AppendSSEEventBytes(output, "content_block_start", template, 2)
|
||||
}
|
||||
|
||||
template = []byte(`{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":""}}`)
|
||||
template, _ = sjson.SetBytes(template, "index", params.BlockIndex)
|
||||
template, _ = sjson.SetBytes(template, "delta.text", text)
|
||||
output = translatorcommon.AppendSSEEventBytes(output, "content_block_delta", template, 2)
|
||||
|
||||
template = []byte(`{"type":"content_block_stop","index":0}`)
|
||||
template, _ = sjson.SetBytes(template, "index", params.BlockIndex)
|
||||
params.TextBlockOpen = false
|
||||
params.BlockIndex++
|
||||
params.HasTextDelta = true
|
||||
output = translatorcommon.AppendSSEEventBytes(output, "content_block_stop", template, 2)
|
||||
} else if itemType == "function_call" {
|
||||
template = []byte(`{"type":"content_block_stop","index":0}`)
|
||||
template, _ = sjson.SetBytes(template, "index", params.BlockIndex)
|
||||
params.BlockIndex++
|
||||
|
||||
@@ -280,3 +280,40 @@ func TestConvertCodexResponseToClaudeNonStream_ThinkingIncludesSignature(t *test
|
||||
t.Fatalf("unexpected thinking text: %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertCodexResponseToClaude_StreamEmptyOutputUsesOutputItemDoneMessageFallback(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
originalRequest := []byte(`{"tools":[]}`)
|
||||
var param any
|
||||
|
||||
chunks := [][]byte{
|
||||
[]byte("data: {\"type\":\"response.created\",\"response\":{\"id\":\"resp_1\",\"model\":\"gpt-5\"}}"),
|
||||
[]byte("data: {\"type\":\"response.output_item.done\",\"item\":{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"ok\"}]},\"output_index\":0}"),
|
||||
[]byte("data: {\"type\":\"response.completed\",\"response\":{\"usage\":{\"input_tokens\":1,\"output_tokens\":1}}}"),
|
||||
}
|
||||
|
||||
var outputs [][]byte
|
||||
for _, chunk := range chunks {
|
||||
outputs = append(outputs, ConvertCodexResponseToClaude(ctx, "", originalRequest, nil, chunk, ¶m)...)
|
||||
}
|
||||
|
||||
foundText := false
|
||||
for _, out := range outputs {
|
||||
for _, line := range strings.Split(string(out), "\n") {
|
||||
if !strings.HasPrefix(line, "data: ") {
|
||||
continue
|
||||
}
|
||||
data := gjson.Parse(strings.TrimPrefix(line, "data: "))
|
||||
if data.Get("type").String() == "content_block_delta" && data.Get("delta.type").String() == "text_delta" && data.Get("delta.text").String() == "ok" {
|
||||
foundText = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if foundText {
|
||||
break
|
||||
}
|
||||
}
|
||||
if !foundText {
|
||||
t.Fatalf("expected fallback content from response.output_item.done message; outputs=%q", outputs)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,10 +20,11 @@ var (
|
||||
|
||||
// ConvertCodexResponseToGeminiParams holds parameters for response conversion.
|
||||
type ConvertCodexResponseToGeminiParams struct {
|
||||
Model string
|
||||
CreatedAt int64
|
||||
ResponseID string
|
||||
LastStorageOutput []byte
|
||||
Model string
|
||||
CreatedAt int64
|
||||
ResponseID string
|
||||
LastStorageOutput []byte
|
||||
HasOutputTextDelta bool
|
||||
}
|
||||
|
||||
// ConvertCodexResponseToGemini converts Codex streaming response format to Gemini format.
|
||||
@@ -42,10 +43,11 @@ type ConvertCodexResponseToGeminiParams struct {
|
||||
func ConvertCodexResponseToGemini(_ context.Context, modelName string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, param *any) [][]byte {
|
||||
if *param == nil {
|
||||
*param = &ConvertCodexResponseToGeminiParams{
|
||||
Model: modelName,
|
||||
CreatedAt: 0,
|
||||
ResponseID: "",
|
||||
LastStorageOutput: nil,
|
||||
Model: modelName,
|
||||
CreatedAt: 0,
|
||||
ResponseID: "",
|
||||
LastStorageOutput: nil,
|
||||
HasOutputTextDelta: false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -58,18 +60,18 @@ func ConvertCodexResponseToGemini(_ context.Context, modelName string, originalR
|
||||
typeResult := rootResult.Get("type")
|
||||
typeStr := typeResult.String()
|
||||
|
||||
params := (*param).(*ConvertCodexResponseToGeminiParams)
|
||||
|
||||
// Base Gemini response template
|
||||
template := []byte(`{"candidates":[{"content":{"role":"model","parts":[]}}],"usageMetadata":{"trafficType":"PROVISIONED_THROUGHPUT"},"modelVersion":"gemini-2.5-pro","createTime":"2025-08-15T02:52:03.884209Z","responseId":"06CeaPH7NaCU48APvNXDyA4"}`)
|
||||
if len((*param).(*ConvertCodexResponseToGeminiParams).LastStorageOutput) > 0 && typeStr == "response.output_item.done" {
|
||||
template = append([]byte(nil), (*param).(*ConvertCodexResponseToGeminiParams).LastStorageOutput...)
|
||||
} else {
|
||||
template, _ = sjson.SetBytes(template, "modelVersion", (*param).(*ConvertCodexResponseToGeminiParams).Model)
|
||||
{
|
||||
template, _ = sjson.SetBytes(template, "modelVersion", params.Model)
|
||||
createdAtResult := rootResult.Get("response.created_at")
|
||||
if createdAtResult.Exists() {
|
||||
(*param).(*ConvertCodexResponseToGeminiParams).CreatedAt = createdAtResult.Int()
|
||||
template, _ = sjson.SetBytes(template, "createTime", time.Unix((*param).(*ConvertCodexResponseToGeminiParams).CreatedAt, 0).Format(time.RFC3339Nano))
|
||||
params.CreatedAt = createdAtResult.Int()
|
||||
template, _ = sjson.SetBytes(template, "createTime", time.Unix(params.CreatedAt, 0).Format(time.RFC3339Nano))
|
||||
}
|
||||
template, _ = sjson.SetBytes(template, "responseId", (*param).(*ConvertCodexResponseToGeminiParams).ResponseID)
|
||||
template, _ = sjson.SetBytes(template, "responseId", params.ResponseID)
|
||||
}
|
||||
|
||||
// Handle function call completion
|
||||
@@ -101,7 +103,7 @@ func ConvertCodexResponseToGemini(_ context.Context, modelName string, originalR
|
||||
template, _ = sjson.SetRawBytes(template, "candidates.0.content.parts.-1", functionCall)
|
||||
template, _ = sjson.SetBytes(template, "candidates.0.finishReason", "STOP")
|
||||
|
||||
(*param).(*ConvertCodexResponseToGeminiParams).LastStorageOutput = append([]byte(nil), template...)
|
||||
params.LastStorageOutput = append([]byte(nil), template...)
|
||||
|
||||
// Use this return to storage message
|
||||
return [][]byte{}
|
||||
@@ -111,15 +113,45 @@ func ConvertCodexResponseToGemini(_ context.Context, modelName string, originalR
|
||||
if typeStr == "response.created" { // Handle response creation - set model and response ID
|
||||
template, _ = sjson.SetBytes(template, "modelVersion", rootResult.Get("response.model").String())
|
||||
template, _ = sjson.SetBytes(template, "responseId", rootResult.Get("response.id").String())
|
||||
(*param).(*ConvertCodexResponseToGeminiParams).ResponseID = rootResult.Get("response.id").String()
|
||||
params.ResponseID = rootResult.Get("response.id").String()
|
||||
} else if typeStr == "response.reasoning_summary_text.delta" { // Handle reasoning/thinking content delta
|
||||
part := []byte(`{"thought":true,"text":""}`)
|
||||
part, _ = sjson.SetBytes(part, "text", rootResult.Get("delta").String())
|
||||
template, _ = sjson.SetRawBytes(template, "candidates.0.content.parts.-1", part)
|
||||
} else if typeStr == "response.output_text.delta" { // Handle regular text content delta
|
||||
params.HasOutputTextDelta = true
|
||||
part := []byte(`{"text":""}`)
|
||||
part, _ = sjson.SetBytes(part, "text", rootResult.Get("delta").String())
|
||||
template, _ = sjson.SetRawBytes(template, "candidates.0.content.parts.-1", part)
|
||||
} else if typeStr == "response.output_item.done" { // Fallback: emit final message text when no delta chunks were received
|
||||
itemResult := rootResult.Get("item")
|
||||
if itemResult.Get("type").String() != "message" || params.HasOutputTextDelta {
|
||||
return [][]byte{}
|
||||
}
|
||||
contentResult := itemResult.Get("content")
|
||||
if !contentResult.Exists() || !contentResult.IsArray() {
|
||||
return [][]byte{}
|
||||
}
|
||||
wroteText := false
|
||||
contentResult.ForEach(func(_, partResult gjson.Result) bool {
|
||||
if partResult.Get("type").String() != "output_text" {
|
||||
return true
|
||||
}
|
||||
text := partResult.Get("text").String()
|
||||
if text == "" {
|
||||
return true
|
||||
}
|
||||
part := []byte(`{"text":""}`)
|
||||
part, _ = sjson.SetBytes(part, "text", text)
|
||||
template, _ = sjson.SetRawBytes(template, "candidates.0.content.parts.-1", part)
|
||||
wroteText = true
|
||||
return true
|
||||
})
|
||||
if wroteText {
|
||||
params.HasOutputTextDelta = true
|
||||
return [][]byte{template}
|
||||
}
|
||||
return [][]byte{}
|
||||
} else if typeStr == "response.completed" { // Handle response completion with usage metadata
|
||||
template, _ = sjson.SetBytes(template, "usageMetadata.promptTokenCount", rootResult.Get("response.usage.input_tokens").Int())
|
||||
template, _ = sjson.SetBytes(template, "usageMetadata.candidatesTokenCount", rootResult.Get("response.usage.output_tokens").Int())
|
||||
@@ -129,11 +161,10 @@ func ConvertCodexResponseToGemini(_ context.Context, modelName string, originalR
|
||||
return [][]byte{}
|
||||
}
|
||||
|
||||
if len((*param).(*ConvertCodexResponseToGeminiParams).LastStorageOutput) > 0 {
|
||||
return [][]byte{
|
||||
append([]byte(nil), (*param).(*ConvertCodexResponseToGeminiParams).LastStorageOutput...),
|
||||
template,
|
||||
}
|
||||
if len(params.LastStorageOutput) > 0 {
|
||||
stored := append([]byte(nil), params.LastStorageOutput...)
|
||||
params.LastStorageOutput = nil
|
||||
return [][]byte{stored, template}
|
||||
}
|
||||
return [][]byte{template}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
package gemini
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
func TestConvertCodexResponseToGemini_StreamEmptyOutputUsesOutputItemDoneMessageFallback(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
originalRequest := []byte(`{"tools":[]}`)
|
||||
var param any
|
||||
|
||||
chunks := [][]byte{
|
||||
[]byte("data: {\"type\":\"response.output_item.done\",\"item\":{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"ok\"}]},\"output_index\":0}"),
|
||||
[]byte("data: {\"type\":\"response.completed\",\"response\":{\"usage\":{\"input_tokens\":1,\"output_tokens\":1}}}"),
|
||||
}
|
||||
|
||||
var outputs [][]byte
|
||||
for _, chunk := range chunks {
|
||||
outputs = append(outputs, ConvertCodexResponseToGemini(ctx, "gemini-2.5-pro", originalRequest, nil, chunk, ¶m)...)
|
||||
}
|
||||
|
||||
found := false
|
||||
for _, out := range outputs {
|
||||
if gjson.GetBytes(out, "candidates.0.content.parts.0.text").String() == "ok" {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Fatalf("expected fallback content from response.output_item.done message; outputs=%q", outputs)
|
||||
}
|
||||
}
|
||||
@@ -53,8 +53,24 @@ func TestServiceApplyCoreAuthAddOrUpdate_DeleteReAddDoesNotInheritStaleRuntimeSt
|
||||
if disabled.NextRefreshAfter.IsZero() {
|
||||
t.Fatalf("expected disabled auth to still carry prior NextRefreshAfter for regression setup")
|
||||
}
|
||||
|
||||
// Reconcile prunes unsupported model state during registration, so seed the
|
||||
// disabled snapshot explicitly before exercising delete -> re-add behavior.
|
||||
disabled.ModelStates = map[string]*coreauth.ModelState{
|
||||
modelID: {
|
||||
Quota: coreauth.QuotaState{BackoffLevel: 7},
|
||||
},
|
||||
}
|
||||
if _, err := service.coreManager.Update(context.Background(), disabled); err != nil {
|
||||
t.Fatalf("seed disabled auth stale ModelStates: %v", err)
|
||||
}
|
||||
|
||||
disabled, ok = service.coreManager.GetByID(authID)
|
||||
if !ok || disabled == nil {
|
||||
t.Fatalf("expected disabled auth after stale state seeding")
|
||||
}
|
||||
if len(disabled.ModelStates) == 0 {
|
||||
t.Fatalf("expected disabled auth to still carry prior ModelStates for regression setup")
|
||||
t.Fatalf("expected disabled auth to carry seeded ModelStates for regression setup")
|
||||
}
|
||||
|
||||
service.applyCoreAuthAddOrUpdate(context.Background(), &coreauth.Auth{
|
||||
|
||||
97
test/usage_logging_test.go
Normal file
97
test/usage_logging_test.go
Normal file
@@ -0,0 +1,97 @@
|
||||
package test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
runtimeexecutor "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor"
|
||||
internalusage "github.com/router-for-me/CLIProxyAPI/v6/internal/usage"
|
||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
||||
)
|
||||
|
||||
func TestGeminiExecutorRecordsSuccessfulZeroUsageInStatistics(t *testing.T) {
|
||||
model := fmt.Sprintf("gemini-2.5-flash-zero-usage-%d", time.Now().UnixNano())
|
||||
source := fmt.Sprintf("zero-usage-%d@example.com", time.Now().UnixNano())
|
||||
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
wantPath := "/v1beta/models/" + model + ":generateContent"
|
||||
if r.URL.Path != wantPath {
|
||||
t.Fatalf("path = %q, want %q", r.URL.Path, wantPath)
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`{"candidates":[{"content":{"role":"model","parts":[{"text":"ok"}]},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":0,"candidatesTokenCount":0,"totalTokenCount":0}}`))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
executor := runtimeexecutor.NewGeminiExecutor(&config.Config{})
|
||||
auth := &cliproxyauth.Auth{
|
||||
Provider: "gemini",
|
||||
Attributes: map[string]string{
|
||||
"api_key": "test-upstream-key",
|
||||
"base_url": server.URL,
|
||||
},
|
||||
Metadata: map[string]any{
|
||||
"email": source,
|
||||
},
|
||||
}
|
||||
|
||||
prevStatsEnabled := internalusage.StatisticsEnabled()
|
||||
internalusage.SetStatisticsEnabled(true)
|
||||
t.Cleanup(func() {
|
||||
internalusage.SetStatisticsEnabled(prevStatsEnabled)
|
||||
})
|
||||
|
||||
_, err := executor.Execute(context.Background(), auth, cliproxyexecutor.Request{
|
||||
Model: model,
|
||||
Payload: []byte(`{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}`),
|
||||
}, cliproxyexecutor.Options{
|
||||
SourceFormat: sdktranslator.FormatGemini,
|
||||
OriginalRequest: []byte(`{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}`),
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Execute error: %v", err)
|
||||
}
|
||||
|
||||
detail := waitForStatisticsDetail(t, "gemini", model, source)
|
||||
if detail.Failed {
|
||||
t.Fatalf("detail failed = true, want false")
|
||||
}
|
||||
if detail.Tokens.TotalTokens != 0 {
|
||||
t.Fatalf("total tokens = %d, want 0", detail.Tokens.TotalTokens)
|
||||
}
|
||||
}
|
||||
|
||||
func waitForStatisticsDetail(t *testing.T, apiName, model, source string) internalusage.RequestDetail {
|
||||
t.Helper()
|
||||
|
||||
deadline := time.Now().Add(2 * time.Second)
|
||||
for time.Now().Before(deadline) {
|
||||
snapshot := internalusage.GetRequestStatistics().Snapshot()
|
||||
apiSnapshot, ok := snapshot.APIs[apiName]
|
||||
if !ok {
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
continue
|
||||
}
|
||||
modelSnapshot, ok := apiSnapshot.Models[model]
|
||||
if !ok {
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
continue
|
||||
}
|
||||
for _, detail := range modelSnapshot.Details {
|
||||
if detail.Source == source {
|
||||
return detail
|
||||
}
|
||||
}
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
|
||||
t.Fatalf("timed out waiting for statistics detail for api=%q model=%q source=%q", apiName, model, source)
|
||||
return internalusage.RequestDetail{}
|
||||
}
|
||||
Reference in New Issue
Block a user