From 5bb69fa4ab9fd6524a08c3abc80ae424de0ed02a Mon Sep 17 00:00:00 2001 From: Allen Yi Date: Sat, 11 Apr 2026 15:22:27 +0800 Subject: [PATCH 1/5] docs: refine CLIproxyAPI Quota Inspector description in all README locales --- README.md | 4 ++++ README_CN.md | 4 ++++ README_JA.md | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/README.md b/README.md index c027be19..ca972bb8 100644 --- a/README.md +++ b/README.md @@ -181,6 +181,10 @@ helping users to immersively use AI assistants across applications on controlled Cross-platform desktop app (macOS, Windows, Linux) wrapping CLIProxyAPI with a native GUI. Connects Claude, ChatGPT, Gemini, GitHub Copilot, Qwen, iFlow, and custom OpenAI-compatible endpoints with usage analytics, request monitoring, and auto-configuration for popular coding tools - no API keys needed. +### [CLIproxyAPI Quota Inspector](https://github.com/AllenReder/CLIproxyAPI-Quota-Inspector) + +Ready-to-use cross-platform quota inspector for CLIProxyAPI, supporting per-account code 5h/7d quota windows, plan-based sorting, status coloring, and multi-account summary analytics. + > [!NOTE] > If you developed a project based on CLIProxyAPI, please open a PR to add it to this list. diff --git a/README_CN.md b/README_CN.md index 3e71528d..ec188df6 100644 --- a/README_CN.md +++ b/README_CN.md @@ -177,6 +177,10 @@ Shadow AI 是一款专为受限环境设计的 AI 辅助工具。提供无窗口 跨平台桌面应用(macOS、Windows、Linux),以原生 GUI 封装 CLIProxyAPI。支持连接 Claude、ChatGPT、Gemini、GitHub Copilot、Qwen、iFlow 及自定义 OpenAI 兼容端点,具备使用分析、请求监控和热门编程工具自动配置功能,无需 API 密钥。 +### [CLIproxyAPI Quota Inspector](https://github.com/AllenReder/CLIproxyAPI-Quota-Inspector) + +上手即用的面向 CLIProxyAPI 跨平台配额查询工具,支持按账号展示 code 5h/7d 配额窗口、按计划排序、状态着色及多账号汇总分析。 + > [!NOTE] > 如果你开发了基于 CLIProxyAPI 的项目,请提交一个 PR(拉取请求)将其添加到此列表中。 diff --git a/README_JA.md b/README_JA.md index d3f06949..597cada3 100644 --- a/README_JA.md +++ b/README_JA.md @@ -178,6 +178,10 @@ Shadow AIは制限された環境向けに特別に設計されたAIアシスタ CLIProxyAPIをネイティブGUIでラップしたクロスプラットフォームデスクトップアプリ(macOS、Windows、Linux)。Claude、ChatGPT、Gemini、GitHub Copilot、Qwen、iFlow、カスタムOpenAI互換エンドポイントに対応し、使用状況分析、リクエスト監視、人気コーディングツールの自動設定機能を搭載 - APIキー不要 +### [CLIproxyAPI Quota Inspector](https://github.com/AllenReder/CLIproxyAPI-Quota-Inspector) + +CLIProxyAPI向けのすぐに使えるクロスプラットフォームのクォータ確認ツール。アカウントごとの code 5h/7d クォータ表示、プラン別ソート、ステータス色分け、複数アカウントの集計分析に対応。 + > [!NOTE] > CLIProxyAPIをベースにプロジェクトを開発した場合は、PRを送ってこのリストに追加してください。 From c585caa0ce2dd3313db2aada49878027674c923b Mon Sep 17 00:00:00 2001 From: Allen Yi Date: Sat, 11 Apr 2026 16:22:45 +0800 Subject: [PATCH 2/5] docs: fix CLIProxyAPI Quota Inspector naming and link casing --- README.md | 2 +- README_CN.md | 2 +- README_JA.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ca972bb8..ef176668 100644 --- a/README.md +++ b/README.md @@ -181,7 +181,7 @@ helping users to immersively use AI assistants across applications on controlled Cross-platform desktop app (macOS, Windows, Linux) wrapping CLIProxyAPI with a native GUI. Connects Claude, ChatGPT, Gemini, GitHub Copilot, Qwen, iFlow, and custom OpenAI-compatible endpoints with usage analytics, request monitoring, and auto-configuration for popular coding tools - no API keys needed. -### [CLIproxyAPI Quota Inspector](https://github.com/AllenReder/CLIproxyAPI-Quota-Inspector) +### [CLIProxyAPI Quota Inspector](https://github.com/AllenReder/CLIProxyAPI-Quota-Inspector) Ready-to-use cross-platform quota inspector for CLIProxyAPI, supporting per-account code 5h/7d quota windows, plan-based sorting, status coloring, and multi-account summary analytics. diff --git a/README_CN.md b/README_CN.md index ec188df6..92340f45 100644 --- a/README_CN.md +++ b/README_CN.md @@ -177,7 +177,7 @@ Shadow AI 是一款专为受限环境设计的 AI 辅助工具。提供无窗口 跨平台桌面应用(macOS、Windows、Linux),以原生 GUI 封装 CLIProxyAPI。支持连接 Claude、ChatGPT、Gemini、GitHub Copilot、Qwen、iFlow 及自定义 OpenAI 兼容端点,具备使用分析、请求监控和热门编程工具自动配置功能,无需 API 密钥。 -### [CLIproxyAPI Quota Inspector](https://github.com/AllenReder/CLIproxyAPI-Quota-Inspector) +### [CLIProxyAPI Quota Inspector](https://github.com/AllenReder/CLIProxyAPI-Quota-Inspector) 上手即用的面向 CLIProxyAPI 跨平台配额查询工具,支持按账号展示 code 5h/7d 配额窗口、按计划排序、状态着色及多账号汇总分析。 diff --git a/README_JA.md b/README_JA.md index 597cada3..d2594ad7 100644 --- a/README_JA.md +++ b/README_JA.md @@ -178,7 +178,7 @@ Shadow AIは制限された環境向けに特別に設計されたAIアシスタ CLIProxyAPIをネイティブGUIでラップしたクロスプラットフォームデスクトップアプリ(macOS、Windows、Linux)。Claude、ChatGPT、Gemini、GitHub Copilot、Qwen、iFlow、カスタムOpenAI互換エンドポイントに対応し、使用状況分析、リクエスト監視、人気コーディングツールの自動設定機能を搭載 - APIキー不要 -### [CLIproxyAPI Quota Inspector](https://github.com/AllenReder/CLIproxyAPI-Quota-Inspector) +### [CLIProxyAPI Quota Inspector](https://github.com/AllenReder/CLIProxyAPI-Quota-Inspector) CLIProxyAPI向けのすぐに使えるクロスプラットフォームのクォータ確認ツール。アカウントごとの code 5h/7d クォータ表示、プラン別ソート、ステータス色分け、複数アカウントの集計分析に対応。 From 828df800881f73860ca657d21ab977a4f82a225a Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Sat, 11 Apr 2026 16:35:18 +0800 Subject: [PATCH 3/5] refactor(executor): remove immediate retry with token refresh on 429 for Qwen and update tests accordingly --- internal/runtime/executor/qwen_executor.go | 53 ------------------ .../runtime/executor/qwen_executor_test.go | 56 +++++++++---------- 2 files changed, 27 insertions(+), 82 deletions(-) diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go index ec02460e..146be5c1 100644 --- a/internal/runtime/executor/qwen_executor.go +++ b/internal/runtime/executor/qwen_executor.go @@ -153,17 +153,6 @@ func wrapQwenError(ctx context.Context, httpCode int, body []byte) (errCode int, return errCode, retryAfter } -func qwenShouldAttemptImmediateRefreshRetry(auth *cliproxyauth.Auth) bool { - if auth == nil || auth.Metadata == nil { - return false - } - if provider := strings.TrimSpace(auth.Provider); provider != "" && !strings.EqualFold(provider, "qwen") { - return false - } - refreshToken, _ := auth.Metadata["refresh_token"].(string) - return strings.TrimSpace(refreshToken) != "" -} - // ensureQwenSystemMessage ensures the request has a single system message at the beginning. // It always injects the default system prompt and merges any user-provided system messages // into the injected system message content to satisfy Qwen's strict message ordering rules. @@ -340,7 +329,6 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req return resp, err } - qwenImmediateRetryAttempted := false for { if errRate := checkQwenRateLimit(authID); errRate != nil { helps.LogWithRequestID(ctx).Warnf("qwen rate limit exceeded for credential %s", redactAuthID(authID)) @@ -398,26 +386,6 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req errCode, retryAfter := wrapQwenError(ctx, httpResp.StatusCode, b) helps.LogWithRequestID(ctx).Debugf("request error, error status: %d (mapped: %d), error message: %s", httpResp.StatusCode, errCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) - if errCode == http.StatusTooManyRequests && !qwenImmediateRetryAttempted && qwenShouldAttemptImmediateRefreshRetry(auth) { - helps.LogWithRequestID(ctx).WithFields(log.Fields{ - "auth_id": redactAuthID(authID), - "model": req.Model, - }).Info("qwen 429 encountered, refreshing token for immediate retry") - - qwenImmediateRetryAttempted = true - refreshFn := e.refreshForImmediateRetry - if refreshFn == nil { - refreshFn = e.Refresh - } - refreshedAuth, errRefresh := refreshFn(ctx, auth) - if errRefresh != nil { - helps.LogWithRequestID(ctx).WithError(errRefresh).WithField("auth_id", redactAuthID(authID)).Warn("qwen 429 refresh failed; skipping immediate retry") - } else if refreshedAuth != nil { - auth = refreshedAuth - continue - } - } - err = statusErr{code: errCode, msg: string(b), retryAfter: retryAfter} return resp, err } @@ -488,7 +456,6 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut return nil, err } - qwenImmediateRetryAttempted := false for { if errRate := checkQwenRateLimit(authID); errRate != nil { helps.LogWithRequestID(ctx).Warnf("qwen rate limit exceeded for credential %s", redactAuthID(authID)) @@ -546,26 +513,6 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut errCode, retryAfter := wrapQwenError(ctx, httpResp.StatusCode, b) helps.LogWithRequestID(ctx).Debugf("request error, error status: %d (mapped: %d), error message: %s", httpResp.StatusCode, errCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) - if errCode == http.StatusTooManyRequests && !qwenImmediateRetryAttempted && qwenShouldAttemptImmediateRefreshRetry(auth) { - helps.LogWithRequestID(ctx).WithFields(log.Fields{ - "auth_id": redactAuthID(authID), - "model": req.Model, - }).Info("qwen 429 encountered, refreshing token for immediate retry (stream)") - - qwenImmediateRetryAttempted = true - refreshFn := e.refreshForImmediateRetry - if refreshFn == nil { - refreshFn = e.Refresh - } - refreshedAuth, errRefresh := refreshFn(ctx, auth) - if errRefresh != nil { - helps.LogWithRequestID(ctx).WithError(errRefresh).WithField("auth_id", redactAuthID(authID)).Warn("qwen 429 refresh failed; skipping immediate retry (stream)") - } else if refreshedAuth != nil { - auth = refreshedAuth - continue - } - } - err = statusErr{code: errCode, msg: string(b), retryAfter: retryAfter} return nil, err } diff --git a/internal/runtime/executor/qwen_executor_test.go b/internal/runtime/executor/qwen_executor_test.go index 97b4757e..f6363f66 100644 --- a/internal/runtime/executor/qwen_executor_test.go +++ b/internal/runtime/executor/qwen_executor_test.go @@ -216,7 +216,7 @@ func TestQwenCreds_NormalizesResourceURL(t *testing.T) { } } -func TestQwenExecutorExecute_429RefreshAndRetry(t *testing.T) { +func TestQwenExecutorExecute_429DoesNotRefreshOrRetry(t *testing.T) { qwenRateLimiter.Lock() qwenRateLimiter.requests = make(map[string][]time.Time) qwenRateLimiter.Unlock() @@ -272,27 +272,31 @@ func TestQwenExecutorExecute_429RefreshAndRetry(t *testing.T) { } ctx := context.Background() - resp, err := exec.Execute(ctx, auth, cliproxyexecutor.Request{ + _, err := exec.Execute(ctx, auth, cliproxyexecutor.Request{ Model: "qwen-max", Payload: []byte(`{"model":"qwen-max","messages":[{"role":"user","content":"hi"}]}`), }, cliproxyexecutor.Options{ SourceFormat: sdktranslator.FromString("openai"), }) - if err != nil { - t.Fatalf("Execute() error = %v", err) + if err == nil { + t.Fatalf("Execute() expected error, got nil") } - if len(resp.Payload) == 0 { - t.Fatalf("Execute() payload is empty") + status, ok := err.(statusErr) + if !ok { + t.Fatalf("Execute() error type = %T, want statusErr", err) } - if atomic.LoadInt32(&calls) != 2 { - t.Fatalf("upstream calls = %d, want 2", atomic.LoadInt32(&calls)) + if status.StatusCode() != http.StatusTooManyRequests { + t.Fatalf("Execute() status code = %d, want %d", status.StatusCode(), http.StatusTooManyRequests) } - if atomic.LoadInt32(&refresherCalls) != 1 { - t.Fatalf("refresher calls = %d, want 1", atomic.LoadInt32(&refresherCalls)) + if atomic.LoadInt32(&calls) != 1 { + t.Fatalf("upstream calls = %d, want 1", atomic.LoadInt32(&calls)) + } + if atomic.LoadInt32(&refresherCalls) != 0 { + t.Fatalf("refresher calls = %d, want 0", atomic.LoadInt32(&refresherCalls)) } } -func TestQwenExecutorExecuteStream_429RefreshAndRetry(t *testing.T) { +func TestQwenExecutorExecuteStream_429DoesNotRefreshOrRetry(t *testing.T) { qwenRateLimiter.Lock() qwenRateLimiter.requests = make(map[string][]time.Time) qwenRateLimiter.Unlock() @@ -351,32 +355,26 @@ func TestQwenExecutorExecuteStream_429RefreshAndRetry(t *testing.T) { } ctx := context.Background() - stream, err := exec.ExecuteStream(ctx, auth, cliproxyexecutor.Request{ + _, err := exec.ExecuteStream(ctx, auth, cliproxyexecutor.Request{ Model: "qwen-max", Payload: []byte(`{"model":"qwen-max","stream":true,"messages":[{"role":"user","content":"hi"}]}`), }, cliproxyexecutor.Options{ SourceFormat: sdktranslator.FromString("openai"), }) - if err != nil { - t.Fatalf("ExecuteStream() error = %v", err) + if err == nil { + t.Fatalf("ExecuteStream() expected error, got nil") } - if atomic.LoadInt32(&calls) != 2 { - t.Fatalf("upstream calls = %d, want 2", atomic.LoadInt32(&calls)) + status, ok := err.(statusErr) + if !ok { + t.Fatalf("ExecuteStream() error type = %T, want statusErr", err) } - if atomic.LoadInt32(&refresherCalls) != 1 { - t.Fatalf("refresher calls = %d, want 1", atomic.LoadInt32(&refresherCalls)) + if status.StatusCode() != http.StatusTooManyRequests { + t.Fatalf("ExecuteStream() status code = %d, want %d", status.StatusCode(), http.StatusTooManyRequests) } - - var sawPayload bool - for chunk := range stream.Chunks { - if chunk.Err != nil { - t.Fatalf("stream chunk error = %v", chunk.Err) - } - if len(chunk.Payload) > 0 { - sawPayload = true - } + if atomic.LoadInt32(&calls) != 1 { + t.Fatalf("upstream calls = %d, want 1", atomic.LoadInt32(&calls)) } - if !sawPayload { - t.Fatalf("stream did not produce any payload chunks") + if atomic.LoadInt32(&refresherCalls) != 0 { + t.Fatalf("refresher calls = %d, want 0", atomic.LoadInt32(&refresherCalls)) } } From f135fdf7fcbf3a46947209b3b8eef600196fddb1 Mon Sep 17 00:00:00 2001 From: Allen Yi Date: Sat, 11 Apr 2026 16:39:32 +0800 Subject: [PATCH 4/5] docs: clarify codex quota window wording in README locales --- README.md | 2 +- README_CN.md | 2 +- README_JA.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ef176668..e824a485 100644 --- a/README.md +++ b/README.md @@ -183,7 +183,7 @@ Cross-platform desktop app (macOS, Windows, Linux) wrapping CLIProxyAPI with a n ### [CLIProxyAPI Quota Inspector](https://github.com/AllenReder/CLIProxyAPI-Quota-Inspector) -Ready-to-use cross-platform quota inspector for CLIProxyAPI, supporting per-account code 5h/7d quota windows, plan-based sorting, status coloring, and multi-account summary analytics. +Ready-to-use cross-platform quota inspector for CLIProxyAPI, supporting per-account codex 5h/7d quota windows, plan-based sorting, status coloring, and multi-account summary analytics. > [!NOTE] > If you developed a project based on CLIProxyAPI, please open a PR to add it to this list. diff --git a/README_CN.md b/README_CN.md index 92340f45..a671db57 100644 --- a/README_CN.md +++ b/README_CN.md @@ -179,7 +179,7 @@ Shadow AI 是一款专为受限环境设计的 AI 辅助工具。提供无窗口 ### [CLIProxyAPI Quota Inspector](https://github.com/AllenReder/CLIProxyAPI-Quota-Inspector) -上手即用的面向 CLIProxyAPI 跨平台配额查询工具,支持按账号展示 code 5h/7d 配额窗口、按计划排序、状态着色及多账号汇总分析。 +上手即用的面向 CLIProxyAPI 跨平台配额查询工具,支持按账号展示 codex 5h/7d 配额窗口、按计划排序、状态着色及多账号汇总分析。 > [!NOTE] > 如果你开发了基于 CLIProxyAPI 的项目,请提交一个 PR(拉取请求)将其添加到此列表中。 diff --git a/README_JA.md b/README_JA.md index d2594ad7..88b33624 100644 --- a/README_JA.md +++ b/README_JA.md @@ -180,7 +180,7 @@ CLIProxyAPIをネイティブGUIでラップしたクロスプラットフォー ### [CLIProxyAPI Quota Inspector](https://github.com/AllenReder/CLIProxyAPI-Quota-Inspector) -CLIProxyAPI向けのすぐに使えるクロスプラットフォームのクォータ確認ツール。アカウントごとの code 5h/7d クォータ表示、プラン別ソート、ステータス色分け、複数アカウントの集計分析に対応。 +CLIProxyAPI向けのすぐに使えるクロスプラットフォームのクォータ確認ツール。アカウントごとの codex 5h/7d クォータ表示、プラン別ソート、ステータス色分け、複数アカウントの集計分析に対応。 > [!NOTE] > CLIProxyAPIをベースにプロジェクトを開発した場合は、PRを送ってこのリストに追加してください。 From 0ab1f5412f079de0d5c1afada89d06063404cb04 Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Sat, 11 Apr 2026 21:04:55 +0800 Subject: [PATCH 5/5] fix(executor): handle 429 Retry-After header and default retry logic for quota exhaustion - Added proper parsing of `Retry-After` headers for 429 responses. - Set default retry duration when "disable cooling" is active on quota exhaustion. - Updated tests to verify `Retry-After` handling and default behavior. --- internal/runtime/executor/qwen_executor.go | 49 ++++ .../runtime/executor/qwen_executor_test.go | 234 ++++++++++++++++++ 2 files changed, 283 insertions(+) diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go index 146be5c1..07ad0b3b 100644 --- a/internal/runtime/executor/qwen_executor.go +++ b/internal/runtime/executor/qwen_executor.go @@ -7,6 +7,7 @@ import ( "fmt" "io" "net/http" + "strconv" "strings" "sync" "time" @@ -153,6 +154,40 @@ func wrapQwenError(ctx context.Context, httpCode int, body []byte) (errCode int, return errCode, retryAfter } +func qwenDisableCooling(cfg *config.Config, auth *cliproxyauth.Auth) bool { + if auth != nil { + if override, ok := auth.DisableCoolingOverride(); ok { + return override + } + } + if cfg == nil { + return false + } + return cfg.DisableCooling +} + +func parseRetryAfterHeader(header http.Header, now time.Time) *time.Duration { + raw := strings.TrimSpace(header.Get("Retry-After")) + if raw == "" { + return nil + } + if seconds, err := strconv.Atoi(raw); err == nil { + if seconds <= 0 { + return nil + } + d := time.Duration(seconds) * time.Second + return &d + } + if at, err := http.ParseTime(raw); err == nil { + if !at.After(now) { + return nil + } + d := at.Sub(now) + return &d + } + return nil +} + // ensureQwenSystemMessage ensures the request has a single system message at the beginning. // It always injects the default system prompt and merges any user-provided system messages // into the injected system message content to satisfy Qwen's strict message ordering rules. @@ -384,6 +419,13 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req } errCode, retryAfter := wrapQwenError(ctx, httpResp.StatusCode, b) + if errCode == http.StatusTooManyRequests && retryAfter == nil { + retryAfter = parseRetryAfterHeader(httpResp.Header, time.Now()) + } + if errCode == http.StatusTooManyRequests && retryAfter == nil && qwenDisableCooling(e.cfg, auth) && isQwenQuotaError(b) { + defaultRetryAfter := time.Second + retryAfter = &defaultRetryAfter + } helps.LogWithRequestID(ctx).Debugf("request error, error status: %d (mapped: %d), error message: %s", httpResp.StatusCode, errCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) err = statusErr{code: errCode, msg: string(b), retryAfter: retryAfter} @@ -511,6 +553,13 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut } errCode, retryAfter := wrapQwenError(ctx, httpResp.StatusCode, b) + if errCode == http.StatusTooManyRequests && retryAfter == nil { + retryAfter = parseRetryAfterHeader(httpResp.Header, time.Now()) + } + if errCode == http.StatusTooManyRequests && retryAfter == nil && qwenDisableCooling(e.cfg, auth) && isQwenQuotaError(b) { + defaultRetryAfter := time.Second + retryAfter = &defaultRetryAfter + } helps.LogWithRequestID(ctx).Debugf("request error, error status: %d (mapped: %d), error message: %s", httpResp.StatusCode, errCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) err = statusErr{code: errCode, msg: string(b), retryAfter: retryAfter} diff --git a/internal/runtime/executor/qwen_executor_test.go b/internal/runtime/executor/qwen_executor_test.go index f6363f66..f19cc8ca 100644 --- a/internal/runtime/executor/qwen_executor_test.go +++ b/internal/runtime/executor/qwen_executor_test.go @@ -378,3 +378,237 @@ func TestQwenExecutorExecuteStream_429DoesNotRefreshOrRetry(t *testing.T) { t.Fatalf("refresher calls = %d, want 0", atomic.LoadInt32(&refresherCalls)) } } + +func TestQwenExecutorExecute_429RetryAfterHeaderPropagatesToStatusErr(t *testing.T) { + qwenRateLimiter.Lock() + qwenRateLimiter.requests = make(map[string][]time.Time) + qwenRateLimiter.Unlock() + + var calls int32 + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + atomic.AddInt32(&calls, 1) + if r.URL.Path != "/v1/chat/completions" { + w.WriteHeader(http.StatusNotFound) + return + } + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Retry-After", "2") + w.WriteHeader(http.StatusTooManyRequests) + _, _ = w.Write([]byte(`{"error":{"code":"rate_limit_exceeded","message":"rate limited","type":"rate_limit_exceeded"}}`)) + })) + defer srv.Close() + + exec := NewQwenExecutor(&config.Config{}) + auth := &cliproxyauth.Auth{ + ID: "auth-test", + Provider: "qwen", + Attributes: map[string]string{ + "base_url": srv.URL + "/v1", + }, + Metadata: map[string]any{ + "access_token": "test-token", + }, + } + ctx := context.Background() + + _, err := exec.Execute(ctx, auth, cliproxyexecutor.Request{ + Model: "qwen-max", + Payload: []byte(`{"model":"qwen-max","messages":[{"role":"user","content":"hi"}]}`), + }, cliproxyexecutor.Options{ + SourceFormat: sdktranslator.FromString("openai"), + }) + if err == nil { + t.Fatalf("Execute() expected error, got nil") + } + status, ok := err.(statusErr) + if !ok { + t.Fatalf("Execute() error type = %T, want statusErr", err) + } + if status.StatusCode() != http.StatusTooManyRequests { + t.Fatalf("Execute() status code = %d, want %d", status.StatusCode(), http.StatusTooManyRequests) + } + if status.RetryAfter() == nil { + t.Fatalf("Execute() RetryAfter is nil, want non-nil") + } + if got := *status.RetryAfter(); got != 2*time.Second { + t.Fatalf("Execute() RetryAfter = %v, want %v", got, 2*time.Second) + } + if atomic.LoadInt32(&calls) != 1 { + t.Fatalf("upstream calls = %d, want 1", atomic.LoadInt32(&calls)) + } +} + +func TestQwenExecutorExecuteStream_429RetryAfterHeaderPropagatesToStatusErr(t *testing.T) { + qwenRateLimiter.Lock() + qwenRateLimiter.requests = make(map[string][]time.Time) + qwenRateLimiter.Unlock() + + var calls int32 + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + atomic.AddInt32(&calls, 1) + if r.URL.Path != "/v1/chat/completions" { + w.WriteHeader(http.StatusNotFound) + return + } + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Retry-After", "2") + w.WriteHeader(http.StatusTooManyRequests) + _, _ = w.Write([]byte(`{"error":{"code":"rate_limit_exceeded","message":"rate limited","type":"rate_limit_exceeded"}}`)) + })) + defer srv.Close() + + exec := NewQwenExecutor(&config.Config{}) + auth := &cliproxyauth.Auth{ + ID: "auth-test", + Provider: "qwen", + Attributes: map[string]string{ + "base_url": srv.URL + "/v1", + }, + Metadata: map[string]any{ + "access_token": "test-token", + }, + } + ctx := context.Background() + + _, err := exec.ExecuteStream(ctx, auth, cliproxyexecutor.Request{ + Model: "qwen-max", + Payload: []byte(`{"model":"qwen-max","stream":true,"messages":[{"role":"user","content":"hi"}]}`), + }, cliproxyexecutor.Options{ + SourceFormat: sdktranslator.FromString("openai"), + }) + if err == nil { + t.Fatalf("ExecuteStream() expected error, got nil") + } + status, ok := err.(statusErr) + if !ok { + t.Fatalf("ExecuteStream() error type = %T, want statusErr", err) + } + if status.StatusCode() != http.StatusTooManyRequests { + t.Fatalf("ExecuteStream() status code = %d, want %d", status.StatusCode(), http.StatusTooManyRequests) + } + if status.RetryAfter() == nil { + t.Fatalf("ExecuteStream() RetryAfter is nil, want non-nil") + } + if got := *status.RetryAfter(); got != 2*time.Second { + t.Fatalf("ExecuteStream() RetryAfter = %v, want %v", got, 2*time.Second) + } + if atomic.LoadInt32(&calls) != 1 { + t.Fatalf("upstream calls = %d, want 1", atomic.LoadInt32(&calls)) + } +} + +func TestQwenExecutorExecute_429QuotaExhausted_DisableCoolingSetsDefaultRetryAfter(t *testing.T) { + qwenRateLimiter.Lock() + qwenRateLimiter.requests = make(map[string][]time.Time) + qwenRateLimiter.Unlock() + + var calls int32 + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + atomic.AddInt32(&calls, 1) + if r.URL.Path != "/v1/chat/completions" { + w.WriteHeader(http.StatusNotFound) + return + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusTooManyRequests) + _, _ = w.Write([]byte(`{"error":{"code":"quota_exceeded","message":"quota exceeded","type":"quota_exceeded"}}`)) + })) + defer srv.Close() + + exec := NewQwenExecutor(&config.Config{DisableCooling: true}) + auth := &cliproxyauth.Auth{ + ID: "auth-test", + Provider: "qwen", + Attributes: map[string]string{ + "base_url": srv.URL + "/v1", + }, + Metadata: map[string]any{ + "access_token": "test-token", + }, + } + ctx := context.Background() + + _, err := exec.Execute(ctx, auth, cliproxyexecutor.Request{ + Model: "qwen-max", + Payload: []byte(`{"model":"qwen-max","messages":[{"role":"user","content":"hi"}]}`), + }, cliproxyexecutor.Options{ + SourceFormat: sdktranslator.FromString("openai"), + }) + if err == nil { + t.Fatalf("Execute() expected error, got nil") + } + status, ok := err.(statusErr) + if !ok { + t.Fatalf("Execute() error type = %T, want statusErr", err) + } + if status.StatusCode() != http.StatusTooManyRequests { + t.Fatalf("Execute() status code = %d, want %d", status.StatusCode(), http.StatusTooManyRequests) + } + if status.RetryAfter() == nil { + t.Fatalf("Execute() RetryAfter is nil, want non-nil") + } + if got := *status.RetryAfter(); got != time.Second { + t.Fatalf("Execute() RetryAfter = %v, want %v", got, time.Second) + } + if atomic.LoadInt32(&calls) != 1 { + t.Fatalf("upstream calls = %d, want 1", atomic.LoadInt32(&calls)) + } +} + +func TestQwenExecutorExecuteStream_429QuotaExhausted_DisableCoolingSetsDefaultRetryAfter(t *testing.T) { + qwenRateLimiter.Lock() + qwenRateLimiter.requests = make(map[string][]time.Time) + qwenRateLimiter.Unlock() + + var calls int32 + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + atomic.AddInt32(&calls, 1) + if r.URL.Path != "/v1/chat/completions" { + w.WriteHeader(http.StatusNotFound) + return + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusTooManyRequests) + _, _ = w.Write([]byte(`{"error":{"code":"quota_exceeded","message":"quota exceeded","type":"quota_exceeded"}}`)) + })) + defer srv.Close() + + exec := NewQwenExecutor(&config.Config{DisableCooling: true}) + auth := &cliproxyauth.Auth{ + ID: "auth-test", + Provider: "qwen", + Attributes: map[string]string{ + "base_url": srv.URL + "/v1", + }, + Metadata: map[string]any{ + "access_token": "test-token", + }, + } + ctx := context.Background() + + _, err := exec.ExecuteStream(ctx, auth, cliproxyexecutor.Request{ + Model: "qwen-max", + Payload: []byte(`{"model":"qwen-max","stream":true,"messages":[{"role":"user","content":"hi"}]}`), + }, cliproxyexecutor.Options{ + SourceFormat: sdktranslator.FromString("openai"), + }) + if err == nil { + t.Fatalf("ExecuteStream() expected error, got nil") + } + status, ok := err.(statusErr) + if !ok { + t.Fatalf("ExecuteStream() error type = %T, want statusErr", err) + } + if status.StatusCode() != http.StatusTooManyRequests { + t.Fatalf("ExecuteStream() status code = %d, want %d", status.StatusCode(), http.StatusTooManyRequests) + } + if status.RetryAfter() == nil { + t.Fatalf("ExecuteStream() RetryAfter is nil, want non-nil") + } + if got := *status.RetryAfter(); got != time.Second { + t.Fatalf("ExecuteStream() RetryAfter = %v, want %v", got, time.Second) + } + if atomic.LoadInt32(&calls) != 1 { + t.Fatalf("upstream calls = %d, want 1", atomic.LoadInt32(&calls)) + } +}