From 5bb69fa4ab9fd6524a08c3abc80ae424de0ed02a Mon Sep 17 00:00:00 2001
From: Allen Yi <yiwenyou_allen@outlook.com>
Date: Sat, 11 Apr 2026 15:22:27 +0800
Subject: [PATCH 1/5] docs: refine CLIproxyAPI Quota Inspector description in
 all README locales

---
 README.md    | 4 ++++
 README_CN.md | 4 ++++
 README_JA.md | 4 ++++
 3 files changed, 12 insertions(+)

diff --git a/README.md b/README.md
index c027be19..ca972bb8 100644
--- a/README.md
+++ b/README.md
@@ -181,6 +181,10 @@ helping users to immersively use AI assistants across applications on controlled
 
 Cross-platform desktop app (macOS, Windows, Linux) wrapping CLIProxyAPI with a native GUI. Connects Claude, ChatGPT, Gemini, GitHub Copilot, Qwen, iFlow, and custom OpenAI-compatible endpoints with usage analytics, request monitoring, and auto-configuration for popular coding tools - no API keys needed.
 
+### [CLIproxyAPI Quota Inspector](https://github.com/AllenReder/CLIproxyAPI-Quota-Inspector)
+
+Ready-to-use cross-platform quota inspector for CLIProxyAPI, supporting per-account code 5h/7d quota windows, plan-based sorting, status coloring, and multi-account summary analytics.
+
 > [!NOTE]  
 > If you developed a project based on CLIProxyAPI, please open a PR to add it to this list.
 
diff --git a/README_CN.md b/README_CN.md
index 3e71528d..ec188df6 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -177,6 +177,10 @@ Shadow AI 是一款专为受限环境设计的 AI 辅助工具。提供无窗口
 
 跨平台桌面应用（macOS、Windows、Linux），以原生 GUI 封装 CLIProxyAPI。支持连接 Claude、ChatGPT、Gemini、GitHub Copilot、Qwen、iFlow 及自定义 OpenAI 兼容端点，具备使用分析、请求监控和热门编程工具自动配置功能，无需 API 密钥。
 
+### [CLIproxyAPI Quota Inspector](https://github.com/AllenReder/CLIproxyAPI-Quota-Inspector)
+
+上手即用的面向 CLIProxyAPI 跨平台配额查询工具，支持按账号展示 code 5h/7d 配额窗口、按计划排序、状态着色及多账号汇总分析。
+
 > [!NOTE]  
 > 如果你开发了基于 CLIProxyAPI 的项目，请提交一个 PR（拉取请求）将其添加到此列表中。
 
diff --git a/README_JA.md b/README_JA.md
index d3f06949..597cada3 100644
--- a/README_JA.md
+++ b/README_JA.md
@@ -178,6 +178,10 @@ Shadow AIは制限された環境向けに特別に設計されたAIアシスタ
 
 CLIProxyAPIをネイティブGUIでラップしたクロスプラットフォームデスクトップアプリ（macOS、Windows、Linux）。Claude、ChatGPT、Gemini、GitHub Copilot、Qwen、iFlow、カスタムOpenAI互換エンドポイントに対応し、使用状況分析、リクエスト監視、人気コーディングツールの自動設定機能を搭載 - APIキー不要
 
+### [CLIproxyAPI Quota Inspector](https://github.com/AllenReder/CLIproxyAPI-Quota-Inspector)
+
+CLIProxyAPI向けのすぐに使えるクロスプラットフォームのクォータ確認ツール。アカウントごとの code 5h/7d クォータ表示、プラン別ソート、ステータス色分け、複数アカウントの集計分析に対応。
+
 > [!NOTE]
 > CLIProxyAPIをベースにプロジェクトを開発した場合は、PRを送ってこのリストに追加してください。
 

From c585caa0ce2dd3313db2aada49878027674c923b Mon Sep 17 00:00:00 2001
From: Allen Yi <yiwenyou_allen@outlook.com>
Date: Sat, 11 Apr 2026 16:22:45 +0800
Subject: [PATCH 2/5] docs: fix CLIProxyAPI Quota Inspector naming and link
 casing

---
 README.md    | 2 +-
 README_CN.md | 2 +-
 README_JA.md | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index ca972bb8..ef176668 100644
--- a/README.md
+++ b/README.md
@@ -181,7 +181,7 @@ helping users to immersively use AI assistants across applications on controlled
 
 Cross-platform desktop app (macOS, Windows, Linux) wrapping CLIProxyAPI with a native GUI. Connects Claude, ChatGPT, Gemini, GitHub Copilot, Qwen, iFlow, and custom OpenAI-compatible endpoints with usage analytics, request monitoring, and auto-configuration for popular coding tools - no API keys needed.
 
-### [CLIproxyAPI Quota Inspector](https://github.com/AllenReder/CLIproxyAPI-Quota-Inspector)
+### [CLIProxyAPI Quota Inspector](https://github.com/AllenReder/CLIProxyAPI-Quota-Inspector)
 
 Ready-to-use cross-platform quota inspector for CLIProxyAPI, supporting per-account code 5h/7d quota windows, plan-based sorting, status coloring, and multi-account summary analytics.
 
diff --git a/README_CN.md b/README_CN.md
index ec188df6..92340f45 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -177,7 +177,7 @@ Shadow AI 是一款专为受限环境设计的 AI 辅助工具。提供无窗口
 
 跨平台桌面应用（macOS、Windows、Linux），以原生 GUI 封装 CLIProxyAPI。支持连接 Claude、ChatGPT、Gemini、GitHub Copilot、Qwen、iFlow 及自定义 OpenAI 兼容端点，具备使用分析、请求监控和热门编程工具自动配置功能，无需 API 密钥。
 
-### [CLIproxyAPI Quota Inspector](https://github.com/AllenReder/CLIproxyAPI-Quota-Inspector)
+### [CLIProxyAPI Quota Inspector](https://github.com/AllenReder/CLIProxyAPI-Quota-Inspector)
 
 上手即用的面向 CLIProxyAPI 跨平台配额查询工具，支持按账号展示 code 5h/7d 配额窗口、按计划排序、状态着色及多账号汇总分析。
 
diff --git a/README_JA.md b/README_JA.md
index 597cada3..d2594ad7 100644
--- a/README_JA.md
+++ b/README_JA.md
@@ -178,7 +178,7 @@ Shadow AIは制限された環境向けに特別に設計されたAIアシスタ
 
 CLIProxyAPIをネイティブGUIでラップしたクロスプラットフォームデスクトップアプリ（macOS、Windows、Linux）。Claude、ChatGPT、Gemini、GitHub Copilot、Qwen、iFlow、カスタムOpenAI互換エンドポイントに対応し、使用状況分析、リクエスト監視、人気コーディングツールの自動設定機能を搭載 - APIキー不要
 
-### [CLIproxyAPI Quota Inspector](https://github.com/AllenReder/CLIproxyAPI-Quota-Inspector)
+### [CLIProxyAPI Quota Inspector](https://github.com/AllenReder/CLIProxyAPI-Quota-Inspector)
 
 CLIProxyAPI向けのすぐに使えるクロスプラットフォームのクォータ確認ツール。アカウントごとの code 5h/7d クォータ表示、プラン別ソート、ステータス色分け、複数アカウントの集計分析に対応。
 

From 828df800881f73860ca657d21ab977a4f82a225a Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Sat, 11 Apr 2026 16:35:18 +0800
Subject: [PATCH 3/5] refactor(executor): remove immediate retry with token
 refresh on 429 for Qwen and update tests accordingly

---
 internal/runtime/executor/qwen_executor.go    | 53 ------------------
 .../runtime/executor/qwen_executor_test.go    | 56 +++++++++----------
 2 files changed, 27 insertions(+), 82 deletions(-)

diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go
index ec02460e..146be5c1 100644
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -153,17 +153,6 @@ func wrapQwenError(ctx context.Context, httpCode int, body []byte) (errCode int,
 	return errCode, retryAfter
 }
 
-func qwenShouldAttemptImmediateRefreshRetry(auth *cliproxyauth.Auth) bool {
-	if auth == nil || auth.Metadata == nil {
-		return false
-	}
-	if provider := strings.TrimSpace(auth.Provider); provider != "" && !strings.EqualFold(provider, "qwen") {
-		return false
-	}
-	refreshToken, _ := auth.Metadata["refresh_token"].(string)
-	return strings.TrimSpace(refreshToken) != ""
-}
-
 // ensureQwenSystemMessage ensures the request has a single system message at the beginning.
 // It always injects the default system prompt and merges any user-provided system messages
 // into the injected system message content to satisfy Qwen's strict message ordering rules.
@@ -340,7 +329,6 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 		return resp, err
 	}
 
-	qwenImmediateRetryAttempted := false
 	for {
 		if errRate := checkQwenRateLimit(authID); errRate != nil {
 			helps.LogWithRequestID(ctx).Warnf("qwen rate limit exceeded for credential %s", redactAuthID(authID))
@@ -398,26 +386,6 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 			errCode, retryAfter := wrapQwenError(ctx, httpResp.StatusCode, b)
 			helps.LogWithRequestID(ctx).Debugf("request error, error status: %d (mapped: %d), error message: %s", httpResp.StatusCode, errCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 
-			if errCode == http.StatusTooManyRequests && !qwenImmediateRetryAttempted && qwenShouldAttemptImmediateRefreshRetry(auth) {
-				helps.LogWithRequestID(ctx).WithFields(log.Fields{
-					"auth_id": redactAuthID(authID),
-					"model":   req.Model,
-				}).Info("qwen 429 encountered, refreshing token for immediate retry")
-
-				qwenImmediateRetryAttempted = true
-				refreshFn := e.refreshForImmediateRetry
-				if refreshFn == nil {
-					refreshFn = e.Refresh
-				}
-				refreshedAuth, errRefresh := refreshFn(ctx, auth)
-				if errRefresh != nil {
-					helps.LogWithRequestID(ctx).WithError(errRefresh).WithField("auth_id", redactAuthID(authID)).Warn("qwen 429 refresh failed; skipping immediate retry")
-				} else if refreshedAuth != nil {
-					auth = refreshedAuth
-					continue
-				}
-			}
-
 			err = statusErr{code: errCode, msg: string(b), retryAfter: retryAfter}
 			return resp, err
 		}
@@ -488,7 +456,6 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 		return nil, err
 	}
 
-	qwenImmediateRetryAttempted := false
 	for {
 		if errRate := checkQwenRateLimit(authID); errRate != nil {
 			helps.LogWithRequestID(ctx).Warnf("qwen rate limit exceeded for credential %s", redactAuthID(authID))
@@ -546,26 +513,6 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 			errCode, retryAfter := wrapQwenError(ctx, httpResp.StatusCode, b)
 			helps.LogWithRequestID(ctx).Debugf("request error, error status: %d (mapped: %d), error message: %s", httpResp.StatusCode, errCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 
-			if errCode == http.StatusTooManyRequests && !qwenImmediateRetryAttempted && qwenShouldAttemptImmediateRefreshRetry(auth) {
-				helps.LogWithRequestID(ctx).WithFields(log.Fields{
-					"auth_id": redactAuthID(authID),
-					"model":   req.Model,
-				}).Info("qwen 429 encountered, refreshing token for immediate retry (stream)")
-
-				qwenImmediateRetryAttempted = true
-				refreshFn := e.refreshForImmediateRetry
-				if refreshFn == nil {
-					refreshFn = e.Refresh
-				}
-				refreshedAuth, errRefresh := refreshFn(ctx, auth)
-				if errRefresh != nil {
-					helps.LogWithRequestID(ctx).WithError(errRefresh).WithField("auth_id", redactAuthID(authID)).Warn("qwen 429 refresh failed; skipping immediate retry (stream)")
-				} else if refreshedAuth != nil {
-					auth = refreshedAuth
-					continue
-				}
-			}
-
 			err = statusErr{code: errCode, msg: string(b), retryAfter: retryAfter}
 			return nil, err
 		}
diff --git a/internal/runtime/executor/qwen_executor_test.go b/internal/runtime/executor/qwen_executor_test.go
index 97b4757e..f6363f66 100644
--- a/internal/runtime/executor/qwen_executor_test.go
+++ b/internal/runtime/executor/qwen_executor_test.go
@@ -216,7 +216,7 @@ func TestQwenCreds_NormalizesResourceURL(t *testing.T) {
 	}
 }
 
-func TestQwenExecutorExecute_429RefreshAndRetry(t *testing.T) {
+func TestQwenExecutorExecute_429DoesNotRefreshOrRetry(t *testing.T) {
 	qwenRateLimiter.Lock()
 	qwenRateLimiter.requests = make(map[string][]time.Time)
 	qwenRateLimiter.Unlock()
@@ -272,27 +272,31 @@ func TestQwenExecutorExecute_429RefreshAndRetry(t *testing.T) {
 	}
 	ctx := context.Background()
 
-	resp, err := exec.Execute(ctx, auth, cliproxyexecutor.Request{
+	_, err := exec.Execute(ctx, auth, cliproxyexecutor.Request{
 		Model:   "qwen-max",
 		Payload: []byte(`{"model":"qwen-max","messages":[{"role":"user","content":"hi"}]}`),
 	}, cliproxyexecutor.Options{
 		SourceFormat: sdktranslator.FromString("openai"),
 	})
-	if err != nil {
-		t.Fatalf("Execute() error = %v", err)
+	if err == nil {
+		t.Fatalf("Execute() expected error, got nil")
 	}
-	if len(resp.Payload) == 0 {
-		t.Fatalf("Execute() payload is empty")
+	status, ok := err.(statusErr)
+	if !ok {
+		t.Fatalf("Execute() error type = %T, want statusErr", err)
 	}
-	if atomic.LoadInt32(&calls) != 2 {
-		t.Fatalf("upstream calls = %d, want 2", atomic.LoadInt32(&calls))
+	if status.StatusCode() != http.StatusTooManyRequests {
+		t.Fatalf("Execute() status code = %d, want %d", status.StatusCode(), http.StatusTooManyRequests)
 	}
-	if atomic.LoadInt32(&refresherCalls) != 1 {
-		t.Fatalf("refresher calls = %d, want 1", atomic.LoadInt32(&refresherCalls))
+	if atomic.LoadInt32(&calls) != 1 {
+		t.Fatalf("upstream calls = %d, want 1", atomic.LoadInt32(&calls))
+	}
+	if atomic.LoadInt32(&refresherCalls) != 0 {
+		t.Fatalf("refresher calls = %d, want 0", atomic.LoadInt32(&refresherCalls))
 	}
 }
 
-func TestQwenExecutorExecuteStream_429RefreshAndRetry(t *testing.T) {
+func TestQwenExecutorExecuteStream_429DoesNotRefreshOrRetry(t *testing.T) {
 	qwenRateLimiter.Lock()
 	qwenRateLimiter.requests = make(map[string][]time.Time)
 	qwenRateLimiter.Unlock()
@@ -351,32 +355,26 @@ func TestQwenExecutorExecuteStream_429RefreshAndRetry(t *testing.T) {
 	}
 	ctx := context.Background()
 
-	stream, err := exec.ExecuteStream(ctx, auth, cliproxyexecutor.Request{
+	_, err := exec.ExecuteStream(ctx, auth, cliproxyexecutor.Request{
 		Model:   "qwen-max",
 		Payload: []byte(`{"model":"qwen-max","stream":true,"messages":[{"role":"user","content":"hi"}]}`),
 	}, cliproxyexecutor.Options{
 		SourceFormat: sdktranslator.FromString("openai"),
 	})
-	if err != nil {
-		t.Fatalf("ExecuteStream() error = %v", err)
+	if err == nil {
+		t.Fatalf("ExecuteStream() expected error, got nil")
 	}
-	if atomic.LoadInt32(&calls) != 2 {
-		t.Fatalf("upstream calls = %d, want 2", atomic.LoadInt32(&calls))
+	status, ok := err.(statusErr)
+	if !ok {
+		t.Fatalf("ExecuteStream() error type = %T, want statusErr", err)
 	}
-	if atomic.LoadInt32(&refresherCalls) != 1 {
-		t.Fatalf("refresher calls = %d, want 1", atomic.LoadInt32(&refresherCalls))
+	if status.StatusCode() != http.StatusTooManyRequests {
+		t.Fatalf("ExecuteStream() status code = %d, want %d", status.StatusCode(), http.StatusTooManyRequests)
 	}
-
-	var sawPayload bool
-	for chunk := range stream.Chunks {
-		if chunk.Err != nil {
-			t.Fatalf("stream chunk error = %v", chunk.Err)
-		}
-		if len(chunk.Payload) > 0 {
-			sawPayload = true
-		}
+	if atomic.LoadInt32(&calls) != 1 {
+		t.Fatalf("upstream calls = %d, want 1", atomic.LoadInt32(&calls))
 	}
-	if !sawPayload {
-		t.Fatalf("stream did not produce any payload chunks")
+	if atomic.LoadInt32(&refresherCalls) != 0 {
+		t.Fatalf("refresher calls = %d, want 0", atomic.LoadInt32(&refresherCalls))
 	}
 }

From f135fdf7fcbf3a46947209b3b8eef600196fddb1 Mon Sep 17 00:00:00 2001
From: Allen Yi <yiwenyou_allen@outlook.com>
Date: Sat, 11 Apr 2026 16:39:32 +0800
Subject: [PATCH 4/5] docs: clarify codex quota window wording in README
 locales

---
 README.md    | 2 +-
 README_CN.md | 2 +-
 README_JA.md | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index ef176668..e824a485 100644
--- a/README.md
+++ b/README.md
@@ -183,7 +183,7 @@ Cross-platform desktop app (macOS, Windows, Linux) wrapping CLIProxyAPI with a n
 
 ### [CLIProxyAPI Quota Inspector](https://github.com/AllenReder/CLIProxyAPI-Quota-Inspector)
 
-Ready-to-use cross-platform quota inspector for CLIProxyAPI, supporting per-account code 5h/7d quota windows, plan-based sorting, status coloring, and multi-account summary analytics.
+Ready-to-use cross-platform quota inspector for CLIProxyAPI, supporting per-account codex 5h/7d quota windows, plan-based sorting, status coloring, and multi-account summary analytics.
 
 > [!NOTE]  
 > If you developed a project based on CLIProxyAPI, please open a PR to add it to this list.
diff --git a/README_CN.md b/README_CN.md
index 92340f45..a671db57 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -179,7 +179,7 @@ Shadow AI 是一款专为受限环境设计的 AI 辅助工具。提供无窗口
 
 ### [CLIProxyAPI Quota Inspector](https://github.com/AllenReder/CLIProxyAPI-Quota-Inspector)
 
-上手即用的面向 CLIProxyAPI 跨平台配额查询工具，支持按账号展示 code 5h/7d 配额窗口、按计划排序、状态着色及多账号汇总分析。
+上手即用的面向 CLIProxyAPI 跨平台配额查询工具，支持按账号展示 codex 5h/7d 配额窗口、按计划排序、状态着色及多账号汇总分析。
 
 > [!NOTE]  
 > 如果你开发了基于 CLIProxyAPI 的项目，请提交一个 PR（拉取请求）将其添加到此列表中。
diff --git a/README_JA.md b/README_JA.md
index d2594ad7..88b33624 100644
--- a/README_JA.md
+++ b/README_JA.md
@@ -180,7 +180,7 @@ CLIProxyAPIをネイティブGUIでラップしたクロスプラットフォー
 
 ### [CLIProxyAPI Quota Inspector](https://github.com/AllenReder/CLIProxyAPI-Quota-Inspector)
 
-CLIProxyAPI向けのすぐに使えるクロスプラットフォームのクォータ確認ツール。アカウントごとの code 5h/7d クォータ表示、プラン別ソート、ステータス色分け、複数アカウントの集計分析に対応。
+CLIProxyAPI向けのすぐに使えるクロスプラットフォームのクォータ確認ツール。アカウントごとの codex 5h/7d クォータ表示、プラン別ソート、ステータス色分け、複数アカウントの集計分析に対応。
 
 > [!NOTE]
 > CLIProxyAPIをベースにプロジェクトを開発した場合は、PRを送ってこのリストに追加してください。

From 0ab1f5412f079de0d5c1afada89d06063404cb04 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Sat, 11 Apr 2026 21:04:55 +0800
Subject: [PATCH 5/5] fix(executor): handle 429 Retry-After header and default
 retry logic for quota exhaustion

- Added proper parsing of `Retry-After` headers for 429 responses.
- Set default retry duration when "disable cooling" is active on quota exhaustion.
- Updated tests to verify `Retry-After` handling and default behavior.
---
 internal/runtime/executor/qwen_executor.go    |  49 ++++
 .../runtime/executor/qwen_executor_test.go    | 234 ++++++++++++++++++
 2 files changed, 283 insertions(+)

diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go
index 146be5c1..07ad0b3b 100644
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -7,6 +7,7 @@ import (
 	"fmt"
 	"io"
 	"net/http"
+	"strconv"
 	"strings"
 	"sync"
 	"time"
@@ -153,6 +154,40 @@ func wrapQwenError(ctx context.Context, httpCode int, body []byte) (errCode int,
 	return errCode, retryAfter
 }
 
+func qwenDisableCooling(cfg *config.Config, auth *cliproxyauth.Auth) bool {
+	if auth != nil {
+		if override, ok := auth.DisableCoolingOverride(); ok {
+			return override
+		}
+	}
+	if cfg == nil {
+		return false
+	}
+	return cfg.DisableCooling
+}
+
+func parseRetryAfterHeader(header http.Header, now time.Time) *time.Duration {
+	raw := strings.TrimSpace(header.Get("Retry-After"))
+	if raw == "" {
+		return nil
+	}
+	if seconds, err := strconv.Atoi(raw); err == nil {
+		if seconds <= 0 {
+			return nil
+		}
+		d := time.Duration(seconds) * time.Second
+		return &d
+	}
+	if at, err := http.ParseTime(raw); err == nil {
+		if !at.After(now) {
+			return nil
+		}
+		d := at.Sub(now)
+		return &d
+	}
+	return nil
+}
+
 // ensureQwenSystemMessage ensures the request has a single system message at the beginning.
 // It always injects the default system prompt and merges any user-provided system messages
 // into the injected system message content to satisfy Qwen's strict message ordering rules.
@@ -384,6 +419,13 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 			}
 
 			errCode, retryAfter := wrapQwenError(ctx, httpResp.StatusCode, b)
+			if errCode == http.StatusTooManyRequests && retryAfter == nil {
+				retryAfter = parseRetryAfterHeader(httpResp.Header, time.Now())
+			}
+			if errCode == http.StatusTooManyRequests && retryAfter == nil && qwenDisableCooling(e.cfg, auth) && isQwenQuotaError(b) {
+				defaultRetryAfter := time.Second
+				retryAfter = &defaultRetryAfter
+			}
 			helps.LogWithRequestID(ctx).Debugf("request error, error status: %d (mapped: %d), error message: %s", httpResp.StatusCode, errCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 
 			err = statusErr{code: errCode, msg: string(b), retryAfter: retryAfter}
@@ -511,6 +553,13 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 			}
 
 			errCode, retryAfter := wrapQwenError(ctx, httpResp.StatusCode, b)
+			if errCode == http.StatusTooManyRequests && retryAfter == nil {
+				retryAfter = parseRetryAfterHeader(httpResp.Header, time.Now())
+			}
+			if errCode == http.StatusTooManyRequests && retryAfter == nil && qwenDisableCooling(e.cfg, auth) && isQwenQuotaError(b) {
+				defaultRetryAfter := time.Second
+				retryAfter = &defaultRetryAfter
+			}
 			helps.LogWithRequestID(ctx).Debugf("request error, error status: %d (mapped: %d), error message: %s", httpResp.StatusCode, errCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
 
 			err = statusErr{code: errCode, msg: string(b), retryAfter: retryAfter}
diff --git a/internal/runtime/executor/qwen_executor_test.go b/internal/runtime/executor/qwen_executor_test.go
index f6363f66..f19cc8ca 100644
--- a/internal/runtime/executor/qwen_executor_test.go
+++ b/internal/runtime/executor/qwen_executor_test.go
@@ -378,3 +378,237 @@ func TestQwenExecutorExecuteStream_429DoesNotRefreshOrRetry(t *testing.T) {
 		t.Fatalf("refresher calls = %d, want 0", atomic.LoadInt32(&refresherCalls))
 	}
 }
+
+func TestQwenExecutorExecute_429RetryAfterHeaderPropagatesToStatusErr(t *testing.T) {
+	qwenRateLimiter.Lock()
+	qwenRateLimiter.requests = make(map[string][]time.Time)
+	qwenRateLimiter.Unlock()
+
+	var calls int32
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		atomic.AddInt32(&calls, 1)
+		if r.URL.Path != "/v1/chat/completions" {
+			w.WriteHeader(http.StatusNotFound)
+			return
+		}
+		w.Header().Set("Content-Type", "application/json")
+		w.Header().Set("Retry-After", "2")
+		w.WriteHeader(http.StatusTooManyRequests)
+		_, _ = w.Write([]byte(`{"error":{"code":"rate_limit_exceeded","message":"rate limited","type":"rate_limit_exceeded"}}`))
+	}))
+	defer srv.Close()
+
+	exec := NewQwenExecutor(&config.Config{})
+	auth := &cliproxyauth.Auth{
+		ID:       "auth-test",
+		Provider: "qwen",
+		Attributes: map[string]string{
+			"base_url": srv.URL + "/v1",
+		},
+		Metadata: map[string]any{
+			"access_token": "test-token",
+		},
+	}
+	ctx := context.Background()
+
+	_, err := exec.Execute(ctx, auth, cliproxyexecutor.Request{
+		Model:   "qwen-max",
+		Payload: []byte(`{"model":"qwen-max","messages":[{"role":"user","content":"hi"}]}`),
+	}, cliproxyexecutor.Options{
+		SourceFormat: sdktranslator.FromString("openai"),
+	})
+	if err == nil {
+		t.Fatalf("Execute() expected error, got nil")
+	}
+	status, ok := err.(statusErr)
+	if !ok {
+		t.Fatalf("Execute() error type = %T, want statusErr", err)
+	}
+	if status.StatusCode() != http.StatusTooManyRequests {
+		t.Fatalf("Execute() status code = %d, want %d", status.StatusCode(), http.StatusTooManyRequests)
+	}
+	if status.RetryAfter() == nil {
+		t.Fatalf("Execute() RetryAfter is nil, want non-nil")
+	}
+	if got := *status.RetryAfter(); got != 2*time.Second {
+		t.Fatalf("Execute() RetryAfter = %v, want %v", got, 2*time.Second)
+	}
+	if atomic.LoadInt32(&calls) != 1 {
+		t.Fatalf("upstream calls = %d, want 1", atomic.LoadInt32(&calls))
+	}
+}
+
+func TestQwenExecutorExecuteStream_429RetryAfterHeaderPropagatesToStatusErr(t *testing.T) {
+	qwenRateLimiter.Lock()
+	qwenRateLimiter.requests = make(map[string][]time.Time)
+	qwenRateLimiter.Unlock()
+
+	var calls int32
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		atomic.AddInt32(&calls, 1)
+		if r.URL.Path != "/v1/chat/completions" {
+			w.WriteHeader(http.StatusNotFound)
+			return
+		}
+		w.Header().Set("Content-Type", "application/json")
+		w.Header().Set("Retry-After", "2")
+		w.WriteHeader(http.StatusTooManyRequests)
+		_, _ = w.Write([]byte(`{"error":{"code":"rate_limit_exceeded","message":"rate limited","type":"rate_limit_exceeded"}}`))
+	}))
+	defer srv.Close()
+
+	exec := NewQwenExecutor(&config.Config{})
+	auth := &cliproxyauth.Auth{
+		ID:       "auth-test",
+		Provider: "qwen",
+		Attributes: map[string]string{
+			"base_url": srv.URL + "/v1",
+		},
+		Metadata: map[string]any{
+			"access_token": "test-token",
+		},
+	}
+	ctx := context.Background()
+
+	_, err := exec.ExecuteStream(ctx, auth, cliproxyexecutor.Request{
+		Model:   "qwen-max",
+		Payload: []byte(`{"model":"qwen-max","stream":true,"messages":[{"role":"user","content":"hi"}]}`),
+	}, cliproxyexecutor.Options{
+		SourceFormat: sdktranslator.FromString("openai"),
+	})
+	if err == nil {
+		t.Fatalf("ExecuteStream() expected error, got nil")
+	}
+	status, ok := err.(statusErr)
+	if !ok {
+		t.Fatalf("ExecuteStream() error type = %T, want statusErr", err)
+	}
+	if status.StatusCode() != http.StatusTooManyRequests {
+		t.Fatalf("ExecuteStream() status code = %d, want %d", status.StatusCode(), http.StatusTooManyRequests)
+	}
+	if status.RetryAfter() == nil {
+		t.Fatalf("ExecuteStream() RetryAfter is nil, want non-nil")
+	}
+	if got := *status.RetryAfter(); got != 2*time.Second {
+		t.Fatalf("ExecuteStream() RetryAfter = %v, want %v", got, 2*time.Second)
+	}
+	if atomic.LoadInt32(&calls) != 1 {
+		t.Fatalf("upstream calls = %d, want 1", atomic.LoadInt32(&calls))
+	}
+}
+
+func TestQwenExecutorExecute_429QuotaExhausted_DisableCoolingSetsDefaultRetryAfter(t *testing.T) {
+	qwenRateLimiter.Lock()
+	qwenRateLimiter.requests = make(map[string][]time.Time)
+	qwenRateLimiter.Unlock()
+
+	var calls int32
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		atomic.AddInt32(&calls, 1)
+		if r.URL.Path != "/v1/chat/completions" {
+			w.WriteHeader(http.StatusNotFound)
+			return
+		}
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(http.StatusTooManyRequests)
+		_, _ = w.Write([]byte(`{"error":{"code":"quota_exceeded","message":"quota exceeded","type":"quota_exceeded"}}`))
+	}))
+	defer srv.Close()
+
+	exec := NewQwenExecutor(&config.Config{DisableCooling: true})
+	auth := &cliproxyauth.Auth{
+		ID:       "auth-test",
+		Provider: "qwen",
+		Attributes: map[string]string{
+			"base_url": srv.URL + "/v1",
+		},
+		Metadata: map[string]any{
+			"access_token": "test-token",
+		},
+	}
+	ctx := context.Background()
+
+	_, err := exec.Execute(ctx, auth, cliproxyexecutor.Request{
+		Model:   "qwen-max",
+		Payload: []byte(`{"model":"qwen-max","messages":[{"role":"user","content":"hi"}]}`),
+	}, cliproxyexecutor.Options{
+		SourceFormat: sdktranslator.FromString("openai"),
+	})
+	if err == nil {
+		t.Fatalf("Execute() expected error, got nil")
+	}
+	status, ok := err.(statusErr)
+	if !ok {
+		t.Fatalf("Execute() error type = %T, want statusErr", err)
+	}
+	if status.StatusCode() != http.StatusTooManyRequests {
+		t.Fatalf("Execute() status code = %d, want %d", status.StatusCode(), http.StatusTooManyRequests)
+	}
+	if status.RetryAfter() == nil {
+		t.Fatalf("Execute() RetryAfter is nil, want non-nil")
+	}
+	if got := *status.RetryAfter(); got != time.Second {
+		t.Fatalf("Execute() RetryAfter = %v, want %v", got, time.Second)
+	}
+	if atomic.LoadInt32(&calls) != 1 {
+		t.Fatalf("upstream calls = %d, want 1", atomic.LoadInt32(&calls))
+	}
+}
+
+func TestQwenExecutorExecuteStream_429QuotaExhausted_DisableCoolingSetsDefaultRetryAfter(t *testing.T) {
+	qwenRateLimiter.Lock()
+	qwenRateLimiter.requests = make(map[string][]time.Time)
+	qwenRateLimiter.Unlock()
+
+	var calls int32
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		atomic.AddInt32(&calls, 1)
+		if r.URL.Path != "/v1/chat/completions" {
+			w.WriteHeader(http.StatusNotFound)
+			return
+		}
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(http.StatusTooManyRequests)
+		_, _ = w.Write([]byte(`{"error":{"code":"quota_exceeded","message":"quota exceeded","type":"quota_exceeded"}}`))
+	}))
+	defer srv.Close()
+
+	exec := NewQwenExecutor(&config.Config{DisableCooling: true})
+	auth := &cliproxyauth.Auth{
+		ID:       "auth-test",
+		Provider: "qwen",
+		Attributes: map[string]string{
+			"base_url": srv.URL + "/v1",
+		},
+		Metadata: map[string]any{
+			"access_token": "test-token",
+		},
+	}
+	ctx := context.Background()
+
+	_, err := exec.ExecuteStream(ctx, auth, cliproxyexecutor.Request{
+		Model:   "qwen-max",
+		Payload: []byte(`{"model":"qwen-max","stream":true,"messages":[{"role":"user","content":"hi"}]}`),
+	}, cliproxyexecutor.Options{
+		SourceFormat: sdktranslator.FromString("openai"),
+	})
+	if err == nil {
+		t.Fatalf("ExecuteStream() expected error, got nil")
+	}
+	status, ok := err.(statusErr)
+	if !ok {
+		t.Fatalf("ExecuteStream() error type = %T, want statusErr", err)
+	}
+	if status.StatusCode() != http.StatusTooManyRequests {
+		t.Fatalf("ExecuteStream() status code = %d, want %d", status.StatusCode(), http.StatusTooManyRequests)
+	}
+	if status.RetryAfter() == nil {
+		t.Fatalf("ExecuteStream() RetryAfter is nil, want non-nil")
+	}
+	if got := *status.RetryAfter(); got != time.Second {
+		t.Fatalf("ExecuteStream() RetryAfter = %v, want %v", got, time.Second)
+	}
+	if atomic.LoadInt32(&calls) != 1 {
+		t.Fatalf("upstream calls = %d, want 1", atomic.LoadInt32(&calls))
+	}
+}