diff --git a/internal/api/handlers/management/config_lists.go b/internal/api/handlers/management/config_lists.go
index 083d4e31..fbaad956 100644
--- a/internal/api/handlers/management/config_lists.go
+++ b/internal/api/handlers/management/config_lists.go
@@ -214,19 +214,46 @@ func (h *Handler) PatchGeminiKey(c *gin.Context) {
 
 func (h *Handler) DeleteGeminiKey(c *gin.Context) {
 	if val := strings.TrimSpace(c.Query("api-key")); val != "" {
-		out := make([]config.GeminiKey, 0, len(h.cfg.GeminiKey))
-		for _, v := range h.cfg.GeminiKey {
-			if v.APIKey != val {
+		if baseRaw, okBase := c.GetQuery("base-url"); okBase {
+			base := strings.TrimSpace(baseRaw)
+			out := make([]config.GeminiKey, 0, len(h.cfg.GeminiKey))
+			for _, v := range h.cfg.GeminiKey {
+				if strings.TrimSpace(v.APIKey) == val && strings.TrimSpace(v.BaseURL) == base {
+					continue
+				}
 				out = append(out, v)
 			}
+			if len(out) != len(h.cfg.GeminiKey) {
+				h.cfg.GeminiKey = out
+				h.cfg.SanitizeGeminiKeys()
+				h.persist(c)
+			} else {
+				c.JSON(404, gin.H{"error": "item not found"})
+			}
+			return
 		}
-		if len(out) != len(h.cfg.GeminiKey) {
-			h.cfg.GeminiKey = out
-			h.cfg.SanitizeGeminiKeys()
-			h.persist(c)
-		} else {
+
+		matchIndex := -1
+		matchCount := 0
+		for i := range h.cfg.GeminiKey {
+			if strings.TrimSpace(h.cfg.GeminiKey[i].APIKey) == val {
+				matchCount++
+				if matchIndex == -1 {
+					matchIndex = i
+				}
+			}
+		}
+		if matchCount == 0 {
 			c.JSON(404, gin.H{"error": "item not found"})
+			return
 		}
+		if matchCount > 1 {
+			c.JSON(400, gin.H{"error": "multiple items match api-key; base-url is required"})
+			return
+		}
+		h.cfg.GeminiKey = append(h.cfg.GeminiKey[:matchIndex], h.cfg.GeminiKey[matchIndex+1:]...)
+		h.cfg.SanitizeGeminiKeys()
+		h.persist(c)
 		return
 	}
 	if idxStr := c.Query("index"); idxStr != "" {
@@ -335,14 +362,39 @@ func (h *Handler) PatchClaudeKey(c *gin.Context) {
 }
 
 func (h *Handler) DeleteClaudeKey(c *gin.Context) {
-	if val := c.Query("api-key"); val != "" {
-		out := make([]config.ClaudeKey, 0, len(h.cfg.ClaudeKey))
-		for _, v := range h.cfg.ClaudeKey {
-			if v.APIKey != val {
+	if val := strings.TrimSpace(c.Query("api-key")); val != "" {
+		if baseRaw, okBase := c.GetQuery("base-url"); okBase {
+			base := strings.TrimSpace(baseRaw)
+			out := make([]config.ClaudeKey, 0, len(h.cfg.ClaudeKey))
+			for _, v := range h.cfg.ClaudeKey {
+				if strings.TrimSpace(v.APIKey) == val && strings.TrimSpace(v.BaseURL) == base {
+					continue
+				}
 				out = append(out, v)
 			}
+			h.cfg.ClaudeKey = out
+			h.cfg.SanitizeClaudeKeys()
+			h.persist(c)
+			return
+		}
+
+		matchIndex := -1
+		matchCount := 0
+		for i := range h.cfg.ClaudeKey {
+			if strings.TrimSpace(h.cfg.ClaudeKey[i].APIKey) == val {
+				matchCount++
+				if matchIndex == -1 {
+					matchIndex = i
+				}
+			}
+		}
+		if matchCount > 1 {
+			c.JSON(400, gin.H{"error": "multiple items match api-key; base-url is required"})
+			return
+		}
+		if matchIndex != -1 {
+			h.cfg.ClaudeKey = append(h.cfg.ClaudeKey[:matchIndex], h.cfg.ClaudeKey[matchIndex+1:]...)
 		}
-		h.cfg.ClaudeKey = out
 		h.cfg.SanitizeClaudeKeys()
 		h.persist(c)
 		return
@@ -601,13 +653,38 @@ func (h *Handler) PatchVertexCompatKey(c *gin.Context) {
 
 func (h *Handler) DeleteVertexCompatKey(c *gin.Context) {
 	if val := strings.TrimSpace(c.Query("api-key")); val != "" {
-		out := make([]config.VertexCompatKey, 0, len(h.cfg.VertexCompatAPIKey))
-		for _, v := range h.cfg.VertexCompatAPIKey {
-			if v.APIKey != val {
+		if baseRaw, okBase := c.GetQuery("base-url"); okBase {
+			base := strings.TrimSpace(baseRaw)
+			out := make([]config.VertexCompatKey, 0, len(h.cfg.VertexCompatAPIKey))
+			for _, v := range h.cfg.VertexCompatAPIKey {
+				if strings.TrimSpace(v.APIKey) == val && strings.TrimSpace(v.BaseURL) == base {
+					continue
+				}
 				out = append(out, v)
 			}
+			h.cfg.VertexCompatAPIKey = out
+			h.cfg.SanitizeVertexCompatKeys()
+			h.persist(c)
+			return
+		}
+
+		matchIndex := -1
+		matchCount := 0
+		for i := range h.cfg.VertexCompatAPIKey {
+			if strings.TrimSpace(h.cfg.VertexCompatAPIKey[i].APIKey) == val {
+				matchCount++
+				if matchIndex == -1 {
+					matchIndex = i
+				}
+			}
+		}
+		if matchCount > 1 {
+			c.JSON(400, gin.H{"error": "multiple items match api-key; base-url is required"})
+			return
+		}
+		if matchIndex != -1 {
+			h.cfg.VertexCompatAPIKey = append(h.cfg.VertexCompatAPIKey[:matchIndex], h.cfg.VertexCompatAPIKey[matchIndex+1:]...)
 		}
-		h.cfg.VertexCompatAPIKey = out
 		h.cfg.SanitizeVertexCompatKeys()
 		h.persist(c)
 		return
@@ -915,14 +992,39 @@ func (h *Handler) PatchCodexKey(c *gin.Context) {
 }
 
 func (h *Handler) DeleteCodexKey(c *gin.Context) {
-	if val := c.Query("api-key"); val != "" {
-		out := make([]config.CodexKey, 0, len(h.cfg.CodexKey))
-		for _, v := range h.cfg.CodexKey {
-			if v.APIKey != val {
+	if val := strings.TrimSpace(c.Query("api-key")); val != "" {
+		if baseRaw, okBase := c.GetQuery("base-url"); okBase {
+			base := strings.TrimSpace(baseRaw)
+			out := make([]config.CodexKey, 0, len(h.cfg.CodexKey))
+			for _, v := range h.cfg.CodexKey {
+				if strings.TrimSpace(v.APIKey) == val && strings.TrimSpace(v.BaseURL) == base {
+					continue
+				}
 				out = append(out, v)
 			}
+			h.cfg.CodexKey = out
+			h.cfg.SanitizeCodexKeys()
+			h.persist(c)
+			return
+		}
+
+		matchIndex := -1
+		matchCount := 0
+		for i := range h.cfg.CodexKey {
+			if strings.TrimSpace(h.cfg.CodexKey[i].APIKey) == val {
+				matchCount++
+				if matchIndex == -1 {
+					matchIndex = i
+				}
+			}
+		}
+		if matchCount > 1 {
+			c.JSON(400, gin.H{"error": "multiple items match api-key; base-url is required"})
+			return
+		}
+		if matchIndex != -1 {
+			h.cfg.CodexKey = append(h.cfg.CodexKey[:matchIndex], h.cfg.CodexKey[matchIndex+1:]...)
 		}
-		h.cfg.CodexKey = out
 		h.cfg.SanitizeCodexKeys()
 		h.persist(c)
 		return
diff --git a/internal/api/handlers/management/config_lists_delete_keys_test.go b/internal/api/handlers/management/config_lists_delete_keys_test.go
new file mode 100644
index 00000000..aaa43910
--- /dev/null
+++ b/internal/api/handlers/management/config_lists_delete_keys_test.go
@@ -0,0 +1,172 @@
+package management
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+)
+
+func writeTestConfigFile(t *testing.T) string {
+	t.Helper()
+
+	dir := t.TempDir()
+	path := filepath.Join(dir, "config.yaml")
+	if errWrite := os.WriteFile(path, []byte("{}\n"), 0o600); errWrite != nil {
+		t.Fatalf("failed to write test config: %v", errWrite)
+	}
+	return path
+}
+
+func TestDeleteGeminiKey_RequiresBaseURLWhenAPIKeyDuplicated(t *testing.T) {
+	t.Parallel()
+	gin.SetMode(gin.TestMode)
+
+	h := &Handler{
+		cfg: &config.Config{
+			GeminiKey: []config.GeminiKey{
+				{APIKey: "shared-key", BaseURL: "https://a.example.com"},
+				{APIKey: "shared-key", BaseURL: "https://b.example.com"},
+			},
+		},
+		configFilePath: writeTestConfigFile(t),
+	}
+
+	rec := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(rec)
+	c.Request = httptest.NewRequest(http.MethodDelete, "/v0/management/gemini-api-key?api-key=shared-key", nil)
+
+	h.DeleteGeminiKey(c)
+
+	if rec.Code != http.StatusBadRequest {
+		t.Fatalf("status = %d, want %d; body=%s", rec.Code, http.StatusBadRequest, rec.Body.String())
+	}
+	if got := len(h.cfg.GeminiKey); got != 2 {
+		t.Fatalf("gemini keys len = %d, want 2", got)
+	}
+}
+
+func TestDeleteGeminiKey_DeletesOnlyMatchingBaseURL(t *testing.T) {
+	t.Parallel()
+	gin.SetMode(gin.TestMode)
+
+	h := &Handler{
+		cfg: &config.Config{
+			GeminiKey: []config.GeminiKey{
+				{APIKey: "shared-key", BaseURL: "https://a.example.com"},
+				{APIKey: "shared-key", BaseURL: "https://b.example.com"},
+			},
+		},
+		configFilePath: writeTestConfigFile(t),
+	}
+
+	rec := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(rec)
+	c.Request = httptest.NewRequest(http.MethodDelete, "/v0/management/gemini-api-key?api-key=shared-key&base-url=https://a.example.com", nil)
+
+	h.DeleteGeminiKey(c)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("status = %d, want %d; body=%s", rec.Code, http.StatusOK, rec.Body.String())
+	}
+	if got := len(h.cfg.GeminiKey); got != 1 {
+		t.Fatalf("gemini keys len = %d, want 1", got)
+	}
+	if got := h.cfg.GeminiKey[0].BaseURL; got != "https://b.example.com" {
+		t.Fatalf("remaining base-url = %q, want %q", got, "https://b.example.com")
+	}
+}
+
+func TestDeleteClaudeKey_DeletesEmptyBaseURLWhenExplicitlyProvided(t *testing.T) {
+	t.Parallel()
+	gin.SetMode(gin.TestMode)
+
+	h := &Handler{
+		cfg: &config.Config{
+			ClaudeKey: []config.ClaudeKey{
+				{APIKey: "shared-key", BaseURL: ""},
+				{APIKey: "shared-key", BaseURL: "https://claude.example.com"},
+			},
+		},
+		configFilePath: writeTestConfigFile(t),
+	}
+
+	rec := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(rec)
+	c.Request = httptest.NewRequest(http.MethodDelete, "/v0/management/claude-api-key?api-key=shared-key&base-url=", nil)
+
+	h.DeleteClaudeKey(c)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("status = %d, want %d; body=%s", rec.Code, http.StatusOK, rec.Body.String())
+	}
+	if got := len(h.cfg.ClaudeKey); got != 1 {
+		t.Fatalf("claude keys len = %d, want 1", got)
+	}
+	if got := h.cfg.ClaudeKey[0].BaseURL; got != "https://claude.example.com" {
+		t.Fatalf("remaining base-url = %q, want %q", got, "https://claude.example.com")
+	}
+}
+
+func TestDeleteVertexCompatKey_DeletesOnlyMatchingBaseURL(t *testing.T) {
+	t.Parallel()
+	gin.SetMode(gin.TestMode)
+
+	h := &Handler{
+		cfg: &config.Config{
+			VertexCompatAPIKey: []config.VertexCompatKey{
+				{APIKey: "shared-key", BaseURL: "https://a.example.com"},
+				{APIKey: "shared-key", BaseURL: "https://b.example.com"},
+			},
+		},
+		configFilePath: writeTestConfigFile(t),
+	}
+
+	rec := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(rec)
+	c.Request = httptest.NewRequest(http.MethodDelete, "/v0/management/vertex-api-key?api-key=shared-key&base-url=https://b.example.com", nil)
+
+	h.DeleteVertexCompatKey(c)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("status = %d, want %d; body=%s", rec.Code, http.StatusOK, rec.Body.String())
+	}
+	if got := len(h.cfg.VertexCompatAPIKey); got != 1 {
+		t.Fatalf("vertex keys len = %d, want 1", got)
+	}
+	if got := h.cfg.VertexCompatAPIKey[0].BaseURL; got != "https://a.example.com" {
+		t.Fatalf("remaining base-url = %q, want %q", got, "https://a.example.com")
+	}
+}
+
+func TestDeleteCodexKey_RequiresBaseURLWhenAPIKeyDuplicated(t *testing.T) {
+	t.Parallel()
+	gin.SetMode(gin.TestMode)
+
+	h := &Handler{
+		cfg: &config.Config{
+			CodexKey: []config.CodexKey{
+				{APIKey: "shared-key", BaseURL: "https://a.example.com"},
+				{APIKey: "shared-key", BaseURL: "https://b.example.com"},
+			},
+		},
+		configFilePath: writeTestConfigFile(t),
+	}
+
+	rec := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(rec)
+	c.Request = httptest.NewRequest(http.MethodDelete, "/v0/management/codex-api-key?api-key=shared-key", nil)
+
+	h.DeleteCodexKey(c)
+
+	if rec.Code != http.StatusBadRequest {
+		t.Fatalf("status = %d, want %d; body=%s", rec.Code, http.StatusBadRequest, rec.Body.String())
+	}
+	if got := len(h.cfg.CodexKey); got != 2 {
+		t.Fatalf("codex keys len = %d, want 2", got)
+	}
+}
diff --git a/internal/config/config.go b/internal/config/config.go
index 15847f57..f25b0aa2 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -865,6 +865,7 @@ func (cfg *Config) SanitizeClaudeKeys() {
 }
 
 // SanitizeGeminiKeys deduplicates and normalizes Gemini credentials.
+// It uses API key + base URL as the uniqueness key.
 func (cfg *Config) SanitizeGeminiKeys() {
 	if cfg == nil {
 		return
@@ -883,10 +884,11 @@ func (cfg *Config) SanitizeGeminiKeys() {
 		entry.ProxyURL = strings.TrimSpace(entry.ProxyURL)
 		entry.Headers = NormalizeHeaders(entry.Headers)
 		entry.ExcludedModels = NormalizeExcludedModels(entry.ExcludedModels)
-		if _, exists := seen[entry.APIKey]; exists {
+		uniqueKey := entry.APIKey + "|" + entry.BaseURL
+		if _, exists := seen[uniqueKey]; exists {
 			continue
 		}
-		seen[entry.APIKey] = struct{}{}
+		seen[uniqueKey] = struct{}{}
 		out = append(out, entry)
 	}
 	cfg.GeminiKey = out
diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go
index d8eec537..5c8ff039 100644
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -25,23 +25,13 @@ import (
 )
 
 const (
-	qwenUserAgent       = "QwenCode/0.13.2 (darwin; arm64)"
+	qwenUserAgent       = "QwenCode/0.14.2 (darwin; arm64)"
 	qwenRateLimitPerMin = 60          // 60 requests per minute per credential
 	qwenRateLimitWindow = time.Minute // sliding window duration
 )
 
 var qwenDefaultSystemMessage = []byte(`{"role":"system","content":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}]}`)
 
-// qwenBeijingLoc caches the Beijing timezone to avoid repeated LoadLocation syscalls.
-var qwenBeijingLoc = func() *time.Location {
-	loc, err := time.LoadLocation("Asia/Shanghai")
-	if err != nil || loc == nil {
-		log.Warnf("qwen: failed to load Asia/Shanghai timezone: %v, using fixed UTC+8", err)
-		return time.FixedZone("CST", 8*3600)
-	}
-	return loc
-}()
-
 // qwenQuotaCodes is a package-level set of error codes that indicate quota exhaustion.
 var qwenQuotaCodes = map[string]struct{}{
 	"insufficient_quota": {},
@@ -156,22 +146,13 @@ func wrapQwenError(ctx context.Context, httpCode int, body []byte) (errCode int,
 	// Qwen returns 403 for quota errors, 429 for rate limits
 	if (httpCode == http.StatusForbidden || httpCode == http.StatusTooManyRequests) && isQwenQuotaError(body) {
 		errCode = http.StatusTooManyRequests // Map to 429 to trigger quota logic
-		cooldown := timeUntilNextDay()
-		retryAfter = &cooldown
-		helps.LogWithRequestID(ctx).Warnf("qwen quota exceeded (http %d -> %d), cooling down until tomorrow (%v)", httpCode, errCode, cooldown)
+		// Do not force an excessively long retry-after (e.g. until tomorrow), otherwise
+		// the global request-retry scheduler may skip retries due to max-retry-interval.
+		helps.LogWithRequestID(ctx).Warnf("qwen quota exceeded (http %d -> %d)", httpCode, errCode)
 	}
 	return errCode, retryAfter
 }
 
-// timeUntilNextDay returns duration until midnight Beijing time (UTC+8).
-// Qwen's daily quota resets at 00:00 Beijing time.
-func timeUntilNextDay() time.Duration {
-	now := time.Now()
-	nowLocal := now.In(qwenBeijingLoc)
-	tomorrow := time.Date(nowLocal.Year(), nowLocal.Month(), nowLocal.Day()+1, 0, 0, 0, 0, qwenBeijingLoc)
-	return tomorrow.Sub(now)
-}
-
 // ensureQwenSystemMessage ensures the request has a single system message at the beginning.
 // It always injects the default system prompt and merges any user-provided system messages
 // into the injected system message content to satisfy Qwen's strict message ordering rules.
@@ -626,19 +607,23 @@ func (e *QwenExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*c
 }
 
 func applyQwenHeaders(r *http.Request, token string, stream bool) {
-	r.Header.Set("Content-Type", "application/json")
-	r.Header.Set("Authorization", "Bearer "+token)
-	r.Header.Set("User-Agent", qwenUserAgent)
-	r.Header["X-DashScope-UserAgent"] = []string{qwenUserAgent}
 	r.Header.Set("X-Stainless-Runtime-Version", "v22.17.0")
+	r.Header.Set("User-Agent", qwenUserAgent)
 	r.Header.Set("X-Stainless-Lang", "js")
-	r.Header.Set("X-Stainless-Arch", "arm64")
-	r.Header.Set("X-Stainless-Package-Version", "5.11.0")
-	r.Header["X-DashScope-CacheControl"] = []string{"enable"}
-	r.Header.Set("X-Stainless-Retry-Count", "0")
+	r.Header.Set("Accept-Language", "*")
+	r.Header.Set("X-Dashscope-Cachecontrol", "enable")
 	r.Header.Set("X-Stainless-Os", "MacOS")
-	r.Header["X-DashScope-AuthType"] = []string{"qwen-oauth"}
+	r.Header.Set("X-Dashscope-Authtype", "qwen-oauth")
+	r.Header.Set("X-Stainless-Arch", "arm64")
 	r.Header.Set("X-Stainless-Runtime", "node")
+	r.Header.Set("X-Stainless-Retry-Count", "0")
+	r.Header.Set("Accept-Encoding", "gzip, deflate")
+	r.Header.Set("Authorization", "Bearer "+token)
+	r.Header.Set("X-Stainless-Package-Version", "5.11.0")
+	r.Header.Set("Sec-Fetch-Mode", "cors")
+	r.Header.Set("Content-Type", "application/json")
+	r.Header.Set("Connection", "keep-alive")
+	r.Header.Set("X-Dashscope-Useragent", qwenUserAgent)
 
 	if stream {
 		r.Header.Set("Accept", "text/event-stream")
@@ -647,6 +632,26 @@ func applyQwenHeaders(r *http.Request, token string, stream bool) {
 	r.Header.Set("Accept", "application/json")
 }
 
+func normaliseQwenBaseURL(resourceURL string) string {
+	raw := strings.TrimSpace(resourceURL)
+	if raw == "" {
+		return ""
+	}
+
+	normalized := raw
+	lower := strings.ToLower(normalized)
+	if !strings.HasPrefix(lower, "http://") && !strings.HasPrefix(lower, "https://") {
+		normalized = "https://" + normalized
+	}
+
+	normalized = strings.TrimRight(normalized, "/")
+	if !strings.HasSuffix(strings.ToLower(normalized), "/v1") {
+		normalized += "/v1"
+	}
+
+	return normalized
+}
+
 func qwenCreds(a *cliproxyauth.Auth) (token, baseURL string) {
 	if a == nil {
 		return "", ""
@@ -664,7 +669,7 @@ func qwenCreds(a *cliproxyauth.Auth) (token, baseURL string) {
 			token = v
 		}
 		if v, ok := a.Metadata["resource_url"].(string); ok {
-			baseURL = fmt.Sprintf("https://%s/v1", v)
+			baseURL = normaliseQwenBaseURL(v)
 		}
 	}
 	return
diff --git a/internal/runtime/executor/qwen_executor_test.go b/internal/runtime/executor/qwen_executor_test.go
index b960eced..cf9ed21f 100644
--- a/internal/runtime/executor/qwen_executor_test.go
+++ b/internal/runtime/executor/qwen_executor_test.go
@@ -1,9 +1,12 @@
 package executor
 
 import (
+	"context"
+	"net/http"
 	"testing"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	"github.com/tidwall/gjson"
 )
 
@@ -152,3 +155,57 @@ func TestEnsureQwenSystemMessage_MergesMultipleSystemMessages(t *testing.T) {
 		t.Fatalf("messages[0].content[2].text = %q, want %q", parts[2].Get("text").String(), "B")
 	}
 }
+
+func TestWrapQwenError_InsufficientQuotaDoesNotSetRetryAfter(t *testing.T) {
+	body := []byte(`{"error":{"code":"insufficient_quota","message":"You exceeded your current quota","type":"insufficient_quota"}}`)
+	code, retryAfter := wrapQwenError(context.Background(), http.StatusTooManyRequests, body)
+	if code != http.StatusTooManyRequests {
+		t.Fatalf("wrapQwenError status = %d, want %d", code, http.StatusTooManyRequests)
+	}
+	if retryAfter != nil {
+		t.Fatalf("wrapQwenError retryAfter = %v, want nil", *retryAfter)
+	}
+}
+
+func TestWrapQwenError_Maps403QuotaTo429WithoutRetryAfter(t *testing.T) {
+	body := []byte(`{"error":{"code":"insufficient_quota","message":"You exceeded your current quota","type":"insufficient_quota"}}`)
+	code, retryAfter := wrapQwenError(context.Background(), http.StatusForbidden, body)
+	if code != http.StatusTooManyRequests {
+		t.Fatalf("wrapQwenError status = %d, want %d", code, http.StatusTooManyRequests)
+	}
+	if retryAfter != nil {
+		t.Fatalf("wrapQwenError retryAfter = %v, want nil", *retryAfter)
+	}
+}
+
+func TestQwenCreds_NormalizesResourceURL(t *testing.T) {
+	tests := []struct {
+		name        string
+		resourceURL string
+		wantBaseURL string
+	}{
+		{"host only", "portal.qwen.ai", "https://portal.qwen.ai/v1"},
+		{"scheme no v1", "https://portal.qwen.ai", "https://portal.qwen.ai/v1"},
+		{"scheme with v1", "https://portal.qwen.ai/v1", "https://portal.qwen.ai/v1"},
+		{"scheme with v1 slash", "https://portal.qwen.ai/v1/", "https://portal.qwen.ai/v1"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			auth := &cliproxyauth.Auth{
+				Metadata: map[string]any{
+					"access_token": "test-token",
+					"resource_url": tt.resourceURL,
+				},
+			}
+
+			token, baseURL := qwenCreds(auth)
+			if token != "test-token" {
+				t.Fatalf("qwenCreds token = %q, want %q", token, "test-token")
+			}
+			if baseURL != tt.wantBaseURL {
+				t.Fatalf("qwenCreds baseURL = %q, want %q", baseURL, tt.wantBaseURL)
+			}
+		})
+	}
+}
diff --git a/sdk/auth/qwen.go b/sdk/auth/qwen.go
index 310d4987..d891021a 100644
--- a/sdk/auth/qwen.go
+++ b/sdk/auth/qwen.go
@@ -27,7 +27,7 @@ func (a *QwenAuthenticator) Provider() string {
 }
 
 func (a *QwenAuthenticator) RefreshLead() *time.Duration {
-	return new(3 * time.Hour)
+	return new(20 * time.Minute)
 }
 
 func (a *QwenAuthenticator) Login(ctx context.Context, cfg *config.Config, opts *LoginOptions) (*coreauth.Auth, error) {
diff --git a/sdk/auth/qwen_refresh_lead_test.go b/sdk/auth/qwen_refresh_lead_test.go
new file mode 100644
index 00000000..56f41fc0
--- /dev/null
+++ b/sdk/auth/qwen_refresh_lead_test.go
@@ -0,0 +1,19 @@
+package auth
+
+import (
+	"testing"
+	"time"
+)
+
+func TestQwenAuthenticator_RefreshLeadIsSane(t *testing.T) {
+	lead := NewQwenAuthenticator().RefreshLead()
+	if lead == nil {
+		t.Fatal("RefreshLead() = nil, want non-nil")
+	}
+	if *lead <= 0 {
+		t.Fatalf("RefreshLead() = %s, want > 0", *lead)
+	}
+	if *lead > 30*time.Minute {
+		t.Fatalf("RefreshLead() = %s, want <= %s", *lead, 30*time.Minute)
+	}
+}
diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go
index 357bf693..25cc7221 100644
--- a/sdk/cliproxy/auth/conductor.go
+++ b/sdk/cliproxy/auth/conductor.go
@@ -1830,7 +1830,11 @@ func (m *Manager) closestCooldownWait(providers []string, model string, attempt
 		if attempt >= effectiveRetry {
 			continue
 		}
-		blocked, reason, next := isAuthBlockedForModel(auth, model, now)
+		checkModel := model
+		if strings.TrimSpace(model) != "" {
+			checkModel = m.selectionModelForAuth(auth, model)
+		}
+		blocked, reason, next := isAuthBlockedForModel(auth, checkModel, now)
 		if !blocked || next.IsZero() || reason == blockReasonDisabled {
 			continue
 		}
@@ -1846,6 +1850,50 @@ func (m *Manager) closestCooldownWait(providers []string, model string, attempt
 	return minWait, found
 }
 
+func (m *Manager) retryAllowed(attempt int, providers []string) bool {
+	if m == nil || attempt < 0 || len(providers) == 0 {
+		return false
+	}
+	defaultRetry := int(m.requestRetry.Load())
+	if defaultRetry < 0 {
+		defaultRetry = 0
+	}
+	providerSet := make(map[string]struct{}, len(providers))
+	for i := range providers {
+		key := strings.TrimSpace(strings.ToLower(providers[i]))
+		if key == "" {
+			continue
+		}
+		providerSet[key] = struct{}{}
+	}
+	if len(providerSet) == 0 {
+		return false
+	}
+
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	for _, auth := range m.auths {
+		if auth == nil {
+			continue
+		}
+		providerKey := strings.TrimSpace(strings.ToLower(auth.Provider))
+		if _, ok := providerSet[providerKey]; !ok {
+			continue
+		}
+		effectiveRetry := defaultRetry
+		if override, ok := auth.RequestRetryOverride(); ok {
+			effectiveRetry = override
+		}
+		if effectiveRetry < 0 {
+			effectiveRetry = 0
+		}
+		if attempt < effectiveRetry {
+			return true
+		}
+	}
+	return false
+}
+
 func (m *Manager) shouldRetryAfterError(err error, attempt int, providers []string, model string, maxWait time.Duration) (time.Duration, bool) {
 	if err == nil {
 		return 0, false
@@ -1853,17 +1901,31 @@ func (m *Manager) shouldRetryAfterError(err error, attempt int, providers []stri
 	if maxWait <= 0 {
 		return 0, false
 	}
-	if status := statusCodeFromError(err); status == http.StatusOK {
+	status := statusCodeFromError(err)
+	if status == http.StatusOK {
 		return 0, false
 	}
 	if isRequestInvalidError(err) {
 		return 0, false
 	}
 	wait, found := m.closestCooldownWait(providers, model, attempt)
-	if !found || wait > maxWait {
+	if found {
+		if wait > maxWait {
+			return 0, false
+		}
+		return wait, true
+	}
+	if status != http.StatusTooManyRequests {
 		return 0, false
 	}
-	return wait, true
+	if !m.retryAllowed(attempt, providers) {
+		return 0, false
+	}
+	retryAfter := retryAfterFromError(err)
+	if retryAfter == nil || *retryAfter <= 0 || *retryAfter > maxWait {
+		return 0, false
+	}
+	return *retryAfter, true
 }
 
 func waitForCooldown(ctx context.Context, wait time.Duration) error {
diff --git a/sdk/cliproxy/auth/conductor_overrides_test.go b/sdk/cliproxy/auth/conductor_overrides_test.go
index 0c72c833..1b74aab1 100644
--- a/sdk/cliproxy/auth/conductor_overrides_test.go
+++ b/sdk/cliproxy/auth/conductor_overrides_test.go
@@ -8,6 +8,7 @@ import (
 	"time"
 
 	"github.com/google/uuid"
+	internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 )
@@ -64,6 +65,49 @@ func TestManager_ShouldRetryAfterError_RespectsAuthRequestRetryOverride(t *testi
 	}
 }
 
+func TestManager_ShouldRetryAfterError_UsesOAuthModelAliasForCooldown(t *testing.T) {
+	m := NewManager(nil, nil, nil)
+	m.SetRetryConfig(3, 30*time.Second, 0)
+	m.SetOAuthModelAlias(map[string][]internalconfig.OAuthModelAlias{
+		"qwen": {
+			{Name: "qwen3.6-plus", Alias: "coder-model"},
+		},
+	})
+
+	routeModel := "coder-model"
+	upstreamModel := "qwen3.6-plus"
+	next := time.Now().Add(5 * time.Second)
+
+	auth := &Auth{
+		ID:       "auth-1",
+		Provider: "qwen",
+		ModelStates: map[string]*ModelState{
+			upstreamModel: {
+				Unavailable:    true,
+				Status:         StatusError,
+				NextRetryAfter: next,
+				Quota: QuotaState{
+					Exceeded:      true,
+					Reason:        "quota",
+					NextRecoverAt: next,
+				},
+			},
+		},
+	}
+	if _, errRegister := m.Register(context.Background(), auth); errRegister != nil {
+		t.Fatalf("register auth: %v", errRegister)
+	}
+
+	_, _, maxWait := m.retrySettings()
+	wait, shouldRetry := m.shouldRetryAfterError(&Error{HTTPStatus: 429, Message: "quota"}, 0, []string{"qwen"}, routeModel, maxWait)
+	if !shouldRetry {
+		t.Fatalf("expected shouldRetry=true, got false (wait=%v)", wait)
+	}
+	if wait <= 0 {
+		t.Fatalf("expected wait > 0, got %v", wait)
+	}
+}
+
 type credentialRetryLimitExecutor struct {
 	id string
 
@@ -646,6 +690,57 @@ func TestManager_Execute_DisableCooling_DoesNotBlackoutAfter429RetryAfter(t *tes
 	}
 }
 
+func TestManager_Execute_DisableCooling_RetriesAfter429RetryAfter(t *testing.T) {
+	prev := quotaCooldownDisabled.Load()
+	quotaCooldownDisabled.Store(false)
+	t.Cleanup(func() { quotaCooldownDisabled.Store(prev) })
+
+	m := NewManager(nil, nil, nil)
+	m.SetRetryConfig(3, 100*time.Millisecond, 0)
+
+	executor := &authFallbackExecutor{
+		id: "claude",
+		executeErrors: map[string]error{
+			"auth-429-retryafter-exec": &retryAfterStatusError{
+				status:     http.StatusTooManyRequests,
+				message:    "quota exhausted",
+				retryAfter: 5 * time.Millisecond,
+			},
+		},
+	}
+	m.RegisterExecutor(executor)
+
+	auth := &Auth{
+		ID:       "auth-429-retryafter-exec",
+		Provider: "claude",
+		Metadata: map[string]any{
+			"disable_cooling": true,
+		},
+	}
+	if _, errRegister := m.Register(context.Background(), auth); errRegister != nil {
+		t.Fatalf("register auth: %v", errRegister)
+	}
+
+	model := "test-model-429-retryafter-exec"
+	reg := registry.GetGlobalRegistry()
+	reg.RegisterClient(auth.ID, "claude", []*registry.ModelInfo{{ID: model}})
+	t.Cleanup(func() { reg.UnregisterClient(auth.ID) })
+
+	req := cliproxyexecutor.Request{Model: model}
+	_, errExecute := m.Execute(context.Background(), []string{"claude"}, req, cliproxyexecutor.Options{})
+	if errExecute == nil {
+		t.Fatal("expected execute error")
+	}
+	if statusCodeFromError(errExecute) != http.StatusTooManyRequests {
+		t.Fatalf("execute status = %d, want %d", statusCodeFromError(errExecute), http.StatusTooManyRequests)
+	}
+
+	calls := executor.ExecuteCalls()
+	if len(calls) != 4 {
+		t.Fatalf("execute calls = %d, want 4 (initial + 3 retries)", len(calls))
+	}
+}
+
 func TestManager_MarkResult_RequestScopedNotFoundDoesNotCooldownAuth(t *testing.T) {
 	m := NewManager(nil, nil, nil)
 
diff --git a/sdk/cliproxy/auth/scheduler.go b/sdk/cliproxy/auth/scheduler.go
index 1482bae6..b5a39282 100644
--- a/sdk/cliproxy/auth/scheduler.go
+++ b/sdk/cliproxy/auth/scheduler.go
@@ -97,6 +97,72 @@ type childBucket struct {
 // cooldownQueue is the blocked auth collection ordered by next retry time during rebuilds.
 type cooldownQueue []*scheduledAuth
 
+type readyViewCursorState struct {
+	cursor       int
+	parentCursor int
+	childCursors map[string]int
+}
+
+type readyBucketCursorState struct {
+	all readyViewCursorState
+	ws  readyViewCursorState
+}
+
+func snapshotReadyViewCursors(view readyView) readyViewCursorState {
+	state := readyViewCursorState{
+		cursor:       view.cursor,
+		parentCursor: view.parentCursor,
+	}
+	if len(view.children) == 0 {
+		return state
+	}
+	state.childCursors = make(map[string]int, len(view.children))
+	for parent, child := range view.children {
+		if child == nil {
+			continue
+		}
+		state.childCursors[parent] = child.cursor
+	}
+	return state
+}
+
+func restoreReadyViewCursors(view *readyView, state readyViewCursorState) {
+	if view == nil {
+		return
+	}
+	if len(view.flat) > 0 {
+		view.cursor = normalizeCursor(state.cursor, len(view.flat))
+	}
+	if len(view.parentOrder) == 0 || len(view.children) == 0 {
+		return
+	}
+	view.parentCursor = normalizeCursor(state.parentCursor, len(view.parentOrder))
+	if len(state.childCursors) == 0 {
+		return
+	}
+	for parent, child := range view.children {
+		if child == nil || len(child.items) == 0 {
+			continue
+		}
+		cursor, ok := state.childCursors[parent]
+		if !ok {
+			continue
+		}
+		child.cursor = normalizeCursor(cursor, len(child.items))
+	}
+}
+
+func normalizeCursor(cursor, size int) int {
+	if size <= 0 || cursor <= 0 {
+		return 0
+	}
+	cursor = cursor % size
+	if cursor < 0 {
+		cursor += size
+	}
+	return cursor
+}
+
 // newAuthScheduler constructs an empty scheduler configured for the supplied selector strategy.
 func newAuthScheduler(selector Selector) *authScheduler {
 	return &authScheduler{
@@ -824,6 +890,17 @@ func (m *modelScheduler) availabilitySummaryLocked(predicate func(*scheduledAuth
 
 // rebuildIndexesLocked reconstructs ready and blocked views from the current entry map.
 func (m *modelScheduler) rebuildIndexesLocked() {
+	cursorStates := make(map[int]readyBucketCursorState, len(m.readyByPriority))
+	for priority, bucket := range m.readyByPriority {
+		if bucket == nil {
+			continue
+		}
+		cursorStates[priority] = readyBucketCursorState{
+			all: snapshotReadyViewCursors(bucket.all),
+			ws:  snapshotReadyViewCursors(bucket.ws),
+		}
+	}
+
 	m.readyByPriority = make(map[int]*readyBucket)
 	m.priorityOrder = m.priorityOrder[:0]
 	m.blocked = m.blocked[:0]
@@ -844,7 +921,12 @@ func (m *modelScheduler) rebuildIndexesLocked() {
 		sort.Slice(entries, func(i, j int) bool {
 			return entries[i].auth.ID < entries[j].auth.ID
 		})
-		m.readyByPriority[priority] = buildReadyBucket(entries)
+		bucket := buildReadyBucket(entries)
+		if cursorState, ok := cursorStates[priority]; ok && bucket != nil {
+			restoreReadyViewCursors(&bucket.all, cursorState.all)
+			restoreReadyViewCursors(&bucket.ws, cursorState.ws)
+		}
+		m.readyByPriority[priority] = bucket
 		m.priorityOrder = append(m.priorityOrder, priority)
 	}
 	sort.Slice(m.priorityOrder, func(i, j int) bool {