diff --git a/internal/auth/codex/openai_auth.go b/internal/auth/codex/openai_auth.go index c273acae..64bc00a6 100644 --- a/internal/auth/codex/openai_auth.go +++ b/internal/auth/codex/openai_auth.go @@ -276,6 +276,10 @@ func (o *CodexAuth) RefreshTokensWithRetry(ctx context.Context, refreshToken str if err == nil { return tokenData, nil } + if isNonRetryableRefreshErr(err) { + log.Warnf("Token refresh attempt %d failed with non-retryable error: %v", attempt+1, err) + return nil, err + } lastErr = err log.Warnf("Token refresh attempt %d failed: %v", attempt+1, err) @@ -284,6 +288,14 @@ func (o *CodexAuth) RefreshTokensWithRetry(ctx context.Context, refreshToken str return nil, fmt.Errorf("token refresh failed after %d attempts: %w", maxRetries, lastErr) } +func isNonRetryableRefreshErr(err error) bool { + if err == nil { + return false + } + raw := strings.ToLower(err.Error()) + return strings.Contains(raw, "refresh_token_reused") +} + // UpdateTokenStorage updates an existing CodexTokenStorage with new token data. // This is typically called after a successful token refresh to persist the new credentials. func (o *CodexAuth) UpdateTokenStorage(storage *CodexTokenStorage, tokenData *CodexTokenData) { diff --git a/internal/auth/codex/openai_auth_test.go b/internal/auth/codex/openai_auth_test.go new file mode 100644 index 00000000..3327eb4a --- /dev/null +++ b/internal/auth/codex/openai_auth_test.go @@ -0,0 +1,44 @@ +package codex + +import ( + "context" + "io" + "net/http" + "strings" + "sync/atomic" + "testing" +) + +type roundTripFunc func(*http.Request) (*http.Response, error) + +func (f roundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) { + return f(req) +} + +func TestRefreshTokensWithRetry_NonRetryableOnlyAttemptsOnce(t *testing.T) { + var calls int32 + auth := &CodexAuth{ + httpClient: &http.Client{ + Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) { + atomic.AddInt32(&calls, 1) + return &http.Response{ + StatusCode: http.StatusBadRequest, + Body: io.NopCloser(strings.NewReader(`{"error":"invalid_grant","code":"refresh_token_reused"}`)), + Header: make(http.Header), + Request: req, + }, nil + }), + }, + } + + _, err := auth.RefreshTokensWithRetry(context.Background(), "dummy_refresh_token", 3) + if err == nil { + t.Fatalf("expected error for non-retryable refresh failure") + } + if !strings.Contains(strings.ToLower(err.Error()), "refresh_token_reused") { + t.Fatalf("expected refresh_token_reused in error, got: %v", err) + } + if got := atomic.LoadInt32(&calls); got != 1 { + t.Fatalf("expected 1 refresh attempt, got %d", got) + } +} diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go index bcc4a057..e7957d29 100644 --- a/internal/runtime/executor/qwen_executor.go +++ b/internal/runtime/executor/qwen_executor.go @@ -8,6 +8,7 @@ import ( "io" "net/http" "strings" + "sync" "time" qwenauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen" @@ -22,9 +23,151 @@ import ( ) const ( - qwenUserAgent = "QwenCode/0.10.3 (darwin; arm64)" + qwenUserAgent = "QwenCode/0.10.3 (darwin; arm64)" + qwenRateLimitPerMin = 60 // 60 requests per minute per credential + qwenRateLimitWindow = time.Minute // sliding window duration ) +// qwenBeijingLoc caches the Beijing timezone to avoid repeated LoadLocation syscalls. +var qwenBeijingLoc = func() *time.Location { + loc, err := time.LoadLocation("Asia/Shanghai") + if err != nil || loc == nil { + log.Warnf("qwen: failed to load Asia/Shanghai timezone: %v, using fixed UTC+8", err) + return time.FixedZone("CST", 8*3600) + } + return loc +}() + +// qwenQuotaCodes is a package-level set of error codes that indicate quota exhaustion. +var qwenQuotaCodes = map[string]struct{}{ + "insufficient_quota": {}, + "quota_exceeded": {}, +} + +// qwenRateLimiter tracks request timestamps per credential for rate limiting. +// Qwen has a limit of 60 requests per minute per account. +var qwenRateLimiter = struct { + sync.Mutex + requests map[string][]time.Time // authID -> request timestamps +}{ + requests: make(map[string][]time.Time), +} + +// redactAuthID returns a redacted version of the auth ID for safe logging. +// Keeps a small prefix/suffix to allow correlation across events. +func redactAuthID(id string) string { + if id == "" { + return "" + } + if len(id) <= 8 { + return id + } + return id[:4] + "..." + id[len(id)-4:] +} + +// checkQwenRateLimit checks if the credential has exceeded the rate limit. +// Returns nil if allowed, or a statusErr with retryAfter if rate limited. +func checkQwenRateLimit(authID string) error { + if authID == "" { + // Empty authID should not bypass rate limiting in production + // Use debug level to avoid log spam for certain auth flows + log.Debug("qwen rate limit check: empty authID, skipping rate limit") + return nil + } + + now := time.Now() + windowStart := now.Add(-qwenRateLimitWindow) + + qwenRateLimiter.Lock() + defer qwenRateLimiter.Unlock() + + // Get and filter timestamps within the window + timestamps := qwenRateLimiter.requests[authID] + var validTimestamps []time.Time + for _, ts := range timestamps { + if ts.After(windowStart) { + validTimestamps = append(validTimestamps, ts) + } + } + + // Always prune expired entries to prevent memory leak + // Delete empty entries, otherwise update with pruned slice + if len(validTimestamps) == 0 { + delete(qwenRateLimiter.requests, authID) + } + + // Check if rate limit exceeded + if len(validTimestamps) >= qwenRateLimitPerMin { + // Calculate when the oldest request will expire + oldestInWindow := validTimestamps[0] + retryAfter := oldestInWindow.Add(qwenRateLimitWindow).Sub(now) + if retryAfter < time.Second { + retryAfter = time.Second + } + retryAfterSec := int(retryAfter.Seconds()) + return statusErr{ + code: http.StatusTooManyRequests, + msg: fmt.Sprintf(`{"error":{"code":"rate_limit_exceeded","message":"Qwen rate limit: %d requests/minute exceeded, retry after %ds","type":"rate_limit_exceeded"}}`, qwenRateLimitPerMin, retryAfterSec), + retryAfter: &retryAfter, + } + } + + // Record this request and update the map with pruned timestamps + validTimestamps = append(validTimestamps, now) + qwenRateLimiter.requests[authID] = validTimestamps + + return nil +} + +// isQwenQuotaError checks if the error response indicates a quota exceeded error. +// Qwen returns HTTP 403 with error.code="insufficient_quota" when daily quota is exhausted. +func isQwenQuotaError(body []byte) bool { + code := strings.ToLower(gjson.GetBytes(body, "error.code").String()) + errType := strings.ToLower(gjson.GetBytes(body, "error.type").String()) + + // Primary check: exact match on error.code or error.type (most reliable) + if _, ok := qwenQuotaCodes[code]; ok { + return true + } + if _, ok := qwenQuotaCodes[errType]; ok { + return true + } + + // Fallback: check message only if code/type don't match (less reliable) + msg := strings.ToLower(gjson.GetBytes(body, "error.message").String()) + if strings.Contains(msg, "insufficient_quota") || strings.Contains(msg, "quota exceeded") || + strings.Contains(msg, "free allocated quota exceeded") { + return true + } + + return false +} + +// wrapQwenError wraps an HTTP error response, detecting quota errors and mapping them to 429. +// Returns the appropriate status code and retryAfter duration for statusErr. +// Only checks for quota errors when httpCode is 403 or 429 to avoid false positives. +func wrapQwenError(ctx context.Context, httpCode int, body []byte) (errCode int, retryAfter *time.Duration) { + errCode = httpCode + // Only check quota errors for expected status codes to avoid false positives + // Qwen returns 403 for quota errors, 429 for rate limits + if (httpCode == http.StatusForbidden || httpCode == http.StatusTooManyRequests) && isQwenQuotaError(body) { + errCode = http.StatusTooManyRequests // Map to 429 to trigger quota logic + cooldown := timeUntilNextDay() + retryAfter = &cooldown + logWithRequestID(ctx).Warnf("qwen quota exceeded (http %d -> %d), cooling down until tomorrow (%v)", httpCode, errCode, cooldown) + } + return errCode, retryAfter +} + +// timeUntilNextDay returns duration until midnight Beijing time (UTC+8). +// Qwen's daily quota resets at 00:00 Beijing time. +func timeUntilNextDay() time.Duration { + now := time.Now() + nowLocal := now.In(qwenBeijingLoc) + tomorrow := time.Date(nowLocal.Year(), nowLocal.Month(), nowLocal.Day()+1, 0, 0, 0, 0, qwenBeijingLoc) + return tomorrow.Sub(now) +} + // QwenExecutor is a stateless executor for Qwen Code using OpenAI-compatible chat completions. // If access token is unavailable, it falls back to legacy via ClientAdapter. type QwenExecutor struct { @@ -67,6 +210,17 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req if opts.Alt == "responses/compact" { return resp, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"} } + + // Check rate limit before proceeding + var authID string + if auth != nil { + authID = auth.ID + } + if err := checkQwenRateLimit(authID); err != nil { + logWithRequestID(ctx).Warnf("qwen rate limit exceeded for credential %s", redactAuthID(authID)) + return resp, err + } + baseModel := thinking.ParseSuffix(req.Model).ModelName token, baseURL := qwenCreds(auth) @@ -102,9 +256,8 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req return resp, err } applyQwenHeaders(httpReq, token, false) - var authID, authLabel, authType, authValue string + var authLabel, authType, authValue string if auth != nil { - authID = auth.ID authLabel = auth.Label authType, authValue = auth.AccountInfo() } @@ -135,8 +288,10 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { b, _ := io.ReadAll(httpResp.Body) appendAPIResponseChunk(ctx, e.cfg, b) - logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) - err = statusErr{code: httpResp.StatusCode, msg: string(b)} + + errCode, retryAfter := wrapQwenError(ctx, httpResp.StatusCode, b) + logWithRequestID(ctx).Debugf("request error, error status: %d (mapped: %d), error message: %s", httpResp.StatusCode, errCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + err = statusErr{code: errCode, msg: string(b), retryAfter: retryAfter} return resp, err } data, err := io.ReadAll(httpResp.Body) @@ -158,6 +313,17 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut if opts.Alt == "responses/compact" { return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"} } + + // Check rate limit before proceeding + var authID string + if auth != nil { + authID = auth.ID + } + if err := checkQwenRateLimit(authID); err != nil { + logWithRequestID(ctx).Warnf("qwen rate limit exceeded for credential %s", redactAuthID(authID)) + return nil, err + } + baseModel := thinking.ParseSuffix(req.Model).ModelName token, baseURL := qwenCreds(auth) @@ -200,9 +366,8 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut return nil, err } applyQwenHeaders(httpReq, token, true) - var authID, authLabel, authType, authValue string + var authLabel, authType, authValue string if auth != nil { - authID = auth.ID authLabel = auth.Label authType, authValue = auth.AccountInfo() } @@ -228,11 +393,13 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { b, _ := io.ReadAll(httpResp.Body) appendAPIResponseChunk(ctx, e.cfg, b) - logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + + errCode, retryAfter := wrapQwenError(ctx, httpResp.StatusCode, b) + logWithRequestID(ctx).Debugf("request error, error status: %d (mapped: %d), error message: %s", httpResp.StatusCode, errCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) if errClose := httpResp.Body.Close(); errClose != nil { log.Errorf("qwen executor: close response body error: %v", errClose) } - err = statusErr{code: httpResp.StatusCode, msg: string(b)} + err = statusErr{code: errCode, msg: string(b), retryAfter: retryAfter} return nil, err } out := make(chan cliproxyexecutor.StreamChunk) diff --git a/internal/translator/codex/openai/responses/codex_openai-responses_request.go b/internal/translator/codex/openai/responses/codex_openai-responses_request.go index f0407149..1161c515 100644 --- a/internal/translator/codex/openai/responses/codex_openai-responses_request.go +++ b/internal/translator/codex/openai/responses/codex_openai-responses_request.go @@ -26,6 +26,8 @@ func ConvertOpenAIResponsesRequestToCodex(modelName string, inputRawJSON []byte, rawJSON, _ = sjson.DeleteBytes(rawJSON, "temperature") rawJSON, _ = sjson.DeleteBytes(rawJSON, "top_p") rawJSON, _ = sjson.DeleteBytes(rawJSON, "service_tier") + rawJSON, _ = sjson.DeleteBytes(rawJSON, "truncation") + rawJSON = applyResponsesCompactionCompatibility(rawJSON) // Delete the user field as it is not supported by the Codex upstream. rawJSON, _ = sjson.DeleteBytes(rawJSON, "user") @@ -36,6 +38,23 @@ func ConvertOpenAIResponsesRequestToCodex(modelName string, inputRawJSON []byte, return rawJSON } +// applyResponsesCompactionCompatibility handles OpenAI Responses context_management.compaction +// for Codex upstream compatibility. +// +// Codex /responses currently rejects context_management with: +// {"detail":"Unsupported parameter: context_management"}. +// +// Compatibility strategy: +// 1) Remove context_management before forwarding to Codex upstream. +func applyResponsesCompactionCompatibility(rawJSON []byte) []byte { + if !gjson.GetBytes(rawJSON, "context_management").Exists() { + return rawJSON + } + + rawJSON, _ = sjson.DeleteBytes(rawJSON, "context_management") + return rawJSON +} + // convertSystemRoleToDeveloper traverses the input array and converts any message items // with role "system" to role "developer". This is necessary because Codex API does not // accept "system" role in the input array. diff --git a/internal/translator/codex/openai/responses/codex_openai-responses_request_test.go b/internal/translator/codex/openai/responses/codex_openai-responses_request_test.go index 4f562486..65732c3f 100644 --- a/internal/translator/codex/openai/responses/codex_openai-responses_request_test.go +++ b/internal/translator/codex/openai/responses/codex_openai-responses_request_test.go @@ -280,3 +280,41 @@ func TestUserFieldDeletion(t *testing.T) { t.Errorf("user field should be deleted, but it was found with value: %s", userField.Raw) } } + +func TestContextManagementCompactionCompatibility(t *testing.T) { + inputJSON := []byte(`{ + "model": "gpt-5.2", + "context_management": [ + { + "type": "compaction", + "compact_threshold": 12000 + } + ], + "input": [{"role":"user","content":"hello"}] + }`) + + output := ConvertOpenAIResponsesRequestToCodex("gpt-5.2", inputJSON, false) + outputStr := string(output) + + if gjson.Get(outputStr, "context_management").Exists() { + t.Fatalf("context_management should be removed for Codex compatibility") + } + if gjson.Get(outputStr, "truncation").Exists() { + t.Fatalf("truncation should be removed for Codex compatibility") + } +} + +func TestTruncationRemovedForCodexCompatibility(t *testing.T) { + inputJSON := []byte(`{ + "model": "gpt-5.2", + "truncation": "disabled", + "input": [{"role":"user","content":"hello"}] + }`) + + output := ConvertOpenAIResponsesRequestToCodex("gpt-5.2", inputJSON, false) + outputStr := string(output) + + if gjson.Get(outputStr, "truncation").Exists() { + t.Fatalf("truncation should be removed for Codex compatibility") + } +} diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go index 6cd3c0ff..b37162ba 100644 --- a/sdk/cliproxy/auth/conductor.go +++ b/sdk/cliproxy/auth/conductor.go @@ -1828,9 +1828,7 @@ func (m *Manager) persist(ctx context.Context, auth *Auth) error { // every few seconds and triggers refresh operations when required. // Only one loop is kept alive; starting a new one cancels the previous run. func (m *Manager) StartAutoRefresh(parent context.Context, interval time.Duration) { - if interval <= 0 || interval > refreshCheckInterval { - interval = refreshCheckInterval - } else { + if interval <= 0 { interval = refreshCheckInterval } if m.refreshCancel != nil {