From 30e94b6792d697390a66d77546cd185161db97d9 Mon Sep 17 00:00:00 2001 From: ZTXBOSS666 <150424052+ZTXBOSS666@users.noreply.github.com> Date: Thu, 9 Apr 2026 21:48:32 +0800 Subject: [PATCH] fix(antigravity): refine 429 handling and credits fallback Includes: restore SDK docs under docs/; update antigravity executor credits tests; gofmt. --- config.example.yaml | 1 - .../runtime/executor/antigravity_executor.go | 533 +++++++++++++++--- .../antigravity_executor_credits_test.go | 11 +- 3 files changed, 449 insertions(+), 96 deletions(-) diff --git a/config.example.yaml b/config.example.yaml index ce2d0a5a..d94cb056 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -109,7 +109,6 @@ enable-gemini-cli-endpoint: false # When > 0, emit blank lines every N seconds for non-streaming responses to prevent idle timeouts. nonstream-keepalive-interval: 0 - # Streaming behavior (SSE keep-alives + safe bootstrap retries). # streaming: # keepalive-seconds: 15 # Default: 0 (disabled). <= 0 disables keep-alives. diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index ed4ce1dc..850ad7dc 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -38,34 +38,58 @@ import ( ) const ( - antigravityBaseURLDaily = "https://daily-cloudcode-pa.googleapis.com" - antigravitySandboxBaseURLDaily = "https://daily-cloudcode-pa.sandbox.googleapis.com" - antigravityBaseURLProd = "https://cloudcode-pa.googleapis.com" - antigravityCountTokensPath = "/v1internal:countTokens" - antigravityStreamPath = "/v1internal:streamGenerateContent" - antigravityGeneratePath = "/v1internal:generateContent" - antigravityClientID = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com" - antigravityClientSecret = "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" - defaultAntigravityAgent = "antigravity/1.21.9 darwin/arm64" // fallback only; overridden at runtime by misc.AntigravityUserAgent() - antigravityAuthType = "antigravity" - refreshSkew = 3000 * time.Second - antigravityCreditsRetryTTL = 5 * time.Hour + antigravityBaseURLDaily = "https://daily-cloudcode-pa.googleapis.com" + antigravitySandboxBaseURLDaily = "https://daily-cloudcode-pa.sandbox.googleapis.com" + antigravityBaseURLProd = "https://cloudcode-pa.googleapis.com" + antigravityCountTokensPath = "/v1internal:countTokens" + antigravityStreamPath = "/v1internal:streamGenerateContent" + antigravityGeneratePath = "/v1internal:generateContent" + antigravityClientID = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com" + antigravityClientSecret = "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" + defaultAntigravityAgent = "antigravity/1.21.9 darwin/arm64" // fallback only; overridden at runtime by misc.AntigravityUserAgent() + antigravityAuthType = "antigravity" + refreshSkew = 3000 * time.Second + antigravityCreditsRetryTTL = 5 * time.Hour + antigravityCreditsAutoDisableDuration = 5 * time.Hour + antigravityShortQuotaCooldownThreshold = 5 * time.Minute + antigravityInstantRetryThreshold = 3 * time.Second // systemInstruction = "You are Antigravity, a powerful agentic AI coding assistant designed by the Google Deepmind team working on Advanced Agentic Coding.You are pair programming with a USER to solve their coding task. The task may require creating a new codebase, modifying or debugging an existing codebase, or simply answering a question.**Absolute paths only****Proactiveness**" ) type antigravity429Category string +type antigravityCreditsFailureState struct { + Count int + DisabledUntil time.Time + PermanentlyDisabled bool + ExplicitBalanceExhausted bool +} + +type antigravity429DecisionKind string + const ( - antigravity429Unknown antigravity429Category = "unknown" - antigravity429RateLimited antigravity429Category = "rate_limited" - antigravity429QuotaExhausted antigravity429Category = "quota_exhausted" + antigravity429Unknown antigravity429Category = "unknown" + antigravity429RateLimited antigravity429Category = "rate_limited" + antigravity429QuotaExhausted antigravity429Category = "quota_exhausted" + antigravity429SoftRateLimit antigravity429Category = "soft_rate_limit" + antigravity429DecisionSoftRetry antigravity429DecisionKind = "soft_retry" + antigravity429DecisionInstantRetrySameAuth antigravity429DecisionKind = "instant_retry_same_auth" + antigravity429DecisionShortCooldownSwitchAuth antigravity429DecisionKind = "short_cooldown_switch_auth" + antigravity429DecisionFullQuotaExhausted antigravity429DecisionKind = "full_quota_exhausted" ) +type antigravity429Decision struct { + kind antigravity429DecisionKind + retryAfter *time.Duration + reason string +} + var ( randSource = rand.New(rand.NewSource(time.Now().UnixNano())) randSourceMutex sync.Mutex - antigravityCreditsExhaustedByAuth sync.Map + antigravityCreditsFailureByAuth sync.Map antigravityPreferCreditsByModel sync.Map + antigravityShortCooldownByAuth sync.Map antigravityQuotaExhaustedKeywords = []string{ "quota_exhausted", "quota exhausted", @@ -229,36 +253,77 @@ func injectEnabledCreditTypes(payload []byte) []byte { } func classifyAntigravity429(body []byte) antigravity429Category { - if len(body) == 0 { + switch decideAntigravity429(body).kind { + case antigravity429DecisionInstantRetrySameAuth, antigravity429DecisionShortCooldownSwitchAuth: + return antigravity429RateLimited + case antigravity429DecisionFullQuotaExhausted: + return antigravity429QuotaExhausted + case antigravity429DecisionSoftRetry: + return antigravity429SoftRateLimit + default: return antigravity429Unknown } +} + +func decideAntigravity429(body []byte) antigravity429Decision { + decision := antigravity429Decision{kind: antigravity429DecisionSoftRetry} + if len(body) == 0 { + return decision + } + + if retryAfter, parseErr := parseRetryDelay(body); parseErr == nil && retryAfter != nil { + decision.retryAfter = retryAfter + } + lowerBody := strings.ToLower(string(body)) for _, keyword := range antigravityQuotaExhaustedKeywords { if strings.Contains(lowerBody, keyword) { - return antigravity429QuotaExhausted + decision.kind = antigravity429DecisionFullQuotaExhausted + decision.reason = "quota_exhausted" + return decision } } + status := strings.TrimSpace(gjson.GetBytes(body, "error.status").String()) if !strings.EqualFold(status, "RESOURCE_EXHAUSTED") { - return antigravity429Unknown + return decision } + details := gjson.GetBytes(body, "error.details") if !details.Exists() || !details.IsArray() { - return antigravity429Unknown + decision.kind = antigravity429DecisionSoftRetry + return decision } + for _, detail := range details.Array() { if detail.Get("@type").String() != "type.googleapis.com/google.rpc.ErrorInfo" { continue } reason := strings.TrimSpace(detail.Get("reason").String()) - if strings.EqualFold(reason, "QUOTA_EXHAUSTED") { - return antigravity429QuotaExhausted - } - if strings.EqualFold(reason, "RATE_LIMIT_EXCEEDED") { - return antigravity429RateLimited + decision.reason = reason + switch { + case strings.EqualFold(reason, "QUOTA_EXHAUSTED"): + decision.kind = antigravity429DecisionFullQuotaExhausted + return decision + case strings.EqualFold(reason, "RATE_LIMIT_EXCEEDED"): + if decision.retryAfter == nil { + decision.kind = antigravity429DecisionSoftRetry + return decision + } + switch { + case *decision.retryAfter < antigravityInstantRetryThreshold: + decision.kind = antigravity429DecisionInstantRetrySameAuth + case *decision.retryAfter < antigravityShortQuotaCooldownThreshold: + decision.kind = antigravity429DecisionShortCooldownSwitchAuth + default: + decision.kind = antigravity429DecisionFullQuotaExhausted + } + return decision } } - return antigravity429Unknown + + decision.kind = antigravity429DecisionSoftRetry + return decision } func antigravityHasQuotaResetDelayOrModelInfo(body []byte) bool { @@ -287,38 +352,91 @@ func antigravityCreditsRetryEnabled(cfg *config.Config) bool { return cfg != nil && cfg.QuotaExceeded.AntigravityCredits } -func antigravityCreditsExhausted(auth *cliproxyauth.Auth, now time.Time) bool { +func antigravityCreditsFailureStateForAuth(auth *cliproxyauth.Auth) (string, antigravityCreditsFailureState, bool) { if auth == nil || strings.TrimSpace(auth.ID) == "" { - return false + return "", antigravityCreditsFailureState{}, false } - value, ok := antigravityCreditsExhaustedByAuth.Load(auth.ID) + authID := strings.TrimSpace(auth.ID) + value, ok := antigravityCreditsFailureByAuth.Load(authID) + if !ok { + return authID, antigravityCreditsFailureState{}, true + } + state, ok := value.(antigravityCreditsFailureState) + if !ok { + antigravityCreditsFailureByAuth.Delete(authID) + return authID, antigravityCreditsFailureState{}, true + } + return authID, state, true +} + +func antigravityCreditsDisabled(auth *cliproxyauth.Auth, now time.Time) bool { + authID, state, ok := antigravityCreditsFailureStateForAuth(auth) if !ok { return false } - until, ok := value.(time.Time) - if !ok || until.IsZero() { - antigravityCreditsExhaustedByAuth.Delete(auth.ID) + if state.PermanentlyDisabled { + return true + } + if state.DisabledUntil.IsZero() { return false } - if !until.After(now) { - antigravityCreditsExhaustedByAuth.Delete(auth.ID) - return false + if state.DisabledUntil.After(now) { + return true } - return true + antigravityCreditsFailureByAuth.Delete(authID) + return false } -func markAntigravityCreditsExhausted(auth *cliproxyauth.Auth, now time.Time) { +func recordAntigravityCreditsFailure(auth *cliproxyauth.Auth, now time.Time) { + authID, state, ok := antigravityCreditsFailureStateForAuth(auth) + if !ok { + return + } + if state.PermanentlyDisabled { + antigravityCreditsFailureByAuth.Store(authID, state) + return + } + state.Count++ + state.DisabledUntil = now.Add(antigravityCreditsAutoDisableDuration) + antigravityCreditsFailureByAuth.Store(authID, state) +} + +func clearAntigravityCreditsFailureState(auth *cliproxyauth.Auth) { if auth == nil || strings.TrimSpace(auth.ID) == "" { return } - antigravityCreditsExhaustedByAuth.Store(auth.ID, now.Add(antigravityCreditsRetryTTL)) + antigravityCreditsFailureByAuth.Delete(strings.TrimSpace(auth.ID)) } - -func clearAntigravityCreditsExhausted(auth *cliproxyauth.Auth) { +func markAntigravityCreditsPermanentlyDisabled(auth *cliproxyauth.Auth) { if auth == nil || strings.TrimSpace(auth.ID) == "" { return } - antigravityCreditsExhaustedByAuth.Delete(auth.ID) + authID := strings.TrimSpace(auth.ID) + state := antigravityCreditsFailureState{ + PermanentlyDisabled: true, + ExplicitBalanceExhausted: true, + } + antigravityCreditsFailureByAuth.Store(authID, state) +} + +func antigravityHasExplicitCreditsBalanceExhaustedReason(body []byte) bool { + if len(body) == 0 { + return false + } + details := gjson.GetBytes(body, "error.details") + if !details.Exists() || !details.IsArray() { + return false + } + for _, detail := range details.Array() { + if detail.Get("@type").String() != "type.googleapis.com/google.rpc.ErrorInfo" { + continue + } + reason := strings.TrimSpace(detail.Get("reason").String()) + if strings.EqualFold(reason, "INSUFFICIENT_G1_CREDITS_BALANCE") { + return true + } + } + return false } func antigravityPreferCreditsKey(auth *cliproxyauth.Auth, modelName string) string { @@ -386,7 +504,7 @@ func shouldMarkAntigravityCreditsExhausted(statusCode int, body []byte, reqErr e if strings.Contains(lowerBody, keyword) { if keyword == "resource has been exhausted" && statusCode == http.StatusTooManyRequests && - classifyAntigravity429(body) == antigravity429Unknown && + decideAntigravity429(body).kind == antigravity429DecisionSoftRetry && !antigravityHasQuotaResetDelayOrModelInfo(body) { return false } @@ -421,11 +539,23 @@ func (e *AntigravityExecutor) attemptCreditsFallback( if !antigravityCreditsRetryEnabled(e.cfg) { return nil, false } - if classifyAntigravity429(originalBody) != antigravity429QuotaExhausted { + if decideAntigravity429(originalBody).kind != antigravity429DecisionFullQuotaExhausted { return nil, false } now := time.Now() - if antigravityCreditsExhausted(auth, now) { + if shouldForcePermanentDisableCredits(originalBody) { + clearAntigravityPreferCredits(auth, modelName) + markAntigravityCreditsPermanentlyDisabled(auth) + return nil, false + } + + if antigravityHasExplicitCreditsBalanceExhaustedReason(originalBody) { + clearAntigravityPreferCredits(auth, modelName) + markAntigravityCreditsPermanentlyDisabled(auth) + return nil, false + } + + if antigravityCreditsDisabled(auth, now) { return nil, false } creditsPayload := injectEnabledCreditTypes(payload) @@ -436,17 +566,21 @@ func (e *AntigravityExecutor) attemptCreditsFallback( httpReq, errReq := e.buildRequest(ctx, auth, token, modelName, creditsPayload, stream, alt, baseURL) if errReq != nil { helps.RecordAPIResponseError(ctx, e.cfg, errReq) + clearAntigravityPreferCredits(auth, modelName) + recordAntigravityCreditsFailure(auth, now) return nil, true } httpResp, errDo := httpClient.Do(httpReq) if errDo != nil { helps.RecordAPIResponseError(ctx, e.cfg, errDo) + clearAntigravityPreferCredits(auth, modelName) + recordAntigravityCreditsFailure(auth, now) return nil, true } if httpResp.StatusCode >= http.StatusOK && httpResp.StatusCode < http.StatusMultipleChoices { retryAfter, _ := parseRetryDelay(originalBody) markAntigravityPreferCredits(auth, modelName, now, retryAfter) - clearAntigravityCreditsExhausted(auth) + clearAntigravityCreditsFailureState(auth) return httpResp, true } @@ -457,24 +591,75 @@ func (e *AntigravityExecutor) attemptCreditsFallback( } if errRead != nil { helps.RecordAPIResponseError(ctx, e.cfg, errRead) + clearAntigravityPreferCredits(auth, modelName) + recordAntigravityCreditsFailure(auth, now) return nil, true } helps.AppendAPIResponseChunk(ctx, e.cfg, bodyBytes) - if shouldMarkAntigravityCreditsExhausted(httpResp.StatusCode, bodyBytes, nil) { + if shouldForcePermanentDisableCredits(bodyBytes) { clearAntigravityPreferCredits(auth, modelName) - markAntigravityCreditsExhausted(auth, now) + markAntigravityCreditsPermanentlyDisabled(auth) + return nil, true } + + if antigravityHasExplicitCreditsBalanceExhaustedReason(bodyBytes) { + clearAntigravityPreferCredits(auth, modelName) + markAntigravityCreditsPermanentlyDisabled(auth) + return nil, true + } + + clearAntigravityPreferCredits(auth, modelName) + recordAntigravityCreditsFailure(auth, now) return nil, true } +func (e *AntigravityExecutor) handleDirectCreditsFailure(ctx context.Context, auth *cliproxyauth.Auth, modelName string, reqErr error) { + if reqErr != nil { + if shouldForcePermanentDisableCredits(reqErrBody(reqErr)) { + clearAntigravityPreferCredits(auth, modelName) + markAntigravityCreditsPermanentlyDisabled(auth) + return + } + + if antigravityHasExplicitCreditsBalanceExhaustedReason(reqErrBody(reqErr)) { + clearAntigravityPreferCredits(auth, modelName) + markAntigravityCreditsPermanentlyDisabled(auth) + return + } + + helps.RecordAPIResponseError(ctx, e.cfg, reqErr) + } + clearAntigravityPreferCredits(auth, modelName) + recordAntigravityCreditsFailure(auth, time.Now()) +} +func reqErrBody(reqErr error) []byte { + if reqErr == nil { + return nil + } + msg := reqErr.Error() + if strings.TrimSpace(msg) == "" { + return nil + } + return []byte(msg) +} + +func shouldForcePermanentDisableCredits(body []byte) bool { + return antigravityHasExplicitCreditsBalanceExhaustedReason(body) +} + // Execute performs a non-streaming request to the Antigravity API. func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { if opts.Alt == "responses/compact" { return resp, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"} } baseModel := thinking.ParseSuffix(req.Model).ModelName - isClaude := strings.Contains(strings.ToLower(baseModel), "claude") + if inCooldown, remaining := antigravityIsInShortCooldown(auth, baseModel, time.Now()); inCooldown { + log.Debugf("antigravity executor: auth %s in short cooldown for model %s (%s remaining), returning 429 to switch auth", auth.ID, baseModel, remaining) + d := remaining + return resp, statusErr{code: http.StatusTooManyRequests, msg: fmt.Sprintf("auth in short cooldown, %s remaining", remaining), retryAfter: &d} + } + isClaude := strings.Contains(strings.ToLower(baseModel), "claude") if isClaude || strings.Contains(baseModel, "gemini-3-pro") || strings.Contains(baseModel, "gemini-3.1-flash-image") { return e.executeClaudeNonStream(ctx, auth, req, opts) } @@ -511,7 +696,6 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newAntigravityHTTPClient(ctx, e.cfg, auth, 0) - attempts := antigravityRetryAttempts(auth, e.cfg) attemptLoop: @@ -529,6 +713,7 @@ attemptLoop: usedCreditsDirect = true } } + httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, requestPayload, false, opts.Alt, baseURL) if errReq != nil { err = errReq @@ -565,31 +750,50 @@ attemptLoop: helps.AppendAPIResponseChunk(ctx, e.cfg, bodyBytes) if httpResp.StatusCode == http.StatusTooManyRequests { - if usedCreditsDirect { - if shouldMarkAntigravityCreditsExhausted(httpResp.StatusCode, bodyBytes, nil) { - clearAntigravityPreferCredits(auth, baseModel) - markAntigravityCreditsExhausted(auth, time.Now()) + decision := decideAntigravity429(bodyBytes) + switch decision.kind { + case antigravity429DecisionInstantRetrySameAuth: + if attempt+1 < attempts { + if decision.retryAfter != nil && *decision.retryAfter > 0 { + wait := antigravityInstantRetryDelay(*decision.retryAfter) + log.Debugf("antigravity executor: instant retry for model %s, waiting %s", baseModel, wait) + if errWait := antigravityWait(ctx, wait); errWait != nil { + + return resp, errWait + } + } + continue attemptLoop } - } else { - creditsResp, _ := e.attemptCreditsFallback(ctx, auth, httpClient, token, baseModel, translated, false, opts.Alt, baseURL, bodyBytes) - if creditsResp != nil { - helps.RecordAPIResponseMetadata(ctx, e.cfg, creditsResp.StatusCode, creditsResp.Header.Clone()) - creditsBody, errCreditsRead := io.ReadAll(creditsResp.Body) - if errClose := creditsResp.Body.Close(); errClose != nil { - log.Errorf("antigravity executor: close credits success response body error: %v", errClose) + case antigravity429DecisionShortCooldownSwitchAuth: + if decision.retryAfter != nil && *decision.retryAfter > 0 { + markAntigravityShortCooldown(auth, baseModel, time.Now(), *decision.retryAfter) + log.Debugf("antigravity executor: short quota cooldown (%s) for model %s, recorded cooldown and skipping credits fallback", *decision.retryAfter, baseModel) + } + case antigravity429DecisionFullQuotaExhausted: + if usedCreditsDirect { + clearAntigravityPreferCredits(auth, baseModel) + recordAntigravityCreditsFailure(auth, time.Now()) + } else { + creditsResp, _ := e.attemptCreditsFallback(ctx, auth, httpClient, token, baseModel, translated, false, opts.Alt, baseURL, bodyBytes) + if creditsResp != nil { + helps.RecordAPIResponseMetadata(ctx, e.cfg, creditsResp.StatusCode, creditsResp.Header.Clone()) + creditsBody, errCreditsRead := io.ReadAll(creditsResp.Body) + if errClose := creditsResp.Body.Close(); errClose != nil { + log.Errorf("antigravity executor: close credits success response body error: %v", errClose) + } + if errCreditsRead != nil { + helps.RecordAPIResponseError(ctx, e.cfg, errCreditsRead) + err = errCreditsRead + return resp, err + } + helps.AppendAPIResponseChunk(ctx, e.cfg, creditsBody) + reporter.Publish(ctx, helps.ParseAntigravityUsage(creditsBody)) + var param any + converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, creditsBody, ¶m) + resp = cliproxyexecutor.Response{Payload: converted, Headers: creditsResp.Header.Clone()} + reporter.EnsurePublished(ctx) + return resp, nil } - if errCreditsRead != nil { - helps.RecordAPIResponseError(ctx, e.cfg, errCreditsRead) - err = errCreditsRead - return resp, err - } - helps.AppendAPIResponseChunk(ctx, e.cfg, creditsBody) - reporter.Publish(ctx, helps.ParseAntigravityUsage(creditsBody)) - var param any - converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, creditsBody, ¶m) - resp = cliproxyexecutor.Response{Payload: converted, Headers: creditsResp.Header.Clone()} - reporter.EnsurePublished(ctx) - return resp, nil } } } @@ -625,6 +829,16 @@ attemptLoop: continue attemptLoop } } + if antigravityShouldRetrySoftRateLimit(httpResp.StatusCode, bodyBytes) { + if attempt+1 < attempts { + delay := antigravitySoftRateLimitDelay(attempt) + log.Debugf("antigravity executor: soft rate limit for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts) + if errWait := antigravityWait(ctx, delay); errWait != nil { + return resp, errWait + } + continue attemptLoop + } + } err = newAntigravityStatusErr(httpResp.StatusCode, bodyBytes) return resp, err } @@ -654,6 +868,11 @@ attemptLoop: // executeClaudeNonStream performs a claude non-streaming request to the Antigravity API. func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { baseModel := thinking.ParseSuffix(req.Model).ModelName + if inCooldown, remaining := antigravityIsInShortCooldown(auth, baseModel, time.Now()); inCooldown { + log.Debugf("antigravity executor: auth %s in short cooldown for model %s (%s remaining), returning 429 to switch auth", auth.ID, baseModel, remaining) + d := remaining + return resp, statusErr{code: http.StatusTooManyRequests, msg: fmt.Sprintf("auth in short cooldown, %s remaining", remaining), retryAfter: &d} + } token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth) if errToken != nil { @@ -755,19 +974,40 @@ attemptLoop: } helps.AppendAPIResponseChunk(ctx, e.cfg, bodyBytes) if httpResp.StatusCode == http.StatusTooManyRequests { - if usedCreditsDirect { - if shouldMarkAntigravityCreditsExhausted(httpResp.StatusCode, bodyBytes, nil) { - clearAntigravityPreferCredits(auth, baseModel) - markAntigravityCreditsExhausted(auth, time.Now()) + decision := decideAntigravity429(bodyBytes) + + switch decision.kind { + case antigravity429DecisionInstantRetrySameAuth: + if attempt+1 < attempts { + if decision.retryAfter != nil && *decision.retryAfter > 0 { + wait := antigravityInstantRetryDelay(*decision.retryAfter) + log.Debugf("antigravity executor: instant retry for model %s, waiting %s", baseModel, wait) + if errWait := antigravityWait(ctx, wait); errWait != nil { + + return resp, errWait + } + } + continue attemptLoop } - } else { - creditsResp, _ := e.attemptCreditsFallback(ctx, auth, httpClient, token, baseModel, translated, true, opts.Alt, baseURL, bodyBytes) - if creditsResp != nil { - httpResp = creditsResp - helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + case antigravity429DecisionShortCooldownSwitchAuth: + if decision.retryAfter != nil && *decision.retryAfter > 0 { + markAntigravityShortCooldown(auth, baseModel, time.Now(), *decision.retryAfter) + log.Debugf("antigravity executor: short quota cooldown (%s) for model %s, recorded cooldown and skipping credits fallback", *decision.retryAfter, baseModel) + } + case antigravity429DecisionFullQuotaExhausted: + if usedCreditsDirect { + clearAntigravityPreferCredits(auth, baseModel) + recordAntigravityCreditsFailure(auth, time.Now()) + } else { + creditsResp, _ := e.attemptCreditsFallback(ctx, auth, httpClient, token, baseModel, translated, true, opts.Alt, baseURL, bodyBytes) + if creditsResp != nil { + httpResp = creditsResp + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + } } } } + if httpResp.StatusCode >= http.StatusOK && httpResp.StatusCode < http.StatusMultipleChoices { goto streamSuccessClaudeNonStream } @@ -800,6 +1040,16 @@ attemptLoop: continue attemptLoop } } + if antigravityShouldRetrySoftRateLimit(httpResp.StatusCode, bodyBytes) { + if attempt+1 < attempts { + delay := antigravitySoftRateLimitDelay(attempt) + log.Debugf("antigravity executor: soft rate limit for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts) + if errWait := antigravityWait(ctx, delay); errWait != nil { + return resp, errWait + } + continue attemptLoop + } + } err = newAntigravityStatusErr(httpResp.StatusCode, bodyBytes) return resp, err } @@ -1079,6 +1329,11 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya baseModel := thinking.ParseSuffix(req.Model).ModelName ctx = context.WithValue(ctx, "alt", "") + if inCooldown, remaining := antigravityIsInShortCooldown(auth, baseModel, time.Now()); inCooldown { + log.Debugf("antigravity executor: auth %s in short cooldown for model %s (%s remaining), returning 429 to switch auth", auth.ID, baseModel, remaining) + d := remaining + return nil, statusErr{code: http.StatusTooManyRequests, msg: fmt.Sprintf("auth in short cooldown, %s remaining", remaining), retryAfter: &d} + } token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth) if errToken != nil { @@ -1179,19 +1434,40 @@ attemptLoop: } helps.AppendAPIResponseChunk(ctx, e.cfg, bodyBytes) if httpResp.StatusCode == http.StatusTooManyRequests { - if usedCreditsDirect { - if shouldMarkAntigravityCreditsExhausted(httpResp.StatusCode, bodyBytes, nil) { - clearAntigravityPreferCredits(auth, baseModel) - markAntigravityCreditsExhausted(auth, time.Now()) + decision := decideAntigravity429(bodyBytes) + + switch decision.kind { + case antigravity429DecisionInstantRetrySameAuth: + if attempt+1 < attempts { + if decision.retryAfter != nil && *decision.retryAfter > 0 { + wait := antigravityInstantRetryDelay(*decision.retryAfter) + log.Debugf("antigravity executor: instant retry for model %s, waiting %s", baseModel, wait) + if errWait := antigravityWait(ctx, wait); errWait != nil { + + return nil, errWait + } + } + continue attemptLoop } - } else { - creditsResp, _ := e.attemptCreditsFallback(ctx, auth, httpClient, token, baseModel, translated, true, opts.Alt, baseURL, bodyBytes) - if creditsResp != nil { - httpResp = creditsResp - helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + case antigravity429DecisionShortCooldownSwitchAuth: + if decision.retryAfter != nil && *decision.retryAfter > 0 { + markAntigravityShortCooldown(auth, baseModel, time.Now(), *decision.retryAfter) + log.Debugf("antigravity executor: short quota cooldown (%s) for model %s, recorded cooldown and skipping credits fallback", *decision.retryAfter, baseModel) + } + case antigravity429DecisionFullQuotaExhausted: + if usedCreditsDirect { + clearAntigravityPreferCredits(auth, baseModel) + recordAntigravityCreditsFailure(auth, time.Now()) + } else { + creditsResp, _ := e.attemptCreditsFallback(ctx, auth, httpClient, token, baseModel, translated, true, opts.Alt, baseURL, bodyBytes) + if creditsResp != nil { + httpResp = creditsResp + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + } } } } + if httpResp.StatusCode >= http.StatusOK && httpResp.StatusCode < http.StatusMultipleChoices { goto streamSuccessExecuteStream } @@ -1224,6 +1500,16 @@ attemptLoop: continue attemptLoop } } + if antigravityShouldRetrySoftRateLimit(httpResp.StatusCode, bodyBytes) { + if attempt+1 < attempts { + delay := antigravitySoftRateLimitDelay(attempt) + log.Debugf("antigravity executor: soft rate limit for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts) + if errWait := antigravityWait(ctx, delay); errWait != nil { + return nil, errWait + } + continue attemptLoop + } + } err = newAntigravityStatusErr(httpResp.StatusCode, bodyBytes) return nil, err } @@ -1844,6 +2130,66 @@ func antigravityShouldRetryTransientResourceExhausted429(statusCode int, body [] return strings.Contains(msg, "resource has been exhausted") } +func antigravityShouldRetrySoftRateLimit(statusCode int, body []byte) bool { + if statusCode != http.StatusTooManyRequests { + return false + } + return decideAntigravity429(body).kind == antigravity429DecisionSoftRetry +} + +func antigravitySoftRateLimitDelay(attempt int) time.Duration { + if attempt < 0 { + attempt = 0 + } + base := time.Duration(attempt+1) * 500 * time.Millisecond + if base > 3*time.Second { + base = 3 * time.Second + } + return base +} + +func antigravityShortCooldownKey(auth *cliproxyauth.Auth, modelName string) string { + if auth == nil { + return "" + } + authID := strings.TrimSpace(auth.ID) + modelName = strings.TrimSpace(modelName) + if authID == "" || modelName == "" { + return "" + } + return authID + "|" + modelName + "|sc" +} + +func antigravityIsInShortCooldown(auth *cliproxyauth.Auth, modelName string, now time.Time) (bool, time.Duration) { + key := antigravityShortCooldownKey(auth, modelName) + if key == "" { + return false, 0 + } + value, ok := antigravityShortCooldownByAuth.Load(key) + if !ok { + return false, 0 + } + until, ok := value.(time.Time) + if !ok || until.IsZero() { + antigravityShortCooldownByAuth.Delete(key) + return false, 0 + } + remaining := until.Sub(now) + if remaining <= 0 { + antigravityShortCooldownByAuth.Delete(key) + return false, 0 + } + return true, remaining +} + +func markAntigravityShortCooldown(auth *cliproxyauth.Auth, modelName string, now time.Time, duration time.Duration) { + key := antigravityShortCooldownKey(auth, modelName) + if key == "" { + return + } + antigravityShortCooldownByAuth.Store(key, now.Add(duration)) +} + func antigravityNoCapacityRetryDelay(attempt int) time.Duration { if attempt < 0 { attempt = 0 @@ -1866,6 +2212,13 @@ func antigravityTransient429RetryDelay(attempt int) time.Duration { return delay } +func antigravityInstantRetryDelay(wait time.Duration) time.Duration { + if wait <= 0 { + return 0 + } + return wait + 800*time.Millisecond +} + func antigravityWait(ctx context.Context, wait time.Duration) error { if wait <= 0 { return nil diff --git a/internal/runtime/executor/antigravity_executor_credits_test.go b/internal/runtime/executor/antigravity_executor_credits_test.go index 852dc778..cf968ac7 100644 --- a/internal/runtime/executor/antigravity_executor_credits_test.go +++ b/internal/runtime/executor/antigravity_executor_credits_test.go @@ -17,8 +17,9 @@ import ( ) func resetAntigravityCreditsRetryState() { - antigravityCreditsExhaustedByAuth = sync.Map{} + antigravityCreditsFailureByAuth = sync.Map{} antigravityPreferCreditsByModel = sync.Map{} + antigravityShortCooldownByAuth = sync.Map{} } func TestClassifyAntigravity429(t *testing.T) { @@ -58,10 +59,10 @@ func TestClassifyAntigravity429(t *testing.T) { } }) - t.Run("unknown", func(t *testing.T) { + t.Run("unstructured 429 defaults to soft rate limit", func(t *testing.T) { body := []byte(`{"error":{"message":"too many requests"}}`) - if got := classifyAntigravity429(body); got != antigravity429Unknown { - t.Fatalf("classifyAntigravity429() = %q, want %q", got, antigravity429Unknown) + if got := classifyAntigravity429(body); got != antigravity429SoftRateLimit { + t.Fatalf("classifyAntigravity429() = %q, want %q", got, antigravity429SoftRateLimit) } }) } @@ -255,7 +256,7 @@ func TestAntigravityExecute_SkipsCreditsRetryWhenAlreadyExhausted(t *testing.T) "expired": time.Now().Add(1 * time.Hour).Format(time.RFC3339), }, } - markAntigravityCreditsExhausted(auth, time.Now()) + recordAntigravityCreditsFailure(auth, time.Now()) _, err := exec.Execute(context.Background(), auth, cliproxyexecutor.Request{ Model: "gemini-2.5-flash",